Add initial support for Ethos-U85

Change-Id: I28d7cb72c149a636a4f1d8ca6931316486b1829a
Signed-off-by: Jonny Svärd <jonny.svaerd@arm.com>
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 350ac40..7fed522 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,5 @@
 #
-# SPDX-FileCopyrightText: Copyright 2019-2021, 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
-#
+# SPDX-FileCopyrightText: Copyright 2019-2021, 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the License); you may
@@ -59,19 +58,21 @@
 target_sources(ethosu_core_driver PRIVATE src/ethosu_driver.c src/ethosu_pmu.c)
 
 string(TOLOWER ${ETHOSU_TARGET_NPU_CONFIG} ETHOSU_TARGET_NPU_CONFIG)
-if(ETHOSU_TARGET_NPU_CONFIG MATCHES "^ethos-(u[0-9]+|uz)-([0-9]+$)")
+if(ETHOSU_TARGET_NPU_CONFIG MATCHES "^ethos-(u[0-9]+)-([0-9]+$)")
     set(ETHOSU_ARCH ${CMAKE_MATCH_1})
     set(ETHOSU_MACS ${CMAKE_MATCH_2})
 else()
     message(FATAL_ERROR "Invalid Ethos-U target configuration '${ETHOSU_TARGET_NPU_CONFIG}")
 endif()
 
-target_compile_definitions(ethosu_core_driver PRIVATE
+target_compile_definitions(ethosu_core_driver PUBLIC
     ETHOSU_ARCH=${ETHOSU_ARCH}
     ETHOS$<UPPER_CASE:${ETHOSU_ARCH}>)
 
 if (ETHOSU_ARCH STREQUAL "u55" OR ETHOSU_ARCH STREQUAL "u65")
     target_sources(ethosu_core_driver PRIVATE src/ethosu_device_u55_u65.c)
+elseif (ETHOSU_ARCH STREQUAL "u85")
+    target_sources(ethosu_core_driver PRIVATE src/ethosu_device_u85.c)
 else()
     message(FATAL_ERROR "Invalid NPU configuration")
 endif()
@@ -96,10 +97,7 @@
 # Define ETHOSU macro
 target_compile_definitions(ethosu_core_driver PUBLIC ETHOSU)
 
-#
 # Print build status
-#
-
 message(STATUS "*******************************************************")
 message(STATUS "PROJECT_NAME                           : ${PROJECT_NAME}")
 message(STATUS "ETHOSU_TARGET_NPU_CONFIG               : ${ETHOSU_TARGET_NPU_CONFIG}")
diff --git a/include/pmu_ethosu.h b/include/pmu_ethosu.h
index b717130..a271114 100644
--- a/include/pmu_ethosu.h
+++ b/include/pmu_ethosu.h
@@ -1,6 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright 2019-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
- *
+ * SPDX-FileCopyrightText: Copyright 2019-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the License); you may
@@ -22,7 +21,6 @@
 /*****************************************************************************
  * Includes
  *****************************************************************************/
-
 #include <stdint.h>
 
 #include "ethosu_driver.h"
@@ -34,13 +32,23 @@
 /*****************************************************************************
  * Defines
  *****************************************************************************/
-
+#ifdef ETHOSU85
+#define ETHOSU_PMU_NCOUNTERS 8
+#else
 #define ETHOSU_PMU_NCOUNTERS 4
+#endif
 
 #define ETHOSU_PMU_CNT1_Msk (1UL << 0)
 #define ETHOSU_PMU_CNT2_Msk (1UL << 1)
 #define ETHOSU_PMU_CNT3_Msk (1UL << 2)
 #define ETHOSU_PMU_CNT4_Msk (1UL << 3)
+#ifdef ETHOSU85
+#define ETHOSU_PMU_CNT5_Msk (1UL << 4)
+#define ETHOSU_PMU_CNT6_Msk (1UL << 5)
+#define ETHOSU_PMU_CNT7_Msk (1UL << 6)
+#define ETHOSU_PMU_CNT8_Msk (1UL << 7)
+#endif
+
 #define ETHOSU_PMU_CCNT_Msk (1UL << 31)
 
 /*****************************************************************************
@@ -52,6 +60,7 @@
  * Note: These values are symbolic. Actual HW-values may change. I.e. always use API
  *       to set/get actual event-type value.
  * */
+#if defined(ETHOSU55) || defined(ETHOSU65)
 enum ethosu_pmu_event_type
 {
     ETHOSU_PMU_NO_EVENT = 0,
@@ -131,6 +140,187 @@
 
     ETHOSU_PMU_SENTINEL // End-marker (not event)
 };
+#elif defined(ETHOSU85)
+enum ethosu_pmu_event_type
+{
+    ETHOSU_PMU_NO_EVENT = 0,
+    ETHOSU_PMU_CYCLE,
+    ETHOSU_PMU_NPU_IDLE,
+    ETHOSU_PMU_CC_STALLED_ON_BLOCKDEP,
+    ETHOSU_PMU_CC_STALLED_ON_SHRAM_RECONFIG,
+    ETHOSU_PMU_NPU_ACTIVE,
+    ETHOSU_PMU_MAC_ACTIVE,
+    ETHOSU_PMU_MAC_DPU_ACTIVE,
+    ETHOSU_PMU_MAC_STALLED_BY_W_OR_ACC,
+    ETHOSU_PMU_MAC_STALLED_BY_W,
+    ETHOSU_PMU_MAC_STALLED_BY_ACC,
+    ETHOSU_PMU_MAC_STALLED_BY_IB,
+    ETHOSU_PMU_AO_ACTIVE,
+    ETHOSU_PMU_AO_STALLED_BY_BS_OR_OB,
+    ETHOSU_PMU_AO_STALLED_BY_BS,
+    ETHOSU_PMU_AO_STALLED_BY_OB,
+    ETHOSU_PMU_AO_STALLED_BY_AB_OR_CB,
+    ETHOSU_PMU_AO_STALLED_BY_AB,
+    ETHOSU_PMU_AO_STALLED_BY_CB,
+    ETHOSU_PMU_WD_ACTIVE,
+    ETHOSU_PMU_WD_STALLED,
+    ETHOSU_PMU_WD_STALLED_BY_WD_BUF,
+    ETHOSU_PMU_WD_STALLED_BY_WS_FC,
+    ETHOSU_PMU_WD_STALLED_BY_WS_TC,
+    ETHOSU_PMU_WD_TRANS_WBLK,
+    ETHOSU_PMU_WD_TRANS_WS_FC,
+    ETHOSU_PMU_WD_TRANS_WS_TC,
+    ETHOSU_PMU_WD_STALLED_BY_WS_SC0,
+    ETHOSU_PMU_WD_STALLED_BY_WS_SC1,
+    ETHOSU_PMU_WD_STALLED_BY_WS_SC2,
+    ETHOSU_PMU_WD_STALLED_BY_WS_SC3,
+    ETHOSU_PMU_WD_PARSE_ACTIVE_SC0,
+    ETHOSU_PMU_WD_PARSE_ACTIVE_SC1,
+    ETHOSU_PMU_WD_PARSE_ACTIVE_SC2,
+    ETHOSU_PMU_WD_PARSE_ACTIVE_SC3,
+    ETHOSU_PMU_WD_PARSE_STALL_SC0,
+    ETHOSU_PMU_WD_PARSE_STALL_SC1,
+    ETHOSU_PMU_WD_PARSE_STALL_SC2,
+    ETHOSU_PMU_WD_PARSE_STALL_SC3,
+    ETHOSU_PMU_WD_PARSE_STALL_IN_SC0,
+    ETHOSU_PMU_WD_PARSE_STALL_IN_SC1,
+    ETHOSU_PMU_WD_PARSE_STALL_IN_SC2,
+    ETHOSU_PMU_WD_PARSE_STALL_IN_SC3,
+    ETHOSU_PMU_WD_PARSE_STALL_OUT_SC0,
+    ETHOSU_PMU_WD_PARSE_STALL_OUT_SC1,
+    ETHOSU_PMU_WD_PARSE_STALL_OUT_SC2,
+    ETHOSU_PMU_WD_PARSE_STALL_OUT_SC3,
+    ETHOSU_PMU_WD_TRANS_WS_SC0,
+    ETHOSU_PMU_WD_TRANS_WS_SC1,
+    ETHOSU_PMU_WD_TRANS_WS_SC2,
+    ETHOSU_PMU_WD_TRANS_WS_SC3,
+    ETHOSU_PMU_WD_TRANS_WB0,
+    ETHOSU_PMU_WD_TRANS_WB1,
+    ETHOSU_PMU_WD_TRANS_WB2,
+    ETHOSU_PMU_WD_TRANS_WB3,
+    ETHOSU_PMU_SRAM_RD_TRANS_ACCEPTED,
+    ETHOSU_PMU_SRAM_RD_TRANS_COMPLETED,
+    ETHOSU_PMU_SRAM_RD_DATA_BEAT_RECEIVED,
+    ETHOSU_PMU_SRAM_RD_TRAN_REQ_STALLED,
+    ETHOSU_PMU_SRAM_WR_TRANS_ACCEPTED,
+    ETHOSU_PMU_SRAM_WR_TRANS_COMPLETED_M,
+    ETHOSU_PMU_SRAM_WR_TRANS_COMPLETED_S,
+    ETHOSU_PMU_SRAM_WR_DATA_BEAT_WRITTEN,
+    ETHOSU_PMU_SRAM_WR_TRAN_REQ_STALLED,
+    ETHOSU_PMU_SRAM_WR_DATA_BEAT_STALLED,
+    ETHOSU_PMU_SRAM_ENABLED_CYCLES,
+    ETHOSU_PMU_SRAM_RD_STALL_LIMIT,
+    ETHOSU_PMU_SRAM_WR_STALL_LIMIT,
+    ETHOSU_PMU_AXI_LATENCY_ANY,
+    ETHOSU_PMU_AXI_LATENCY_32,
+    ETHOSU_PMU_AXI_LATENCY_64,
+    ETHOSU_PMU_AXI_LATENCY_128,
+    ETHOSU_PMU_AXI_LATENCY_256,
+    ETHOSU_PMU_AXI_LATENCY_512,
+    ETHOSU_PMU_AXI_LATENCY_1024,
+    ETHOSU_PMU_ECC_DMA,
+    ETHOSU_PMU_ECC_MAC_IB,
+    ETHOSU_PMU_ECC_MAC_AB,
+    ETHOSU_PMU_ECC_AO_CB,
+    ETHOSU_PMU_ECC_AO_OB,
+    ETHOSU_PMU_ECC_AO_LUT,
+    ETHOSU_PMU_EXT_RD_TRANS_ACCEPTED,
+    ETHOSU_PMU_EXT_RD_TRANS_COMPLETED,
+    ETHOSU_PMU_EXT_RD_DATA_BEAT_RECEIVED,
+    ETHOSU_PMU_EXT_RD_TRAN_REQ_STALLED,
+    ETHOSU_PMU_EXT_WR_TRANS_ACCEPTED,
+    ETHOSU_PMU_EXT_WR_TRANS_COMPLETED_M,
+    ETHOSU_PMU_EXT_WR_TRANS_COMPLETED_S,
+    ETHOSU_PMU_EXT_WR_DATA_BEAT_WRITTEN,
+    ETHOSU_PMU_EXT_WR_TRAN_REQ_STALLED,
+    ETHOSU_PMU_EXT_WR_DATA_BEAT_STALLED,
+    ETHOSU_PMU_EXT_ENABLED_CYCLES,
+    ETHOSU_PMU_EXT_RD_STALL_LIMIT,
+    ETHOSU_PMU_EXT_WR_STALL_LIMIT,
+    ETHOSU_PMU_SRAM0_RD_TRANS_ACCEPTED,
+    ETHOSU_PMU_SRAM0_RD_TRANS_COMPLETED,
+    ETHOSU_PMU_SRAM0_RD_DATA_BEAT_RECEIVED,
+    ETHOSU_PMU_SRAM0_RD_TRAN_REQ_STALLED,
+    ETHOSU_PMU_SRAM0_WR_TRANS_ACCEPTED,
+    ETHOSU_PMU_SRAM0_WR_TRANS_COMPLETED_M,
+    ETHOSU_PMU_SRAM0_WR_TRANS_COMPLETED_S,
+    ETHOSU_PMU_SRAM0_WR_DATA_BEAT_WRITTEN,
+    ETHOSU_PMU_SRAM0_WR_TRAN_REQ_STALLED,
+    ETHOSU_PMU_SRAM0_WR_DATA_BEAT_STALLED,
+    ETHOSU_PMU_SRAM0_ENABLED_CYCLES,
+    ETHOSU_PMU_SRAM0_RD_STALL_LIMIT,
+    ETHOSU_PMU_SRAM0_WR_STALL_LIMIT,
+    ETHOSU_PMU_SRAM1_RD_TRANS_ACCEPTED,
+    ETHOSU_PMU_SRAM1_RD_TRANS_COMPLETED,
+    ETHOSU_PMU_SRAM1_RD_DATA_BEAT_RECEIVED,
+    ETHOSU_PMU_SRAM1_RD_TRAN_REQ_STALLED,
+    ETHOSU_PMU_SRAM1_WR_TRANS_ACCEPTED,
+    ETHOSU_PMU_SRAM1_WR_TRANS_COMPLETED_M,
+    ETHOSU_PMU_SRAM1_WR_TRANS_COMPLETED_S,
+    ETHOSU_PMU_SRAM1_WR_DATA_BEAT_WRITTEN,
+    ETHOSU_PMU_SRAM1_WR_TRAN_REQ_STALLED,
+    ETHOSU_PMU_SRAM1_WR_DATA_BEAT_STALLED,
+    ETHOSU_PMU_SRAM1_ENABLED_CYCLES,
+    ETHOSU_PMU_SRAM1_RD_STALL_LIMIT,
+    ETHOSU_PMU_SRAM1_WR_STALL_LIMIT,
+    ETHOSU_PMU_SRAM2_RD_TRANS_ACCEPTED,
+    ETHOSU_PMU_SRAM2_RD_TRANS_COMPLETED,
+    ETHOSU_PMU_SRAM2_RD_DATA_BEAT_RECEIVED,
+    ETHOSU_PMU_SRAM2_RD_TRAN_REQ_STALLED,
+    ETHOSU_PMU_SRAM2_WR_TRANS_ACCEPTED,
+    ETHOSU_PMU_SRAM2_WR_TRANS_COMPLETED_M,
+    ETHOSU_PMU_SRAM2_WR_TRANS_COMPLETED_S,
+    ETHOSU_PMU_SRAM2_WR_DATA_BEAT_WRITTEN,
+    ETHOSU_PMU_SRAM2_WR_TRAN_REQ_STALLED,
+    ETHOSU_PMU_SRAM2_WR_DATA_BEAT_STALLED,
+    ETHOSU_PMU_SRAM2_ENABLED_CYCLES,
+    ETHOSU_PMU_SRAM2_RD_STALL_LIMIT,
+    ETHOSU_PMU_SRAM2_WR_STALL_LIMIT,
+    ETHOSU_PMU_SRAM3_RD_TRANS_ACCEPTED,
+    ETHOSU_PMU_SRAM3_RD_TRANS_COMPLETED,
+    ETHOSU_PMU_SRAM3_RD_DATA_BEAT_RECEIVED,
+    ETHOSU_PMU_SRAM3_RD_TRAN_REQ_STALLED,
+    ETHOSU_PMU_SRAM3_WR_TRANS_ACCEPTED,
+    ETHOSU_PMU_SRAM3_WR_TRANS_COMPLETED_M,
+    ETHOSU_PMU_SRAM3_WR_TRANS_COMPLETED_S,
+    ETHOSU_PMU_SRAM3_WR_DATA_BEAT_WRITTEN,
+    ETHOSU_PMU_SRAM3_WR_TRAN_REQ_STALLED,
+    ETHOSU_PMU_SRAM3_WR_DATA_BEAT_STALLED,
+    ETHOSU_PMU_SRAM3_ENABLED_CYCLES,
+    ETHOSU_PMU_SRAM3_RD_STALL_LIMIT,
+    ETHOSU_PMU_SRAM3_WR_STALL_LIMIT,
+    ETHOSU_PMU_EXT0_RD_TRANS_ACCEPTED,
+    ETHOSU_PMU_EXT0_RD_TRANS_COMPLETED,
+    ETHOSU_PMU_EXT0_RD_DATA_BEAT_RECEIVED,
+    ETHOSU_PMU_EXT0_RD_TRAN_REQ_STALLED,
+    ETHOSU_PMU_EXT0_WR_TRANS_ACCEPTED,
+    ETHOSU_PMU_EXT0_WR_TRANS_COMPLETED_M,
+    ETHOSU_PMU_EXT0_WR_TRANS_COMPLETED_S,
+    ETHOSU_PMU_EXT0_WR_DATA_BEAT_WRITTEN,
+    ETHOSU_PMU_EXT0_WR_TRAN_REQ_STALLED,
+    ETHOSU_PMU_EXT0_WR_DATA_BEAT_STALLED,
+    ETHOSU_PMU_EXT0_ENABLED_CYCLES,
+    ETHOSU_PMU_EXT0_RD_STALL_LIMIT,
+    ETHOSU_PMU_EXT0_WR_STALL_LIMIT,
+    ETHOSU_PMU_EXT1_RD_TRANS_ACCEPTED,
+    ETHOSU_PMU_EXT1_RD_TRANS_COMPLETED,
+    ETHOSU_PMU_EXT1_RD_DATA_BEAT_RECEIVED,
+    ETHOSU_PMU_EXT1_RD_TRAN_REQ_STALLED,
+    ETHOSU_PMU_EXT1_WR_TRANS_ACCEPTED,
+    ETHOSU_PMU_EXT1_WR_TRANS_COMPLETED_M,
+    ETHOSU_PMU_EXT1_WR_TRANS_COMPLETED_S,
+    ETHOSU_PMU_EXT1_WR_DATA_BEAT_WRITTEN,
+    ETHOSU_PMU_EXT1_WR_TRAN_REQ_STALLED,
+    ETHOSU_PMU_EXT1_WR_DATA_BEAT_STALLED,
+    ETHOSU_PMU_EXT1_ENABLED_CYCLES,
+    ETHOSU_PMU_EXT1_RD_STALL_LIMIT,
+    ETHOSU_PMU_EXT1_WR_STALL_LIMIT,
+
+    ETHOSU_PMU_SENTINEL // End-marker (not event)
+};
+#else
+#error No NPU target defined
+#endif
 
 /*****************************************************************************
  * Functions
diff --git a/src/ethosu85_interface.h b/src/ethosu85_interface.h
new file mode 100644
index 0000000..a0040c3
--- /dev/null
+++ b/src/ethosu85_interface.h
@@ -0,0 +1,24705 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// clang-format off
+
+#ifndef ETHOSU85_INTERFACE_H
+#define ETHOSU85_INTERFACE_H
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#if !defined(__cplusplus) || __cplusplus < 201402L
+#define CONSTEXPR
+#else
+#define CONSTEXPR constexpr
+#endif
+
+#ifndef __cplusplus
+#define STRUCT struct
+#else
+#define STRUCT
+#endif
+
+#if defined(__cplusplus) && defined(NPU_DISASSEMBLE)
+#include <iomanip>
+#include <vector>
+#include <sstream>
+#endif
+
+#if defined(__cplusplus) && !defined(NPU_NAMESPACE)
+#define NPU_NAMESPACE npu
+#endif
+
+#ifdef __cplusplus
+#include <cassert>
+#include <cstring>
+#include <limits>
+#endif
+
+#ifdef __cplusplus
+namespace NPU_NAMESPACE
+{
+#endif
+#define NNX_ARCH_VERSION_MAJOR 2
+#define NNX_ARCH_VERSION_MINOR 0
+#define NNX_ARCH_VERSION_PATCH 0
+
+// Register offsets
+//
+// Register subpage BASE
+//
+#define NPU_REG_ID 0x0000
+#define NPU_REG_STATUS 0x0004
+#define NPU_REG_CMD 0x0008
+#define NPU_REG_RESET 0x000C
+#define NPU_REG_QBASE 0x0010
+#define NPU_REG_QBASE_HI 0x0014
+#define NPU_REG_QREAD 0x0018
+#define NPU_REG_QCONFIG 0x001C
+#define NPU_REG_QSIZE 0x0020
+#define NPU_REG_PROT 0x0024
+#define NPU_REG_CONFIG 0x0028
+#define NPU_REG_COND_STATUS 0x0030
+#define NPU_REG_POWER_CTRL 0x0038
+#define NPU_REG_REGIONCFG 0x003C
+#define NPU_REG_MEM_ATTR_BASE 0x0040
+#define NPU_REG_MEM_ATTR_ARRLEN 0x0004
+#define NPU_REG_AXI_SRAM 0x0050
+#define NPU_REG_AXI_EXT 0x0054
+#define NPU_REG_CFG_SRAM_CAP 0x0060
+#define NPU_REG_CFG_EXT_CAP 0x0064
+#define NPU_REG_CFG_SRAM_HASH0 0x0068
+#define NPU_REG_CFG_SRAM_HASH0_HI 0x006C
+#define NPU_REG_CFG_SRAM_HASH1 0x0070
+#define NPU_REG_CFG_SRAM_HASH1_HI 0x0074
+#define NPU_REG_CFG_EXT_HASH0 0x0078
+#define NPU_REG_CFG_EXT_HASH0_HI 0x007C
+#define BASE_REGISTERS_SIZE 0x0080
+
+//
+// Register subpage BASE_POINTERS
+//
+#define NPU_REG_BASEP_BASE 0x0080
+#define NPU_REG_BASEP_ARRLEN 0x0008
+#define BASE_POINTERS_REGISTERS_SIZE 0x0100
+
+//
+// Register subpage DEBUG
+//
+#define NPU_REG_CLKFORCE 0x0140
+#define NPU_REG_DEBUG_ADDRESS 0x0144
+#define NPU_REG_DEBUG_MISC 0x0148
+#define DEBUG_REGISTERS_SIZE 0x0180
+
+//
+// Register subpage TSU_DEBUG
+//
+#define NPU_REG_DMA_IFM_SRC 0x0240
+#define NPU_REG_DMA_IFM_SRC_HI 0x0244
+#define NPU_REG_DMA_IFM_DST 0x0248
+#define NPU_REG_DMA_OFM_SRC 0x024C
+#define NPU_REG_DMA_OFM_DST 0x0250
+#define NPU_REG_DMA_OFM_DST_HI 0x0254
+#define NPU_REG_DMA_WEIGHT_SRC 0x0258
+#define NPU_REG_DMA_WEIGHT_SRC_HI 0x025C
+#define NPU_REG_DMA_CMD_SRC 0x0260
+#define NPU_REG_DMA_CMD_SRC_HI 0x0264
+#define NPU_REG_DMA_CMD_SIZE 0x0268
+#define NPU_REG_DMA_M2M_SRC 0x026C
+#define NPU_REG_DMA_M2M_SRC_HI 0x0270
+#define NPU_REG_DMA_M2M_DST 0x0274
+#define NPU_REG_DMA_M2M_DST_HI 0x0278
+#define NPU_REG_CURRENT_QREAD 0x027C
+#define NPU_REG_DMA_SCALE_SRC 0x0280
+#define NPU_REG_DMA_SCALE_SRC_HI 0x0284
+#define NPU_REG_DMA_WEIGHT1_SRC 0x0288
+#define NPU_REG_DMA_WEIGHT1_SRC_HI 0x028C
+#define NPU_REG_DMA_WEIGHT2_SRC 0x0290
+#define NPU_REG_DMA_WEIGHT2_SRC_HI 0x0294
+#define NPU_REG_DMA_WEIGHT3_SRC 0x0298
+#define NPU_REG_DMA_WEIGHT3_SRC_HI 0x029C
+#define NPU_REG_CURRENT_OP 0x02B8
+#define NPU_REG_CURRENT_CMD 0x02BC
+#define TSU_DEBUG_REGISTERS_SIZE 0x0300
+
+//
+// Register subpage INTERNAL_MEMORY
+//
+#define NPU_REG_INTERNAL_MEMORY_BASE 0x0400
+#define NPU_REG_INTERNAL_MEMORY_ARRLEN 0x0100
+#define INTERNAL_MEMORY_REGISTERS_SIZE 0x0800
+
+//
+// Register subpage TSU_IFM
+//
+#define NPU_REG_IFM_PAD_TOP 0x0800
+#define NPU_REG_IFM_PAD_LEFT 0x0804
+#define NPU_REG_IFM_PAD_RIGHT 0x0808
+#define NPU_REG_IFM_PAD_BOTTOM 0x080C
+#define NPU_REG_IFM_DEPTH_M1 0x0810
+#define NPU_REG_IFM_PRECISION 0x0814
+#define NPU_REG_IFM_UPSCALE 0x081C
+#define NPU_REG_IFM_BROADCAST 0x0820
+#define NPU_REG_IFM_ZERO_POINT 0x0824
+#define NPU_REG_IFM_WIDTH0_M1 0x0828
+#define NPU_REG_IFM_HEIGHT0_M1 0x082C
+#define NPU_REG_IFM_HEIGHT1_M1 0x0830
+#define NPU_REG_IFM_REGION 0x083C
+#define TSU_IFM_REGISTERS_SIZE 0x0840
+
+//
+// Register subpage TSU_OFM
+//
+#define NPU_REG_OFM_WIDTH_M1 0x0844
+#define NPU_REG_OFM_HEIGHT_M1 0x0848
+#define NPU_REG_OFM_DEPTH_M1 0x084C
+#define NPU_REG_OFM_PRECISION 0x0850
+#define NPU_REG_OFM_BLK_WIDTH_M1 0x0854
+#define NPU_REG_OFM_BLK_HEIGHT_M1 0x0858
+#define NPU_REG_OFM_BLK_DEPTH_M1 0x085C
+#define NPU_REG_OFM_ZERO_POINT 0x0860
+#define NPU_REG_OFM_WIDTH0_M1 0x0868
+#define NPU_REG_OFM_HEIGHT0_M1 0x086C
+#define NPU_REG_OFM_HEIGHT1_M1 0x0870
+#define NPU_REG_OFM_REGION 0x087C
+#define TSU_OFM_REGISTERS_SIZE 0x0880
+
+//
+// Register subpage TSU_KERNEL
+//
+#define NPU_REG_KERNEL_WIDTH_M1 0x0880
+#define NPU_REG_KERNEL_HEIGHT_M1 0x0884
+#define NPU_REG_KERNEL_STRIDE 0x0888
+#define NPU_REG_ACC_FORMAT 0x0890
+#define NPU_REG_ACTIVATION 0x0894
+#define NPU_REG_ACTIVATION_MIN 0x0898
+#define NPU_REG_ACTIVATION_MAX 0x089C
+#define NPU_REG_WEIGHT_REGION 0x08A0
+#define NPU_REG_SCALE_REGION 0x08A4
+#define NPU_REG_RESIZE_X_SCALE_N_M1 0x08A8
+#define NPU_REG_RESIZE_Y_SCALE_N_M1 0x08AC
+#define NPU_REG_RESIZE_X_OFFSET 0x08B0
+#define NPU_REG_RESIZE_Y_OFFSET 0x08B4
+#define NPU_REG_WEIGHT_FORMAT 0x08B8
+#define NPU_REG_BLOCKDEP 0x08BC
+#define TSU_KERNEL_REGISTERS_SIZE 0x08C0
+
+//
+// Register subpage TSU_DMA
+//
+#define NPU_REG_DMA0_SRC_REGION 0x08C0
+#define NPU_REG_DMA0_DST_REGION 0x08C4
+#define NPU_REG_DMA0_SIZE0 0x08C8
+#define NPU_REG_DMA0_SIZE1 0x08CC
+#define NPU_REG_DMA0_IDX_REGION 0x08D0
+#define TSU_DMA_REGISTERS_SIZE 0x0900
+
+//
+// Register subpage TSU_IFM2
+//
+#define NPU_REG_IFM2_BROADCAST 0x0900
+#define NPU_REG_IFM2_PRECISION 0x0914
+#define NPU_REG_IFM2_ZERO_POINT 0x0924
+#define NPU_REG_IFM2_WIDTH0_M1 0x0928
+#define NPU_REG_IFM2_HEIGHT0_M1 0x092C
+#define NPU_REG_IFM2_HEIGHT1_M1 0x0930
+#define NPU_REG_IFM2_REGION 0x093C
+#define TSU_IFM2_REGISTERS_SIZE 0x0940
+
+//
+// Register subpage TSU_IFM_BASE
+//
+#define NPU_REG_IFM_BASE0 0x0A00
+#define NPU_REG_IFM_BASE0_HI 0x0A04
+#define NPU_REG_IFM_BASE1 0x0A08
+#define NPU_REG_IFM_BASE1_HI 0x0A0C
+#define NPU_REG_IFM_BASE2 0x0A10
+#define NPU_REG_IFM_BASE2_HI 0x0A14
+#define NPU_REG_IFM_BASE3 0x0A18
+#define NPU_REG_IFM_BASE3_HI 0x0A1C
+#define NPU_REG_IFM_STRIDE_X 0x0A20
+#define NPU_REG_IFM_STRIDE_X_HI 0x0A24
+#define NPU_REG_IFM_STRIDE_Y 0x0A28
+#define NPU_REG_IFM_STRIDE_Y_HI 0x0A2C
+#define NPU_REG_IFM_STRIDE_C 0x0A30
+#define NPU_REG_IFM_STRIDE_C_HI 0x0A34
+#define TSU_IFM_BASE_REGISTERS_SIZE 0x0A40
+
+//
+// Register subpage TSU_OFM_BASE
+//
+#define NPU_REG_OFM_BASE0 0x0A40
+#define NPU_REG_OFM_BASE0_HI 0x0A44
+#define NPU_REG_OFM_BASE1 0x0A48
+#define NPU_REG_OFM_BASE1_HI 0x0A4C
+#define NPU_REG_OFM_BASE2 0x0A50
+#define NPU_REG_OFM_BASE2_HI 0x0A54
+#define NPU_REG_OFM_BASE3 0x0A58
+#define NPU_REG_OFM_BASE3_HI 0x0A5C
+#define NPU_REG_OFM_STRIDE_X 0x0A60
+#define NPU_REG_OFM_STRIDE_X_HI 0x0A64
+#define NPU_REG_OFM_STRIDE_Y 0x0A68
+#define NPU_REG_OFM_STRIDE_Y_HI 0x0A6C
+#define NPU_REG_OFM_STRIDE_C 0x0A70
+#define NPU_REG_OFM_STRIDE_C_HI 0x0A74
+#define TSU_OFM_BASE_REGISTERS_SIZE 0x0A80
+
+//
+// Register subpage TSU_WS_BASE
+//
+#define NPU_REG_WEIGHT_BASE 0x0A80
+#define NPU_REG_WEIGHT_BASE_HI 0x0A84
+#define NPU_REG_WEIGHT_LENGTH 0x0A88
+#define NPU_REG_WEIGHT_LENGTH_HI 0x0A8C
+#define NPU_REG_SCALE_BASE 0x0A90
+#define NPU_REG_SCALE_BASE_HI 0x0A94
+#define NPU_REG_SCALE_LENGTH 0x0A98
+#define NPU_REG_SCALE_LENGTH_HI 0x0A9C
+#define NPU_REG_OFM_SCALE 0x0AA0
+#define NPU_REG_OFM_SCALE_HI 0x0AA4
+#define NPU_REG_IFM_SCALE 0x0AA8
+#define NPU_REG_IFM_SCALE_HI 0x0AAC
+#define NPU_REG_IFM2_SCALE 0x0AB0
+#define NPU_REG_IFM2_SCALE_HI 0x0AB4
+#define NPU_REG_OP_SCALAR 0x0AB8
+#define NPU_REG_OP_SCALAR_HI 0x0ABC
+#define TSU_WS_BASE_REGISTERS_SIZE 0x0AC0
+
+//
+// Register subpage TSU_DMA_BASE
+//
+#define NPU_REG_DMA0_SRC 0x0AC0
+#define NPU_REG_DMA0_SRC_HI 0x0AC4
+#define NPU_REG_DMA0_DST 0x0AC8
+#define NPU_REG_DMA0_DST_HI 0x0ACC
+#define NPU_REG_DMA0_LEN 0x0AD0
+#define NPU_REG_DMA0_LEN_HI 0x0AD4
+#define NPU_REG_DMA0_SRC_STRIDE0 0x0AD8
+#define NPU_REG_DMA0_SRC_STRIDE0_HI 0x0ADC
+#define NPU_REG_DMA0_SRC_STRIDE1 0x0AE0
+#define NPU_REG_DMA0_SRC_STRIDE1_HI 0x0AE4
+#define NPU_REG_DMA0_DST_STRIDE0 0x0AE8
+#define NPU_REG_DMA0_DST_STRIDE0_HI 0x0AEC
+#define NPU_REG_DMA0_DST_STRIDE1 0x0AF0
+#define NPU_REG_DMA0_DST_STRIDE1_HI 0x0AF4
+#define NPU_REG_DMA0_IDX 0x0AF8
+#define NPU_REG_DMA0_IDX_HI 0x0AFC
+#define TSU_DMA_BASE_REGISTERS_SIZE 0x0B00
+
+//
+// Register subpage TSU_IFM2_BASE
+//
+#define NPU_REG_IFM2_BASE0 0x0B00
+#define NPU_REG_IFM2_BASE0_HI 0x0B04
+#define NPU_REG_IFM2_BASE1 0x0B08
+#define NPU_REG_IFM2_BASE1_HI 0x0B0C
+#define NPU_REG_IFM2_BASE2 0x0B10
+#define NPU_REG_IFM2_BASE2_HI 0x0B14
+#define NPU_REG_IFM2_BASE3 0x0B18
+#define NPU_REG_IFM2_BASE3_HI 0x0B1C
+#define NPU_REG_IFM2_STRIDE_X 0x0B20
+#define NPU_REG_IFM2_STRIDE_X_HI 0x0B24
+#define NPU_REG_IFM2_STRIDE_Y 0x0B28
+#define NPU_REG_IFM2_STRIDE_Y_HI 0x0B2C
+#define NPU_REG_IFM2_STRIDE_C 0x0B30
+#define NPU_REG_IFM2_STRIDE_C_HI 0x0B34
+#define TSU_IFM2_BASE_REGISTERS_SIZE 0x0B40
+
+//
+// Register subpage TSU_WS1_BASE
+//
+#define NPU_REG_WEIGHT1_BASE 0x0B40
+#define NPU_REG_WEIGHT1_BASE_HI 0x0B44
+#define NPU_REG_WEIGHT1_LENGTH 0x0B48
+#define NPU_REG_WEIGHT1_LENGTH_HI 0x0B4C
+#define NPU_REG_WEIGHT2_BASE 0x0B50
+#define NPU_REG_WEIGHT2_BASE_HI 0x0B54
+#define NPU_REG_WEIGHT2_LENGTH 0x0B58
+#define NPU_REG_WEIGHT2_LENGTH_HI 0x0B5C
+#define NPU_REG_WEIGHT3_BASE 0x0B60
+#define NPU_REG_WEIGHT3_BASE_HI 0x0B64
+#define NPU_REG_WEIGHT3_LENGTH 0x0B68
+#define NPU_REG_WEIGHT3_LENGTH_HI 0x0B6C
+#define NPU_REG_RESIZE_X_STEP 0x0B70
+#define NPU_REG_RESIZE_X_STEP_HI 0x0B74
+#define NPU_REG_RESIZE_Y_STEP 0x0B78
+#define NPU_REG_RESIZE_Y_STEP_HI 0x0B7C
+#define TSU_WS1_BASE_REGISTERS_SIZE 0x0B80
+
+//
+// Register subpage TSU_USER_BASE
+//
+#define TSU_USER_BASE_REGISTERS_SIZE 0x0BC0
+
+//
+// Register subpage TSU_DMA_EBASE
+//
+#define NPU_REG_DMA0_IDX_MAX 0x0BC0
+#define NPU_REG_DMA0_IDX_MAX_HI 0x0BC4
+#define NPU_REG_DMA0_IDX_SKIP1 0x0BC8
+#define NPU_REG_DMA0_IDX_SKIP1_HI 0x0BCC
+#define TSU_DMA_EBASE_REGISTERS_SIZE 0x0C00
+
+//
+// Register subpage ID
+//
+#define NPU_REG_REVISION 0x0FC0
+#define NPU_REG_PID4 0x0FD0
+#define NPU_REG_PID5 0x0FD4
+#define NPU_REG_PID6 0x0FD8
+#define NPU_REG_PID7 0x0FDC
+#define NPU_REG_PID0 0x0FE0
+#define NPU_REG_PID1 0x0FE4
+#define NPU_REG_PID2 0x0FE8
+#define NPU_REG_PID3 0x0FEC
+#define NPU_REG_CID0 0x0FF0
+#define NPU_REG_CID1 0x0FF4
+#define NPU_REG_CID2 0x0FF8
+#define NPU_REG_CID3 0x0FFC
+#define ID_REGISTERS_SIZE 0x1000
+
+//
+// Register subpage DEBUG_STATUS
+//
+#define NPU_REG_WD_STATUS 0x1100
+#define NPU_REG_MAC_STATUS 0x1104
+#define NPU_REG_AO_STATUS 0x1108
+#define NPU_REG_DMA_STATUS0 0x1110
+#define NPU_REG_DMA_STATUS1 0x1114
+#define DEBUG_STATUS_REGISTERS_SIZE 0x1180
+
+//
+// Register subpage PMU
+//
+#define NPU_REG_PMCR 0x1180
+#define NPU_REG_PMCNTENSET 0x1184
+#define NPU_REG_PMCNTENCLR 0x1188
+#define NPU_REG_PMOVSSET 0x118C
+#define NPU_REG_PMOVSCLR 0x1190
+#define NPU_REG_PMINTSET 0x1194
+#define NPU_REG_PMINTCLR 0x1198
+#define NPU_REG_PMCCNTR 0x11A0
+#define NPU_REG_PMCCNTR_HI 0x11A4
+#define NPU_REG_PMCCNTR_CFG 0x11A8
+#define NPU_REG_PMCAXI_CHAN 0x11AC
+#define NPU_REG_PMCLUT 0x11B0
+#define PMU_REGISTERS_SIZE 0x1200
+
+//
+// Register subpage PMU_COUNTERS
+//
+#define NPU_REG_PMEVCNTR_BASE 0x1300
+#define NPU_REG_PMEVCNTR_ARRLEN 0x0008
+#define NPU_REG_PMEVTYPER_BASE 0x1380
+#define NPU_REG_PMEVTYPER_ARRLEN 0x0008
+#define PMU_COUNTERS_REGISTERS_SIZE 0x1400
+
+#ifdef __cplusplus
+// Enum types
+enum class acc_format : uint8_t
+{
+    I32 = 0,
+    I48 = 1,
+};
+
+enum class acc_input : uint8_t
+{
+    RESET = 0,
+    KEEP = 1,
+    IFM2 = 2,
+};
+
+enum class acc_output : uint8_t
+{
+    ENABLE = 0,
+    DISABLE = 1,
+};
+
+enum class activation_clip_range : uint8_t
+{
+    B16 = 0,
+    NONE = 1,
+};
+
+enum class activation_format : uint8_t
+{
+    NHWC = 0,
+    NHCWB16 = 1,
+};
+
+enum class activation_function : uint8_t
+{
+    LUT_NONE = 0,
+    LUT_U8_U8 = 1,
+    LUT_S8_S8 = 4,
+    LUT_S8_S16 = 5,
+    LUT_S8_S32 = 7,
+    LUT_S16_S16 = 8,
+    LUT_S16_S32 = 9,
+    LUT_TANH = 10,
+    LUT_SIGMOID = 11,
+};
+
+enum class activation_precision : uint8_t
+{
+    B8 = 0,
+    B16 = 1,
+    B32 = 2,
+    B64 = 3,
+};
+
+enum class activation_reverse : uint8_t
+{
+    NONE = 0,
+    H = 1,
+    W = 2,
+    C = 3,
+};
+
+enum class activation_storage : uint8_t
+{
+    TILE2X2 = 0,
+    TILE3X1 = 1,
+    CHAINED = 2,
+    NONE = 3,
+};
+
+enum class activation_transpose : uint8_t
+{
+    HWC = 0,
+    WHC = 1,
+    HCW = 2,
+    WCH = 3,
+    CHW = 6,
+    CWH = 7,
+};
+
+enum class activation_type : uint8_t
+{
+    UNSIGNED = 0,
+    SIGNED = 1,
+};
+
+enum class axi_mem_domain : uint8_t
+{
+    NON_SHARABLE = 0,
+    INNER_SHARABLE = 1,
+    OUTER_SHARABLE = 2,
+    SYSTEM = 3,
+};
+
+enum class axi_mem_encoding : uint8_t
+{
+    DEVICE_NON_BUFFERABLE = 0,
+    DEVICE_BUFFERABLE = 1,
+    NORMAL_NON_CACHEABLE_NON_BUFFERABLE = 2,
+    NORMAL_NON_CACHEABLE_BUFFERABLE = 3,
+    WRITE_THROUGH_NO_ALLOCATE = 4,
+    WRITE_THROUGH_READ_ALLOCATE = 5,
+    WRITE_THROUGH_WRITE_ALLOCATE = 6,
+    WRITE_THROUGH_READ_AND_WRITE_ALLOCATE = 7,
+    WRITE_BACK_NO_ALLOCATE = 8,
+    WRITE_BACK_READ_ALLOCATE = 9,
+    WRITE_BACK_WRITE_ALLOCATE = 10,
+    WRITE_BACK_READ_AND_WRITE_ALLOCATE = 11,
+};
+
+enum class axi_port : uint8_t
+{
+    SRAM = 0,
+    EXT = 1,
+};
+
+enum class branch_cond : uint8_t
+{
+    ALWAYS = 0,
+    RF_TRUE = 1,
+};
+
+enum class broadcast_mode : uint8_t
+{
+    NONE = 0,
+    H = 1,
+    W = 2,
+    HW = 3,
+    C = 4,
+    CH = 5,
+    CW = 6,
+    CWH = 7,
+    SCALAR = 8,
+};
+
+enum class cmd0_opcode : uint16_t
+{
+    NPU_OP_STOP = 0,
+    NPU_OP_IRQ = 1,
+    NPU_OP_CONV = 2,
+    NPU_OP_DEPTHWISE = 3,
+    NPU_OP_POOL = 5,
+    NPU_OP_ELEMENTWISE = 6,
+    NPU_OP_RESIZE = 7,
+    NPU_OP_DMA_START = 16,
+    NPU_OP_DMA_WAIT = 17,
+    NPU_OP_KERNEL_WAIT = 18,
+    NPU_OP_PMU_MASK = 19,
+    NPU_SET_IFM_PAD_TOP = 256,
+    NPU_SET_IFM_PAD_LEFT = 257,
+    NPU_SET_IFM_PAD_RIGHT = 258,
+    NPU_SET_IFM_PAD_BOTTOM = 259,
+    NPU_SET_IFM_DEPTH_M1 = 260,
+    NPU_SET_IFM_PRECISION = 261,
+    NPU_SET_IFM_UPSCALE = 263,
+    NPU_SET_IFM_BROADCAST = 264,
+    NPU_SET_IFM_ZERO_POINT = 265,
+    NPU_SET_IFM_WIDTH0_M1 = 266,
+    NPU_SET_IFM_HEIGHT0_M1 = 267,
+    NPU_SET_IFM_HEIGHT1_M1 = 268,
+    NPU_SET_IFM_REGION = 271,
+    NPU_SET_OFM_WIDTH_M1 = 273,
+    NPU_SET_OFM_HEIGHT_M1 = 274,
+    NPU_SET_OFM_DEPTH_M1 = 275,
+    NPU_SET_OFM_PRECISION = 276,
+    NPU_SET_OFM_BLK_WIDTH_M1 = 277,
+    NPU_SET_OFM_BLK_HEIGHT_M1 = 278,
+    NPU_SET_OFM_BLK_DEPTH_M1 = 279,
+    NPU_SET_OFM_ZERO_POINT = 280,
+    NPU_SET_OFM_WIDTH0_M1 = 282,
+    NPU_SET_OFM_HEIGHT0_M1 = 283,
+    NPU_SET_OFM_HEIGHT1_M1 = 284,
+    NPU_SET_OFM_REGION = 287,
+    NPU_SET_KERNEL_WIDTH_M1 = 288,
+    NPU_SET_KERNEL_HEIGHT_M1 = 289,
+    NPU_SET_KERNEL_STRIDE = 290,
+    NPU_SET_ACC_FORMAT = 292,
+    NPU_SET_ACTIVATION = 293,
+    NPU_SET_ACTIVATION_MIN = 294,
+    NPU_SET_ACTIVATION_MAX = 295,
+    NPU_SET_WEIGHT_REGION = 296,
+    NPU_SET_SCALE_REGION = 297,
+    NPU_SET_RESIZE_X_SCALE_N_M1 = 298,
+    NPU_SET_RESIZE_Y_SCALE_N_M1 = 299,
+    NPU_SET_RESIZE_X_OFFSET = 300,
+    NPU_SET_RESIZE_Y_OFFSET = 301,
+    NPU_SET_WEIGHT_FORMAT = 302,
+    NPU_SET_BLOCKDEP = 303,
+    NPU_SET_DMA0_SRC_REGION = 304,
+    NPU_SET_DMA0_DST_REGION = 305,
+    NPU_SET_DMA0_SIZE0 = 306,
+    NPU_SET_DMA0_SIZE1 = 307,
+    NPU_SET_DMA0_IDX_REGION = 308,
+    NPU_SET_IFM2_BROADCAST = 384,
+    NPU_SET_IFM2_PRECISION = 389,
+    NPU_SET_IFM2_ZERO_POINT = 393,
+    NPU_SET_IFM2_WIDTH0_M1 = 394,
+    NPU_SET_IFM2_HEIGHT0_M1 = 395,
+    NPU_SET_IFM2_HEIGHT1_M1 = 396,
+    NPU_SET_IFM2_REGION = 399,
+};
+
+enum class cmd1_opcode : uint16_t
+{
+    NPU_SET_IFM_BASE0 = 0,
+    NPU_SET_IFM_BASE1 = 1,
+    NPU_SET_IFM_BASE2 = 2,
+    NPU_SET_IFM_BASE3 = 3,
+    NPU_SET_IFM_STRIDE_X = 4,
+    NPU_SET_IFM_STRIDE_Y = 5,
+    NPU_SET_IFM_STRIDE_C = 6,
+    NPU_SET_OFM_BASE0 = 16,
+    NPU_SET_OFM_BASE1 = 17,
+    NPU_SET_OFM_BASE2 = 18,
+    NPU_SET_OFM_BASE3 = 19,
+    NPU_SET_OFM_STRIDE_X = 20,
+    NPU_SET_OFM_STRIDE_Y = 21,
+    NPU_SET_OFM_STRIDE_C = 22,
+    NPU_SET_WEIGHT_BASE = 32,
+    NPU_SET_WEIGHT_LENGTH = 33,
+    NPU_SET_SCALE_BASE = 34,
+    NPU_SET_SCALE_LENGTH = 35,
+    NPU_SET_OFM_SCALE = 36,
+    NPU_SET_IFM_SCALE = 37,
+    NPU_SET_IFM2_SCALE = 38,
+    NPU_SET_OP_SCALAR = 39,
+    NPU_SET_DMA0_SRC = 48,
+    NPU_SET_DMA0_DST = 49,
+    NPU_SET_DMA0_LEN = 50,
+    NPU_SET_DMA0_SRC_STRIDE0 = 51,
+    NPU_SET_DMA0_SRC_STRIDE1 = 52,
+    NPU_SET_DMA0_DST_STRIDE0 = 53,
+    NPU_SET_DMA0_DST_STRIDE1 = 54,
+    NPU_SET_DMA0_IDX = 55,
+    NPU_SET_DMA0_IDX_MAX = 56,
+    NPU_SET_DMA0_IDX_SKIP1 = 57,
+    NPU_SET_IFM2_BASE0 = 128,
+    NPU_SET_IFM2_BASE1 = 129,
+    NPU_SET_IFM2_BASE2 = 130,
+    NPU_SET_IFM2_BASE3 = 131,
+    NPU_SET_IFM2_STRIDE_X = 132,
+    NPU_SET_IFM2_STRIDE_Y = 133,
+    NPU_SET_IFM2_STRIDE_C = 134,
+    NPU_SET_WEIGHT1_BASE = 144,
+    NPU_SET_WEIGHT1_LENGTH = 145,
+    NPU_SET_WEIGHT2_BASE = 146,
+    NPU_SET_WEIGHT2_LENGTH = 147,
+    NPU_SET_WEIGHT3_BASE = 148,
+    NPU_SET_WEIGHT3_LENGTH = 149,
+    NPU_SET_RESIZE_X = 150,
+    NPU_SET_RESIZE_Y = 151,
+    NPU_OP_BRANCH = 256,
+};
+
+enum class cmd_ctrl : uint8_t
+{
+    CMD0_CTRL = 0,
+    CMD1_CTRL = 1,
+};
+
+enum class custom_dma : uint8_t
+{
+    NOT_IMPLEMENTED = 0,
+    IMPLEMENTED = 1,
+};
+
+enum class dma_fault_channel : uint8_t
+{
+    CMD_READ = 0,
+    IFM_READ = 1,
+    WEIGHT_READ = 2,
+    SBS_READ = 3,
+    MEM2MEM_READ = 4,
+    OFM_WRITE = 8,
+    MEM2MEM_WRITE = 9,
+};
+
+enum class dma_fault_src : uint8_t
+{
+    SRAM = 0,
+    EXT = 1,
+};
+
+enum class dma_idx_mode : uint8_t
+{
+    DISABLED = 0,
+    ENABLED = 1,
+};
+
+enum class dma_region_mode : uint8_t
+{
+    EXTERNAL = 0,
+    INTERNAL = 1,
+};
+
+enum class dma_stride_mode : uint8_t
+{
+    D1 = 0,
+    D2 = 1,
+    D3 = 2,
+};
+
+enum class elementwise_mode : uint8_t
+{
+    MUL = 0,
+    ADD = 1,
+    SUB = 2,
+    MIN = 3,
+    MAX = 4,
+    LRELU = 5,
+    ABS = 6,
+    CLZ = 7,
+    SHR = 8,
+    SHL = 9,
+    LSR = 10,
+    DIV = 11,
+    CMP_EQ = 16,
+    CMP_NE = 17,
+    CMP_GE = 18,
+    CMP_GT = 19,
+    AND = 33,
+    OR = 34,
+    XOR = 35,
+    NOT = 36,
+    AND_NOT = 42,
+};
+
+enum class ifm_upscale_mode : uint8_t
+{
+    NONE = 0,
+    NEAREST = 1,
+    ZEROS = 2,
+};
+
+enum class kernel_decomposition : uint8_t
+{
+    D8X8 = 0,
+    D4X4 = 1,
+};
+
+enum class kernel_dilation : uint8_t
+{
+    NONE = 0,
+    X2 = 1,
+};
+
+enum class max_beats : uint8_t
+{
+    B64 = 0,
+    B128 = 1,
+    B256 = 2,
+};
+
+enum class microblock : uint8_t
+{
+    U1X1 = 0,
+    U1X2 = 1,
+    U1X4 = 2,
+    U2X2 = 3,
+    U2X4 = 4,
+    U4X4 = 5,
+};
+
+enum class ofm_scale_mode : uint8_t
+{
+    PER_CHANNEL = 0,
+    GLOBAL = 1,
+};
+
+enum class pmu_axi_channel : uint8_t
+{
+    RD_CMD = 0,
+    RD_IFM = 1,
+    RD_WEIGHTS = 2,
+    RD_SCALE_BIAS = 3,
+    RD_MEM2MEM = 4,
+    RD_IFM_STREAM = 5,
+    RD_MEM2MEM_IDX = 6,
+    WR_OFM = 8,
+    WR_MEM2MEM = 9,
+};
+
+enum class pmu_event : uint16_t
+{
+    NO_EVENT = 0,
+    CYCLE = 17,
+    NPU_IDLE = 32,
+    CC_STALLED_ON_BLOCKDEP = 33,
+    CC_STALLED_ON_SHRAM_RECONFIG = 34,
+    NPU_ACTIVE = 35,
+    MAC_ACTIVE = 48,
+    MAC_DPU_ACTIVE = 51,
+    MAC_STALLED_BY_W_OR_ACC = 52,
+    MAC_STALLED_BY_W = 53,
+    MAC_STALLED_BY_ACC = 54,
+    MAC_STALLED_BY_IB = 55,
+    AO_ACTIVE = 64,
+    AO_STALLED_BY_BS_OR_OB = 67,
+    AO_STALLED_BY_BS = 68,
+    AO_STALLED_BY_OB = 69,
+    AO_STALLED_BY_AB_OR_CB = 70,
+    AO_STALLED_BY_AB = 71,
+    AO_STALLED_BY_CB = 72,
+    WD_ACTIVE = 80,
+    WD_STALLED = 81,
+    WD_STALLED_BY_WD_BUF = 83,
+    WD_STALLED_BY_WS_FC = 84,
+    WD_STALLED_BY_WS_TC = 85,
+    WD_TRANS_WBLK = 89,
+    WD_TRANS_WS_FC = 90,
+    WD_TRANS_WS_TC = 91,
+    WD_STALLED_BY_WS_SC0 = 96,
+    WD_STALLED_BY_WS_SC1 = 97,
+    WD_STALLED_BY_WS_SC2 = 98,
+    WD_STALLED_BY_WS_SC3 = 99,
+    WD_PARSE_ACTIVE_SC0 = 100,
+    WD_PARSE_ACTIVE_SC1 = 101,
+    WD_PARSE_ACTIVE_SC2 = 102,
+    WD_PARSE_ACTIVE_SC3 = 103,
+    WD_PARSE_STALL_SC0 = 104,
+    WD_PARSE_STALL_SC1 = 105,
+    WD_PARSE_STALL_SC2 = 106,
+    WD_PARSE_STALL_SC3 = 107,
+    WD_PARSE_STALL_IN_SC0 = 108,
+    WD_PARSE_STALL_IN_SC1 = 109,
+    WD_PARSE_STALL_IN_SC2 = 110,
+    WD_PARSE_STALL_IN_SC3 = 111,
+    WD_PARSE_STALL_OUT_SC0 = 112,
+    WD_PARSE_STALL_OUT_SC1 = 113,
+    WD_PARSE_STALL_OUT_SC2 = 114,
+    WD_PARSE_STALL_OUT_SC3 = 115,
+    WD_TRANS_WS_SC0 = 116,
+    WD_TRANS_WS_SC1 = 117,
+    WD_TRANS_WS_SC2 = 118,
+    WD_TRANS_WS_SC3 = 119,
+    WD_TRANS_WB0 = 120,
+    WD_TRANS_WB1 = 121,
+    WD_TRANS_WB2 = 122,
+    WD_TRANS_WB3 = 123,
+    SRAM_RD_TRANS_ACCEPTED = 128,
+    SRAM_RD_TRANS_COMPLETED = 129,
+    SRAM_RD_DATA_BEAT_RECEIVED = 130,
+    SRAM_RD_TRAN_REQ_STALLED = 131,
+    SRAM_WR_TRANS_ACCEPTED = 132,
+    SRAM_WR_TRANS_COMPLETED_M = 133,
+    SRAM_WR_TRANS_COMPLETED_S = 134,
+    SRAM_WR_DATA_BEAT_WRITTEN = 135,
+    SRAM_WR_TRAN_REQ_STALLED = 136,
+    SRAM_WR_DATA_BEAT_STALLED = 137,
+    SRAM_ENABLED_CYCLES = 140,
+    SRAM_RD_STALL_LIMIT = 142,
+    SRAM_WR_STALL_LIMIT = 143,
+    AXI_LATENCY_ANY = 160,
+    AXI_LATENCY_32 = 161,
+    AXI_LATENCY_64 = 162,
+    AXI_LATENCY_128 = 163,
+    AXI_LATENCY_256 = 164,
+    AXI_LATENCY_512 = 165,
+    AXI_LATENCY_1024 = 166,
+    ECC_DMA = 176,
+    ECC_MAC_IB = 177,
+    ECC_MAC_AB = 178,
+    ECC_AO_CB = 179,
+    ECC_AO_OB = 180,
+    ECC_AO_LUT = 181,
+    EXT_RD_TRANS_ACCEPTED = 384,
+    EXT_RD_TRANS_COMPLETED = 385,
+    EXT_RD_DATA_BEAT_RECEIVED = 386,
+    EXT_RD_TRAN_REQ_STALLED = 387,
+    EXT_WR_TRANS_ACCEPTED = 388,
+    EXT_WR_TRANS_COMPLETED_M = 389,
+    EXT_WR_TRANS_COMPLETED_S = 390,
+    EXT_WR_DATA_BEAT_WRITTEN = 391,
+    EXT_WR_TRAN_REQ_STALLED = 392,
+    EXT_WR_DATA_BEAT_STALLED = 393,
+    EXT_ENABLED_CYCLES = 396,
+    EXT_RD_STALL_LIMIT = 398,
+    EXT_WR_STALL_LIMIT = 399,
+    SRAM0_RD_TRANS_ACCEPTED = 512,
+    SRAM0_RD_TRANS_COMPLETED = 513,
+    SRAM0_RD_DATA_BEAT_RECEIVED = 514,
+    SRAM0_RD_TRAN_REQ_STALLED = 515,
+    SRAM0_WR_TRANS_ACCEPTED = 516,
+    SRAM0_WR_TRANS_COMPLETED_M = 517,
+    SRAM0_WR_TRANS_COMPLETED_S = 518,
+    SRAM0_WR_DATA_BEAT_WRITTEN = 519,
+    SRAM0_WR_TRAN_REQ_STALLED = 520,
+    SRAM0_WR_DATA_BEAT_STALLED = 521,
+    SRAM0_ENABLED_CYCLES = 524,
+    SRAM0_RD_STALL_LIMIT = 526,
+    SRAM0_WR_STALL_LIMIT = 527,
+    SRAM1_RD_TRANS_ACCEPTED = 528,
+    SRAM1_RD_TRANS_COMPLETED = 529,
+    SRAM1_RD_DATA_BEAT_RECEIVED = 530,
+    SRAM1_RD_TRAN_REQ_STALLED = 531,
+    SRAM1_WR_TRANS_ACCEPTED = 532,
+    SRAM1_WR_TRANS_COMPLETED_M = 533,
+    SRAM1_WR_TRANS_COMPLETED_S = 534,
+    SRAM1_WR_DATA_BEAT_WRITTEN = 535,
+    SRAM1_WR_TRAN_REQ_STALLED = 536,
+    SRAM1_WR_DATA_BEAT_STALLED = 537,
+    SRAM1_ENABLED_CYCLES = 540,
+    SRAM1_RD_STALL_LIMIT = 542,
+    SRAM1_WR_STALL_LIMIT = 543,
+    SRAM2_RD_TRANS_ACCEPTED = 544,
+    SRAM2_RD_TRANS_COMPLETED = 545,
+    SRAM2_RD_DATA_BEAT_RECEIVED = 546,
+    SRAM2_RD_TRAN_REQ_STALLED = 547,
+    SRAM2_WR_TRANS_ACCEPTED = 548,
+    SRAM2_WR_TRANS_COMPLETED_M = 549,
+    SRAM2_WR_TRANS_COMPLETED_S = 550,
+    SRAM2_WR_DATA_BEAT_WRITTEN = 551,
+    SRAM2_WR_TRAN_REQ_STALLED = 552,
+    SRAM2_WR_DATA_BEAT_STALLED = 553,
+    SRAM2_ENABLED_CYCLES = 556,
+    SRAM2_RD_STALL_LIMIT = 558,
+    SRAM2_WR_STALL_LIMIT = 559,
+    SRAM3_RD_TRANS_ACCEPTED = 560,
+    SRAM3_RD_TRANS_COMPLETED = 561,
+    SRAM3_RD_DATA_BEAT_RECEIVED = 562,
+    SRAM3_RD_TRAN_REQ_STALLED = 563,
+    SRAM3_WR_TRANS_ACCEPTED = 564,
+    SRAM3_WR_TRANS_COMPLETED_M = 565,
+    SRAM3_WR_TRANS_COMPLETED_S = 566,
+    SRAM3_WR_DATA_BEAT_WRITTEN = 567,
+    SRAM3_WR_TRAN_REQ_STALLED = 568,
+    SRAM3_WR_DATA_BEAT_STALLED = 569,
+    SRAM3_ENABLED_CYCLES = 572,
+    SRAM3_RD_STALL_LIMIT = 574,
+    SRAM3_WR_STALL_LIMIT = 575,
+    EXT0_RD_TRANS_ACCEPTED = 640,
+    EXT0_RD_TRANS_COMPLETED = 641,
+    EXT0_RD_DATA_BEAT_RECEIVED = 642,
+    EXT0_RD_TRAN_REQ_STALLED = 643,
+    EXT0_WR_TRANS_ACCEPTED = 644,
+    EXT0_WR_TRANS_COMPLETED_M = 645,
+    EXT0_WR_TRANS_COMPLETED_S = 646,
+    EXT0_WR_DATA_BEAT_WRITTEN = 647,
+    EXT0_WR_TRAN_REQ_STALLED = 648,
+    EXT0_WR_DATA_BEAT_STALLED = 649,
+    EXT0_ENABLED_CYCLES = 652,
+    EXT0_RD_STALL_LIMIT = 654,
+    EXT0_WR_STALL_LIMIT = 655,
+    EXT1_RD_TRANS_ACCEPTED = 656,
+    EXT1_RD_TRANS_COMPLETED = 657,
+    EXT1_RD_DATA_BEAT_RECEIVED = 658,
+    EXT1_RD_TRAN_REQ_STALLED = 659,
+    EXT1_WR_TRANS_ACCEPTED = 660,
+    EXT1_WR_TRANS_COMPLETED_M = 661,
+    EXT1_WR_TRANS_COMPLETED_S = 662,
+    EXT1_WR_DATA_BEAT_WRITTEN = 663,
+    EXT1_WR_TRAN_REQ_STALLED = 664,
+    EXT1_WR_DATA_BEAT_STALLED = 665,
+    EXT1_ENABLED_CYCLES = 668,
+    EXT1_RD_STALL_LIMIT = 670,
+    EXT1_WR_STALL_LIMIT = 671,
+};
+
+enum class pmu_port_disable : uint8_t
+{
+    ENABLE = 0,
+    DISABLE = 1,
+};
+
+enum class pooling_mode : uint8_t
+{
+    MAX = 0,
+    AVERAGE = 1,
+    REDUCE_SUM = 2,
+    SUM = 3,
+    NONE = 4,
+    MIN = 5,
+    ARGMAX_X = 6,
+    ARGMAX_Y = 7,
+};
+
+enum class privilege_level : uint8_t
+{
+    USER = 0,
+    PRIVILEGED = 1,
+};
+
+enum class ram_id : uint8_t
+{
+    LUT = 0,
+    IB = 1,
+    AB = 2,
+    CB = 3,
+    OB = 4,
+};
+
+enum class resize_mode : uint8_t
+{
+    BILINEAR = 0,
+    REPLICATE = 1,
+    NEAREST = 2,
+};
+
+enum class round_mode_ifm : uint8_t
+{
+    DOUBLE_SYMMETRIC = 0,
+    NATURAL = 1,
+};
+
+enum class round_mode_ofm : uint8_t
+{
+    DOUBLE_SYMMETRIC = 0,
+    NATURAL = 1,
+    DOUBLE_ASYMMETRIC = 2,
+    SYMMETRIC = 3,
+    TRUNCATE_TO_ZERO = 4,
+    TRUNCATE_TO_LOWER = 5,
+};
+
+enum class security_level : uint8_t
+{
+    SECURE = 0,
+    NON_SECURE = 1,
+};
+
+enum class state : uint8_t
+{
+    STOPPED = 0,
+    RUNNING = 1,
+};
+
+enum class wd_active_core : uint8_t
+{
+    NONE = 0,
+    STANDARD = 1,
+    FAST = 2,
+    TENSOR = 3,
+};
+
+enum class weight_format : uint8_t
+{
+    SWD = 0,
+    FWD = 1,
+};
+
+enum class weight_order : uint8_t
+{
+    DEPTH_FIRST = 0,
+    PART_KERNEL_FIRST = 1,
+};
+
+enum class weight_sparsity : uint8_t
+{
+    NONE = 0,
+    SPARSE_2_4 = 1,
+};
+
+#else
+
+enum acc_format
+{
+    ACC_FORMAT_I32 = 0,
+    ACC_FORMAT_I48 = 1,
+};
+
+enum acc_input
+{
+    ACC_INPUT_RESET = 0,
+    ACC_INPUT_KEEP = 1,
+    ACC_INPUT_IFM2 = 2,
+};
+
+enum acc_output
+{
+    ACC_OUTPUT_ENABLE = 0,
+    ACC_OUTPUT_DISABLE = 1,
+};
+
+enum activation_clip_range
+{
+    ACTIVATION_CLIP_RANGE_B16 = 0,
+    ACTIVATION_CLIP_RANGE_NONE = 1,
+};
+
+enum activation_format
+{
+    ACTIVATION_FORMAT_NHWC = 0,
+    ACTIVATION_FORMAT_NHCWB16 = 1,
+};
+
+enum activation_function
+{
+    ACTIVATION_FUNCTION_LUT_NONE = 0,
+    ACTIVATION_FUNCTION_LUT_U8_U8 = 1,
+    ACTIVATION_FUNCTION_LUT_S8_S8 = 4,
+    ACTIVATION_FUNCTION_LUT_S8_S16 = 5,
+    ACTIVATION_FUNCTION_LUT_S8_S32 = 7,
+    ACTIVATION_FUNCTION_LUT_S16_S16 = 8,
+    ACTIVATION_FUNCTION_LUT_S16_S32 = 9,
+    ACTIVATION_FUNCTION_LUT_TANH = 10,
+    ACTIVATION_FUNCTION_LUT_SIGMOID = 11,
+};
+
+enum activation_precision
+{
+    ACTIVATION_PRECISION_B8 = 0,
+    ACTIVATION_PRECISION_B16 = 1,
+    ACTIVATION_PRECISION_B32 = 2,
+    ACTIVATION_PRECISION_B64 = 3,
+};
+
+enum activation_reverse
+{
+    ACTIVATION_REVERSE_NONE = 0,
+    ACTIVATION_REVERSE_H = 1,
+    ACTIVATION_REVERSE_W = 2,
+    ACTIVATION_REVERSE_C = 3,
+};
+
+enum activation_storage
+{
+    ACTIVATION_STORAGE_TILE2X2 = 0,
+    ACTIVATION_STORAGE_TILE3X1 = 1,
+    ACTIVATION_STORAGE_CHAINED = 2,
+    ACTIVATION_STORAGE_NONE = 3,
+};
+
+enum activation_transpose
+{
+    ACTIVATION_TRANSPOSE_HWC = 0,
+    ACTIVATION_TRANSPOSE_WHC = 1,
+    ACTIVATION_TRANSPOSE_HCW = 2,
+    ACTIVATION_TRANSPOSE_WCH = 3,
+    ACTIVATION_TRANSPOSE_CHW = 6,
+    ACTIVATION_TRANSPOSE_CWH = 7,
+};
+
+enum activation_type
+{
+    ACTIVATION_TYPE_UNSIGNED = 0,
+    ACTIVATION_TYPE_SIGNED = 1,
+};
+
+enum axi_mem_domain
+{
+    AXI_MEM_DOMAIN_NON_SHARABLE = 0,
+    AXI_MEM_DOMAIN_INNER_SHARABLE = 1,
+    AXI_MEM_DOMAIN_OUTER_SHARABLE = 2,
+    AXI_MEM_DOMAIN_SYSTEM = 3,
+};
+
+enum axi_mem_encoding
+{
+    AXI_MEM_ENCODING_DEVICE_NON_BUFFERABLE = 0,
+    AXI_MEM_ENCODING_DEVICE_BUFFERABLE = 1,
+    AXI_MEM_ENCODING_NORMAL_NON_CACHEABLE_NON_BUFFERABLE = 2,
+    AXI_MEM_ENCODING_NORMAL_NON_CACHEABLE_BUFFERABLE = 3,
+    AXI_MEM_ENCODING_WRITE_THROUGH_NO_ALLOCATE = 4,
+    AXI_MEM_ENCODING_WRITE_THROUGH_READ_ALLOCATE = 5,
+    AXI_MEM_ENCODING_WRITE_THROUGH_WRITE_ALLOCATE = 6,
+    AXI_MEM_ENCODING_WRITE_THROUGH_READ_AND_WRITE_ALLOCATE = 7,
+    AXI_MEM_ENCODING_WRITE_BACK_NO_ALLOCATE = 8,
+    AXI_MEM_ENCODING_WRITE_BACK_READ_ALLOCATE = 9,
+    AXI_MEM_ENCODING_WRITE_BACK_WRITE_ALLOCATE = 10,
+    AXI_MEM_ENCODING_WRITE_BACK_READ_AND_WRITE_ALLOCATE = 11,
+};
+
+enum axi_port
+{
+    AXI_PORT_SRAM = 0,
+    AXI_PORT_EXT = 1,
+};
+
+enum branch_cond
+{
+    BRANCH_COND_ALWAYS = 0,
+    BRANCH_COND_RF_TRUE = 1,
+};
+
+enum broadcast_mode
+{
+    BROADCAST_MODE_NONE = 0,
+    BROADCAST_MODE_H = 1,
+    BROADCAST_MODE_W = 2,
+    BROADCAST_MODE_HW = 3,
+    BROADCAST_MODE_C = 4,
+    BROADCAST_MODE_CH = 5,
+    BROADCAST_MODE_CW = 6,
+    BROADCAST_MODE_CWH = 7,
+    BROADCAST_MODE_SCALAR = 8,
+};
+
+enum cmd0_opcode
+{
+    CMD0_OPCODE_NPU_OP_STOP = 0,
+    CMD0_OPCODE_NPU_OP_IRQ = 1,
+    CMD0_OPCODE_NPU_OP_CONV = 2,
+    CMD0_OPCODE_NPU_OP_DEPTHWISE = 3,
+    CMD0_OPCODE_NPU_OP_POOL = 5,
+    CMD0_OPCODE_NPU_OP_ELEMENTWISE = 6,
+    CMD0_OPCODE_NPU_OP_RESIZE = 7,
+    CMD0_OPCODE_NPU_OP_DMA_START = 16,
+    CMD0_OPCODE_NPU_OP_DMA_WAIT = 17,
+    CMD0_OPCODE_NPU_OP_KERNEL_WAIT = 18,
+    CMD0_OPCODE_NPU_OP_PMU_MASK = 19,
+    CMD0_OPCODE_NPU_SET_IFM_PAD_TOP = 256,
+    CMD0_OPCODE_NPU_SET_IFM_PAD_LEFT = 257,
+    CMD0_OPCODE_NPU_SET_IFM_PAD_RIGHT = 258,
+    CMD0_OPCODE_NPU_SET_IFM_PAD_BOTTOM = 259,
+    CMD0_OPCODE_NPU_SET_IFM_DEPTH_M1 = 260,
+    CMD0_OPCODE_NPU_SET_IFM_PRECISION = 261,
+    CMD0_OPCODE_NPU_SET_IFM_UPSCALE = 263,
+    CMD0_OPCODE_NPU_SET_IFM_BROADCAST = 264,
+    CMD0_OPCODE_NPU_SET_IFM_ZERO_POINT = 265,
+    CMD0_OPCODE_NPU_SET_IFM_WIDTH0_M1 = 266,
+    CMD0_OPCODE_NPU_SET_IFM_HEIGHT0_M1 = 267,
+    CMD0_OPCODE_NPU_SET_IFM_HEIGHT1_M1 = 268,
+    CMD0_OPCODE_NPU_SET_IFM_REGION = 271,
+    CMD0_OPCODE_NPU_SET_OFM_WIDTH_M1 = 273,
+    CMD0_OPCODE_NPU_SET_OFM_HEIGHT_M1 = 274,
+    CMD0_OPCODE_NPU_SET_OFM_DEPTH_M1 = 275,
+    CMD0_OPCODE_NPU_SET_OFM_PRECISION = 276,
+    CMD0_OPCODE_NPU_SET_OFM_BLK_WIDTH_M1 = 277,
+    CMD0_OPCODE_NPU_SET_OFM_BLK_HEIGHT_M1 = 278,
+    CMD0_OPCODE_NPU_SET_OFM_BLK_DEPTH_M1 = 279,
+    CMD0_OPCODE_NPU_SET_OFM_ZERO_POINT = 280,
+    CMD0_OPCODE_NPU_SET_OFM_WIDTH0_M1 = 282,
+    CMD0_OPCODE_NPU_SET_OFM_HEIGHT0_M1 = 283,
+    CMD0_OPCODE_NPU_SET_OFM_HEIGHT1_M1 = 284,
+    CMD0_OPCODE_NPU_SET_OFM_REGION = 287,
+    CMD0_OPCODE_NPU_SET_KERNEL_WIDTH_M1 = 288,
+    CMD0_OPCODE_NPU_SET_KERNEL_HEIGHT_M1 = 289,
+    CMD0_OPCODE_NPU_SET_KERNEL_STRIDE = 290,
+    CMD0_OPCODE_NPU_SET_ACC_FORMAT = 292,
+    CMD0_OPCODE_NPU_SET_ACTIVATION = 293,
+    CMD0_OPCODE_NPU_SET_ACTIVATION_MIN = 294,
+    CMD0_OPCODE_NPU_SET_ACTIVATION_MAX = 295,
+    CMD0_OPCODE_NPU_SET_WEIGHT_REGION = 296,
+    CMD0_OPCODE_NPU_SET_SCALE_REGION = 297,
+    CMD0_OPCODE_NPU_SET_RESIZE_X_SCALE_N_M1 = 298,
+    CMD0_OPCODE_NPU_SET_RESIZE_Y_SCALE_N_M1 = 299,
+    CMD0_OPCODE_NPU_SET_RESIZE_X_OFFSET = 300,
+    CMD0_OPCODE_NPU_SET_RESIZE_Y_OFFSET = 301,
+    CMD0_OPCODE_NPU_SET_WEIGHT_FORMAT = 302,
+    CMD0_OPCODE_NPU_SET_BLOCKDEP = 303,
+    CMD0_OPCODE_NPU_SET_DMA0_SRC_REGION = 304,
+    CMD0_OPCODE_NPU_SET_DMA0_DST_REGION = 305,
+    CMD0_OPCODE_NPU_SET_DMA0_SIZE0 = 306,
+    CMD0_OPCODE_NPU_SET_DMA0_SIZE1 = 307,
+    CMD0_OPCODE_NPU_SET_DMA0_IDX_REGION = 308,
+    CMD0_OPCODE_NPU_SET_IFM2_BROADCAST = 384,
+    CMD0_OPCODE_NPU_SET_IFM2_PRECISION = 389,
+    CMD0_OPCODE_NPU_SET_IFM2_ZERO_POINT = 393,
+    CMD0_OPCODE_NPU_SET_IFM2_WIDTH0_M1 = 394,
+    CMD0_OPCODE_NPU_SET_IFM2_HEIGHT0_M1 = 395,
+    CMD0_OPCODE_NPU_SET_IFM2_HEIGHT1_M1 = 396,
+    CMD0_OPCODE_NPU_SET_IFM2_REGION = 399,
+};
+
+enum cmd1_opcode
+{
+    CMD1_OPCODE_NPU_SET_IFM_BASE0 = 0,
+    CMD1_OPCODE_NPU_SET_IFM_BASE1 = 1,
+    CMD1_OPCODE_NPU_SET_IFM_BASE2 = 2,
+    CMD1_OPCODE_NPU_SET_IFM_BASE3 = 3,
+    CMD1_OPCODE_NPU_SET_IFM_STRIDE_X = 4,
+    CMD1_OPCODE_NPU_SET_IFM_STRIDE_Y = 5,
+    CMD1_OPCODE_NPU_SET_IFM_STRIDE_C = 6,
+    CMD1_OPCODE_NPU_SET_OFM_BASE0 = 16,
+    CMD1_OPCODE_NPU_SET_OFM_BASE1 = 17,
+    CMD1_OPCODE_NPU_SET_OFM_BASE2 = 18,
+    CMD1_OPCODE_NPU_SET_OFM_BASE3 = 19,
+    CMD1_OPCODE_NPU_SET_OFM_STRIDE_X = 20,
+    CMD1_OPCODE_NPU_SET_OFM_STRIDE_Y = 21,
+    CMD1_OPCODE_NPU_SET_OFM_STRIDE_C = 22,
+    CMD1_OPCODE_NPU_SET_WEIGHT_BASE = 32,
+    CMD1_OPCODE_NPU_SET_WEIGHT_LENGTH = 33,
+    CMD1_OPCODE_NPU_SET_SCALE_BASE = 34,
+    CMD1_OPCODE_NPU_SET_SCALE_LENGTH = 35,
+    CMD1_OPCODE_NPU_SET_OFM_SCALE = 36,
+    CMD1_OPCODE_NPU_SET_IFM_SCALE = 37,
+    CMD1_OPCODE_NPU_SET_IFM2_SCALE = 38,
+    CMD1_OPCODE_NPU_SET_OP_SCALAR = 39,
+    CMD1_OPCODE_NPU_SET_DMA0_SRC = 48,
+    CMD1_OPCODE_NPU_SET_DMA0_DST = 49,
+    CMD1_OPCODE_NPU_SET_DMA0_LEN = 50,
+    CMD1_OPCODE_NPU_SET_DMA0_SRC_STRIDE0 = 51,
+    CMD1_OPCODE_NPU_SET_DMA0_SRC_STRIDE1 = 52,
+    CMD1_OPCODE_NPU_SET_DMA0_DST_STRIDE0 = 53,
+    CMD1_OPCODE_NPU_SET_DMA0_DST_STRIDE1 = 54,
+    CMD1_OPCODE_NPU_SET_DMA0_IDX = 55,
+    CMD1_OPCODE_NPU_SET_DMA0_IDX_MAX = 56,
+    CMD1_OPCODE_NPU_SET_DMA0_IDX_SKIP1 = 57,
+    CMD1_OPCODE_NPU_SET_IFM2_BASE0 = 128,
+    CMD1_OPCODE_NPU_SET_IFM2_BASE1 = 129,
+    CMD1_OPCODE_NPU_SET_IFM2_BASE2 = 130,
+    CMD1_OPCODE_NPU_SET_IFM2_BASE3 = 131,
+    CMD1_OPCODE_NPU_SET_IFM2_STRIDE_X = 132,
+    CMD1_OPCODE_NPU_SET_IFM2_STRIDE_Y = 133,
+    CMD1_OPCODE_NPU_SET_IFM2_STRIDE_C = 134,
+    CMD1_OPCODE_NPU_SET_WEIGHT1_BASE = 144,
+    CMD1_OPCODE_NPU_SET_WEIGHT1_LENGTH = 145,
+    CMD1_OPCODE_NPU_SET_WEIGHT2_BASE = 146,
+    CMD1_OPCODE_NPU_SET_WEIGHT2_LENGTH = 147,
+    CMD1_OPCODE_NPU_SET_WEIGHT3_BASE = 148,
+    CMD1_OPCODE_NPU_SET_WEIGHT3_LENGTH = 149,
+    CMD1_OPCODE_NPU_SET_RESIZE_X = 150,
+    CMD1_OPCODE_NPU_SET_RESIZE_Y = 151,
+    CMD1_OPCODE_NPU_OP_BRANCH = 256,
+};
+
+enum cmd_ctrl
+{
+    CMD_CTRL_CMD0_CTRL = 0,
+    CMD_CTRL_CMD1_CTRL = 1,
+};
+
+enum custom_dma
+{
+    CUSTOM_DMA_NOT_IMPLEMENTED = 0,
+    CUSTOM_DMA_IMPLEMENTED = 1,
+};
+
+enum dma_fault_channel
+{
+    DMA_FAULT_CHANNEL_CMD_READ = 0,
+    DMA_FAULT_CHANNEL_IFM_READ = 1,
+    DMA_FAULT_CHANNEL_WEIGHT_READ = 2,
+    DMA_FAULT_CHANNEL_SBS_READ = 3,
+    DMA_FAULT_CHANNEL_MEM2MEM_READ = 4,
+    DMA_FAULT_CHANNEL_OFM_WRITE = 8,
+    DMA_FAULT_CHANNEL_MEM2MEM_WRITE = 9,
+};
+
+enum dma_fault_src
+{
+    DMA_FAULT_SRC_SRAM = 0,
+    DMA_FAULT_SRC_EXT = 1,
+};
+
+enum dma_idx_mode
+{
+    DMA_IDX_MODE_DISABLED = 0,
+    DMA_IDX_MODE_ENABLED = 1,
+};
+
+enum dma_region_mode
+{
+    DMA_REGION_MODE_EXTERNAL = 0,
+    DMA_REGION_MODE_INTERNAL = 1,
+};
+
+enum dma_stride_mode
+{
+    DMA_STRIDE_MODE_D1 = 0,
+    DMA_STRIDE_MODE_D2 = 1,
+    DMA_STRIDE_MODE_D3 = 2,
+};
+
+enum elementwise_mode
+{
+    ELEMENTWISE_MODE_MUL = 0,
+    ELEMENTWISE_MODE_ADD = 1,
+    ELEMENTWISE_MODE_SUB = 2,
+    ELEMENTWISE_MODE_MIN = 3,
+    ELEMENTWISE_MODE_MAX = 4,
+    ELEMENTWISE_MODE_LRELU = 5,
+    ELEMENTWISE_MODE_ABS = 6,
+    ELEMENTWISE_MODE_CLZ = 7,
+    ELEMENTWISE_MODE_SHR = 8,
+    ELEMENTWISE_MODE_SHL = 9,
+    ELEMENTWISE_MODE_LSR = 10,
+    ELEMENTWISE_MODE_DIV = 11,
+    ELEMENTWISE_MODE_CMP_EQ = 16,
+    ELEMENTWISE_MODE_CMP_NE = 17,
+    ELEMENTWISE_MODE_CMP_GE = 18,
+    ELEMENTWISE_MODE_CMP_GT = 19,
+    ELEMENTWISE_MODE_AND = 33,
+    ELEMENTWISE_MODE_OR = 34,
+    ELEMENTWISE_MODE_XOR = 35,
+    ELEMENTWISE_MODE_NOT = 36,
+    ELEMENTWISE_MODE_AND_NOT = 42,
+};
+
+enum ifm_upscale_mode
+{
+    IFM_UPSCALE_MODE_NONE = 0,
+    IFM_UPSCALE_MODE_NEAREST = 1,
+    IFM_UPSCALE_MODE_ZEROS = 2,
+};
+
+enum kernel_decomposition
+{
+    KERNEL_DECOMPOSITION_D8X8 = 0,
+    KERNEL_DECOMPOSITION_D4X4 = 1,
+};
+
+enum kernel_dilation
+{
+    KERNEL_DILATION_NONE = 0,
+    KERNEL_DILATION_X2 = 1,
+};
+
+enum max_beats
+{
+    MAX_BEATS_B64 = 0,
+    MAX_BEATS_B128 = 1,
+    MAX_BEATS_B256 = 2,
+};
+
+enum microblock
+{
+    MICROBLOCK_U1X1 = 0,
+    MICROBLOCK_U1X2 = 1,
+    MICROBLOCK_U1X4 = 2,
+    MICROBLOCK_U2X2 = 3,
+    MICROBLOCK_U2X4 = 4,
+    MICROBLOCK_U4X4 = 5,
+};
+
+enum ofm_scale_mode
+{
+    OFM_SCALE_MODE_PER_CHANNEL = 0,
+    OFM_SCALE_MODE_GLOBAL = 1,
+};
+
+enum pmu_axi_channel
+{
+    PMU_AXI_CHANNEL_RD_CMD = 0,
+    PMU_AXI_CHANNEL_RD_IFM = 1,
+    PMU_AXI_CHANNEL_RD_WEIGHTS = 2,
+    PMU_AXI_CHANNEL_RD_SCALE_BIAS = 3,
+    PMU_AXI_CHANNEL_RD_MEM2MEM = 4,
+    PMU_AXI_CHANNEL_RD_IFM_STREAM = 5,
+    PMU_AXI_CHANNEL_RD_MEM2MEM_IDX = 6,
+    PMU_AXI_CHANNEL_WR_OFM = 8,
+    PMU_AXI_CHANNEL_WR_MEM2MEM = 9,
+};
+
+enum pmu_event
+{
+    PMU_EVENT_NO_EVENT = 0,
+    PMU_EVENT_CYCLE = 17,
+    PMU_EVENT_NPU_IDLE = 32,
+    PMU_EVENT_CC_STALLED_ON_BLOCKDEP = 33,
+    PMU_EVENT_CC_STALLED_ON_SHRAM_RECONFIG = 34,
+    PMU_EVENT_NPU_ACTIVE = 35,
+    PMU_EVENT_MAC_ACTIVE = 48,
+    PMU_EVENT_MAC_DPU_ACTIVE = 51,
+    PMU_EVENT_MAC_STALLED_BY_W_OR_ACC = 52,
+    PMU_EVENT_MAC_STALLED_BY_W = 53,
+    PMU_EVENT_MAC_STALLED_BY_ACC = 54,
+    PMU_EVENT_MAC_STALLED_BY_IB = 55,
+    PMU_EVENT_AO_ACTIVE = 64,
+    PMU_EVENT_AO_STALLED_BY_BS_OR_OB = 67,
+    PMU_EVENT_AO_STALLED_BY_BS = 68,
+    PMU_EVENT_AO_STALLED_BY_OB = 69,
+    PMU_EVENT_AO_STALLED_BY_AB_OR_CB = 70,
+    PMU_EVENT_AO_STALLED_BY_AB = 71,
+    PMU_EVENT_AO_STALLED_BY_CB = 72,
+    PMU_EVENT_WD_ACTIVE = 80,
+    PMU_EVENT_WD_STALLED = 81,
+    PMU_EVENT_WD_STALLED_BY_WD_BUF = 83,
+    PMU_EVENT_WD_STALLED_BY_WS_FC = 84,
+    PMU_EVENT_WD_STALLED_BY_WS_TC = 85,
+    PMU_EVENT_WD_TRANS_WBLK = 89,
+    PMU_EVENT_WD_TRANS_WS_FC = 90,
+    PMU_EVENT_WD_TRANS_WS_TC = 91,
+    PMU_EVENT_WD_STALLED_BY_WS_SC0 = 96,
+    PMU_EVENT_WD_STALLED_BY_WS_SC1 = 97,
+    PMU_EVENT_WD_STALLED_BY_WS_SC2 = 98,
+    PMU_EVENT_WD_STALLED_BY_WS_SC3 = 99,
+    PMU_EVENT_WD_PARSE_ACTIVE_SC0 = 100,
+    PMU_EVENT_WD_PARSE_ACTIVE_SC1 = 101,
+    PMU_EVENT_WD_PARSE_ACTIVE_SC2 = 102,
+    PMU_EVENT_WD_PARSE_ACTIVE_SC3 = 103,
+    PMU_EVENT_WD_PARSE_STALL_SC0 = 104,
+    PMU_EVENT_WD_PARSE_STALL_SC1 = 105,
+    PMU_EVENT_WD_PARSE_STALL_SC2 = 106,
+    PMU_EVENT_WD_PARSE_STALL_SC3 = 107,
+    PMU_EVENT_WD_PARSE_STALL_IN_SC0 = 108,
+    PMU_EVENT_WD_PARSE_STALL_IN_SC1 = 109,
+    PMU_EVENT_WD_PARSE_STALL_IN_SC2 = 110,
+    PMU_EVENT_WD_PARSE_STALL_IN_SC3 = 111,
+    PMU_EVENT_WD_PARSE_STALL_OUT_SC0 = 112,
+    PMU_EVENT_WD_PARSE_STALL_OUT_SC1 = 113,
+    PMU_EVENT_WD_PARSE_STALL_OUT_SC2 = 114,
+    PMU_EVENT_WD_PARSE_STALL_OUT_SC3 = 115,
+    PMU_EVENT_WD_TRANS_WS_SC0 = 116,
+    PMU_EVENT_WD_TRANS_WS_SC1 = 117,
+    PMU_EVENT_WD_TRANS_WS_SC2 = 118,
+    PMU_EVENT_WD_TRANS_WS_SC3 = 119,
+    PMU_EVENT_WD_TRANS_WB0 = 120,
+    PMU_EVENT_WD_TRANS_WB1 = 121,
+    PMU_EVENT_WD_TRANS_WB2 = 122,
+    PMU_EVENT_WD_TRANS_WB3 = 123,
+    PMU_EVENT_SRAM_RD_TRANS_ACCEPTED = 128,
+    PMU_EVENT_SRAM_RD_TRANS_COMPLETED = 129,
+    PMU_EVENT_SRAM_RD_DATA_BEAT_RECEIVED = 130,
+    PMU_EVENT_SRAM_RD_TRAN_REQ_STALLED = 131,
+    PMU_EVENT_SRAM_WR_TRANS_ACCEPTED = 132,
+    PMU_EVENT_SRAM_WR_TRANS_COMPLETED_M = 133,
+    PMU_EVENT_SRAM_WR_TRANS_COMPLETED_S = 134,
+    PMU_EVENT_SRAM_WR_DATA_BEAT_WRITTEN = 135,
+    PMU_EVENT_SRAM_WR_TRAN_REQ_STALLED = 136,
+    PMU_EVENT_SRAM_WR_DATA_BEAT_STALLED = 137,
+    PMU_EVENT_SRAM_ENABLED_CYCLES = 140,
+    PMU_EVENT_SRAM_RD_STALL_LIMIT = 142,
+    PMU_EVENT_SRAM_WR_STALL_LIMIT = 143,
+    PMU_EVENT_AXI_LATENCY_ANY = 160,
+    PMU_EVENT_AXI_LATENCY_32 = 161,
+    PMU_EVENT_AXI_LATENCY_64 = 162,
+    PMU_EVENT_AXI_LATENCY_128 = 163,
+    PMU_EVENT_AXI_LATENCY_256 = 164,
+    PMU_EVENT_AXI_LATENCY_512 = 165,
+    PMU_EVENT_AXI_LATENCY_1024 = 166,
+    PMU_EVENT_ECC_DMA = 176,
+    PMU_EVENT_ECC_MAC_IB = 177,
+    PMU_EVENT_ECC_MAC_AB = 178,
+    PMU_EVENT_ECC_AO_CB = 179,
+    PMU_EVENT_ECC_AO_OB = 180,
+    PMU_EVENT_ECC_AO_LUT = 181,
+    PMU_EVENT_EXT_RD_TRANS_ACCEPTED = 384,
+    PMU_EVENT_EXT_RD_TRANS_COMPLETED = 385,
+    PMU_EVENT_EXT_RD_DATA_BEAT_RECEIVED = 386,
+    PMU_EVENT_EXT_RD_TRAN_REQ_STALLED = 387,
+    PMU_EVENT_EXT_WR_TRANS_ACCEPTED = 388,
+    PMU_EVENT_EXT_WR_TRANS_COMPLETED_M = 389,
+    PMU_EVENT_EXT_WR_TRANS_COMPLETED_S = 390,
+    PMU_EVENT_EXT_WR_DATA_BEAT_WRITTEN = 391,
+    PMU_EVENT_EXT_WR_TRAN_REQ_STALLED = 392,
+    PMU_EVENT_EXT_WR_DATA_BEAT_STALLED = 393,
+    PMU_EVENT_EXT_ENABLED_CYCLES = 396,
+    PMU_EVENT_EXT_RD_STALL_LIMIT = 398,
+    PMU_EVENT_EXT_WR_STALL_LIMIT = 399,
+    PMU_EVENT_SRAM0_RD_TRANS_ACCEPTED = 512,
+    PMU_EVENT_SRAM0_RD_TRANS_COMPLETED = 513,
+    PMU_EVENT_SRAM0_RD_DATA_BEAT_RECEIVED = 514,
+    PMU_EVENT_SRAM0_RD_TRAN_REQ_STALLED = 515,
+    PMU_EVENT_SRAM0_WR_TRANS_ACCEPTED = 516,
+    PMU_EVENT_SRAM0_WR_TRANS_COMPLETED_M = 517,
+    PMU_EVENT_SRAM0_WR_TRANS_COMPLETED_S = 518,
+    PMU_EVENT_SRAM0_WR_DATA_BEAT_WRITTEN = 519,
+    PMU_EVENT_SRAM0_WR_TRAN_REQ_STALLED = 520,
+    PMU_EVENT_SRAM0_WR_DATA_BEAT_STALLED = 521,
+    PMU_EVENT_SRAM0_ENABLED_CYCLES = 524,
+    PMU_EVENT_SRAM0_RD_STALL_LIMIT = 526,
+    PMU_EVENT_SRAM0_WR_STALL_LIMIT = 527,
+    PMU_EVENT_SRAM1_RD_TRANS_ACCEPTED = 528,
+    PMU_EVENT_SRAM1_RD_TRANS_COMPLETED = 529,
+    PMU_EVENT_SRAM1_RD_DATA_BEAT_RECEIVED = 530,
+    PMU_EVENT_SRAM1_RD_TRAN_REQ_STALLED = 531,
+    PMU_EVENT_SRAM1_WR_TRANS_ACCEPTED = 532,
+    PMU_EVENT_SRAM1_WR_TRANS_COMPLETED_M = 533,
+    PMU_EVENT_SRAM1_WR_TRANS_COMPLETED_S = 534,
+    PMU_EVENT_SRAM1_WR_DATA_BEAT_WRITTEN = 535,
+    PMU_EVENT_SRAM1_WR_TRAN_REQ_STALLED = 536,
+    PMU_EVENT_SRAM1_WR_DATA_BEAT_STALLED = 537,
+    PMU_EVENT_SRAM1_ENABLED_CYCLES = 540,
+    PMU_EVENT_SRAM1_RD_STALL_LIMIT = 542,
+    PMU_EVENT_SRAM1_WR_STALL_LIMIT = 543,
+    PMU_EVENT_SRAM2_RD_TRANS_ACCEPTED = 544,
+    PMU_EVENT_SRAM2_RD_TRANS_COMPLETED = 545,
+    PMU_EVENT_SRAM2_RD_DATA_BEAT_RECEIVED = 546,
+    PMU_EVENT_SRAM2_RD_TRAN_REQ_STALLED = 547,
+    PMU_EVENT_SRAM2_WR_TRANS_ACCEPTED = 548,
+    PMU_EVENT_SRAM2_WR_TRANS_COMPLETED_M = 549,
+    PMU_EVENT_SRAM2_WR_TRANS_COMPLETED_S = 550,
+    PMU_EVENT_SRAM2_WR_DATA_BEAT_WRITTEN = 551,
+    PMU_EVENT_SRAM2_WR_TRAN_REQ_STALLED = 552,
+    PMU_EVENT_SRAM2_WR_DATA_BEAT_STALLED = 553,
+    PMU_EVENT_SRAM2_ENABLED_CYCLES = 556,
+    PMU_EVENT_SRAM2_RD_STALL_LIMIT = 558,
+    PMU_EVENT_SRAM2_WR_STALL_LIMIT = 559,
+    PMU_EVENT_SRAM3_RD_TRANS_ACCEPTED = 560,
+    PMU_EVENT_SRAM3_RD_TRANS_COMPLETED = 561,
+    PMU_EVENT_SRAM3_RD_DATA_BEAT_RECEIVED = 562,
+    PMU_EVENT_SRAM3_RD_TRAN_REQ_STALLED = 563,
+    PMU_EVENT_SRAM3_WR_TRANS_ACCEPTED = 564,
+    PMU_EVENT_SRAM3_WR_TRANS_COMPLETED_M = 565,
+    PMU_EVENT_SRAM3_WR_TRANS_COMPLETED_S = 566,
+    PMU_EVENT_SRAM3_WR_DATA_BEAT_WRITTEN = 567,
+    PMU_EVENT_SRAM3_WR_TRAN_REQ_STALLED = 568,
+    PMU_EVENT_SRAM3_WR_DATA_BEAT_STALLED = 569,
+    PMU_EVENT_SRAM3_ENABLED_CYCLES = 572,
+    PMU_EVENT_SRAM3_RD_STALL_LIMIT = 574,
+    PMU_EVENT_SRAM3_WR_STALL_LIMIT = 575,
+    PMU_EVENT_EXT0_RD_TRANS_ACCEPTED = 640,
+    PMU_EVENT_EXT0_RD_TRANS_COMPLETED = 641,
+    PMU_EVENT_EXT0_RD_DATA_BEAT_RECEIVED = 642,
+    PMU_EVENT_EXT0_RD_TRAN_REQ_STALLED = 643,
+    PMU_EVENT_EXT0_WR_TRANS_ACCEPTED = 644,
+    PMU_EVENT_EXT0_WR_TRANS_COMPLETED_M = 645,
+    PMU_EVENT_EXT0_WR_TRANS_COMPLETED_S = 646,
+    PMU_EVENT_EXT0_WR_DATA_BEAT_WRITTEN = 647,
+    PMU_EVENT_EXT0_WR_TRAN_REQ_STALLED = 648,
+    PMU_EVENT_EXT0_WR_DATA_BEAT_STALLED = 649,
+    PMU_EVENT_EXT0_ENABLED_CYCLES = 652,
+    PMU_EVENT_EXT0_RD_STALL_LIMIT = 654,
+    PMU_EVENT_EXT0_WR_STALL_LIMIT = 655,
+    PMU_EVENT_EXT1_RD_TRANS_ACCEPTED = 656,
+    PMU_EVENT_EXT1_RD_TRANS_COMPLETED = 657,
+    PMU_EVENT_EXT1_RD_DATA_BEAT_RECEIVED = 658,
+    PMU_EVENT_EXT1_RD_TRAN_REQ_STALLED = 659,
+    PMU_EVENT_EXT1_WR_TRANS_ACCEPTED = 660,
+    PMU_EVENT_EXT1_WR_TRANS_COMPLETED_M = 661,
+    PMU_EVENT_EXT1_WR_TRANS_COMPLETED_S = 662,
+    PMU_EVENT_EXT1_WR_DATA_BEAT_WRITTEN = 663,
+    PMU_EVENT_EXT1_WR_TRAN_REQ_STALLED = 664,
+    PMU_EVENT_EXT1_WR_DATA_BEAT_STALLED = 665,
+    PMU_EVENT_EXT1_ENABLED_CYCLES = 668,
+    PMU_EVENT_EXT1_RD_STALL_LIMIT = 670,
+    PMU_EVENT_EXT1_WR_STALL_LIMIT = 671,
+};
+
+enum pmu_port_disable
+{
+    PMU_PORT_DISABLE_ENABLE = 0,
+    PMU_PORT_DISABLE_DISABLE = 1,
+};
+
+enum pooling_mode
+{
+    POOLING_MODE_MAX = 0,
+    POOLING_MODE_AVERAGE = 1,
+    POOLING_MODE_REDUCE_SUM = 2,
+    POOLING_MODE_SUM = 3,
+    POOLING_MODE_NONE = 4,
+    POOLING_MODE_MIN = 5,
+    POOLING_MODE_ARGMAX_X = 6,
+    POOLING_MODE_ARGMAX_Y = 7,
+};
+
+enum privilege_level
+{
+    PRIVILEGE_LEVEL_USER = 0,
+    PRIVILEGE_LEVEL_PRIVILEGED = 1,
+};
+
+enum ram_id
+{
+    RAM_ID_LUT = 0,
+    RAM_ID_IB = 1,
+    RAM_ID_AB = 2,
+    RAM_ID_CB = 3,
+    RAM_ID_OB = 4,
+};
+
+enum resize_mode
+{
+    RESIZE_MODE_BILINEAR = 0,
+    RESIZE_MODE_REPLICATE = 1,
+    RESIZE_MODE_NEAREST = 2,
+};
+
+enum round_mode_ifm
+{
+    ROUND_MODE_IFM_DOUBLE_SYMMETRIC = 0,
+    ROUND_MODE_IFM_NATURAL = 1,
+};
+
+enum round_mode_ofm
+{
+    ROUND_MODE_OFM_DOUBLE_SYMMETRIC = 0,
+    ROUND_MODE_OFM_NATURAL = 1,
+    ROUND_MODE_OFM_DOUBLE_ASYMMETRIC = 2,
+    ROUND_MODE_OFM_SYMMETRIC = 3,
+    ROUND_MODE_OFM_TRUNCATE_TO_ZERO = 4,
+    ROUND_MODE_OFM_TRUNCATE_TO_LOWER = 5,
+};
+
+enum security_level
+{
+    SECURITY_LEVEL_SECURE = 0,
+    SECURITY_LEVEL_NON_SECURE = 1,
+};
+
+enum state
+{
+    STATE_STOPPED = 0,
+    STATE_RUNNING = 1,
+};
+
+enum wd_active_core
+{
+    WD_ACTIVE_CORE_NONE = 0,
+    WD_ACTIVE_CORE_STANDARD = 1,
+    WD_ACTIVE_CORE_FAST = 2,
+    WD_ACTIVE_CORE_TENSOR = 3,
+};
+
+enum weight_format
+{
+    WEIGHT_FORMAT_SWD = 0,
+    WEIGHT_FORMAT_FWD = 1,
+};
+
+enum weight_order
+{
+    WEIGHT_ORDER_DEPTH_FIRST = 0,
+    WEIGHT_ORDER_PART_KERNEL_FIRST = 1,
+};
+
+enum weight_sparsity
+{
+    WEIGHT_SPARSITY_NONE = 0,
+    WEIGHT_SPARSITY_SPARSE_2_4 = 1,
+};
+
+#endif
+
+#ifdef NPU_DISASSEMBLE
+
+static const char* acc_format_str[] =
+{
+    "ACC_FORMAT_I32",
+    "ACC_FORMAT_I48",
+};
+
+static const char* acc_input_str[] =
+{
+    "ACC_INPUT_RESET",
+    "ACC_INPUT_KEEP",
+    "ACC_INPUT_IFM2",
+};
+
+static const char* acc_output_str[] =
+{
+    "ACC_OUTPUT_ENABLE",
+    "ACC_OUTPUT_DISABLE",
+};
+
+static const char* activation_clip_range_str[] =
+{
+    "ACTIVATION_CLIP_RANGE_B16",
+    "ACTIVATION_CLIP_RANGE_NONE",
+};
+
+static const char* activation_format_str[] =
+{
+    "ACTIVATION_FORMAT_NHWC",
+    "ACTIVATION_FORMAT_NHCWB16",
+};
+
+static const char* activation_function_str[] =
+{
+    "ACTIVATION_FUNCTION_LUT_NONE",
+    "ACTIVATION_FUNCTION_LUT_U8_U8",
+    "****",
+    "****",
+    "ACTIVATION_FUNCTION_LUT_S8_S8",
+    "ACTIVATION_FUNCTION_LUT_S8_S16",
+    "****",
+    "ACTIVATION_FUNCTION_LUT_S8_S32",
+    "ACTIVATION_FUNCTION_LUT_S16_S16",
+    "ACTIVATION_FUNCTION_LUT_S16_S32",
+    "ACTIVATION_FUNCTION_LUT_TANH",
+    "ACTIVATION_FUNCTION_LUT_SIGMOID",
+};
+
+static const char* activation_precision_str[] =
+{
+    "ACTIVATION_PRECISION_B8",
+    "ACTIVATION_PRECISION_B16",
+    "ACTIVATION_PRECISION_B32",
+    "ACTIVATION_PRECISION_B64",
+};
+
+static const char* activation_reverse_str[] =
+{
+    "ACTIVATION_REVERSE_NONE",
+    "ACTIVATION_REVERSE_H",
+    "ACTIVATION_REVERSE_W",
+    "ACTIVATION_REVERSE_C",
+};
+
+static const char* activation_storage_str[] =
+{
+    "ACTIVATION_STORAGE_TILE2X2",
+    "ACTIVATION_STORAGE_TILE3X1",
+    "ACTIVATION_STORAGE_CHAINED",
+    "ACTIVATION_STORAGE_NONE",
+};
+
+static const char* activation_transpose_str[] =
+{
+    "ACTIVATION_TRANSPOSE_HWC",
+    "ACTIVATION_TRANSPOSE_WHC",
+    "ACTIVATION_TRANSPOSE_HCW",
+    "ACTIVATION_TRANSPOSE_WCH",
+    "****",
+    "****",
+    "ACTIVATION_TRANSPOSE_CHW",
+    "ACTIVATION_TRANSPOSE_CWH",
+};
+
+static const char* activation_type_str[] =
+{
+    "ACTIVATION_TYPE_UNSIGNED",
+    "ACTIVATION_TYPE_SIGNED",
+};
+
+static const char* axi_mem_domain_str[] =
+{
+    "AXI_MEM_DOMAIN_NON_SHARABLE",
+    "AXI_MEM_DOMAIN_INNER_SHARABLE",
+    "AXI_MEM_DOMAIN_OUTER_SHARABLE",
+    "AXI_MEM_DOMAIN_SYSTEM",
+};
+
+static const char* axi_mem_encoding_str[] =
+{
+    "AXI_MEM_ENCODING_DEVICE_NON_BUFFERABLE",
+    "AXI_MEM_ENCODING_DEVICE_BUFFERABLE",
+    "AXI_MEM_ENCODING_NORMAL_NON_CACHEABLE_NON_BUFFERABLE",
+    "AXI_MEM_ENCODING_NORMAL_NON_CACHEABLE_BUFFERABLE",
+    "AXI_MEM_ENCODING_WRITE_THROUGH_NO_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_THROUGH_READ_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_THROUGH_WRITE_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_THROUGH_READ_AND_WRITE_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_BACK_NO_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_BACK_READ_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_BACK_WRITE_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_BACK_READ_AND_WRITE_ALLOCATE",
+};
+
+static const char* axi_port_str[] =
+{
+    "AXI_PORT_SRAM",
+    "AXI_PORT_EXT",
+};
+
+static const char* branch_cond_str[] =
+{
+    "BRANCH_COND_ALWAYS",
+    "BRANCH_COND_RF_TRUE",
+};
+
+static const char* broadcast_mode_str[] =
+{
+    "BROADCAST_MODE_NONE",
+    "BROADCAST_MODE_H",
+    "BROADCAST_MODE_W",
+    "BROADCAST_MODE_HW",
+    "BROADCAST_MODE_C",
+    "BROADCAST_MODE_CH",
+    "BROADCAST_MODE_CW",
+    "BROADCAST_MODE_CWH",
+    "BROADCAST_MODE_SCALAR",
+};
+
+static const char* cmd0_opcode_str[] =
+{
+    "CMD0_OPCODE_NPU_OP_STOP",
+    "CMD0_OPCODE_NPU_OP_IRQ",
+    "CMD0_OPCODE_NPU_OP_CONV",
+    "CMD0_OPCODE_NPU_OP_DEPTHWISE",
+    "****",
+    "CMD0_OPCODE_NPU_OP_POOL",
+    "CMD0_OPCODE_NPU_OP_ELEMENTWISE",
+    "CMD0_OPCODE_NPU_OP_RESIZE",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_OP_DMA_START",
+    "CMD0_OPCODE_NPU_OP_DMA_WAIT",
+    "CMD0_OPCODE_NPU_OP_KERNEL_WAIT",
+    "CMD0_OPCODE_NPU_OP_PMU_MASK",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM_PAD_TOP",
+    "CMD0_OPCODE_NPU_SET_IFM_PAD_LEFT",
+    "CMD0_OPCODE_NPU_SET_IFM_PAD_RIGHT",
+    "CMD0_OPCODE_NPU_SET_IFM_PAD_BOTTOM",
+    "CMD0_OPCODE_NPU_SET_IFM_DEPTH_M1",
+    "CMD0_OPCODE_NPU_SET_IFM_PRECISION",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM_UPSCALE",
+    "CMD0_OPCODE_NPU_SET_IFM_BROADCAST",
+    "CMD0_OPCODE_NPU_SET_IFM_ZERO_POINT",
+    "CMD0_OPCODE_NPU_SET_IFM_WIDTH0_M1",
+    "CMD0_OPCODE_NPU_SET_IFM_HEIGHT0_M1",
+    "CMD0_OPCODE_NPU_SET_IFM_HEIGHT1_M1",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM_REGION",
+    "****",
+    "CMD0_OPCODE_NPU_SET_OFM_WIDTH_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_HEIGHT_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_DEPTH_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_PRECISION",
+    "CMD0_OPCODE_NPU_SET_OFM_BLK_WIDTH_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_BLK_HEIGHT_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_BLK_DEPTH_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_ZERO_POINT",
+    "****",
+    "CMD0_OPCODE_NPU_SET_OFM_WIDTH0_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_HEIGHT0_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_HEIGHT1_M1",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_SET_OFM_REGION",
+    "CMD0_OPCODE_NPU_SET_KERNEL_WIDTH_M1",
+    "CMD0_OPCODE_NPU_SET_KERNEL_HEIGHT_M1",
+    "CMD0_OPCODE_NPU_SET_KERNEL_STRIDE",
+    "****",
+    "CMD0_OPCODE_NPU_SET_ACC_FORMAT",
+    "CMD0_OPCODE_NPU_SET_ACTIVATION",
+    "CMD0_OPCODE_NPU_SET_ACTIVATION_MIN",
+    "CMD0_OPCODE_NPU_SET_ACTIVATION_MAX",
+    "CMD0_OPCODE_NPU_SET_WEIGHT_REGION",
+    "CMD0_OPCODE_NPU_SET_SCALE_REGION",
+    "CMD0_OPCODE_NPU_SET_RESIZE_X_SCALE_N_M1",
+    "CMD0_OPCODE_NPU_SET_RESIZE_Y_SCALE_N_M1",
+    "CMD0_OPCODE_NPU_SET_RESIZE_X_OFFSET",
+    "CMD0_OPCODE_NPU_SET_RESIZE_Y_OFFSET",
+    "CMD0_OPCODE_NPU_SET_WEIGHT_FORMAT",
+    "CMD0_OPCODE_NPU_SET_BLOCKDEP",
+    "CMD0_OPCODE_NPU_SET_DMA0_SRC_REGION",
+    "CMD0_OPCODE_NPU_SET_DMA0_DST_REGION",
+    "CMD0_OPCODE_NPU_SET_DMA0_SIZE0",
+    "CMD0_OPCODE_NPU_SET_DMA0_SIZE1",
+    "CMD0_OPCODE_NPU_SET_DMA0_IDX_REGION",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM2_BROADCAST",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM2_PRECISION",
+    "****",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM2_ZERO_POINT",
+    "CMD0_OPCODE_NPU_SET_IFM2_WIDTH0_M1",
+    "CMD0_OPCODE_NPU_SET_IFM2_HEIGHT0_M1",
+    "CMD0_OPCODE_NPU_SET_IFM2_HEIGHT1_M1",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM2_REGION",
+};
+
+static const char* cmd1_opcode_str[] =
+{
+    "CMD1_OPCODE_NPU_SET_IFM_BASE0",
+    "CMD1_OPCODE_NPU_SET_IFM_BASE1",
+    "CMD1_OPCODE_NPU_SET_IFM_BASE2",
+    "CMD1_OPCODE_NPU_SET_IFM_BASE3",
+    "CMD1_OPCODE_NPU_SET_IFM_STRIDE_X",
+    "CMD1_OPCODE_NPU_SET_IFM_STRIDE_Y",
+    "CMD1_OPCODE_NPU_SET_IFM_STRIDE_C",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD1_OPCODE_NPU_SET_OFM_BASE0",
+    "CMD1_OPCODE_NPU_SET_OFM_BASE1",
+    "CMD1_OPCODE_NPU_SET_OFM_BASE2",
+    "CMD1_OPCODE_NPU_SET_OFM_BASE3",
+    "CMD1_OPCODE_NPU_SET_OFM_STRIDE_X",
+    "CMD1_OPCODE_NPU_SET_OFM_STRIDE_Y",
+    "CMD1_OPCODE_NPU_SET_OFM_STRIDE_C",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD1_OPCODE_NPU_SET_WEIGHT_BASE",
+    "CMD1_OPCODE_NPU_SET_WEIGHT_LENGTH",
+    "CMD1_OPCODE_NPU_SET_SCALE_BASE",
+    "CMD1_OPCODE_NPU_SET_SCALE_LENGTH",
+    "CMD1_OPCODE_NPU_SET_OFM_SCALE",
+    "CMD1_OPCODE_NPU_SET_IFM_SCALE",
+    "CMD1_OPCODE_NPU_SET_IFM2_SCALE",
+    "CMD1_OPCODE_NPU_SET_OP_SCALAR",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD1_OPCODE_NPU_SET_DMA0_SRC",
+    "CMD1_OPCODE_NPU_SET_DMA0_DST",
+    "CMD1_OPCODE_NPU_SET_DMA0_LEN",
+    "CMD1_OPCODE_NPU_SET_DMA0_SRC_STRIDE0",
+    "CMD1_OPCODE_NPU_SET_DMA0_SRC_STRIDE1",
+    "CMD1_OPCODE_NPU_SET_DMA0_DST_STRIDE0",
+    "CMD1_OPCODE_NPU_SET_DMA0_DST_STRIDE1",
+    "CMD1_OPCODE_NPU_SET_DMA0_IDX",
+    "CMD1_OPCODE_NPU_SET_DMA0_IDX_MAX",
+    "CMD1_OPCODE_NPU_SET_DMA0_IDX_SKIP1",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD1_OPCODE_NPU_SET_IFM2_BASE0",
+    "CMD1_OPCODE_NPU_SET_IFM2_BASE1",
+    "CMD1_OPCODE_NPU_SET_IFM2_BASE2",
+    "CMD1_OPCODE_NPU_SET_IFM2_BASE3",
+    "CMD1_OPCODE_NPU_SET_IFM2_STRIDE_X",
+    "CMD1_OPCODE_NPU_SET_IFM2_STRIDE_Y",
+    "CMD1_OPCODE_NPU_SET_IFM2_STRIDE_C",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD1_OPCODE_NPU_SET_WEIGHT1_BASE",
+    "CMD1_OPCODE_NPU_SET_WEIGHT1_LENGTH",
+    "CMD1_OPCODE_NPU_SET_WEIGHT2_BASE",
+    "CMD1_OPCODE_NPU_SET_WEIGHT2_LENGTH",
+    "CMD1_OPCODE_NPU_SET_WEIGHT3_BASE",
+    "CMD1_OPCODE_NPU_SET_WEIGHT3_LENGTH",
+    "CMD1_OPCODE_NPU_SET_RESIZE_X",
+    "CMD1_OPCODE_NPU_SET_RESIZE_Y",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD1_OPCODE_NPU_OP_BRANCH",
+};
+
+static const char* cmd_ctrl_str[] =
+{
+    "CMD_CTRL_CMD0_CTRL",
+    "CMD_CTRL_CMD1_CTRL",
+};
+
+static const char* custom_dma_str[] =
+{
+    "CUSTOM_DMA_NOT_IMPLEMENTED",
+    "CUSTOM_DMA_IMPLEMENTED",
+};
+
+static const char* dma_fault_channel_str[] =
+{
+    "DMA_FAULT_CHANNEL_CMD_READ",
+    "DMA_FAULT_CHANNEL_IFM_READ",
+    "DMA_FAULT_CHANNEL_WEIGHT_READ",
+    "DMA_FAULT_CHANNEL_SBS_READ",
+    "DMA_FAULT_CHANNEL_MEM2MEM_READ",
+    "****",
+    "****",
+    "****",
+    "DMA_FAULT_CHANNEL_OFM_WRITE",
+    "DMA_FAULT_CHANNEL_MEM2MEM_WRITE",
+};
+
+static const char* dma_fault_src_str[] =
+{
+    "DMA_FAULT_SRC_SRAM",
+    "DMA_FAULT_SRC_EXT",
+};
+
+static const char* dma_idx_mode_str[] =
+{
+    "DMA_IDX_MODE_DISABLED",
+    "DMA_IDX_MODE_ENABLED",
+};
+
+static const char* dma_region_mode_str[] =
+{
+    "DMA_REGION_MODE_EXTERNAL",
+    "DMA_REGION_MODE_INTERNAL",
+};
+
+static const char* dma_stride_mode_str[] =
+{
+    "DMA_STRIDE_MODE_D1",
+    "DMA_STRIDE_MODE_D2",
+    "DMA_STRIDE_MODE_D3",
+};
+
+static const char* elementwise_mode_str[] =
+{
+    "ELEMENTWISE_MODE_MUL",
+    "ELEMENTWISE_MODE_ADD",
+    "ELEMENTWISE_MODE_SUB",
+    "ELEMENTWISE_MODE_MIN",
+    "ELEMENTWISE_MODE_MAX",
+    "ELEMENTWISE_MODE_LRELU",
+    "ELEMENTWISE_MODE_ABS",
+    "ELEMENTWISE_MODE_CLZ",
+    "ELEMENTWISE_MODE_SHR",
+    "ELEMENTWISE_MODE_SHL",
+    "ELEMENTWISE_MODE_LSR",
+    "ELEMENTWISE_MODE_DIV",
+    "****",
+    "****",
+    "****",
+    "****",
+    "ELEMENTWISE_MODE_CMP_EQ",
+    "ELEMENTWISE_MODE_CMP_NE",
+    "ELEMENTWISE_MODE_CMP_GE",
+    "ELEMENTWISE_MODE_CMP_GT",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "ELEMENTWISE_MODE_AND",
+    "ELEMENTWISE_MODE_OR",
+    "ELEMENTWISE_MODE_XOR",
+    "ELEMENTWISE_MODE_NOT",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "ELEMENTWISE_MODE_AND_NOT",
+};
+
+static const char* ifm_upscale_mode_str[] =
+{
+    "IFM_UPSCALE_MODE_NONE",
+    "IFM_UPSCALE_MODE_NEAREST",
+    "IFM_UPSCALE_MODE_ZEROS",
+};
+
+static const char* kernel_decomposition_str[] =
+{
+    "KERNEL_DECOMPOSITION_D8X8",
+    "KERNEL_DECOMPOSITION_D4X4",
+};
+
+static const char* kernel_dilation_str[] =
+{
+    "KERNEL_DILATION_NONE",
+    "KERNEL_DILATION_X2",
+};
+
+static const char* max_beats_str[] =
+{
+    "MAX_BEATS_B64",
+    "MAX_BEATS_B128",
+    "MAX_BEATS_B256",
+};
+
+static const char* microblock_str[] =
+{
+    "MICROBLOCK_U1X1",
+    "MICROBLOCK_U1X2",
+    "MICROBLOCK_U1X4",
+    "MICROBLOCK_U2X2",
+    "MICROBLOCK_U2X4",
+    "MICROBLOCK_U4X4",
+};
+
+static const char* ofm_scale_mode_str[] =
+{
+    "OFM_SCALE_MODE_PER_CHANNEL",
+    "OFM_SCALE_MODE_GLOBAL",
+};
+
+static const char* pmu_axi_channel_str[] =
+{
+    "PMU_AXI_CHANNEL_RD_CMD",
+    "PMU_AXI_CHANNEL_RD_IFM",
+    "PMU_AXI_CHANNEL_RD_WEIGHTS",
+    "PMU_AXI_CHANNEL_RD_SCALE_BIAS",
+    "PMU_AXI_CHANNEL_RD_MEM2MEM",
+    "PMU_AXI_CHANNEL_RD_IFM_STREAM",
+    "PMU_AXI_CHANNEL_RD_MEM2MEM_IDX",
+    "****",
+    "PMU_AXI_CHANNEL_WR_OFM",
+    "PMU_AXI_CHANNEL_WR_MEM2MEM",
+};
+
+static const char* pmu_event_str[] =
+{
+    "PMU_EVENT_NO_EVENT",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_CYCLE",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_NPU_IDLE",
+    "PMU_EVENT_CC_STALLED_ON_BLOCKDEP",
+    "PMU_EVENT_CC_STALLED_ON_SHRAM_RECONFIG",
+    "PMU_EVENT_NPU_ACTIVE",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_MAC_ACTIVE",
+    "****",
+    "****",
+    "PMU_EVENT_MAC_DPU_ACTIVE",
+    "PMU_EVENT_MAC_STALLED_BY_W_OR_ACC",
+    "PMU_EVENT_MAC_STALLED_BY_W",
+    "PMU_EVENT_MAC_STALLED_BY_ACC",
+    "PMU_EVENT_MAC_STALLED_BY_IB",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_AO_ACTIVE",
+    "****",
+    "****",
+    "PMU_EVENT_AO_STALLED_BY_BS_OR_OB",
+    "PMU_EVENT_AO_STALLED_BY_BS",
+    "PMU_EVENT_AO_STALLED_BY_OB",
+    "PMU_EVENT_AO_STALLED_BY_AB_OR_CB",
+    "PMU_EVENT_AO_STALLED_BY_AB",
+    "PMU_EVENT_AO_STALLED_BY_CB",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_WD_ACTIVE",
+    "PMU_EVENT_WD_STALLED",
+    "****",
+    "PMU_EVENT_WD_STALLED_BY_WD_BUF",
+    "PMU_EVENT_WD_STALLED_BY_WS_FC",
+    "PMU_EVENT_WD_STALLED_BY_WS_TC",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_WD_TRANS_WBLK",
+    "PMU_EVENT_WD_TRANS_WS_FC",
+    "PMU_EVENT_WD_TRANS_WS_TC",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_WD_STALLED_BY_WS_SC0",
+    "PMU_EVENT_WD_STALLED_BY_WS_SC1",
+    "PMU_EVENT_WD_STALLED_BY_WS_SC2",
+    "PMU_EVENT_WD_STALLED_BY_WS_SC3",
+    "PMU_EVENT_WD_PARSE_ACTIVE_SC0",
+    "PMU_EVENT_WD_PARSE_ACTIVE_SC1",
+    "PMU_EVENT_WD_PARSE_ACTIVE_SC2",
+    "PMU_EVENT_WD_PARSE_ACTIVE_SC3",
+    "PMU_EVENT_WD_PARSE_STALL_SC0",
+    "PMU_EVENT_WD_PARSE_STALL_SC1",
+    "PMU_EVENT_WD_PARSE_STALL_SC2",
+    "PMU_EVENT_WD_PARSE_STALL_SC3",
+    "PMU_EVENT_WD_PARSE_STALL_IN_SC0",
+    "PMU_EVENT_WD_PARSE_STALL_IN_SC1",
+    "PMU_EVENT_WD_PARSE_STALL_IN_SC2",
+    "PMU_EVENT_WD_PARSE_STALL_IN_SC3",
+    "PMU_EVENT_WD_PARSE_STALL_OUT_SC0",
+    "PMU_EVENT_WD_PARSE_STALL_OUT_SC1",
+    "PMU_EVENT_WD_PARSE_STALL_OUT_SC2",
+    "PMU_EVENT_WD_PARSE_STALL_OUT_SC3",
+    "PMU_EVENT_WD_TRANS_WS_SC0",
+    "PMU_EVENT_WD_TRANS_WS_SC1",
+    "PMU_EVENT_WD_TRANS_WS_SC2",
+    "PMU_EVENT_WD_TRANS_WS_SC3",
+    "PMU_EVENT_WD_TRANS_WB0",
+    "PMU_EVENT_WD_TRANS_WB1",
+    "PMU_EVENT_WD_TRANS_WB2",
+    "PMU_EVENT_WD_TRANS_WB3",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_SRAM_RD_TRANS_ACCEPTED",
+    "PMU_EVENT_SRAM_RD_TRANS_COMPLETED",
+    "PMU_EVENT_SRAM_RD_DATA_BEAT_RECEIVED",
+    "PMU_EVENT_SRAM_RD_TRAN_REQ_STALLED",
+    "PMU_EVENT_SRAM_WR_TRANS_ACCEPTED",
+    "PMU_EVENT_SRAM_WR_TRANS_COMPLETED_M",
+    "PMU_EVENT_SRAM_WR_TRANS_COMPLETED_S",
+    "PMU_EVENT_SRAM_WR_DATA_BEAT_WRITTEN",
+    "PMU_EVENT_SRAM_WR_TRAN_REQ_STALLED",
+    "PMU_EVENT_SRAM_WR_DATA_BEAT_STALLED",
+    "****",
+    "****",
+    "PMU_EVENT_SRAM_ENABLED_CYCLES",
+    "****",
+    "PMU_EVENT_SRAM_RD_STALL_LIMIT",
+    "PMU_EVENT_SRAM_WR_STALL_LIMIT",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_AXI_LATENCY_ANY",
+    "PMU_EVENT_AXI_LATENCY_32",
+    "PMU_EVENT_AXI_LATENCY_64",
+    "PMU_EVENT_AXI_LATENCY_128",
+    "PMU_EVENT_AXI_LATENCY_256",
+    "PMU_EVENT_AXI_LATENCY_512",
+    "PMU_EVENT_AXI_LATENCY_1024",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_ECC_DMA",
+    "PMU_EVENT_ECC_MAC_IB",
+    "PMU_EVENT_ECC_MAC_AB",
+    "PMU_EVENT_ECC_AO_CB",
+    "PMU_EVENT_ECC_AO_OB",
+    "PMU_EVENT_ECC_AO_LUT",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_EXT_RD_TRANS_ACCEPTED",
+    "PMU_EVENT_EXT_RD_TRANS_COMPLETED",
+    "PMU_EVENT_EXT_RD_DATA_BEAT_RECEIVED",
+    "PMU_EVENT_EXT_RD_TRAN_REQ_STALLED",
+    "PMU_EVENT_EXT_WR_TRANS_ACCEPTED",
+    "PMU_EVENT_EXT_WR_TRANS_COMPLETED_M",
+    "PMU_EVENT_EXT_WR_TRANS_COMPLETED_S",
+    "PMU_EVENT_EXT_WR_DATA_BEAT_WRITTEN",
+    "PMU_EVENT_EXT_WR_TRAN_REQ_STALLED",
+    "PMU_EVENT_EXT_WR_DATA_BEAT_STALLED",
+    "****",
+    "****",
+    "PMU_EVENT_EXT_ENABLED_CYCLES",
+    "****",
+    "PMU_EVENT_EXT_RD_STALL_LIMIT",
+    "PMU_EVENT_EXT_WR_STALL_LIMIT",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_SRAM0_RD_TRANS_ACCEPTED",
+    "PMU_EVENT_SRAM0_RD_TRANS_COMPLETED",
+    "PMU_EVENT_SRAM0_RD_DATA_BEAT_RECEIVED",
+    "PMU_EVENT_SRAM0_RD_TRAN_REQ_STALLED",
+    "PMU_EVENT_SRAM0_WR_TRANS_ACCEPTED",
+    "PMU_EVENT_SRAM0_WR_TRANS_COMPLETED_M",
+    "PMU_EVENT_SRAM0_WR_TRANS_COMPLETED_S",
+    "PMU_EVENT_SRAM0_WR_DATA_BEAT_WRITTEN",
+    "PMU_EVENT_SRAM0_WR_TRAN_REQ_STALLED",
+    "PMU_EVENT_SRAM0_WR_DATA_BEAT_STALLED",
+    "****",
+    "****",
+    "PMU_EVENT_SRAM0_ENABLED_CYCLES",
+    "****",
+    "PMU_EVENT_SRAM0_RD_STALL_LIMIT",
+    "PMU_EVENT_SRAM0_WR_STALL_LIMIT",
+    "PMU_EVENT_SRAM1_RD_TRANS_ACCEPTED",
+    "PMU_EVENT_SRAM1_RD_TRANS_COMPLETED",
+    "PMU_EVENT_SRAM1_RD_DATA_BEAT_RECEIVED",
+    "PMU_EVENT_SRAM1_RD_TRAN_REQ_STALLED",
+    "PMU_EVENT_SRAM1_WR_TRANS_ACCEPTED",
+    "PMU_EVENT_SRAM1_WR_TRANS_COMPLETED_M",
+    "PMU_EVENT_SRAM1_WR_TRANS_COMPLETED_S",
+    "PMU_EVENT_SRAM1_WR_DATA_BEAT_WRITTEN",
+    "PMU_EVENT_SRAM1_WR_TRAN_REQ_STALLED",
+    "PMU_EVENT_SRAM1_WR_DATA_BEAT_STALLED",
+    "****",
+    "****",
+    "PMU_EVENT_SRAM1_ENABLED_CYCLES",
+    "****",
+    "PMU_EVENT_SRAM1_RD_STALL_LIMIT",
+    "PMU_EVENT_SRAM1_WR_STALL_LIMIT",
+    "PMU_EVENT_SRAM2_RD_TRANS_ACCEPTED",
+    "PMU_EVENT_SRAM2_RD_TRANS_COMPLETED",
+    "PMU_EVENT_SRAM2_RD_DATA_BEAT_RECEIVED",
+    "PMU_EVENT_SRAM2_RD_TRAN_REQ_STALLED",
+    "PMU_EVENT_SRAM2_WR_TRANS_ACCEPTED",
+    "PMU_EVENT_SRAM2_WR_TRANS_COMPLETED_M",
+    "PMU_EVENT_SRAM2_WR_TRANS_COMPLETED_S",
+    "PMU_EVENT_SRAM2_WR_DATA_BEAT_WRITTEN",
+    "PMU_EVENT_SRAM2_WR_TRAN_REQ_STALLED",
+    "PMU_EVENT_SRAM2_WR_DATA_BEAT_STALLED",
+    "****",
+    "****",
+    "PMU_EVENT_SRAM2_ENABLED_CYCLES",
+    "****",
+    "PMU_EVENT_SRAM2_RD_STALL_LIMIT",
+    "PMU_EVENT_SRAM2_WR_STALL_LIMIT",
+    "PMU_EVENT_SRAM3_RD_TRANS_ACCEPTED",
+    "PMU_EVENT_SRAM3_RD_TRANS_COMPLETED",
+    "PMU_EVENT_SRAM3_RD_DATA_BEAT_RECEIVED",
+    "PMU_EVENT_SRAM3_RD_TRAN_REQ_STALLED",
+    "PMU_EVENT_SRAM3_WR_TRANS_ACCEPTED",
+    "PMU_EVENT_SRAM3_WR_TRANS_COMPLETED_M",
+    "PMU_EVENT_SRAM3_WR_TRANS_COMPLETED_S",
+    "PMU_EVENT_SRAM3_WR_DATA_BEAT_WRITTEN",
+    "PMU_EVENT_SRAM3_WR_TRAN_REQ_STALLED",
+    "PMU_EVENT_SRAM3_WR_DATA_BEAT_STALLED",
+    "****",
+    "****",
+    "PMU_EVENT_SRAM3_ENABLED_CYCLES",
+    "****",
+    "PMU_EVENT_SRAM3_RD_STALL_LIMIT",
+    "PMU_EVENT_SRAM3_WR_STALL_LIMIT",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_EXT0_RD_TRANS_ACCEPTED",
+    "PMU_EVENT_EXT0_RD_TRANS_COMPLETED",
+    "PMU_EVENT_EXT0_RD_DATA_BEAT_RECEIVED",
+    "PMU_EVENT_EXT0_RD_TRAN_REQ_STALLED",
+    "PMU_EVENT_EXT0_WR_TRANS_ACCEPTED",
+    "PMU_EVENT_EXT0_WR_TRANS_COMPLETED_M",
+    "PMU_EVENT_EXT0_WR_TRANS_COMPLETED_S",
+    "PMU_EVENT_EXT0_WR_DATA_BEAT_WRITTEN",
+    "PMU_EVENT_EXT0_WR_TRAN_REQ_STALLED",
+    "PMU_EVENT_EXT0_WR_DATA_BEAT_STALLED",
+    "****",
+    "****",
+    "PMU_EVENT_EXT0_ENABLED_CYCLES",
+    "****",
+    "PMU_EVENT_EXT0_RD_STALL_LIMIT",
+    "PMU_EVENT_EXT0_WR_STALL_LIMIT",
+    "PMU_EVENT_EXT1_RD_TRANS_ACCEPTED",
+    "PMU_EVENT_EXT1_RD_TRANS_COMPLETED",
+    "PMU_EVENT_EXT1_RD_DATA_BEAT_RECEIVED",
+    "PMU_EVENT_EXT1_RD_TRAN_REQ_STALLED",
+    "PMU_EVENT_EXT1_WR_TRANS_ACCEPTED",
+    "PMU_EVENT_EXT1_WR_TRANS_COMPLETED_M",
+    "PMU_EVENT_EXT1_WR_TRANS_COMPLETED_S",
+    "PMU_EVENT_EXT1_WR_DATA_BEAT_WRITTEN",
+    "PMU_EVENT_EXT1_WR_TRAN_REQ_STALLED",
+    "PMU_EVENT_EXT1_WR_DATA_BEAT_STALLED",
+    "****",
+    "****",
+    "PMU_EVENT_EXT1_ENABLED_CYCLES",
+    "****",
+    "PMU_EVENT_EXT1_RD_STALL_LIMIT",
+    "PMU_EVENT_EXT1_WR_STALL_LIMIT",
+};
+
+static const char* pmu_port_disable_str[] =
+{
+    "PMU_PORT_DISABLE_ENABLE",
+    "PMU_PORT_DISABLE_DISABLE",
+};
+
+static const char* pooling_mode_str[] =
+{
+    "POOLING_MODE_MAX",
+    "POOLING_MODE_AVERAGE",
+    "POOLING_MODE_REDUCE_SUM",
+    "POOLING_MODE_SUM",
+    "POOLING_MODE_NONE",
+    "POOLING_MODE_MIN",
+    "POOLING_MODE_ARGMAX_X",
+    "POOLING_MODE_ARGMAX_Y",
+};
+
+static const char* privilege_level_str[] =
+{
+    "PRIVILEGE_LEVEL_USER",
+    "PRIVILEGE_LEVEL_PRIVILEGED",
+};
+
+static const char* ram_id_str[] =
+{
+    "RAM_ID_LUT",
+    "RAM_ID_IB",
+    "RAM_ID_AB",
+    "RAM_ID_CB",
+    "RAM_ID_OB",
+};
+
+static const char* resize_mode_str[] =
+{
+    "RESIZE_MODE_BILINEAR",
+    "RESIZE_MODE_REPLICATE",
+    "RESIZE_MODE_NEAREST",
+};
+
+static const char* round_mode_ifm_str[] =
+{
+    "ROUND_MODE_IFM_DOUBLE_SYMMETRIC",
+    "ROUND_MODE_IFM_NATURAL",
+};
+
+static const char* round_mode_ofm_str[] =
+{
+    "ROUND_MODE_OFM_DOUBLE_SYMMETRIC",
+    "ROUND_MODE_OFM_NATURAL",
+    "ROUND_MODE_OFM_DOUBLE_ASYMMETRIC",
+    "ROUND_MODE_OFM_SYMMETRIC",
+    "ROUND_MODE_OFM_TRUNCATE_TO_ZERO",
+    "ROUND_MODE_OFM_TRUNCATE_TO_LOWER",
+};
+
+static const char* security_level_str[] =
+{
+    "SECURITY_LEVEL_SECURE",
+    "SECURITY_LEVEL_NON_SECURE",
+};
+
+static const char* state_str[] =
+{
+    "STATE_STOPPED",
+    "STATE_RUNNING",
+};
+
+static const char* wd_active_core_str[] =
+{
+    "WD_ACTIVE_CORE_NONE",
+    "WD_ACTIVE_CORE_STANDARD",
+    "WD_ACTIVE_CORE_FAST",
+    "WD_ACTIVE_CORE_TENSOR",
+};
+
+static const char* weight_format_str[] =
+{
+    "WEIGHT_FORMAT_SWD",
+    "WEIGHT_FORMAT_FWD",
+};
+
+static const char* weight_order_str[] =
+{
+    "WEIGHT_ORDER_DEPTH_FIRST",
+    "WEIGHT_ORDER_PART_KERNEL_FIRST",
+};
+
+static const char* weight_sparsity_str[] =
+{
+    "WEIGHT_SPARSITY_NONE",
+    "WEIGHT_SPARSITY_SPARSE_2_4",
+};
+
+#endif
+
+// Register type structs
+// id_r - ID register
+struct id_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t version_status : 4; // This value is the version of the product
+            uint32_t version_minor : 4; // This value is the n for the P part of an RnPn release number
+            uint32_t version_major : 4; // This value is the n for the R part of an RnPn release number
+            uint32_t product_major : 4; // Product major ID number (unique per base product)
+            uint32_t arch_patch_rev : 4; // This value is the patch number of the architecture version a.b
+            uint32_t arch_minor_rev : 8; // This value is the minor architecture version number, b in the architecture version a.b
+            uint32_t arch_major_rev : 4; // This value is the major architecture version number, a in the architecture version a.b
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR id_r() :
+        word0(536899584)
+    {}
+    CONSTEXPR id_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    id_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_version_status() const
+    {
+        auto v = ((1U << 4) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR id_r& set_version_status(uint32_t value)
+    {
+        word0 = (~(((1U << 4) - 1)<<0) & word0) | ((((1U << 4) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_version_minor() const
+    {
+        auto v = ((1U << 4) - 1) & (word0 >> 4);
+        return v;
+    }
+    CONSTEXPR id_r& set_version_minor(uint32_t value)
+    {
+        word0 = (~(((1U << 4) - 1)<<4) & word0) | ((((1U << 4) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_version_major() const
+    {
+        auto v = ((1U << 4) - 1) & (word0 >> 8);
+        return v;
+    }
+    CONSTEXPR id_r& set_version_major(uint32_t value)
+    {
+        word0 = (~(((1U << 4) - 1)<<8) & word0) | ((((1U << 4) - 1) & value) << 8);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_product_major() const
+    {
+        auto v = ((1U << 4) - 1) & (word0 >> 12);
+        return v;
+    }
+    CONSTEXPR id_r& set_product_major(uint32_t value)
+    {
+        word0 = (~(((1U << 4) - 1)<<12) & word0) | ((((1U << 4) - 1) & value) << 12);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_arch_patch_rev() const
+    {
+        auto v = ((1U << 4) - 1) & (word0 >> 16);
+        return v;
+    }
+    CONSTEXPR id_r& set_arch_patch_rev(uint32_t value)
+    {
+        word0 = (~(((1U << 4) - 1)<<16) & word0) | ((((1U << 4) - 1) & value) << 16);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_arch_minor_rev() const
+    {
+        auto v = ((1U << 8) - 1) & (word0 >> 20);
+        return v;
+    }
+    CONSTEXPR id_r& set_arch_minor_rev(uint32_t value)
+    {
+        word0 = (~(((1U << 8) - 1)<<20) & word0) | ((((1U << 8) - 1) & value) << 20);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_arch_major_rev() const
+    {
+        auto v = ((1U << 4) - 1) & (word0 >> 28);
+        return v;
+    }
+    CONSTEXPR id_r& set_arch_major_rev(uint32_t value)
+    {
+        word0 = (~(((1U << 4) - 1)<<28) & word0) | ((((1U << 4) - 1) & value) << 28);
+        return *this;
+    }
+#endif
+};
+
+// status_r - Register describes the current operating status of the NPU
+struct status_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t state : 1; // 0 = NPU is in stopped state. 1 = NPU is in running state
+            uint32_t irq_raised : 1; // 0 = IRQ not raised. 1 = IRQ raised
+            uint32_t bus_status : 1; // 0 = No bus fault. 1 = Bus abort detected and processing halted
+            uint32_t reset_status : 1; // 0 = No reset in progress. 1 = Reset in progress
+            uint32_t cmd_parse_error : 1; // 0 = No parsing error. 1 = Command stream parsing error detected
+            uint32_t cmd_end_reached : 1; // 0 = Command stream end is not reached. 1 = Command stream end is reached
+            uint32_t pmu_irq_raised : 1; // 0 = No PMU IRQ raised. 1 = PMU IRQ raised
+            uint32_t reserved0 : 1;
+            uint32_t ecc_fault : 1; // 0 = No ECC fault detected. 1 = ECC fault detected
+            uint32_t branch_fault : 1; // 0 = No branch fault. 1 = Branch fault detected
+            uint32_t reserved1 : 1;
+            uint32_t faulting_interface : 1; // The faulting interface on bus abort
+            uint32_t faulting_channel : 4; // The faulting channel on a bus abort
+            uint32_t irq_history_mask : 16; // The IRQ History mask
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR status_r() :
+        word0(8)
+    {}
+    CONSTEXPR status_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    status_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::state get_state() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 0);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::state>(v);
+    }
+    CONSTEXPR status_r& set_state(NPU_NAMESPACE::state value)
+    {
+        word0 = (~(((1U << 1) - 1)<<0) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_irq_raised() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 1);
+        return v;
+    }
+    CONSTEXPR status_r& set_irq_raised(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_bus_status() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 2);
+        return v;
+    }
+    CONSTEXPR status_r& set_bus_status(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_reset_status() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 3);
+        return v;
+    }
+    CONSTEXPR status_r& set_reset_status(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_parse_error() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 4);
+        return v;
+    }
+    CONSTEXPR status_r& set_cmd_parse_error(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_end_reached() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 5);
+        return v;
+    }
+    CONSTEXPR status_r& set_cmd_end_reached(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_pmu_irq_raised() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 6);
+        return v;
+    }
+    CONSTEXPR status_r& set_pmu_irq_raised(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ecc_fault() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 8);
+        return v;
+    }
+    CONSTEXPR status_r& set_ecc_fault(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<8) & word0) | ((((1U << 1) - 1) & value) << 8);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_branch_fault() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 9);
+        return v;
+    }
+    CONSTEXPR status_r& set_branch_fault(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<9) & word0) | ((((1U << 1) - 1) & value) << 9);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::dma_fault_src get_faulting_interface() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 11);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::dma_fault_src>(v);
+    }
+    CONSTEXPR status_r& set_faulting_interface(NPU_NAMESPACE::dma_fault_src value)
+    {
+        word0 = (~(((1U << 1) - 1)<<11) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 11);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::dma_fault_channel get_faulting_channel() const
+    {
+        auto v = ((1U << 4) - 1) & (word0 >> 12);
+        assert(v <= 9);
+        return static_cast<NPU_NAMESPACE::dma_fault_channel>(v);
+    }
+    CONSTEXPR status_r& set_faulting_channel(NPU_NAMESPACE::dma_fault_channel value)
+    {
+        word0 = (~(((1U << 4) - 1)<<12) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 12);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_irq_history_mask() const
+    {
+        auto v = ((1U << 16) - 1) & (word0 >> 16);
+        return v;
+    }
+    CONSTEXPR status_r& set_irq_history_mask(uint32_t value)
+    {
+        word0 = (~(((1U << 16) - 1)<<16) & word0) | ((((1U << 16) - 1) & value) << 16);
+        return *this;
+    }
+#endif
+};
+
+// cmd_r - The command register. This register reads as last written command
+struct cmd_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t transition_to_running_state : 1; // Write 1 to transition the NPU to running state. Writing 0 has no effect
+            uint32_t clear_irq : 1; // Write 1 to clear the IRQ status in the STATUS register. Writing 0 has no effect
+            uint32_t clock_q_enable : 1; // Write 1 to this bit to enable clock off using clock q-interface and enable the requester clock gate
+            uint32_t power_q_enable : 1; // Write 1 to this bit to enable power off using power q-interface
+            uint32_t stop_request : 1; // Write 1 to this bit to request STOP after completing any already-started commands
+            uint32_t reserved0 : 11;
+            uint32_t clear_irq_history : 16; // Clears the IRQ history mask
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR cmd_r() :
+        word0(12)
+    {}
+    CONSTEXPR cmd_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    cmd_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_transition_to_running_state() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR cmd_r& set_transition_to_running_state(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_clear_irq() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 1);
+        return v;
+    }
+    CONSTEXPR cmd_r& set_clear_irq(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_clock_q_enable() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 2);
+        return v;
+    }
+    CONSTEXPR cmd_r& set_clock_q_enable(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_power_q_enable() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 3);
+        return v;
+    }
+    CONSTEXPR cmd_r& set_power_q_enable(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_stop_request() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 4);
+        return v;
+    }
+    CONSTEXPR cmd_r& set_stop_request(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_clear_irq_history() const
+    {
+        auto v = ((1U << 16) - 1) & (word0 >> 16);
+        return v;
+    }
+    CONSTEXPR cmd_r& set_clear_irq_history(uint32_t value)
+    {
+        word0 = (~(((1U << 16) - 1)<<16) & word0) | ((((1U << 16) - 1) & value) << 16);
+        return *this;
+    }
+#endif
+};
+
+// reset_r - Request reset and new security mode
+struct reset_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t pending_CPL : 1; // Current privilege level. 0 = User and 1 = Privileged
+            uint32_t pending_CSL : 1; // Current security level. 0 = Secure and 1 = Non-secure
+            uint32_t reserved0 : 30;
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR reset_r() :
+        word0(0)
+    {}
+    CONSTEXPR reset_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    reset_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::privilege_level get_pending_CPL() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 0);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::privilege_level>(v);
+    }
+    CONSTEXPR reset_r& set_pending_CPL(NPU_NAMESPACE::privilege_level value)
+    {
+        word0 = (~(((1U << 1) - 1)<<0) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::security_level get_pending_CSL() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 1);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::security_level>(v);
+    }
+    CONSTEXPR reset_r& set_pending_CSL(NPU_NAMESPACE::security_level value)
+    {
+        word0 = (~(((1U << 1) - 1)<<1) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 1);
+        return *this;
+    }
+#endif
+};
+
+// qbase_r - The base address of the command stream in bytes
+struct qbase_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8; // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR qbase_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR qbase_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    qbase_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// qread_r - The read offset in the command stream in bytes. Multiple of four in the range 0-16MB
+struct qread_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t QREAD : 32; // The read offset of the current command under execution
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR qread_r() :
+        word0(0)
+    {}
+    CONSTEXPR qread_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    qread_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_QREAD() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR qread_r& set_QREAD(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// qconfig_r - The AXI configuration for the command stream in the range 0-3. Same encoding as for REGIONCFG
+struct qconfig_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t cmd_region0 : 2; // The command region configuration number
+            uint32_t reserved0 : 30;
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR qconfig_r() :
+        word0(0)
+    {}
+    CONSTEXPR qconfig_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    qconfig_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_region0() const
+    {
+        auto v = ((1U << 2) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR qconfig_r& set_cmd_region0(uint32_t value)
+    {
+        word0 = (~(((1U << 2) - 1)<<0) & word0) | ((((1U << 2) - 1) & value) << 0);
+        return *this;
+    }
+#endif
+};
+
+// qsize_r - The size of the command stream in bytes. Multiple of four in the range 0-16MB
+struct qsize_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t QSIZE : 32; // The size of the next command stream to be executed by the NPU
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR qsize_r() :
+        word0(0)
+    {}
+    CONSTEXPR qsize_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    qsize_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_QSIZE() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR qsize_r& set_QSIZE(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// prot_r - The protection level configured for the NPU when acting as an AXI Requester
+struct prot_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t active_CPL : 1; // The current privilege level. 0 = User and 1 = Privileged
+            uint32_t active_CSL : 1; // The current security level 0=Secure 1=Non secure
+            uint32_t reserved0 : 30;
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR prot_r() :
+        word0(0)
+    {}
+    CONSTEXPR prot_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    prot_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::privilege_level get_active_CPL() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 0);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::privilege_level>(v);
+    }
+    CONSTEXPR prot_r& set_active_CPL(NPU_NAMESPACE::privilege_level value)
+    {
+        word0 = (~(((1U << 1) - 1)<<0) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::security_level get_active_CSL() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 1);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::security_level>(v);
+    }
+    CONSTEXPR prot_r& set_active_CSL(NPU_NAMESPACE::security_level value)
+    {
+        word0 = (~(((1U << 1) - 1)<<1) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 1);
+        return *this;
+    }
+#endif
+};
+
+// config_r - RTL configuration
+struct config_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t macs_per_cc : 4; // The log2(macs/clock cycle)
+            uint32_t cmd_stream_version : 4; // The command stream version accepted by this NPU
+            uint32_t num_axi_sram : 2; // The log2 of the number of AXI SRAM interfaces
+            uint32_t num_axi_ext : 1; // The log2 of the number of AXI External memory interfaces
+            uint32_t reserved0 : 1;
+            uint32_t num_wd : 2; // The log2 of the number of standard weight decoders
+            uint32_t reserved1 : 13;
+            uint32_t custom_dma : 1; // The custom DMA configuration
+            uint32_t product : 4; // The product configuration
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR config_r() :
+        word0(536870928)
+    {}
+    CONSTEXPR config_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    config_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_macs_per_cc() const
+    {
+        auto v = ((1U << 4) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR config_r& set_macs_per_cc(uint32_t value)
+    {
+        word0 = (~(((1U << 4) - 1)<<0) & word0) | ((((1U << 4) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_stream_version() const
+    {
+        auto v = ((1U << 4) - 1) & (word0 >> 4);
+        return v;
+    }
+    CONSTEXPR config_r& set_cmd_stream_version(uint32_t value)
+    {
+        word0 = (~(((1U << 4) - 1)<<4) & word0) | ((((1U << 4) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_num_axi_sram() const
+    {
+        auto v = ((1U << 2) - 1) & (word0 >> 8);
+        return v;
+    }
+    CONSTEXPR config_r& set_num_axi_sram(uint32_t value)
+    {
+        word0 = (~(((1U << 2) - 1)<<8) & word0) | ((((1U << 2) - 1) & value) << 8);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_num_axi_ext() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 10);
+        return v;
+    }
+    CONSTEXPR config_r& set_num_axi_ext(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<10) & word0) | ((((1U << 1) - 1) & value) << 10);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_num_wd() const
+    {
+        auto v = ((1U << 2) - 1) & (word0 >> 12);
+        return v;
+    }
+    CONSTEXPR config_r& set_num_wd(uint32_t value)
+    {
+        word0 = (~(((1U << 2) - 1)<<12) & word0) | ((((1U << 2) - 1) & value) << 12);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::custom_dma get_custom_dma() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 27);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::custom_dma>(v);
+    }
+    CONSTEXPR config_r& set_custom_dma(NPU_NAMESPACE::custom_dma value)
+    {
+        word0 = (~(((1U << 1) - 1)<<27) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 27);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_product() const
+    {
+        auto v = ((1U << 4) - 1) & (word0 >> 28);
+        return v;
+    }
+    CONSTEXPR config_r& set_product(uint32_t value)
+    {
+        word0 = (~(((1U << 4) - 1)<<28) & word0) | ((((1U << 4) - 1) & value) << 28);
+        return *this;
+    }
+#endif
+};
+
+// cond_status_r - Condition status of the NPU
+struct cond_status_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t result_flag : 1; // The tensor result flag. For OFM with a single element, this is bit 0 of the value. Otherwise UNPREDICTABLE
+            uint32_t reserved0 : 31;
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR cond_status_r() :
+        word0(0)
+    {}
+    CONSTEXPR cond_status_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    cond_status_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_result_flag() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR cond_status_r& set_result_flag(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+#endif
+};
+
+// power_ctrl_r - Power control register
+struct power_ctrl_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t mac_step_cycles : 6; // MAC power ramping up/down control
+            uint32_t reserved0 : 26;
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR power_ctrl_r() :
+        word0(0)
+    {}
+    CONSTEXPR power_ctrl_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    power_ctrl_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_mac_step_cycles() const
+    {
+        auto v = ((1U << 6) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR power_ctrl_r& set_mac_step_cycles(uint32_t value)
+    {
+        word0 = (~(((1U << 6) - 1)<<0) & word0) | ((((1U << 6) - 1) & value) << 0);
+        return *this;
+    }
+#endif
+};
+
+// regioncfg_r - Specify which MEM_ATTR register applies to each region
+struct regioncfg_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t region0 : 2; // Bits for Region0 Configuration
+            uint32_t region1 : 2; // Bits for Region1 Configuration
+            uint32_t region2 : 2; // Bits for Region2 Configuration
+            uint32_t region3 : 2; // Bits for Region3 Configuration
+            uint32_t region4 : 2; // Bits for Region4 Configuration
+            uint32_t region5 : 2; // Bits for Region5 Configuration
+            uint32_t region6 : 2; // Bits for Region6 Configuration
+            uint32_t region7 : 2; // Bits for Region7 Configuration
+            uint32_t reserved0 : 16;
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR regioncfg_r() :
+        word0(0)
+    {}
+    CONSTEXPR regioncfg_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    regioncfg_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_region0() const
+    {
+        auto v = ((1U << 2) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR regioncfg_r& set_region0(uint32_t value)
+    {
+        word0 = (~(((1U << 2) - 1)<<0) & word0) | ((((1U << 2) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_region1() const
+    {
+        auto v = ((1U << 2) - 1) & (word0 >> 2);
+        return v;
+    }
+    CONSTEXPR regioncfg_r& set_region1(uint32_t value)
+    {
+        word0 = (~(((1U << 2) - 1)<<2) & word0) | ((((1U << 2) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_region2() const
+    {
+        auto v = ((1U << 2) - 1) & (word0 >> 4);
+        return v;
+    }
+    CONSTEXPR regioncfg_r& set_region2(uint32_t value)
+    {
+        word0 = (~(((1U << 2) - 1)<<4) & word0) | ((((1U << 2) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_region3() const
+    {
+        auto v = ((1U << 2) - 1) & (word0 >> 6);
+        return v;
+    }
+    CONSTEXPR regioncfg_r& set_region3(uint32_t value)
+    {
+        word0 = (~(((1U << 2) - 1)<<6) & word0) | ((((1U << 2) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_region4() const
+    {
+        auto v = ((1U << 2) - 1) & (word0 >> 8);
+        return v;
+    }
+    CONSTEXPR regioncfg_r& set_region4(uint32_t value)
+    {
+        word0 = (~(((1U << 2) - 1)<<8) & word0) | ((((1U << 2) - 1) & value) << 8);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_region5() const
+    {
+        auto v = ((1U << 2) - 1) & (word0 >> 10);
+        return v;
+    }
+    CONSTEXPR regioncfg_r& set_region5(uint32_t value)
+    {
+        word0 = (~(((1U << 2) - 1)<<10) & word0) | ((((1U << 2) - 1) & value) << 10);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_region6() const
+    {
+        auto v = ((1U << 2) - 1) & (word0 >> 12);
+        return v;
+    }
+    CONSTEXPR regioncfg_r& set_region6(uint32_t value)
+    {
+        word0 = (~(((1U << 2) - 1)<<12) & word0) | ((((1U << 2) - 1) & value) << 12);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_region7() const
+    {
+        auto v = ((1U << 2) - 1) & (word0 >> 14);
+        return v;
+    }
+    CONSTEXPR regioncfg_r& set_region7(uint32_t value)
+    {
+        word0 = (~(((1U << 2) - 1)<<14) & word0) | ((((1U << 2) - 1) & value) << 14);
+        return *this;
+    }
+#endif
+};
+
+// mem_attr_r - Memory attributes 0
+struct mem_attr_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t mem_domain : 2; // Memory domain
+            uint32_t axi_port : 1; // AXI port select
+            uint32_t reserved0 : 1;
+            uint32_t memtype : 4; // Memtype to be used to encode AxCACHE signals
+            uint32_t reserved1 : 24;
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR mem_attr_r() :
+        word0(0)
+    {}
+    CONSTEXPR mem_attr_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    mem_attr_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::axi_mem_domain get_mem_domain() const
+    {
+        auto v = ((1U << 2) - 1) & (word0 >> 0);
+        assert(v <= 3);
+        return static_cast<NPU_NAMESPACE::axi_mem_domain>(v);
+    }
+    CONSTEXPR mem_attr_r& set_mem_domain(NPU_NAMESPACE::axi_mem_domain value)
+    {
+        word0 = (~(((1U << 2) - 1)<<0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::axi_port get_axi_port() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 2);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::axi_port>(v);
+    }
+    CONSTEXPR mem_attr_r& set_axi_port(NPU_NAMESPACE::axi_port value)
+    {
+        word0 = (~(((1U << 1) - 1)<<2) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 2);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::axi_mem_encoding get_memtype() const
+    {
+        auto v = ((1U << 4) - 1) & (word0 >> 4);
+        assert(v <= 11);
+        return static_cast<NPU_NAMESPACE::axi_mem_encoding>(v);
+    }
+    CONSTEXPR mem_attr_r& set_memtype(NPU_NAMESPACE::axi_mem_encoding value)
+    {
+        word0 = (~(((1U << 4) - 1)<<4) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 4);
+        return *this;
+    }
+#endif
+};
+
+// axi_sram_r - The AXI configuration for SRAM ports
+struct axi_sram_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t max_outstanding_read_m1 : 6; // Maximum number of outstanding AXI read transactions per port - 1
+            uint32_t reserved0 : 2;
+            uint32_t max_outstanding_write_m1 : 5; // Maximum number of outstanding AXI write transactions per port - 1
+            uint32_t reserved1 : 3;
+            uint32_t max_beats : 2; // Burst split alignment
+            uint32_t reserved2 : 14;
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR axi_sram_r() :
+        word0(0)
+    {}
+    CONSTEXPR axi_sram_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    axi_sram_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_max_outstanding_read_m1() const
+    {
+        auto v = ((1U << 6) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR axi_sram_r& set_max_outstanding_read_m1(uint32_t value)
+    {
+        word0 = (~(((1U << 6) - 1)<<0) & word0) | ((((1U << 6) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_max_outstanding_write_m1() const
+    {
+        auto v = ((1U << 5) - 1) & (word0 >> 8);
+        return v;
+    }
+    CONSTEXPR axi_sram_r& set_max_outstanding_write_m1(uint32_t value)
+    {
+        word0 = (~(((1U << 5) - 1)<<8) & word0) | ((((1U << 5) - 1) & value) << 8);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::max_beats get_max_beats() const
+    {
+        auto v = ((1U << 2) - 1) & (word0 >> 16);
+        assert(v <= 2);
+        return static_cast<NPU_NAMESPACE::max_beats>(v);
+    }
+    CONSTEXPR axi_sram_r& set_max_beats(NPU_NAMESPACE::max_beats value)
+    {
+        word0 = (~(((1U << 2) - 1)<<16) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 16);
+        return *this;
+    }
+#endif
+};
+
+// axi_ext_r - The AXI configuration for EXT ports
+struct axi_ext_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t max_outstanding_read_m1 : 6; // Maximum number of outstanding AXI read transactions per port - 1
+            uint32_t reserved0 : 2;
+            uint32_t max_outstanding_write_m1 : 5; // Maximum number of outstanding AXI write transactions per port - 1
+            uint32_t reserved1 : 3;
+            uint32_t max_beats : 2; // Burst split alignment
+            uint32_t reserved2 : 14;
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR axi_ext_r() :
+        word0(0)
+    {}
+    CONSTEXPR axi_ext_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    axi_ext_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_max_outstanding_read_m1() const
+    {
+        auto v = ((1U << 6) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR axi_ext_r& set_max_outstanding_read_m1(uint32_t value)
+    {
+        word0 = (~(((1U << 6) - 1)<<0) & word0) | ((((1U << 6) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_max_outstanding_write_m1() const
+    {
+        auto v = ((1U << 5) - 1) & (word0 >> 8);
+        return v;
+    }
+    CONSTEXPR axi_ext_r& set_max_outstanding_write_m1(uint32_t value)
+    {
+        word0 = (~(((1U << 5) - 1)<<8) & word0) | ((((1U << 5) - 1) & value) << 8);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::max_beats get_max_beats() const
+    {
+        auto v = ((1U << 2) - 1) & (word0 >> 16);
+        assert(v <= 2);
+        return static_cast<NPU_NAMESPACE::max_beats>(v);
+    }
+    CONSTEXPR axi_ext_r& set_max_beats(NPU_NAMESPACE::max_beats value)
+    {
+        word0 = (~(((1U << 2) - 1)<<16) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 16);
+        return *this;
+    }
+#endif
+};
+
+// cfg_sram_cap_r - The value of the CFGSRAMCAP pins, SRAM AXI ports cap
+struct cfg_sram_cap_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t max_outstanding_read_m1 : 6; // Maximum number of outstanding AXI read transactions per port - 1
+            uint32_t reserved0 : 2;
+            uint32_t max_outstanding_write_m1 : 5; // Maximum number of outstanding AXI write transactions per port - 1
+            uint32_t reserved1 : 3;
+            uint32_t max_beats : 2; // Burst split alignment
+            uint32_t reserved2 : 14;
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR cfg_sram_cap_r() :
+        word0(0)
+    {}
+    CONSTEXPR cfg_sram_cap_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    cfg_sram_cap_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_max_outstanding_read_m1() const
+    {
+        auto v = ((1U << 6) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR cfg_sram_cap_r& set_max_outstanding_read_m1(uint32_t value)
+    {
+        word0 = (~(((1U << 6) - 1)<<0) & word0) | ((((1U << 6) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_max_outstanding_write_m1() const
+    {
+        auto v = ((1U << 5) - 1) & (word0 >> 8);
+        return v;
+    }
+    CONSTEXPR cfg_sram_cap_r& set_max_outstanding_write_m1(uint32_t value)
+    {
+        word0 = (~(((1U << 5) - 1)<<8) & word0) | ((((1U << 5) - 1) & value) << 8);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::max_beats get_max_beats() const
+    {
+        auto v = ((1U << 2) - 1) & (word0 >> 16);
+        assert(v <= 2);
+        return static_cast<NPU_NAMESPACE::max_beats>(v);
+    }
+    CONSTEXPR cfg_sram_cap_r& set_max_beats(NPU_NAMESPACE::max_beats value)
+    {
+        word0 = (~(((1U << 2) - 1)<<16) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 16);
+        return *this;
+    }
+#endif
+};
+
+// cfg_ext_cap_r - The value of the CFGEXTCAP pins, EXT AXI ports cap
+struct cfg_ext_cap_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t max_outstanding_read_m1 : 6; // Maximum number of outstanding AXI read transactions per port - 1
+            uint32_t reserved0 : 2;
+            uint32_t max_outstanding_write_m1 : 5; // Maximum number of outstanding AXI write transactions per port - 1
+            uint32_t reserved1 : 3;
+            uint32_t max_beats : 2; // Burst split alignment
+            uint32_t reserved2 : 14;
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR cfg_ext_cap_r() :
+        word0(0)
+    {}
+    CONSTEXPR cfg_ext_cap_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    cfg_ext_cap_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_max_outstanding_read_m1() const
+    {
+        auto v = ((1U << 6) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR cfg_ext_cap_r& set_max_outstanding_read_m1(uint32_t value)
+    {
+        word0 = (~(((1U << 6) - 1)<<0) & word0) | ((((1U << 6) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_max_outstanding_write_m1() const
+    {
+        auto v = ((1U << 5) - 1) & (word0 >> 8);
+        return v;
+    }
+    CONSTEXPR cfg_ext_cap_r& set_max_outstanding_write_m1(uint32_t value)
+    {
+        word0 = (~(((1U << 5) - 1)<<8) & word0) | ((((1U << 5) - 1) & value) << 8);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::max_beats get_max_beats() const
+    {
+        auto v = ((1U << 2) - 1) & (word0 >> 16);
+        assert(v <= 2);
+        return static_cast<NPU_NAMESPACE::max_beats>(v);
+    }
+    CONSTEXPR cfg_ext_cap_r& set_max_beats(NPU_NAMESPACE::max_beats value)
+    {
+        word0 = (~(((1U << 2) - 1)<<16) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 16);
+        return *this;
+    }
+#endif
+};
+
+// cfg_sram_hash0_r - The value of the CFGSRAMHASH0 pins, SRAM AXI port select bit 0 hash
+struct cfg_sram_hash0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t zero : 6; // Must be zero
+            uint32_t hash_LO : 26; // Hash function - LSB
+            uint32_t hash_HI : 8; // Hash function - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR cfg_sram_hash0_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR cfg_sram_hash0_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    cfg_sram_hash0_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// cfg_sram_hash1_r - The value of the CFGSRAMHASH1 pins, SRAM AXI port select bit 1 hash
+struct cfg_sram_hash1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t zero : 6; // Must be zero
+            uint32_t hash_LO : 26; // Hash function - LSB
+            uint32_t hash_HI : 8; // Hash function - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR cfg_sram_hash1_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR cfg_sram_hash1_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    cfg_sram_hash1_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// cfg_ext_hash0_r - The value of the CFGEXTHASH0  pins, EXT AXI port select bit 0 hash
+struct cfg_ext_hash0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t zero : 6; // Must be zero
+            uint32_t hash_LO : 26; // Hash function - LSB
+            uint32_t hash_HI : 8; // Hash function - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR cfg_ext_hash0_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR cfg_ext_hash0_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    cfg_ext_hash0_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// basep_r - AXI base address of the respective region number 0 - 7
+struct basep_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8; // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR basep_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR basep_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    basep_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// clkforce_r - Force clocks on for clock gating
+struct clkforce_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t top_level_clk : 1; // set to 1 to force on TOP level clock
+            uint32_t cc_clk : 1; // set to 1 to force on CC clock
+            uint32_t dma_clk : 1; // set to 1 to force on DMA clock
+            uint32_t mac_clk : 1; // set to 1 to force on MAC clock
+            uint32_t ao_clk : 1; // set to 1 to force on AO clock
+            uint32_t wd_clk : 1; // set to 1 to force on WD clock
+            uint32_t reserved0 : 26;
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR clkforce_r() :
+        word0(0)
+    {}
+    CONSTEXPR clkforce_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    clkforce_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_top_level_clk() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR clkforce_r& set_top_level_clk(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cc_clk() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 1);
+        return v;
+    }
+    CONSTEXPR clkforce_r& set_cc_clk(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_dma_clk() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 2);
+        return v;
+    }
+    CONSTEXPR clkforce_r& set_dma_clk(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_mac_clk() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 3);
+        return v;
+    }
+    CONSTEXPR clkforce_r& set_mac_clk(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ao_clk() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 4);
+        return v;
+    }
+    CONSTEXPR clkforce_r& set_ao_clk(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wd_clk() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 5);
+        return v;
+    }
+    CONSTEXPR clkforce_r& set_wd_clk(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+#endif
+};
+
+// debug_address_r - Set debug address for register reads 0x400-0x7FF. The address must be 1KB aligned
+struct debug_address_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t addr : 28; // Address in the RAM, 1KB aligned
+            uint32_t ram_id : 4; // RAM to access
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR debug_address_r() :
+        word0(0)
+    {}
+    CONSTEXPR debug_address_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    debug_address_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_addr() const
+    {
+        auto v = ((1U << 28) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR debug_address_r& set_addr(uint32_t value)
+    {
+        word0 = (~(((1U << 28) - 1)<<0) & word0) | ((((1U << 28) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::ram_id get_ram_id() const
+    {
+        auto v = ((1U << 4) - 1) & (word0 >> 28);
+        assert(v <= 4);
+        return static_cast<NPU_NAMESPACE::ram_id>(v);
+    }
+    CONSTEXPR debug_address_r& set_ram_id(NPU_NAMESPACE::ram_id value)
+    {
+        word0 = (~(((1U << 4) - 1)<<28) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 28);
+        return *this;
+    }
+#endif
+};
+
+// debug_misc_r - 32-bit read/write register for driver debug use. This does not affect NPU function
+struct debug_misc_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t misc : 32; // Debug misc
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR debug_misc_r() :
+        word0(0)
+    {}
+    CONSTEXPR debug_misc_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    debug_misc_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_misc() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR debug_misc_r& set_misc(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma_ifm_src_r - DMA IFM channel source position on AXI
+struct dma_ifm_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8; // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR dma_ifm_src_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR dma_ifm_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_ifm_src_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma_ifm_dst_r - DMA IFM channel destination position in SHRAM
+struct dma_ifm_dst_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR dma_ifm_dst_r() :
+        word0(0)
+    {}
+    CONSTEXPR dma_ifm_dst_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    dma_ifm_dst_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR dma_ifm_dst_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma_ofm_src_r - DMA OFM channel source position in SHRAM
+struct dma_ofm_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR dma_ofm_src_r() :
+        word0(0)
+    {}
+    CONSTEXPR dma_ofm_src_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    dma_ofm_src_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR dma_ofm_src_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma_ofm_dst_r - DMA OFM channel destination position on AXI
+struct dma_ofm_dst_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8; // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR dma_ofm_dst_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR dma_ofm_dst_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_ofm_dst_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma_weight_src_r - DMA weight channel source position on AXI
+struct dma_weight_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8; // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR dma_weight_src_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR dma_weight_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_weight_src_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma_cmd_src_r - DMA command channel source position on AXI
+struct dma_cmd_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8; // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR dma_cmd_src_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR dma_cmd_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_cmd_src_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma_cmd_size_r - DMA command channel number of bytes buffered
+struct dma_cmd_size_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR dma_cmd_size_r() :
+        word0(0)
+    {}
+    CONSTEXPR dma_cmd_size_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    dma_cmd_size_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR dma_cmd_size_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma_m2m_src_r - DMA memory to memory source position on AXI
+struct dma_m2m_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8; // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR dma_m2m_src_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR dma_m2m_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_m2m_src_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma_m2m_dst_r - DMA memory to memory destination position on AXI
+struct dma_m2m_dst_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8; // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR dma_m2m_dst_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR dma_m2m_dst_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_m2m_dst_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// current_qread_r - QREAD position being issued (rather than completed)
+struct current_qread_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR current_qread_r() :
+        word0(0)
+    {}
+    CONSTEXPR current_qread_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    current_qread_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR current_qread_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma_scale_src_r - DMA scale and bias channel source position on AXI
+struct dma_scale_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8; // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR dma_scale_src_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR dma_scale_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_scale_src_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma_weight1_src_r - DMA weight1 channel source position on AXI
+struct dma_weight1_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8; // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR dma_weight1_src_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR dma_weight1_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_weight1_src_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma_weight2_src_r - DMA weight2 channel source position on AXI
+struct dma_weight2_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8; // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR dma_weight2_src_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR dma_weight2_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_weight2_src_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma_weight3_src_r - DMA weight3 channel source position on AXI
+struct dma_weight3_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8; // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR dma_weight3_src_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR dma_weight3_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_weight3_src_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// current_op_r - Latest NPU OP command issued by the parser
+struct current_op_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR current_op_r() :
+        word0(0)
+    {}
+    CONSTEXPR current_op_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    current_op_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR current_op_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// current_cmd_r - Current 32-bit command being parsed by the command stream parser
+struct current_cmd_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR current_cmd_r() :
+        word0(0)
+    {}
+    CONSTEXPR current_cmd_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    current_cmd_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR current_cmd_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// internal_memory_r - 1KB window onto internal memory as set by DEBUG_ADDRESS
+struct internal_memory_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t mem_word : 32; // Memory word
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR internal_memory_r() :
+        word0(0)
+    {}
+    CONSTEXPR internal_memory_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    internal_memory_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_mem_word() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR internal_memory_r& set_mem_word(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_pad_top_r - State managed by NPU_SET_IFM_PAD_TOP
+struct ifm_pad_top_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ifm_pad_top_r() :
+        word0(0)
+    {}
+    CONSTEXPR ifm_pad_top_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ifm_pad_top_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ifm_pad_top_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_pad_left_r - State managed by NPU_SET_IFM_PAD_LEFT
+struct ifm_pad_left_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ifm_pad_left_r() :
+        word0(0)
+    {}
+    CONSTEXPR ifm_pad_left_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ifm_pad_left_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ifm_pad_left_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_pad_right_r - State managed by NPU_SET_IFM_PAD_RIGHT
+struct ifm_pad_right_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ifm_pad_right_r() :
+        word0(0)
+    {}
+    CONSTEXPR ifm_pad_right_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ifm_pad_right_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ifm_pad_right_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_pad_bottom_r - State managed by NPU_SET_IFM_PAD_BOTTOM
+struct ifm_pad_bottom_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ifm_pad_bottom_r() :
+        word0(0)
+    {}
+    CONSTEXPR ifm_pad_bottom_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ifm_pad_bottom_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ifm_pad_bottom_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_depth_m1_r - State managed by NPU_SET_IFM_DEPTH_M1
+struct ifm_depth_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ifm_depth_m1_r() :
+        word0(0)
+    {}
+    CONSTEXPR ifm_depth_m1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ifm_depth_m1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ifm_depth_m1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_precision_r - State managed by NPU_SET_IFM_PRECISION
+struct ifm_precision_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ifm_precision_r() :
+        word0(0)
+    {}
+    CONSTEXPR ifm_precision_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ifm_precision_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ifm_precision_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_upscale_r - State managed by NPU_SET_IFM_UPSCALE
+struct ifm_upscale_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ifm_upscale_r() :
+        word0(0)
+    {}
+    CONSTEXPR ifm_upscale_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ifm_upscale_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ifm_upscale_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_broadcast_r - State managed by NPU_SET_IFM_BROADCAST
+struct ifm_broadcast_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ifm_broadcast_r() :
+        word0(0)
+    {}
+    CONSTEXPR ifm_broadcast_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ifm_broadcast_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ifm_broadcast_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_zero_point_r - State managed by NPU_SET_IFM_ZERO_POINT
+struct ifm_zero_point_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ifm_zero_point_r() :
+        word0(0)
+    {}
+    CONSTEXPR ifm_zero_point_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ifm_zero_point_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ifm_zero_point_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_width0_m1_r - State managed by NPU_SET_IFM_WIDTH0_M1
+struct ifm_width0_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ifm_width0_m1_r() :
+        word0(0)
+    {}
+    CONSTEXPR ifm_width0_m1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ifm_width0_m1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ifm_width0_m1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_height0_m1_r - State managed by NPU_SET_IFM_HEIGHT0_M1
+struct ifm_height0_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ifm_height0_m1_r() :
+        word0(0)
+    {}
+    CONSTEXPR ifm_height0_m1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ifm_height0_m1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ifm_height0_m1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_height1_m1_r - State managed by NPU_SET_IFM_HEIGHT1_M1
+struct ifm_height1_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ifm_height1_m1_r() :
+        word0(0)
+    {}
+    CONSTEXPR ifm_height1_m1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ifm_height1_m1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ifm_height1_m1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_region_r - State managed by NPU_SET_IFM_REGION
+struct ifm_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ifm_region_r() :
+        word0(0)
+    {}
+    CONSTEXPR ifm_region_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ifm_region_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ifm_region_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_width_m1_r - State managed by NPU_SET_OFM_WIDTH_M1
+struct ofm_width_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ofm_width_m1_r() :
+        word0(0)
+    {}
+    CONSTEXPR ofm_width_m1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ofm_width_m1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ofm_width_m1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_height_m1_r - State managed by NPU_SET_OFM_HEIGHT_M1
+struct ofm_height_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ofm_height_m1_r() :
+        word0(0)
+    {}
+    CONSTEXPR ofm_height_m1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ofm_height_m1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ofm_height_m1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_depth_m1_r - State managed by NPU_SET_OFM_DEPTH_M1
+struct ofm_depth_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ofm_depth_m1_r() :
+        word0(0)
+    {}
+    CONSTEXPR ofm_depth_m1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ofm_depth_m1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ofm_depth_m1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_precision_r - State managed by NPU_SET_OFM_PRECISION
+struct ofm_precision_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ofm_precision_r() :
+        word0(0)
+    {}
+    CONSTEXPR ofm_precision_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ofm_precision_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ofm_precision_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_blk_width_m1_r - State managed by NPU_SET_OFM_BLK_WIDTH_M1
+struct ofm_blk_width_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ofm_blk_width_m1_r() :
+        word0(0)
+    {}
+    CONSTEXPR ofm_blk_width_m1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ofm_blk_width_m1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ofm_blk_width_m1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_blk_height_m1_r - State managed by NPU_SET_OFM_BLK_HEIGHT_M1
+struct ofm_blk_height_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ofm_blk_height_m1_r() :
+        word0(0)
+    {}
+    CONSTEXPR ofm_blk_height_m1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ofm_blk_height_m1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ofm_blk_height_m1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_blk_depth_m1_r - State managed by NPU_SET_OFM_BLK_DEPTH_M1
+struct ofm_blk_depth_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ofm_blk_depth_m1_r() :
+        word0(0)
+    {}
+    CONSTEXPR ofm_blk_depth_m1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ofm_blk_depth_m1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ofm_blk_depth_m1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_zero_point_r - State managed by NPU_SET_OFM_ZERO_POINT
+struct ofm_zero_point_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ofm_zero_point_r() :
+        word0(0)
+    {}
+    CONSTEXPR ofm_zero_point_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ofm_zero_point_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ofm_zero_point_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_width0_m1_r - State managed by NPU_SET_OFM_WIDTH0_M1
+struct ofm_width0_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ofm_width0_m1_r() :
+        word0(0)
+    {}
+    CONSTEXPR ofm_width0_m1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ofm_width0_m1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ofm_width0_m1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_height0_m1_r - State managed by NPU_SET_OFM_HEIGHT0_M1
+struct ofm_height0_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ofm_height0_m1_r() :
+        word0(0)
+    {}
+    CONSTEXPR ofm_height0_m1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ofm_height0_m1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ofm_height0_m1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_height1_m1_r - State managed by NPU_SET_OFM_HEIGHT1_M1
+struct ofm_height1_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ofm_height1_m1_r() :
+        word0(0)
+    {}
+    CONSTEXPR ofm_height1_m1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ofm_height1_m1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ofm_height1_m1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_region_r - State managed by NPU_SET_OFM_REGION
+struct ofm_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ofm_region_r() :
+        word0(0)
+    {}
+    CONSTEXPR ofm_region_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ofm_region_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ofm_region_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// kernel_width_m1_r - State managed by NPU_SET_KERNEL_WIDTH_M1
+struct kernel_width_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR kernel_width_m1_r() :
+        word0(0)
+    {}
+    CONSTEXPR kernel_width_m1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    kernel_width_m1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR kernel_width_m1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// kernel_height_m1_r - State managed by NPU_SET_KERNEL_HEIGHT_M1
+struct kernel_height_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR kernel_height_m1_r() :
+        word0(0)
+    {}
+    CONSTEXPR kernel_height_m1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    kernel_height_m1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR kernel_height_m1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// kernel_stride_r - State managed by NPU_SET_KERNEL_STRIDE
+struct kernel_stride_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR kernel_stride_r() :
+        word0(0)
+    {}
+    CONSTEXPR kernel_stride_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    kernel_stride_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR kernel_stride_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// acc_format_r - State managed by NPU_SET_ACC_FORMAT
+struct acc_format_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR acc_format_r() :
+        word0(0)
+    {}
+    CONSTEXPR acc_format_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    acc_format_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR acc_format_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// activation_r - State managed by NPU_SET_ACTIVATION
+struct activation_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR activation_r() :
+        word0(0)
+    {}
+    CONSTEXPR activation_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    activation_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR activation_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// activation_min_r - State managed by NPU_SET_ACTIVATION_MIN
+struct activation_min_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR activation_min_r() :
+        word0(0)
+    {}
+    CONSTEXPR activation_min_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    activation_min_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR activation_min_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// activation_max_r - State managed by NPU_SET_ACTIVATION_MAX
+struct activation_max_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR activation_max_r() :
+        word0(0)
+    {}
+    CONSTEXPR activation_max_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    activation_max_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR activation_max_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// weight_region_r - State managed by NPU_SET_WEIGHT_REGION
+struct weight_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR weight_region_r() :
+        word0(0)
+    {}
+    CONSTEXPR weight_region_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    weight_region_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR weight_region_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// scale_region_r - State managed by NPU_SET_SCALE_REGION
+struct scale_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR scale_region_r() :
+        word0(0)
+    {}
+    CONSTEXPR scale_region_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    scale_region_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR scale_region_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// resize_x_scale_n_m1_r - State managed by NPU_SET_RESIZE_X_SCALE_N_M1
+struct resize_x_scale_n_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR resize_x_scale_n_m1_r() :
+        word0(0)
+    {}
+    CONSTEXPR resize_x_scale_n_m1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    resize_x_scale_n_m1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR resize_x_scale_n_m1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// resize_y_scale_n_m1_r - State managed by NPU_SET_RESIZE_Y_SCALE_N_M1
+struct resize_y_scale_n_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR resize_y_scale_n_m1_r() :
+        word0(0)
+    {}
+    CONSTEXPR resize_y_scale_n_m1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    resize_y_scale_n_m1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR resize_y_scale_n_m1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// resize_x_offset_r - State managed by NPU_SET_RESIZE_X_OFFSET
+struct resize_x_offset_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR resize_x_offset_r() :
+        word0(0)
+    {}
+    CONSTEXPR resize_x_offset_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    resize_x_offset_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR resize_x_offset_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// resize_y_offset_r - State managed by NPU_SET_RESIZE_Y_OFFSET
+struct resize_y_offset_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR resize_y_offset_r() :
+        word0(0)
+    {}
+    CONSTEXPR resize_y_offset_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    resize_y_offset_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR resize_y_offset_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// weight_format_r - State managed by NPU_SET_WEIGHT_FORMAT
+struct weight_format_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR weight_format_r() :
+        word0(0)
+    {}
+    CONSTEXPR weight_format_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    weight_format_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR weight_format_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// blockdep_r - State managed by NPU_SET_BLOCKDEP
+struct blockdep_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR blockdep_r() :
+        word0(0)
+    {}
+    CONSTEXPR blockdep_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    blockdep_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR blockdep_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma0_src_region_r - State managed by NPU_SET_DMA0_SRC_REGION
+struct dma0_src_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR dma0_src_region_r() :
+        word0(0)
+    {}
+    CONSTEXPR dma0_src_region_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    dma0_src_region_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR dma0_src_region_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma0_dst_region_r - State managed by NPU_SET_DMA0_DST_REGION
+struct dma0_dst_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR dma0_dst_region_r() :
+        word0(0)
+    {}
+    CONSTEXPR dma0_dst_region_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    dma0_dst_region_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR dma0_dst_region_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma0_size0_r - State managed by NPU_SET_DMA0_SIZE0
+struct dma0_size0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR dma0_size0_r() :
+        word0(0)
+    {}
+    CONSTEXPR dma0_size0_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    dma0_size0_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR dma0_size0_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma0_size1_r - State managed by NPU_SET_DMA0_SIZE1
+struct dma0_size1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR dma0_size1_r() :
+        word0(0)
+    {}
+    CONSTEXPR dma0_size1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    dma0_size1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR dma0_size1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma0_idx_region_r - State managed by NPU_SET_DMA0_IDX_REGION
+struct dma0_idx_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR dma0_idx_region_r() :
+        word0(0)
+    {}
+    CONSTEXPR dma0_idx_region_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    dma0_idx_region_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR dma0_idx_region_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_broadcast_r - State managed by NPU_SET_IFM2_BROADCAST
+struct ifm2_broadcast_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ifm2_broadcast_r() :
+        word0(0)
+    {}
+    CONSTEXPR ifm2_broadcast_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ifm2_broadcast_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ifm2_broadcast_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_precision_r - State managed by NPU_SET_IFM2_PRECISION
+struct ifm2_precision_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ifm2_precision_r() :
+        word0(0)
+    {}
+    CONSTEXPR ifm2_precision_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ifm2_precision_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ifm2_precision_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_zero_point_r - State managed by NPU_SET_IFM2_ZERO_POINT
+struct ifm2_zero_point_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ifm2_zero_point_r() :
+        word0(0)
+    {}
+    CONSTEXPR ifm2_zero_point_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ifm2_zero_point_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ifm2_zero_point_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_width0_m1_r - State managed by NPU_SET_IFM2_WIDTH0_M1
+struct ifm2_width0_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ifm2_width0_m1_r() :
+        word0(0)
+    {}
+    CONSTEXPR ifm2_width0_m1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ifm2_width0_m1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ifm2_width0_m1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_height0_m1_r - State managed by NPU_SET_IFM2_HEIGHT0_M1
+struct ifm2_height0_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ifm2_height0_m1_r() :
+        word0(0)
+    {}
+    CONSTEXPR ifm2_height0_m1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ifm2_height0_m1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ifm2_height0_m1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_height1_m1_r - State managed by NPU_SET_IFM2_HEIGHT1_M1
+struct ifm2_height1_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ifm2_height1_m1_r() :
+        word0(0)
+    {}
+    CONSTEXPR ifm2_height1_m1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ifm2_height1_m1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ifm2_height1_m1_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_region_r - State managed by NPU_SET_IFM2_REGION
+struct ifm2_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ifm2_region_r() :
+        word0(0)
+    {}
+    CONSTEXPR ifm2_region_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ifm2_region_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR ifm2_region_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_base0_r - State managed by NPU_SET_IFM_BASE0
+struct ifm_base0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ifm_base0_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ifm_base0_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_base0_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm_base1_r - State managed by NPU_SET_IFM_BASE1
+struct ifm_base1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ifm_base1_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ifm_base1_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_base1_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm_base2_r - State managed by NPU_SET_IFM_BASE2
+struct ifm_base2_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ifm_base2_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ifm_base2_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_base2_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm_base3_r - State managed by NPU_SET_IFM_BASE3
+struct ifm_base3_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ifm_base3_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ifm_base3_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_base3_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm_stride_x_r - State managed by NPU_SET_IFM_STRIDE_X
+struct ifm_stride_x_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ifm_stride_x_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ifm_stride_x_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_stride_x_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm_stride_y_r - State managed by NPU_SET_IFM_STRIDE_Y
+struct ifm_stride_y_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ifm_stride_y_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ifm_stride_y_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_stride_y_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm_stride_c_r - State managed by NPU_SET_IFM_STRIDE_C
+struct ifm_stride_c_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ifm_stride_c_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ifm_stride_c_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_stride_c_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_base0_r - State managed by NPU_SET_OFM_BASE0
+struct ofm_base0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ofm_base0_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ofm_base0_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_base0_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_base1_r - State managed by NPU_SET_OFM_BASE1
+struct ofm_base1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ofm_base1_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ofm_base1_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_base1_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_base2_r - State managed by NPU_SET_OFM_BASE2
+struct ofm_base2_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ofm_base2_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ofm_base2_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_base2_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_base3_r - State managed by NPU_SET_OFM_BASE3
+struct ofm_base3_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ofm_base3_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ofm_base3_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_base3_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_stride_x_r - State managed by NPU_SET_OFM_STRIDE_X
+struct ofm_stride_x_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ofm_stride_x_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ofm_stride_x_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_stride_x_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_stride_y_r - State managed by NPU_SET_OFM_STRIDE_Y
+struct ofm_stride_y_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ofm_stride_y_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ofm_stride_y_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_stride_y_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_stride_c_r - State managed by NPU_SET_OFM_STRIDE_C
+struct ofm_stride_c_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ofm_stride_c_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ofm_stride_c_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_stride_c_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// weight_base_r - State managed by NPU_SET_WEIGHT_BASE
+struct weight_base_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR weight_base_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR weight_base_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    weight_base_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// weight_length_r - State managed by NPU_SET_WEIGHT_LENGTH
+struct weight_length_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR weight_length_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR weight_length_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    weight_length_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// scale_base_r - State managed by NPU_SET_SCALE_BASE
+struct scale_base_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR scale_base_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR scale_base_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    scale_base_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// scale_length_r - State managed by NPU_SET_SCALE_LENGTH
+struct scale_length_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR scale_length_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR scale_length_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    scale_length_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_scale_r - State managed by NPU_SET_OFM_SCALE
+struct ofm_scale_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ofm_scale_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ofm_scale_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_scale_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm_scale_r - State managed by NPU_SET_IFM_SCALE
+struct ifm_scale_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ifm_scale_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ifm_scale_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_scale_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_scale_r - State managed by NPU_SET_IFM2_SCALE
+struct ifm2_scale_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ifm2_scale_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ifm2_scale_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_scale_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// op_scalar_r - State managed by NPU_SET_OP_SCALAR
+struct op_scalar_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR op_scalar_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR op_scalar_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    op_scalar_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma0_src_r - State managed by NPU_SET_DMA0_SRC
+struct dma0_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR dma0_src_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR dma0_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma0_src_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma0_dst_r - State managed by NPU_SET_DMA0_DST
+struct dma0_dst_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR dma0_dst_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR dma0_dst_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma0_dst_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma0_len_r - State managed by NPU_SET_DMA0_LEN
+struct dma0_len_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR dma0_len_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR dma0_len_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma0_len_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma0_src_stride0_r - State managed by NPU_SET_DMA0_SRC_STRIDE0
+struct dma0_src_stride0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR dma0_src_stride0_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR dma0_src_stride0_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma0_src_stride0_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma0_src_stride1_r - State managed by NPU_SET_DMA0_SRC_STRIDE1
+struct dma0_src_stride1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR dma0_src_stride1_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR dma0_src_stride1_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma0_src_stride1_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma0_dst_stride0_r - State managed by NPU_SET_DMA0_DST_STRIDE0
+struct dma0_dst_stride0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR dma0_dst_stride0_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR dma0_dst_stride0_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma0_dst_stride0_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma0_dst_stride1_r - State managed by NPU_SET_DMA0_DST_STRIDE1
+struct dma0_dst_stride1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR dma0_dst_stride1_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR dma0_dst_stride1_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma0_dst_stride1_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma0_idx_r - State managed by NPU_SET_DMA0_IDX
+struct dma0_idx_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR dma0_idx_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR dma0_idx_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma0_idx_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_base0_r - State managed by NPU_SET_IFM2_BASE0
+struct ifm2_base0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ifm2_base0_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ifm2_base0_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_base0_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_base1_r - State managed by NPU_SET_IFM2_BASE1
+struct ifm2_base1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ifm2_base1_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ifm2_base1_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_base1_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_base2_r - State managed by NPU_SET_IFM2_BASE2
+struct ifm2_base2_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ifm2_base2_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ifm2_base2_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_base2_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_base3_r - State managed by NPU_SET_IFM2_BASE3
+struct ifm2_base3_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ifm2_base3_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ifm2_base3_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_base3_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_stride_x_r - State managed by NPU_SET_IFM2_STRIDE_X
+struct ifm2_stride_x_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ifm2_stride_x_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ifm2_stride_x_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_stride_x_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_stride_y_r - State managed by NPU_SET_IFM2_STRIDE_Y
+struct ifm2_stride_y_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ifm2_stride_y_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ifm2_stride_y_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_stride_y_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_stride_c_r - State managed by NPU_SET_IFM2_STRIDE_C
+struct ifm2_stride_c_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR ifm2_stride_c_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR ifm2_stride_c_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_stride_c_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// weight1_base_r - State managed by NPU_SET_WEIGHT1_BASE
+struct weight1_base_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR weight1_base_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR weight1_base_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    weight1_base_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// weight1_length_r - State managed by NPU_SET_WEIGHT1_LENGTH
+struct weight1_length_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR weight1_length_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR weight1_length_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    weight1_length_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// weight2_base_r - State managed by NPU_SET_WEIGHT2_BASE
+struct weight2_base_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR weight2_base_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR weight2_base_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    weight2_base_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// weight2_length_r - State managed by NPU_SET_WEIGHT2_LENGTH
+struct weight2_length_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR weight2_length_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR weight2_length_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    weight2_length_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// weight3_base_r - State managed by NPU_SET_WEIGHT3_BASE
+struct weight3_base_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR weight3_base_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR weight3_base_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    weight3_base_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// weight3_length_r - State managed by NPU_SET_WEIGHT3_LENGTH
+struct weight3_length_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR weight3_length_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR weight3_length_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    weight3_length_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// resize_x_step_r - State managed by NPU_SET_RESIZE_X_STEP
+struct resize_x_step_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR resize_x_step_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR resize_x_step_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    resize_x_step_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// resize_y_step_r - State managed by NPU_SET_RESIZE_Y_STEP
+struct resize_y_step_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR resize_y_step_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR resize_y_step_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    resize_y_step_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma0_idx_max_r - State managed by NPU_SET_DMA0_IDX_MAX
+struct dma0_idx_max_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR dma0_idx_max_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR dma0_idx_max_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma0_idx_max_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma0_idx_skip1_r - State managed by NPU_SET_DMA0_IDX_SKIP1
+struct dma0_idx_skip1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR dma0_idx_skip1_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR dma0_idx_skip1_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma0_idx_skip1_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// revision_r - Internal FPGA build revision: first 32 bits of the Ultan git hash used for the build
+struct revision_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR revision_r() :
+        word0(0)
+    {}
+    CONSTEXPR revision_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    revision_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR revision_r& set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid4_r - Peripheral ID byte 4 (Arm=code 4)
+struct pid4_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID4 : 32; // Byte 4 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR pid4_r() :
+        word0(4)
+    {}
+    CONSTEXPR pid4_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    pid4_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID4() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR pid4_r& set_PID4(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid5_r - Peripheral ID byte 5 (reserved)
+struct pid5_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID5 : 32; // Byte 5 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR pid5_r() :
+        word0(0)
+    {}
+    CONSTEXPR pid5_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    pid5_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID5() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR pid5_r& set_PID5(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid6_r - Peripheral ID byte 6 (reserved)
+struct pid6_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID6 : 32; // Byte 6 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR pid6_r() :
+        word0(0)
+    {}
+    CONSTEXPR pid6_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    pid6_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID6() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR pid6_r& set_PID6(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid7_r - Peripheral ID byte 7 (reserved)
+struct pid7_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID7 : 32; // Byte 7 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR pid7_r() :
+        word0(0)
+    {}
+    CONSTEXPR pid7_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    pid7_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID7() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR pid7_r& set_PID7(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid0_r - Peripheral ID byte 0. This is bits[7:0] of the part number
+struct pid0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID0 : 32; // Byte 0 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR pid0_r() :
+        word0(130)
+    {}
+    CONSTEXPR pid0_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    pid0_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID0() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR pid0_r& set_PID0(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid1_r - Peripheral ID byte 1. This is bits[11:8] of the part number in bits[3:0], and bits[3:0] of the Arm ID in bits[7:4]
+struct pid1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID1 : 32; // Byte 1 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR pid1_r() :
+        word0(181)
+    {}
+    CONSTEXPR pid1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    pid1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID1() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR pid1_r& set_PID1(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid2_r - Peripheral ID byte 2. This is bits[6:4] of the Arm ID in bits[2:0], and bit 3 indicates format B
+struct pid2_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID2 : 32; // Byte 2 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR pid2_r() :
+        word0(11)
+    {}
+    CONSTEXPR pid2_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    pid2_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID2() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR pid2_r& set_PID2(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid3_r - Peripheral ID byte 3
+struct pid3_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID3 : 32; // Byte 1 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR pid3_r() :
+        word0(0)
+    {}
+    CONSTEXPR pid3_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    pid3_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID3() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR pid3_r& set_PID3(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// cid0_r - Component ID byte 0
+struct cid0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t CID0 : 32; // Byte 0 of Component ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR cid0_r() :
+        word0(13)
+    {}
+    CONSTEXPR cid0_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    cid0_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CID0() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR cid0_r& set_CID0(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// cid1_r - Component ID byte 1
+struct cid1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t CID1 : 32; // Byte 1 of Component ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR cid1_r() :
+        word0(240)
+    {}
+    CONSTEXPR cid1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    cid1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CID1() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR cid1_r& set_CID1(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// cid2_r - Component ID byte 2
+struct cid2_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t CID2 : 32; // Byte 2 of Component ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR cid2_r() :
+        word0(5)
+    {}
+    CONSTEXPR cid2_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    cid2_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CID2() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR cid2_r& set_CID2(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// cid3_r - Component ID byte 3
+struct cid3_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t CID3 : 32; // Byte 3 of Component ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR cid3_r() :
+        word0(177)
+    {}
+    CONSTEXPR cid3_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    cid3_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CID3() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR cid3_r& set_CID3(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// wd_status_r - WD_STATUS
+struct wd_status_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t ctrl_idle : 1; // All stripe jobs idle (all weights consumed)
+            uint32_t reserved0 : 1;
+            uint32_t active_core : 2; // WD active core
+            uint32_t sc0_idle : 1; // Standard core 0 idle
+            uint32_t sc1_idle : 1; // Standard core 1 idle
+            uint32_t sc2_idle : 1; // Standard core 2 idle
+            uint32_t sc3_idle : 1; // Standard core 3 idle
+            uint32_t fc_idle : 1; // Fast core idle
+            uint32_t tc_idle : 1; // Tensor core (IFM input) idle
+            uint32_t reserved1 : 6;
+            uint32_t wbuf0_valid : 1; // Weight buffer 0 full
+            uint32_t wbuf0_idle : 1; // Weight buffer 0 empty
+            uint32_t wbuf1_valid : 1; // Weight buffer 1 full
+            uint32_t wbuf1_idle : 1; // Weight buffer 1 empty
+            uint32_t wbuf2_valid : 1; // Weight buffer 2 full
+            uint32_t wbuf2_idle : 1; // Weight buffer 2 empty
+            uint32_t wbuf3_valid : 1; // Weight buffer 3 full
+            uint32_t wbuf3_idle : 1; // Weight buffer 3 empty
+            uint32_t stalled_by_ws_sc0 : 1; // WD stalled by lack of standard core 0 weight stream data
+            uint32_t stalled_by_ws_sc1 : 1; // WD stalled by lack of standard core 1 weight stream data
+            uint32_t stalled_by_ws_sc2 : 1; // WD stalled by lack of standard core 2 weight stream data
+            uint32_t stalled_by_ws_sc3 : 1; // WD stalled by lack of standard core 3 weight stream data
+            uint32_t stalled_by_ws_fc : 1; // WD stalled by lack of fast core weight stream data
+            uint32_t stalled_by_ws_tc : 1; // WD stalled by lack of tensor core IFM stream data
+            uint32_t stalled_by_wd_buf : 1; // WD stalled by lack of free WD buffer (blocked by MAC)
+            uint32_t reserved2 : 1;
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR wd_status_r() :
+        word0(0)
+    {}
+    CONSTEXPR wd_status_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    wd_status_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ctrl_idle() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_ctrl_idle(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::wd_active_core get_active_core() const
+    {
+        auto v = ((1U << 2) - 1) & (word0 >> 2);
+        assert(v <= 3);
+        return static_cast<NPU_NAMESPACE::wd_active_core>(v);
+    }
+    CONSTEXPR wd_status_r& set_active_core(NPU_NAMESPACE::wd_active_core value)
+    {
+        word0 = (~(((1U << 2) - 1)<<2) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_sc0_idle() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 4);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_sc0_idle(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_sc1_idle() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 5);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_sc1_idle(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_sc2_idle() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 6);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_sc2_idle(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_sc3_idle() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 7);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_sc3_idle(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_fc_idle() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 8);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_fc_idle(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<8) & word0) | ((((1U << 1) - 1) & value) << 8);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_tc_idle() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 9);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_tc_idle(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<9) & word0) | ((((1U << 1) - 1) & value) << 9);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wbuf0_valid() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 16);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_wbuf0_valid(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<16) & word0) | ((((1U << 1) - 1) & value) << 16);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wbuf0_idle() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 17);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_wbuf0_idle(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<17) & word0) | ((((1U << 1) - 1) & value) << 17);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wbuf1_valid() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 18);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_wbuf1_valid(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<18) & word0) | ((((1U << 1) - 1) & value) << 18);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wbuf1_idle() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 19);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_wbuf1_idle(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<19) & word0) | ((((1U << 1) - 1) & value) << 19);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wbuf2_valid() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 20);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_wbuf2_valid(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<20) & word0) | ((((1U << 1) - 1) & value) << 20);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wbuf2_idle() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 21);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_wbuf2_idle(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<21) & word0) | ((((1U << 1) - 1) & value) << 21);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wbuf3_valid() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 22);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_wbuf3_valid(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<22) & word0) | ((((1U << 1) - 1) & value) << 22);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wbuf3_idle() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 23);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_wbuf3_idle(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<23) & word0) | ((((1U << 1) - 1) & value) << 23);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_stalled_by_ws_sc0() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 24);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_stalled_by_ws_sc0(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<24) & word0) | ((((1U << 1) - 1) & value) << 24);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_stalled_by_ws_sc1() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 25);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_stalled_by_ws_sc1(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<25) & word0) | ((((1U << 1) - 1) & value) << 25);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_stalled_by_ws_sc2() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 26);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_stalled_by_ws_sc2(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<26) & word0) | ((((1U << 1) - 1) & value) << 26);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_stalled_by_ws_sc3() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 27);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_stalled_by_ws_sc3(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<27) & word0) | ((((1U << 1) - 1) & value) << 27);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_stalled_by_ws_fc() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 28);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_stalled_by_ws_fc(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<28) & word0) | ((((1U << 1) - 1) & value) << 28);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_stalled_by_ws_tc() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 29);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_stalled_by_ws_tc(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<29) & word0) | ((((1U << 1) - 1) & value) << 29);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_stalled_by_wd_buf() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 30);
+        return v;
+    }
+    CONSTEXPR wd_status_r& set_stalled_by_wd_buf(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<30) & word0) | ((((1U << 1) - 1) & value) << 30);
+        return *this;
+    }
+#endif
+};
+
+// mac_status_r - MAC_STATUS
+struct mac_status_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t mac_active : 1; // Valid block command and not stalled
+            uint32_t reserved0 : 2;
+            uint32_t mac_dpu_active : 1; // At least one DPU is active
+            uint32_t mac_stalled_by_w_or_acc : 1; // MAC stalled by WB or AB (not IB)
+            uint32_t mac_stalled_by_w : 1; // MAC stalled by WB (not IB)
+            uint32_t mac_stalled_by_acc : 1; // MAC stalled by AB (not IB)
+            uint32_t mac_stalled_by_ib : 1; // MAC stalled by IB
+            uint32_t reserved1 : 24;
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR mac_status_r() :
+        word0(0)
+    {}
+    CONSTEXPR mac_status_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    mac_status_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_mac_active() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR mac_status_r& set_mac_active(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_mac_dpu_active() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 3);
+        return v;
+    }
+    CONSTEXPR mac_status_r& set_mac_dpu_active(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_mac_stalled_by_w_or_acc() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 4);
+        return v;
+    }
+    CONSTEXPR mac_status_r& set_mac_stalled_by_w_or_acc(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_mac_stalled_by_w() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 5);
+        return v;
+    }
+    CONSTEXPR mac_status_r& set_mac_stalled_by_w(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_mac_stalled_by_acc() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 6);
+        return v;
+    }
+    CONSTEXPR mac_status_r& set_mac_stalled_by_acc(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_mac_stalled_by_ib() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 7);
+        return v;
+    }
+    CONSTEXPR mac_status_r& set_mac_stalled_by_ib(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+#endif
+};
+
+// ao_status_r - AO_STATUS
+struct ao_status_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t ao_active : 1; // Valid block command and not stalled
+            uint32_t reserved0 : 2;
+            uint32_t ao_stalled_by_bs_or_ob : 1; // Stalled by bias scale data or output block
+            uint32_t ao_stalled_by_bs : 1; // Stalled by bias scale data
+            uint32_t ao_stalled_by_ob : 1; // Stalled by output block
+            uint32_t ao_stalled_by_ab_or_cb : 1; // Stalled by accumulator or chaining buffer
+            uint32_t ao_stalled_by_ab : 1; // Stalled by accumulator buffer
+            uint32_t ao_stalled_by_cb : 1; // Stalled by chaining buffer
+            uint32_t reserved1 : 23;
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR ao_status_r() :
+        word0(0)
+    {}
+    CONSTEXPR ao_status_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    ao_status_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ao_active() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR ao_status_r& set_ao_active(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ao_stalled_by_bs_or_ob() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 3);
+        return v;
+    }
+    CONSTEXPR ao_status_r& set_ao_stalled_by_bs_or_ob(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ao_stalled_by_bs() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 4);
+        return v;
+    }
+    CONSTEXPR ao_status_r& set_ao_stalled_by_bs(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ao_stalled_by_ob() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 5);
+        return v;
+    }
+    CONSTEXPR ao_status_r& set_ao_stalled_by_ob(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ao_stalled_by_ab_or_cb() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 6);
+        return v;
+    }
+    CONSTEXPR ao_status_r& set_ao_stalled_by_ab_or_cb(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ao_stalled_by_ab() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 7);
+        return v;
+    }
+    CONSTEXPR ao_status_r& set_ao_stalled_by_ab(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ao_stalled_by_cb() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 8);
+        return v;
+    }
+    CONSTEXPR ao_status_r& set_ao_stalled_by_cb(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<8) & word0) | ((((1U << 1) - 1) & value) << 8);
+        return *this;
+    }
+#endif
+};
+
+// dma_status0_r - DMA_STATUS0
+struct dma_status0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t cmd_ch_idle : 1; // When this bit is high means that the CMD channel is not busy in generating addresses for a CMD job
+            uint32_t ifm0_ch_idle : 1; // When this bit is high means that there are no ongoing IFM MAC jobs
+            uint32_t ifm1_ch_idle : 1; // When this bit is high means that there are no ongoing IFM STREAM jobs
+            uint32_t wgt_ch_idle : 1; // When this bit is high means that the WGT channel is not busy in generating addresses for a WGT job
+            uint32_t bas_ch_idle : 1; // When this bit is high means that the BAS channel is not busy in generating addresses for a BAS job
+            uint32_t m2m_ch_idle : 1; // When this bit is high means that there are no ongoing M2M jobs
+            uint32_t ofm_ch_idle : 1; // When this bit is high means that there are no ongoing OFM jobs
+            uint32_t axi_halt_req : 1; // CPM has requested to HALT AXI bus before soft reset
+            uint32_t axi_halt_ack : 1; // DMA is in condition to halt the AXI bus since there are no pending transactions
+            uint32_t axi_pause_req : 1; // CC has requested to pause the AXI
+            uint32_t axi_pause_ack : 1; // DMA is in condition to pause the AXI bus since there are no pending transactions
+            uint32_t cmd_abort_ack : 1; // CC has requested to abort a CMD channel job
+            uint32_t cmd_abort_req : 1; // DMA has acknowledged the request of aborting a CMD channel job
+            uint32_t ifm_mac_if_stall : 1; // Valid high and Ready low for IFM MAC interface
+            uint32_t ifm_tc_if_stall : 1; // Valid high and Ready low for IFM WD Tensor Core interface
+            uint32_t ifm_ao_if_stall : 1; // Valid high and Ready low for IFM AO interface
+            uint32_t ofm_if_stall : 1; // Valid high and Ready low for OFM interface between AO and DMA
+            uint32_t cmd_if_stall : 1; // Valid high and Ready low for CMD interface between DMA and CC
+            uint32_t wd_sc0_if_stall : 1; // Valid high and Ready low for SC0 bitstream interface toward WD
+            uint32_t wd_sc1_if_stall : 1; // Valid high and Ready low for SC1 bitstream interface toward WD
+            uint32_t wd_sc2_if_stall : 1; // Valid high and Ready low for SC2 bitstream interface toward WD
+            uint32_t wd_sc3_if_stall : 1; // Valid high and Ready low for SC2 bitstream interface toward WD
+            uint32_t wd_fc_if_stall : 1; // Valid high and Ready low for FC bitstream interface toward WD
+            uint32_t bs_if_stall : 1; // Valid high and Ready low for BAS interface between DMA and AO
+            uint32_t lutcfg_if_stall : 1; // Valid high and Ready low for LUTCFG interface between DMA and AO
+            uint32_t reserved0 : 7;
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR dma_status0_r() :
+        word0(0)
+    {}
+    CONSTEXPR dma_status0_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    dma_status0_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_ch_idle() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_cmd_ch_idle(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ifm0_ch_idle() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 1);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_ifm0_ch_idle(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ifm1_ch_idle() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 2);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_ifm1_ch_idle(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wgt_ch_idle() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 3);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_wgt_ch_idle(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_bas_ch_idle() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 4);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_bas_ch_idle(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_m2m_ch_idle() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 5);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_m2m_ch_idle(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ofm_ch_idle() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 6);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_ofm_ch_idle(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_halt_req() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 7);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_axi_halt_req(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_halt_ack() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 8);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_axi_halt_ack(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<8) & word0) | ((((1U << 1) - 1) & value) << 8);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_pause_req() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 9);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_axi_pause_req(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<9) & word0) | ((((1U << 1) - 1) & value) << 9);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_pause_ack() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 10);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_axi_pause_ack(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<10) & word0) | ((((1U << 1) - 1) & value) << 10);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_abort_ack() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 11);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_cmd_abort_ack(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<11) & word0) | ((((1U << 1) - 1) & value) << 11);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_abort_req() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 12);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_cmd_abort_req(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<12) & word0) | ((((1U << 1) - 1) & value) << 12);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ifm_mac_if_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 13);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_ifm_mac_if_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<13) & word0) | ((((1U << 1) - 1) & value) << 13);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ifm_tc_if_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 14);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_ifm_tc_if_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<14) & word0) | ((((1U << 1) - 1) & value) << 14);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ifm_ao_if_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 15);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_ifm_ao_if_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<15) & word0) | ((((1U << 1) - 1) & value) << 15);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ofm_if_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 16);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_ofm_if_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<16) & word0) | ((((1U << 1) - 1) & value) << 16);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_if_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 17);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_cmd_if_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<17) & word0) | ((((1U << 1) - 1) & value) << 17);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wd_sc0_if_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 18);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_wd_sc0_if_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<18) & word0) | ((((1U << 1) - 1) & value) << 18);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wd_sc1_if_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 19);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_wd_sc1_if_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<19) & word0) | ((((1U << 1) - 1) & value) << 19);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wd_sc2_if_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 20);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_wd_sc2_if_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<20) & word0) | ((((1U << 1) - 1) & value) << 20);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wd_sc3_if_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 21);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_wd_sc3_if_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<21) & word0) | ((((1U << 1) - 1) & value) << 21);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wd_fc_if_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 22);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_wd_fc_if_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<22) & word0) | ((((1U << 1) - 1) & value) << 22);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_bs_if_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 23);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_bs_if_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<23) & word0) | ((((1U << 1) - 1) & value) << 23);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_lutcfg_if_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 24);
+        return v;
+    }
+    CONSTEXPR dma_status0_r& set_lutcfg_if_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<24) & word0) | ((((1U << 1) - 1) & value) << 24);
+        return *this;
+    }
+#endif
+};
+
+// dma_status1_r - DMA_STATUS1
+struct dma_status1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t axi_sram0_ar_stalled : 1; // Read transfer request stalled on arready low AXI SRAM0 (due to memory system)
+            uint32_t axi_sram0_rd_limit_stall : 1; // Read stalled due to one AXI SRAM0 limit counter being reached
+            uint32_t axi_sram0_aw_stalled : 1; // Write transfer request stalled on awready low AXI SRAM0 (due to memory system)
+            uint32_t axi_sram0_w_stalled : 1; // Write transfer stalled on awready low AXI SRAM0 (due to memory system)
+            uint32_t axi_sram0_wr_limit_stall : 1; // Write stalled due to one AXI SRAM0 limit counter being reached
+            uint32_t axi_sram1_ar_stalled : 1; // Read transfer request stalled on arready low AXI SRAM1 (due to memory system)
+            uint32_t axi_sram1_rd_limit_stall : 1; // Read stalled due to one AXI SRAM1 limit counter being reached
+            uint32_t axi_sram1_aw_stalled : 1; // Write transfer request stalled on awready low AXI SRAM1 (due to memory system)
+            uint32_t axi_sram1_w_stalled : 1; // Write transfer stalled on awready low AXI SRAM1 (due to memory system)
+            uint32_t axi_sram1_wr_limit_stall : 1; // Write stalled due to one AXI SRAM1 limit counter being reached
+            uint32_t axi_sram2_ar_stalled : 1; // Read transfer request stalled on arready low AXI SRAM2 (due to memory system)
+            uint32_t axi_sram2_rd_limit_stall : 1; // Read stalled due to one AXI SRAM2 limit counter being reached
+            uint32_t axi_sram2_aw_stalled : 1; // Write transfer request stalled on awready low AXI SRAM2 (due to memory system)
+            uint32_t axi_sram2_w_stalled : 1; // Write transfer stalled on awready low AXI SRAM2 (due to memory system)
+            uint32_t axi_sram2_wr_limit_stall : 1; // Write stalled due to one AXI SRAM2 limit counter being reached
+            uint32_t axi_sram3_ar_stalled : 1; // Read transfer request stalled on arready low AXI SRAM3 (due to memory system)
+            uint32_t axi_sram3_rd_limit_stall : 1; // Read stalled due to one AXI SRAM3 limit counter being reached
+            uint32_t axi_sram3_aw_stalled : 1; // Write transfer request stalled on awready low AXI SRAM3 (due to memory system)
+            uint32_t axi_sram3_w_stalled : 1; // Write transfer stalled on awready low AXI SRAM3 (due to memory system)
+            uint32_t axi_sram3_wr_limit_stall : 1; // Write stalled due to one AXI SRAM3 limit counter being reached
+            uint32_t axi_ext0_ar_stalled : 1; // Read transfer request stalled on arready low AXI EXT0 (due to memory system)
+            uint32_t axi_ext0_rd_limit_stall : 1; // Read stalled due to one AXI EXT0 limit counter being reached
+            uint32_t axi_ext0_aw_stalled : 1; // Write transfer request stalled on awready low AXI EXT0 (due to memory system)
+            uint32_t axi_ext0_w_stalled : 1; // Write transfer stalled on awready low AXI EXT0 (due to memory system)
+            uint32_t axi_ext0_wr_limit_stall : 1; // Write stalled due to one AXI EXT0 limit counter being reached
+            uint32_t axi_ext1_ar_stalled : 1; // Read transfer request stalled on arready low AXI EXT1 (due to memory system)
+            uint32_t axi_ext1_rd_limit_stall : 1; // Read stalled due to one AXI EXT1 limit counter being reached
+            uint32_t axi_ext1_aw_stalled : 1; // Write transfer request stalled on awready low AXI EXT1 (due to memory system)
+            uint32_t axi_ext1_w_stalled : 1; // Write transfer stalled on awready low AXI EXT1 (due to memory system)
+            uint32_t axi_ext1_wr_limit_stall : 1; // Write stalled due to one AXI EXT1 limit counter being reached
+            uint32_t reserved0 : 2;
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR dma_status1_r() :
+        word0(0)
+    {}
+    CONSTEXPR dma_status1_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    dma_status1_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_sram0_ar_stalled() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_sram0_ar_stalled(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_sram0_rd_limit_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 1);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_sram0_rd_limit_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_sram0_aw_stalled() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 2);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_sram0_aw_stalled(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_sram0_w_stalled() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 3);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_sram0_w_stalled(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_sram0_wr_limit_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 4);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_sram0_wr_limit_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_sram1_ar_stalled() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 5);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_sram1_ar_stalled(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_sram1_rd_limit_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 6);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_sram1_rd_limit_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_sram1_aw_stalled() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 7);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_sram1_aw_stalled(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_sram1_w_stalled() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 8);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_sram1_w_stalled(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<8) & word0) | ((((1U << 1) - 1) & value) << 8);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_sram1_wr_limit_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 9);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_sram1_wr_limit_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<9) & word0) | ((((1U << 1) - 1) & value) << 9);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_sram2_ar_stalled() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 10);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_sram2_ar_stalled(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<10) & word0) | ((((1U << 1) - 1) & value) << 10);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_sram2_rd_limit_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 11);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_sram2_rd_limit_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<11) & word0) | ((((1U << 1) - 1) & value) << 11);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_sram2_aw_stalled() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 12);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_sram2_aw_stalled(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<12) & word0) | ((((1U << 1) - 1) & value) << 12);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_sram2_w_stalled() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 13);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_sram2_w_stalled(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<13) & word0) | ((((1U << 1) - 1) & value) << 13);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_sram2_wr_limit_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 14);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_sram2_wr_limit_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<14) & word0) | ((((1U << 1) - 1) & value) << 14);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_sram3_ar_stalled() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 15);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_sram3_ar_stalled(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<15) & word0) | ((((1U << 1) - 1) & value) << 15);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_sram3_rd_limit_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 16);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_sram3_rd_limit_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<16) & word0) | ((((1U << 1) - 1) & value) << 16);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_sram3_aw_stalled() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 17);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_sram3_aw_stalled(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<17) & word0) | ((((1U << 1) - 1) & value) << 17);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_sram3_w_stalled() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 18);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_sram3_w_stalled(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<18) & word0) | ((((1U << 1) - 1) & value) << 18);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_sram3_wr_limit_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 19);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_sram3_wr_limit_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<19) & word0) | ((((1U << 1) - 1) & value) << 19);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_ext0_ar_stalled() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 20);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_ext0_ar_stalled(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<20) & word0) | ((((1U << 1) - 1) & value) << 20);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_ext0_rd_limit_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 21);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_ext0_rd_limit_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<21) & word0) | ((((1U << 1) - 1) & value) << 21);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_ext0_aw_stalled() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 22);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_ext0_aw_stalled(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<22) & word0) | ((((1U << 1) - 1) & value) << 22);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_ext0_w_stalled() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 23);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_ext0_w_stalled(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<23) & word0) | ((((1U << 1) - 1) & value) << 23);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_ext0_wr_limit_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 24);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_ext0_wr_limit_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<24) & word0) | ((((1U << 1) - 1) & value) << 24);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_ext1_ar_stalled() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 25);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_ext1_ar_stalled(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<25) & word0) | ((((1U << 1) - 1) & value) << 25);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_ext1_rd_limit_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 26);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_ext1_rd_limit_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<26) & word0) | ((((1U << 1) - 1) & value) << 26);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_ext1_aw_stalled() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 27);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_ext1_aw_stalled(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<27) & word0) | ((((1U << 1) - 1) & value) << 27);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_ext1_w_stalled() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 28);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_ext1_w_stalled(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<28) & word0) | ((((1U << 1) - 1) & value) << 28);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi_ext1_wr_limit_stall() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 29);
+        return v;
+    }
+    CONSTEXPR dma_status1_r& set_axi_ext1_wr_limit_stall(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<29) & word0) | ((((1U << 1) - 1) & value) << 29);
+        return *this;
+    }
+#endif
+};
+
+// pmcr_r - PMU register control
+struct pmcr_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t cnt_en : 1; // Enable counter
+            uint32_t event_cnt_rst : 1; // Reset event counter
+            uint32_t cycle_cnt_rst : 1; // Reset cycle counter
+            uint32_t mask_en : 1; // PMU can be enabled/disabled by command stream operation NPU_OP_PMU_MASK
+            uint32_t reserved0 : 7;
+            uint32_t num_event_cnt : 5; // Number of event counters
+            uint32_t reserved1 : 16;
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR pmcr_r() :
+        word0(16384)
+    {}
+    CONSTEXPR pmcr_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    pmcr_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cnt_en() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR pmcr_r& set_cnt_en(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_event_cnt_rst() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 1);
+        return v;
+    }
+    CONSTEXPR pmcr_r& set_event_cnt_rst(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cycle_cnt_rst() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 2);
+        return v;
+    }
+    CONSTEXPR pmcr_r& set_cycle_cnt_rst(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_mask_en() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 3);
+        return v;
+    }
+    CONSTEXPR pmcr_r& set_mask_en(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_num_event_cnt() const
+    {
+        auto v = ((1U << 5) - 1) & (word0 >> 11);
+        return v;
+    }
+    CONSTEXPR pmcr_r& set_num_event_cnt(uint32_t value)
+    {
+        word0 = (~(((1U << 5) - 1)<<11) & word0) | ((((1U << 5) - 1) & value) << 11);
+        return *this;
+    }
+#endif
+};
+
+// pmcntenset_r - Count enable set register
+struct pmcntenset_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t EVENT_CNT_0 : 1; // Event counter enable bit for PMEVCNTR0
+            uint32_t EVENT_CNT_1 : 1; // Event counter enable bit for PMEVCNTR1
+            uint32_t EVENT_CNT_2 : 1; // Event counter enable bit for PMEVCNTR2
+            uint32_t EVENT_CNT_3 : 1; // Event counter enable bit for PMEVCNTR3
+            uint32_t EVENT_CNT_4 : 1; // Event counter enable bit for PMEVCNTR4
+            uint32_t EVENT_CNT_5 : 1; // Event counter enable bit for PMEVCNTR5
+            uint32_t EVENT_CNT_6 : 1; // Event counter enable bit for PMEVCNTR6
+            uint32_t EVENT_CNT_7 : 1; // Event counter enable bit for PMEVCNTR7
+            uint32_t reserved0 : 23;
+            uint32_t CYCLE_CNT : 1; // PMCCNTR enable bit
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR pmcntenset_r() :
+        word0(0)
+    {}
+    CONSTEXPR pmcntenset_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    pmcntenset_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_0() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR pmcntenset_r& set_EVENT_CNT_0(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_1() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 1);
+        return v;
+    }
+    CONSTEXPR pmcntenset_r& set_EVENT_CNT_1(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_2() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 2);
+        return v;
+    }
+    CONSTEXPR pmcntenset_r& set_EVENT_CNT_2(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_3() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 3);
+        return v;
+    }
+    CONSTEXPR pmcntenset_r& set_EVENT_CNT_3(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_4() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 4);
+        return v;
+    }
+    CONSTEXPR pmcntenset_r& set_EVENT_CNT_4(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_5() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 5);
+        return v;
+    }
+    CONSTEXPR pmcntenset_r& set_EVENT_CNT_5(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_6() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 6);
+        return v;
+    }
+    CONSTEXPR pmcntenset_r& set_EVENT_CNT_6(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_7() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 7);
+        return v;
+    }
+    CONSTEXPR pmcntenset_r& set_EVENT_CNT_7(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CYCLE_CNT() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 31);
+        return v;
+    }
+    CONSTEXPR pmcntenset_r& set_CYCLE_CNT(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+#endif
+};
+
+// pmcntenclr_r - Count enable clear register
+struct pmcntenclr_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t EVENT_CNT_0 : 1; // Event counter disable bit for PMEVCNTR0
+            uint32_t EVENT_CNT_1 : 1; // Event counter disable bit for PMEVCNTR1
+            uint32_t EVENT_CNT_2 : 1; // Event counter disable bit for PMEVCNTR2
+            uint32_t EVENT_CNT_3 : 1; // Event counter disable bit for PMEVCNTR3
+            uint32_t EVENT_CNT_4 : 1; // Event counter disable bit for PMEVCNTR4
+            uint32_t EVENT_CNT_5 : 1; // Event counter disable bit for PMEVCNTR5
+            uint32_t EVENT_CNT_6 : 1; // Event counter disable bit for PMEVCNTR6
+            uint32_t EVENT_CNT_7 : 1; // Event counter disable bit for PMEVCNTR7
+            uint32_t reserved0 : 23;
+            uint32_t CYCLE_CNT : 1; // PMCCNTR disable bit
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR pmcntenclr_r() :
+        word0(0)
+    {}
+    CONSTEXPR pmcntenclr_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    pmcntenclr_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_0() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR pmcntenclr_r& set_EVENT_CNT_0(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_1() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 1);
+        return v;
+    }
+    CONSTEXPR pmcntenclr_r& set_EVENT_CNT_1(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_2() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 2);
+        return v;
+    }
+    CONSTEXPR pmcntenclr_r& set_EVENT_CNT_2(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_3() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 3);
+        return v;
+    }
+    CONSTEXPR pmcntenclr_r& set_EVENT_CNT_3(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_4() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 4);
+        return v;
+    }
+    CONSTEXPR pmcntenclr_r& set_EVENT_CNT_4(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_5() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 5);
+        return v;
+    }
+    CONSTEXPR pmcntenclr_r& set_EVENT_CNT_5(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_6() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 6);
+        return v;
+    }
+    CONSTEXPR pmcntenclr_r& set_EVENT_CNT_6(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_7() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 7);
+        return v;
+    }
+    CONSTEXPR pmcntenclr_r& set_EVENT_CNT_7(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CYCLE_CNT() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 31);
+        return v;
+    }
+    CONSTEXPR pmcntenclr_r& set_CYCLE_CNT(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+#endif
+};
+
+// pmovsset_r - Overflow flag status set register
+struct pmovsset_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t EVENT_CNT_0_OVF : 1; // Event counter overflow set bit for PMEVCNTR0
+            uint32_t EVENT_CNT_1_OVF : 1; // Event counter overflow set bit for PMEVCNTR1
+            uint32_t EVENT_CNT_2_OVF : 1; // Event counter overflow set bit for PMEVCNTR2
+            uint32_t EVENT_CNT_3_OVF : 1; // Event counter overflow set bit for PMEVCNTR3
+            uint32_t EVENT_CNT_4_OVF : 1; // Event counter overflow set bit for PMEVCNTR4
+            uint32_t EVENT_CNT_5_OVF : 1; // Event counter overflow set bit for PMEVCNTR5
+            uint32_t EVENT_CNT_6_OVF : 1; // Event counter overflow set bit for PMEVCNTR6
+            uint32_t EVENT_CNT_7_OVF : 1; // Event counter overflow set bit for PMEVCNTR7
+            uint32_t reserved0 : 23;
+            uint32_t CYCLE_CNT_OVF : 1; // PMCCNTR overflow set bit
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR pmovsset_r() :
+        word0(0)
+    {}
+    CONSTEXPR pmovsset_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    pmovsset_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_0_OVF() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR pmovsset_r& set_EVENT_CNT_0_OVF(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_1_OVF() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 1);
+        return v;
+    }
+    CONSTEXPR pmovsset_r& set_EVENT_CNT_1_OVF(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_2_OVF() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 2);
+        return v;
+    }
+    CONSTEXPR pmovsset_r& set_EVENT_CNT_2_OVF(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_3_OVF() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 3);
+        return v;
+    }
+    CONSTEXPR pmovsset_r& set_EVENT_CNT_3_OVF(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_4_OVF() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 4);
+        return v;
+    }
+    CONSTEXPR pmovsset_r& set_EVENT_CNT_4_OVF(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_5_OVF() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 5);
+        return v;
+    }
+    CONSTEXPR pmovsset_r& set_EVENT_CNT_5_OVF(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_6_OVF() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 6);
+        return v;
+    }
+    CONSTEXPR pmovsset_r& set_EVENT_CNT_6_OVF(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_7_OVF() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 7);
+        return v;
+    }
+    CONSTEXPR pmovsset_r& set_EVENT_CNT_7_OVF(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CYCLE_CNT_OVF() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 31);
+        return v;
+    }
+    CONSTEXPR pmovsset_r& set_CYCLE_CNT_OVF(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+#endif
+};
+
+// pmovsclr_r - Overflow flag status clear register
+struct pmovsclr_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t EVENT_CNT_0_OVF : 1; // Event counter overflow clear bit for PMEVCNTR0
+            uint32_t EVENT_CNT_1_OVF : 1; // Event counter overflow clear bit for PMEVCNTR1
+            uint32_t EVENT_CNT_2_OVF : 1; // Event counter overflow clear bit for PMEVCNTR2
+            uint32_t EVENT_CNT_3_OVF : 1; // Event counter overflow clear bit for PMEVCNTR3
+            uint32_t EVENT_CNT_4_OVF : 1; // Event counter overflow clear bit for PMEVCNTR4
+            uint32_t EVENT_CNT_5_OVF : 1; // Event counter overflow clear bit for PMEVCNTR5
+            uint32_t EVENT_CNT_6_OVF : 1; // Event counter overflow clear bit for PMEVCNTR6
+            uint32_t EVENT_CNT_7_OVF : 1; // Event counter overflow clear bit for PMEVCNTR7
+            uint32_t reserved0 : 23;
+            uint32_t CYCLE_CNT_OVF : 1; // PMCCNTR overflow clear bit
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR pmovsclr_r() :
+        word0(0)
+    {}
+    CONSTEXPR pmovsclr_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    pmovsclr_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_0_OVF() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR pmovsclr_r& set_EVENT_CNT_0_OVF(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_1_OVF() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 1);
+        return v;
+    }
+    CONSTEXPR pmovsclr_r& set_EVENT_CNT_1_OVF(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_2_OVF() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 2);
+        return v;
+    }
+    CONSTEXPR pmovsclr_r& set_EVENT_CNT_2_OVF(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_3_OVF() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 3);
+        return v;
+    }
+    CONSTEXPR pmovsclr_r& set_EVENT_CNT_3_OVF(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_4_OVF() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 4);
+        return v;
+    }
+    CONSTEXPR pmovsclr_r& set_EVENT_CNT_4_OVF(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_5_OVF() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 5);
+        return v;
+    }
+    CONSTEXPR pmovsclr_r& set_EVENT_CNT_5_OVF(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_6_OVF() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 6);
+        return v;
+    }
+    CONSTEXPR pmovsclr_r& set_EVENT_CNT_6_OVF(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_7_OVF() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 7);
+        return v;
+    }
+    CONSTEXPR pmovsclr_r& set_EVENT_CNT_7_OVF(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CYCLE_CNT_OVF() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 31);
+        return v;
+    }
+    CONSTEXPR pmovsclr_r& set_CYCLE_CNT_OVF(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+#endif
+};
+
+// pmintset_r - Interrupt enable set register
+struct pmintset_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t EVENT_CNT_0_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR0
+            uint32_t EVENT_CNT_1_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR1
+            uint32_t EVENT_CNT_2_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR2
+            uint32_t EVENT_CNT_3_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR3
+            uint32_t EVENT_CNT_4_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR4
+            uint32_t EVENT_CNT_5_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR5
+            uint32_t EVENT_CNT_6_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR6
+            uint32_t EVENT_CNT_7_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR7
+            uint32_t reserved0 : 23;
+            uint32_t CYCLE_CNT_INT : 1; // PMCCNTR overflow interrupt request enable bit
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR pmintset_r() :
+        word0(0)
+    {}
+    CONSTEXPR pmintset_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    pmintset_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_0_INT() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR pmintset_r& set_EVENT_CNT_0_INT(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_1_INT() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 1);
+        return v;
+    }
+    CONSTEXPR pmintset_r& set_EVENT_CNT_1_INT(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_2_INT() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 2);
+        return v;
+    }
+    CONSTEXPR pmintset_r& set_EVENT_CNT_2_INT(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_3_INT() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 3);
+        return v;
+    }
+    CONSTEXPR pmintset_r& set_EVENT_CNT_3_INT(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_4_INT() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 4);
+        return v;
+    }
+    CONSTEXPR pmintset_r& set_EVENT_CNT_4_INT(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_5_INT() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 5);
+        return v;
+    }
+    CONSTEXPR pmintset_r& set_EVENT_CNT_5_INT(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_6_INT() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 6);
+        return v;
+    }
+    CONSTEXPR pmintset_r& set_EVENT_CNT_6_INT(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_7_INT() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 7);
+        return v;
+    }
+    CONSTEXPR pmintset_r& set_EVENT_CNT_7_INT(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CYCLE_CNT_INT() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 31);
+        return v;
+    }
+    CONSTEXPR pmintset_r& set_CYCLE_CNT_INT(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+#endif
+};
+
+// pmintclr_r - Interrupt enable clear register
+struct pmintclr_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t EVENT_CNT_0_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR0
+            uint32_t EVENT_CNT_1_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR1
+            uint32_t EVENT_CNT_2_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR2
+            uint32_t EVENT_CNT_3_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR3
+            uint32_t EVENT_CNT_4_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR4
+            uint32_t EVENT_CNT_5_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR5
+            uint32_t EVENT_CNT_6_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR6
+            uint32_t EVENT_CNT_7_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR7
+            uint32_t reserved0 : 23;
+            uint32_t CYCLE_CNT_INT : 1; // PMCCNTR overflow interrupt request disable bit
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR pmintclr_r() :
+        word0(0)
+    {}
+    CONSTEXPR pmintclr_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    pmintclr_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_0_INT() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR pmintclr_r& set_EVENT_CNT_0_INT(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_1_INT() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 1);
+        return v;
+    }
+    CONSTEXPR pmintclr_r& set_EVENT_CNT_1_INT(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_2_INT() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 2);
+        return v;
+    }
+    CONSTEXPR pmintclr_r& set_EVENT_CNT_2_INT(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_3_INT() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 3);
+        return v;
+    }
+    CONSTEXPR pmintclr_r& set_EVENT_CNT_3_INT(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_4_INT() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 4);
+        return v;
+    }
+    CONSTEXPR pmintclr_r& set_EVENT_CNT_4_INT(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_5_INT() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 5);
+        return v;
+    }
+    CONSTEXPR pmintclr_r& set_EVENT_CNT_5_INT(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_6_INT() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 6);
+        return v;
+    }
+    CONSTEXPR pmintclr_r& set_EVENT_CNT_6_INT(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_7_INT() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 7);
+        return v;
+    }
+    CONSTEXPR pmintclr_r& set_EVENT_CNT_7_INT(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CYCLE_CNT_INT() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 31);
+        return v;
+    }
+    CONSTEXPR pmintclr_r& set_CYCLE_CNT_INT(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+#endif
+};
+
+// pmccntr_r - Performance monitor cycle count register
+struct pmccntr_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t CYCLE_CNT_LO : 32; // Cycle count - LSB
+            uint32_t CYCLE_CNT_HI : 16; // Cycle count - MSB
+            uint32_t reserved0 : 16;
+        };
+        uint32_t word[2];
+    };
+#else
+private:
+    uint32_t word0;
+    uint32_t word1;
+public:
+    CONSTEXPR pmccntr_r() :
+        word0(0),
+        word1(0)
+    {}
+    CONSTEXPR pmccntr_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()))),
+        word1(static_cast<uint32_t>((init >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); word1 = static_cast<uint32_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()));
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    pmccntr_r copy()
+    {
+        return *this;
+    }
+#endif
+};
+
+// pmccntr_cfg_r - Set start/stop event on the cycle counter
+struct pmccntr_cfg_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t CYCLE_CNT_CFG_START : 10; // Cycle counter start event
+            uint32_t reserved0 : 2;
+            uint32_t S0 : 1; // Start disabled for AXI port 0
+            uint32_t S1 : 1; // Start disabled for AXI port 1
+            uint32_t S2 : 1; // Start disabled for AXI port 2
+            uint32_t S3 : 1; // Start disabled for AXI port 3
+            uint32_t CYCLE_CNT_CFG_STOP : 10; // Cycle counter stop event
+            uint32_t reserved1 : 2;
+            uint32_t E0 : 1; // End disabled for AXI port 0
+            uint32_t E1 : 1; // End disabled for AXI port 1
+            uint32_t E2 : 1; // End disabled for AXI port 2
+            uint32_t E3 : 1; // End disabled for AXI port 3
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR pmccntr_cfg_r() :
+        word0(0)
+    {}
+    CONSTEXPR pmccntr_cfg_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    pmccntr_cfg_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_event get_CYCLE_CNT_CFG_START() const
+    {
+        auto v = ((1U << 10) - 1) & (word0 >> 0);
+        assert(v <= 671);
+        return static_cast<NPU_NAMESPACE::pmu_event>(v);
+    }
+    CONSTEXPR pmccntr_cfg_r& set_CYCLE_CNT_CFG_START(NPU_NAMESPACE::pmu_event value)
+    {
+        word0 = (~(((1U << 10) - 1)<<0) & word0) | ((((1U << 10) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_port_disable get_S0() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 12);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::pmu_port_disable>(v);
+    }
+    CONSTEXPR pmccntr_cfg_r& set_S0(NPU_NAMESPACE::pmu_port_disable value)
+    {
+        word0 = (~(((1U << 1) - 1)<<12) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 12);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_port_disable get_S1() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 13);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::pmu_port_disable>(v);
+    }
+    CONSTEXPR pmccntr_cfg_r& set_S1(NPU_NAMESPACE::pmu_port_disable value)
+    {
+        word0 = (~(((1U << 1) - 1)<<13) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 13);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_port_disable get_S2() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 14);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::pmu_port_disable>(v);
+    }
+    CONSTEXPR pmccntr_cfg_r& set_S2(NPU_NAMESPACE::pmu_port_disable value)
+    {
+        word0 = (~(((1U << 1) - 1)<<14) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 14);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_port_disable get_S3() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 15);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::pmu_port_disable>(v);
+    }
+    CONSTEXPR pmccntr_cfg_r& set_S3(NPU_NAMESPACE::pmu_port_disable value)
+    {
+        word0 = (~(((1U << 1) - 1)<<15) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 15);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_event get_CYCLE_CNT_CFG_STOP() const
+    {
+        auto v = ((1U << 10) - 1) & (word0 >> 16);
+        assert(v <= 671);
+        return static_cast<NPU_NAMESPACE::pmu_event>(v);
+    }
+    CONSTEXPR pmccntr_cfg_r& set_CYCLE_CNT_CFG_STOP(NPU_NAMESPACE::pmu_event value)
+    {
+        word0 = (~(((1U << 10) - 1)<<16) & word0) | ((((1U << 10) - 1) & static_cast<uint32_t>(value)) << 16);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_port_disable get_E0() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 28);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::pmu_port_disable>(v);
+    }
+    CONSTEXPR pmccntr_cfg_r& set_E0(NPU_NAMESPACE::pmu_port_disable value)
+    {
+        word0 = (~(((1U << 1) - 1)<<28) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 28);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_port_disable get_E1() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 29);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::pmu_port_disable>(v);
+    }
+    CONSTEXPR pmccntr_cfg_r& set_E1(NPU_NAMESPACE::pmu_port_disable value)
+    {
+        word0 = (~(((1U << 1) - 1)<<29) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 29);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_port_disable get_E2() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 30);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::pmu_port_disable>(v);
+    }
+    CONSTEXPR pmccntr_cfg_r& set_E2(NPU_NAMESPACE::pmu_port_disable value)
+    {
+        word0 = (~(((1U << 1) - 1)<<30) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 30);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_port_disable get_E3() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 31);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::pmu_port_disable>(v);
+    }
+    CONSTEXPR pmccntr_cfg_r& set_E3(NPU_NAMESPACE::pmu_port_disable value)
+    {
+        word0 = (~(((1U << 1) - 1)<<31) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 31);
+        return *this;
+    }
+#endif
+};
+
+// pmcaxi_chan_r - Set which AXI channel to monitor for latency measurements in PMU
+struct pmcaxi_chan_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t CH_SEL : 4; // Channel select for latency measurements
+            uint32_t reserved0 : 4;
+            uint32_t AXI_SEL : 1; // AXI port select for latency measurements
+            uint32_t reserved1 : 1;
+            uint32_t BW_CH_SEL_EN : 1; // Bandwidth channel selector
+            uint32_t reserved2 : 21;
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR pmcaxi_chan_r() :
+        word0(0)
+    {}
+    CONSTEXPR pmcaxi_chan_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    pmcaxi_chan_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_axi_channel get_CH_SEL() const
+    {
+        auto v = ((1U << 4) - 1) & (word0 >> 0);
+        assert(v <= 9);
+        return static_cast<NPU_NAMESPACE::pmu_axi_channel>(v);
+    }
+    CONSTEXPR pmcaxi_chan_r& set_CH_SEL(NPU_NAMESPACE::pmu_axi_channel value)
+    {
+        word0 = (~(((1U << 4) - 1)<<0) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::axi_port get_AXI_SEL() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 8);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::axi_port>(v);
+    }
+    CONSTEXPR pmcaxi_chan_r& set_AXI_SEL(NPU_NAMESPACE::axi_port value)
+    {
+        word0 = (~(((1U << 1) - 1)<<8) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 8);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_BW_CH_SEL_EN() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 10);
+        return v;
+    }
+    CONSTEXPR pmcaxi_chan_r& set_BW_CH_SEL_EN(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<10) & word0) | ((((1U << 1) - 1) & value) << 10);
+        return *this;
+    }
+#endif
+};
+
+// pmclut_r - Performance monitor control for lookup table
+struct pmclut_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PCM_LUT_EN_0 : 1; // PMU lookup table enable for event counter 0
+            uint32_t reserved0 : 15;
+            uint32_t PMC_LUT_0 : 16; // PMU lookup table for event counter 0
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR pmclut_r() :
+        word0(0)
+    {}
+    CONSTEXPR pmclut_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    pmclut_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PCM_LUT_EN_0() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 0);
+        return v;
+    }
+    CONSTEXPR pmclut_r& set_PCM_LUT_EN_0(uint32_t value)
+    {
+        word0 = (~(((1U << 1) - 1)<<0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PMC_LUT_0() const
+    {
+        auto v = ((1U << 16) - 1) & (word0 >> 16);
+        return v;
+    }
+    CONSTEXPR pmclut_r& set_PMC_LUT_0(uint32_t value)
+    {
+        word0 = (~(((1U << 16) - 1)<<16) & word0) | ((((1U << 16) - 1) & value) << 16);
+        return *this;
+    }
+#endif
+};
+
+// pmevcntr_r - Performance monitor event 0 count register
+struct pmevcntr_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t count : 32; // Count word
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR pmevcntr_r() :
+        word0(0)
+    {}
+    CONSTEXPR pmevcntr_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    pmevcntr_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_count() const
+    {
+        auto v = word0;
+        return v;
+    }
+    CONSTEXPR pmevcntr_r& set_count(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pmevtyper_r - Performance monitor event type register 0
+struct pmevtyper_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t EV_TYPE : 10; // Event Type
+            uint32_t reserved0 : 2;
+            uint32_t D0 : 1; // Counting disabled for AXI port 0
+            uint32_t D1 : 1; // Counting disabled for AXI port 1
+            uint32_t D2 : 1; // Counting disabled for AXI port 2
+            uint32_t D3 : 1; // Counting disabled for AXI port 3
+            uint32_t reserved1 : 16;
+        };
+        uint32_t word;
+    };
+#else
+private:
+    uint32_t word0;
+public:
+    CONSTEXPR pmevtyper_r() :
+        word0(0)
+    {}
+    CONSTEXPR pmevtyper_r(uint32_t init) :
+        word0(init)
+    {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    pmevtyper_r copy()
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_event get_EV_TYPE() const
+    {
+        auto v = ((1U << 10) - 1) & (word0 >> 0);
+        assert(v <= 671);
+        return static_cast<NPU_NAMESPACE::pmu_event>(v);
+    }
+    CONSTEXPR pmevtyper_r& set_EV_TYPE(NPU_NAMESPACE::pmu_event value)
+    {
+        word0 = (~(((1U << 10) - 1)<<0) & word0) | ((((1U << 10) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_port_disable get_D0() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 12);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::pmu_port_disable>(v);
+    }
+    CONSTEXPR pmevtyper_r& set_D0(NPU_NAMESPACE::pmu_port_disable value)
+    {
+        word0 = (~(((1U << 1) - 1)<<12) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 12);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_port_disable get_D1() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 13);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::pmu_port_disable>(v);
+    }
+    CONSTEXPR pmevtyper_r& set_D1(NPU_NAMESPACE::pmu_port_disable value)
+    {
+        word0 = (~(((1U << 1) - 1)<<13) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 13);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_port_disable get_D2() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 14);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::pmu_port_disable>(v);
+    }
+    CONSTEXPR pmevtyper_r& set_D2(NPU_NAMESPACE::pmu_port_disable value)
+    {
+        word0 = (~(((1U << 1) - 1)<<14) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 14);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_port_disable get_D3() const
+    {
+        auto v = ((1U << 1) - 1) & (word0 >> 15);
+        assert(v <= 1);
+        return static_cast<NPU_NAMESPACE::pmu_port_disable>(v);
+    }
+    CONSTEXPR pmevtyper_r& set_D3(NPU_NAMESPACE::pmu_port_disable value)
+    {
+        word0 = (~(((1U << 1) - 1)<<15) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 15);
+        return *this;
+    }
+#endif
+};
+
+struct NPU_REG
+{
+    STRUCT id_r ID; // 0x0000
+    STRUCT status_r STATUS; // 0x0004
+    STRUCT cmd_r CMD; // 0x0008
+    STRUCT reset_r RESET; // 0x000C
+    STRUCT qbase_r QBASE; // 0x0010
+    STRUCT qread_r QREAD; // 0x0018
+    STRUCT qconfig_r QCONFIG; // 0x001C
+    STRUCT qsize_r QSIZE; // 0x0020
+    STRUCT prot_r PROT; // 0x0024
+    STRUCT config_r CONFIG; // 0x0028
+    uint32_t unused0[1];
+    STRUCT cond_status_r COND_STATUS; // 0x0030
+    uint32_t unused1[1];
+    STRUCT power_ctrl_r POWER_CTRL; // 0x0038
+    STRUCT regioncfg_r REGIONCFG; // 0x003C
+    STRUCT mem_attr_r MEM_ATTR[4]; // 0x0040
+    STRUCT axi_sram_r AXI_SRAM; // 0x0050
+    STRUCT axi_ext_r AXI_EXT; // 0x0054
+    uint32_t unused2[2];
+    STRUCT cfg_sram_cap_r CFG_SRAM_CAP; // 0x0060
+    STRUCT cfg_ext_cap_r CFG_EXT_CAP; // 0x0064
+    STRUCT cfg_sram_hash0_r CFG_SRAM_HASH0; // 0x0068
+    STRUCT cfg_sram_hash1_r CFG_SRAM_HASH1; // 0x0070
+    STRUCT cfg_ext_hash0_r CFG_EXT_HASH0; // 0x0078
+    STRUCT basep_r BASEP[8]; // 0x0080
+    uint32_t unused3[32];
+    STRUCT clkforce_r CLKFORCE; // 0x0140
+    STRUCT debug_address_r DEBUG_ADDRESS; // 0x0144
+    STRUCT debug_misc_r DEBUG_MISC; // 0x0148
+    uint32_t unused4[61];
+    STRUCT dma_ifm_src_r DMA_IFM_SRC; // 0x0240
+    STRUCT dma_ifm_dst_r DMA_IFM_DST; // 0x0248
+    STRUCT dma_ofm_src_r DMA_OFM_SRC; // 0x024C
+    STRUCT dma_ofm_dst_r DMA_OFM_DST; // 0x0250
+    STRUCT dma_weight_src_r DMA_WEIGHT_SRC; // 0x0258
+    STRUCT dma_cmd_src_r DMA_CMD_SRC; // 0x0260
+    STRUCT dma_cmd_size_r DMA_CMD_SIZE; // 0x0268
+    STRUCT dma_m2m_src_r DMA_M2M_SRC; // 0x026C
+    STRUCT dma_m2m_dst_r DMA_M2M_DST; // 0x0274
+    STRUCT current_qread_r CURRENT_QREAD; // 0x027C
+    STRUCT dma_scale_src_r DMA_SCALE_SRC; // 0x0280
+    STRUCT dma_weight1_src_r DMA_WEIGHT1_SRC; // 0x0288
+    STRUCT dma_weight2_src_r DMA_WEIGHT2_SRC; // 0x0290
+    STRUCT dma_weight3_src_r DMA_WEIGHT3_SRC; // 0x0298
+    uint32_t unused5[6];
+    STRUCT current_op_r CURRENT_OP; // 0x02B8
+    STRUCT current_cmd_r CURRENT_CMD; // 0x02BC
+    uint32_t unused6[80];
+    STRUCT internal_memory_r INTERNAL_MEMORY[256]; // 0x0400
+    STRUCT ifm_pad_top_r IFM_PAD_TOP; // 0x0800
+    STRUCT ifm_pad_left_r IFM_PAD_LEFT; // 0x0804
+    STRUCT ifm_pad_right_r IFM_PAD_RIGHT; // 0x0808
+    STRUCT ifm_pad_bottom_r IFM_PAD_BOTTOM; // 0x080C
+    STRUCT ifm_depth_m1_r IFM_DEPTH_M1; // 0x0810
+    STRUCT ifm_precision_r IFM_PRECISION; // 0x0814
+    uint32_t unused7[1];
+    STRUCT ifm_upscale_r IFM_UPSCALE; // 0x081C
+    STRUCT ifm_broadcast_r IFM_BROADCAST; // 0x0820
+    STRUCT ifm_zero_point_r IFM_ZERO_POINT; // 0x0824
+    STRUCT ifm_width0_m1_r IFM_WIDTH0_M1; // 0x0828
+    STRUCT ifm_height0_m1_r IFM_HEIGHT0_M1; // 0x082C
+    STRUCT ifm_height1_m1_r IFM_HEIGHT1_M1; // 0x0830
+    uint32_t unused8[2];
+    STRUCT ifm_region_r IFM_REGION; // 0x083C
+    uint32_t unused9[1];
+    STRUCT ofm_width_m1_r OFM_WIDTH_M1; // 0x0844
+    STRUCT ofm_height_m1_r OFM_HEIGHT_M1; // 0x0848
+    STRUCT ofm_depth_m1_r OFM_DEPTH_M1; // 0x084C
+    STRUCT ofm_precision_r OFM_PRECISION; // 0x0850
+    STRUCT ofm_blk_width_m1_r OFM_BLK_WIDTH_M1; // 0x0854
+    STRUCT ofm_blk_height_m1_r OFM_BLK_HEIGHT_M1; // 0x0858
+    STRUCT ofm_blk_depth_m1_r OFM_BLK_DEPTH_M1; // 0x085C
+    STRUCT ofm_zero_point_r OFM_ZERO_POINT; // 0x0860
+    uint32_t unused10[1];
+    STRUCT ofm_width0_m1_r OFM_WIDTH0_M1; // 0x0868
+    STRUCT ofm_height0_m1_r OFM_HEIGHT0_M1; // 0x086C
+    STRUCT ofm_height1_m1_r OFM_HEIGHT1_M1; // 0x0870
+    uint32_t unused11[2];
+    STRUCT ofm_region_r OFM_REGION; // 0x087C
+    STRUCT kernel_width_m1_r KERNEL_WIDTH_M1; // 0x0880
+    STRUCT kernel_height_m1_r KERNEL_HEIGHT_M1; // 0x0884
+    STRUCT kernel_stride_r KERNEL_STRIDE; // 0x0888
+    uint32_t unused12[1];
+    STRUCT acc_format_r ACC_FORMAT; // 0x0890
+    STRUCT activation_r ACTIVATION; // 0x0894
+    STRUCT activation_min_r ACTIVATION_MIN; // 0x0898
+    STRUCT activation_max_r ACTIVATION_MAX; // 0x089C
+    STRUCT weight_region_r WEIGHT_REGION; // 0x08A0
+    STRUCT scale_region_r SCALE_REGION; // 0x08A4
+    STRUCT resize_x_scale_n_m1_r RESIZE_X_SCALE_N_M1; // 0x08A8
+    STRUCT resize_y_scale_n_m1_r RESIZE_Y_SCALE_N_M1; // 0x08AC
+    STRUCT resize_x_offset_r RESIZE_X_OFFSET; // 0x08B0
+    STRUCT resize_y_offset_r RESIZE_Y_OFFSET; // 0x08B4
+    STRUCT weight_format_r WEIGHT_FORMAT; // 0x08B8
+    STRUCT blockdep_r BLOCKDEP; // 0x08BC
+    STRUCT dma0_src_region_r DMA0_SRC_REGION; // 0x08C0
+    STRUCT dma0_dst_region_r DMA0_DST_REGION; // 0x08C4
+    STRUCT dma0_size0_r DMA0_SIZE0; // 0x08C8
+    STRUCT dma0_size1_r DMA0_SIZE1; // 0x08CC
+    STRUCT dma0_idx_region_r DMA0_IDX_REGION; // 0x08D0
+    uint32_t unused13[11];
+    STRUCT ifm2_broadcast_r IFM2_BROADCAST; // 0x0900
+    uint32_t unused14[4];
+    STRUCT ifm2_precision_r IFM2_PRECISION; // 0x0914
+    uint32_t unused15[3];
+    STRUCT ifm2_zero_point_r IFM2_ZERO_POINT; // 0x0924
+    STRUCT ifm2_width0_m1_r IFM2_WIDTH0_M1; // 0x0928
+    STRUCT ifm2_height0_m1_r IFM2_HEIGHT0_M1; // 0x092C
+    STRUCT ifm2_height1_m1_r IFM2_HEIGHT1_M1; // 0x0930
+    uint32_t unused16[2];
+    STRUCT ifm2_region_r IFM2_REGION; // 0x093C
+    uint32_t unused17[48];
+    STRUCT ifm_base0_r IFM_BASE0; // 0x0A00
+    STRUCT ifm_base1_r IFM_BASE1; // 0x0A08
+    STRUCT ifm_base2_r IFM_BASE2; // 0x0A10
+    STRUCT ifm_base3_r IFM_BASE3; // 0x0A18
+    STRUCT ifm_stride_x_r IFM_STRIDE_X; // 0x0A20
+    STRUCT ifm_stride_y_r IFM_STRIDE_Y; // 0x0A28
+    STRUCT ifm_stride_c_r IFM_STRIDE_C; // 0x0A30
+    uint32_t unused18[2];
+    STRUCT ofm_base0_r OFM_BASE0; // 0x0A40
+    STRUCT ofm_base1_r OFM_BASE1; // 0x0A48
+    STRUCT ofm_base2_r OFM_BASE2; // 0x0A50
+    STRUCT ofm_base3_r OFM_BASE3; // 0x0A58
+    STRUCT ofm_stride_x_r OFM_STRIDE_X; // 0x0A60
+    STRUCT ofm_stride_y_r OFM_STRIDE_Y; // 0x0A68
+    STRUCT ofm_stride_c_r OFM_STRIDE_C; // 0x0A70
+    uint32_t unused19[2];
+    STRUCT weight_base_r WEIGHT_BASE; // 0x0A80
+    STRUCT weight_length_r WEIGHT_LENGTH; // 0x0A88
+    STRUCT scale_base_r SCALE_BASE; // 0x0A90
+    STRUCT scale_length_r SCALE_LENGTH; // 0x0A98
+    STRUCT ofm_scale_r OFM_SCALE; // 0x0AA0
+    STRUCT ifm_scale_r IFM_SCALE; // 0x0AA8
+    STRUCT ifm2_scale_r IFM2_SCALE; // 0x0AB0
+    STRUCT op_scalar_r OP_SCALAR; // 0x0AB8
+    STRUCT dma0_src_r DMA0_SRC; // 0x0AC0
+    STRUCT dma0_dst_r DMA0_DST; // 0x0AC8
+    STRUCT dma0_len_r DMA0_LEN; // 0x0AD0
+    STRUCT dma0_src_stride0_r DMA0_SRC_STRIDE0; // 0x0AD8
+    STRUCT dma0_src_stride1_r DMA0_SRC_STRIDE1; // 0x0AE0
+    STRUCT dma0_dst_stride0_r DMA0_DST_STRIDE0; // 0x0AE8
+    STRUCT dma0_dst_stride1_r DMA0_DST_STRIDE1; // 0x0AF0
+    STRUCT dma0_idx_r DMA0_IDX; // 0x0AF8
+    STRUCT ifm2_base0_r IFM2_BASE0; // 0x0B00
+    STRUCT ifm2_base1_r IFM2_BASE1; // 0x0B08
+    STRUCT ifm2_base2_r IFM2_BASE2; // 0x0B10
+    STRUCT ifm2_base3_r IFM2_BASE3; // 0x0B18
+    STRUCT ifm2_stride_x_r IFM2_STRIDE_X; // 0x0B20
+    STRUCT ifm2_stride_y_r IFM2_STRIDE_Y; // 0x0B28
+    STRUCT ifm2_stride_c_r IFM2_STRIDE_C; // 0x0B30
+    uint32_t unused20[2];
+    STRUCT weight1_base_r WEIGHT1_BASE; // 0x0B40
+    STRUCT weight1_length_r WEIGHT1_LENGTH; // 0x0B48
+    STRUCT weight2_base_r WEIGHT2_BASE; // 0x0B50
+    STRUCT weight2_length_r WEIGHT2_LENGTH; // 0x0B58
+    STRUCT weight3_base_r WEIGHT3_BASE; // 0x0B60
+    STRUCT weight3_length_r WEIGHT3_LENGTH; // 0x0B68
+    STRUCT resize_x_step_r RESIZE_X_STEP; // 0x0B70
+    STRUCT resize_y_step_r RESIZE_Y_STEP; // 0x0B78
+    uint32_t unused21[16];
+    STRUCT dma0_idx_max_r DMA0_IDX_MAX; // 0x0BC0
+    STRUCT dma0_idx_skip1_r DMA0_IDX_SKIP1; // 0x0BC8
+    uint32_t unused22[252];
+    STRUCT revision_r REVISION; // 0x0FC0
+    uint32_t unused23[3];
+    STRUCT pid4_r PID4; // 0x0FD0
+    STRUCT pid5_r PID5; // 0x0FD4
+    STRUCT pid6_r PID6; // 0x0FD8
+    STRUCT pid7_r PID7; // 0x0FDC
+    STRUCT pid0_r PID0; // 0x0FE0
+    STRUCT pid1_r PID1; // 0x0FE4
+    STRUCT pid2_r PID2; // 0x0FE8
+    STRUCT pid3_r PID3; // 0x0FEC
+    STRUCT cid0_r CID0; // 0x0FF0
+    STRUCT cid1_r CID1; // 0x0FF4
+    STRUCT cid2_r CID2; // 0x0FF8
+    STRUCT cid3_r CID3; // 0x0FFC
+    uint32_t unused24[64];
+    STRUCT wd_status_r WD_STATUS; // 0x1100
+    STRUCT mac_status_r MAC_STATUS; // 0x1104
+    STRUCT ao_status_r AO_STATUS; // 0x1108
+    uint32_t unused25[1];
+    STRUCT dma_status0_r DMA_STATUS0; // 0x1110
+    STRUCT dma_status1_r DMA_STATUS1; // 0x1114
+    uint32_t unused26[26];
+    STRUCT pmcr_r PMCR; // 0x1180
+    STRUCT pmcntenset_r PMCNTENSET; // 0x1184
+    STRUCT pmcntenclr_r PMCNTENCLR; // 0x1188
+    STRUCT pmovsset_r PMOVSSET; // 0x118C
+    STRUCT pmovsclr_r PMOVSCLR; // 0x1190
+    STRUCT pmintset_r PMINTSET; // 0x1194
+    STRUCT pmintclr_r PMINTCLR; // 0x1198
+    uint32_t unused27[1];
+    STRUCT pmccntr_r PMCCNTR; // 0x11A0
+    STRUCT pmccntr_cfg_r PMCCNTR_CFG; // 0x11A8
+    STRUCT pmcaxi_chan_r PMCAXI_CHAN; // 0x11AC
+    STRUCT pmclut_r PMCLUT; // 0x11B0
+    uint32_t unused28[83];
+    STRUCT pmevcntr_r PMEVCNTR[8]; // 0x1300
+    uint32_t unused29[24];
+    STRUCT pmevtyper_r PMEVTYPER[8]; // 0x1380
+
+#ifdef __cplusplus
+    enum class access_type_t : uint8_t { RW, RO, WO };
+    NPU_REG()
+    {
+        reset();
+    }
+    void reset()
+    {
+        ID = 536899584;
+        STATUS = 8;
+        CMD = 12;
+        RESET = 0;
+        QBASE = 0;
+        QREAD = 0;
+        QCONFIG = 0;
+        QSIZE = 0;
+        PROT = 0;
+        CONFIG = 536870928;
+        COND_STATUS = 0;
+        POWER_CTRL = 0;
+        REGIONCFG = 0;
+        for (size_t i = 0; i < (sizeof(MEM_ATTR) / sizeof(MEM_ATTR[0])); ++i)
+            MEM_ATTR[i] = 0;
+        AXI_SRAM = 0;
+        AXI_EXT = 0;
+        CFG_SRAM_CAP = 0;
+        CFG_EXT_CAP = 0;
+        CFG_SRAM_HASH0 = 0;
+        CFG_SRAM_HASH1 = 0;
+        CFG_EXT_HASH0 = 0;
+        for (size_t i = 0; i < (sizeof(BASEP) / sizeof(BASEP[0])); ++i)
+            BASEP[i] = 0;
+        CLKFORCE = 0;
+        DEBUG_ADDRESS = 0;
+        DEBUG_MISC = 0;
+        DMA_IFM_SRC = 0;
+        DMA_IFM_DST = 0;
+        DMA_OFM_SRC = 0;
+        DMA_OFM_DST = 0;
+        DMA_WEIGHT_SRC = 0;
+        DMA_CMD_SRC = 0;
+        DMA_CMD_SIZE = 0;
+        DMA_M2M_SRC = 0;
+        DMA_M2M_DST = 0;
+        CURRENT_QREAD = 0;
+        DMA_SCALE_SRC = 0;
+        DMA_WEIGHT1_SRC = 0;
+        DMA_WEIGHT2_SRC = 0;
+        DMA_WEIGHT3_SRC = 0;
+        CURRENT_OP = 0;
+        CURRENT_CMD = 0;
+        for (size_t i = 0; i < (sizeof(INTERNAL_MEMORY) / sizeof(INTERNAL_MEMORY[0])); ++i)
+            INTERNAL_MEMORY[i] = 0;
+        IFM_PAD_TOP = 0;
+        IFM_PAD_LEFT = 0;
+        IFM_PAD_RIGHT = 0;
+        IFM_PAD_BOTTOM = 0;
+        IFM_DEPTH_M1 = 0;
+        IFM_PRECISION = 0;
+        IFM_UPSCALE = 0;
+        IFM_BROADCAST = 0;
+        IFM_ZERO_POINT = 0;
+        IFM_WIDTH0_M1 = 0;
+        IFM_HEIGHT0_M1 = 0;
+        IFM_HEIGHT1_M1 = 0;
+        IFM_REGION = 0;
+        OFM_WIDTH_M1 = 0;
+        OFM_HEIGHT_M1 = 0;
+        OFM_DEPTH_M1 = 0;
+        OFM_PRECISION = 0;
+        OFM_BLK_WIDTH_M1 = 0;
+        OFM_BLK_HEIGHT_M1 = 0;
+        OFM_BLK_DEPTH_M1 = 0;
+        OFM_ZERO_POINT = 0;
+        OFM_WIDTH0_M1 = 0;
+        OFM_HEIGHT0_M1 = 0;
+        OFM_HEIGHT1_M1 = 0;
+        OFM_REGION = 0;
+        KERNEL_WIDTH_M1 = 0;
+        KERNEL_HEIGHT_M1 = 0;
+        KERNEL_STRIDE = 0;
+        ACC_FORMAT = 0;
+        ACTIVATION = 0;
+        ACTIVATION_MIN = 0;
+        ACTIVATION_MAX = 0;
+        WEIGHT_REGION = 0;
+        SCALE_REGION = 0;
+        RESIZE_X_SCALE_N_M1 = 0;
+        RESIZE_Y_SCALE_N_M1 = 0;
+        RESIZE_X_OFFSET = 0;
+        RESIZE_Y_OFFSET = 0;
+        WEIGHT_FORMAT = 0;
+        BLOCKDEP = 0;
+        DMA0_SRC_REGION = 0;
+        DMA0_DST_REGION = 0;
+        DMA0_SIZE0 = 0;
+        DMA0_SIZE1 = 0;
+        DMA0_IDX_REGION = 0;
+        IFM2_BROADCAST = 0;
+        IFM2_PRECISION = 0;
+        IFM2_ZERO_POINT = 0;
+        IFM2_WIDTH0_M1 = 0;
+        IFM2_HEIGHT0_M1 = 0;
+        IFM2_HEIGHT1_M1 = 0;
+        IFM2_REGION = 0;
+        IFM_BASE0 = 0;
+        IFM_BASE1 = 0;
+        IFM_BASE2 = 0;
+        IFM_BASE3 = 0;
+        IFM_STRIDE_X = 0;
+        IFM_STRIDE_Y = 0;
+        IFM_STRIDE_C = 0;
+        OFM_BASE0 = 0;
+        OFM_BASE1 = 0;
+        OFM_BASE2 = 0;
+        OFM_BASE3 = 0;
+        OFM_STRIDE_X = 0;
+        OFM_STRIDE_Y = 0;
+        OFM_STRIDE_C = 0;
+        WEIGHT_BASE = 0;
+        WEIGHT_LENGTH = 0;
+        SCALE_BASE = 0;
+        SCALE_LENGTH = 0;
+        OFM_SCALE = 0;
+        IFM_SCALE = 0;
+        IFM2_SCALE = 0;
+        OP_SCALAR = 0;
+        DMA0_SRC = 0;
+        DMA0_DST = 0;
+        DMA0_LEN = 0;
+        DMA0_SRC_STRIDE0 = 0;
+        DMA0_SRC_STRIDE1 = 0;
+        DMA0_DST_STRIDE0 = 0;
+        DMA0_DST_STRIDE1 = 0;
+        DMA0_IDX = 0;
+        IFM2_BASE0 = 0;
+        IFM2_BASE1 = 0;
+        IFM2_BASE2 = 0;
+        IFM2_BASE3 = 0;
+        IFM2_STRIDE_X = 0;
+        IFM2_STRIDE_Y = 0;
+        IFM2_STRIDE_C = 0;
+        WEIGHT1_BASE = 0;
+        WEIGHT1_LENGTH = 0;
+        WEIGHT2_BASE = 0;
+        WEIGHT2_LENGTH = 0;
+        WEIGHT3_BASE = 0;
+        WEIGHT3_LENGTH = 0;
+        RESIZE_X_STEP = 0;
+        RESIZE_Y_STEP = 0;
+        DMA0_IDX_MAX = 0;
+        DMA0_IDX_SKIP1 = 0;
+        REVISION = 0;
+        PID4 = 4;
+        PID5 = 0;
+        PID6 = 0;
+        PID7 = 0;
+        PID0 = 130;
+        PID1 = 181;
+        PID2 = 11;
+        PID3 = 0;
+        CID0 = 13;
+        CID1 = 240;
+        CID2 = 5;
+        CID3 = 177;
+        WD_STATUS = 0;
+        MAC_STATUS = 0;
+        AO_STATUS = 0;
+        DMA_STATUS0 = 0;
+        DMA_STATUS1 = 0;
+        PMCR = 16384;
+        PMCNTENSET = 0;
+        PMCNTENCLR = 0;
+        PMOVSSET = 0;
+        PMOVSCLR = 0;
+        PMINTSET = 0;
+        PMINTCLR = 0;
+        PMCCNTR = 0;
+        PMCCNTR_CFG = 0;
+        PMCAXI_CHAN = 0;
+        PMCLUT = 0;
+        for (size_t i = 0; i < (sizeof(PMEVCNTR) / sizeof(PMEVCNTR[0])); ++i)
+            PMEVCNTR[i] = 0;
+        for (size_t i = 0; i < (sizeof(PMEVTYPER) / sizeof(PMEVTYPER[0])); ++i)
+            PMEVTYPER[i] = 0;
+    }
+    uint32_t& operator[](const int addr_offset)
+    {
+        return reinterpret_cast<uint32_t *>(this)[addr_offset / 4];
+    }
+    access_type_t get_access_type(uint32_t offset)
+    {
+        switch (offset)
+        {
+            case 0: return access_type_t::RO;
+            case 4: return access_type_t::RO;
+            case 8: return access_type_t::RW;
+            case 12: return access_type_t::RW;
+            case 16: return access_type_t::RW;
+            case 24: return access_type_t::RO;
+            case 28: return access_type_t::RW;
+            case 32: return access_type_t::RW;
+            case 36: return access_type_t::RO;
+            case 40: return access_type_t::RO;
+            case 48: return access_type_t::RW;
+            case 56: return access_type_t::RW;
+            case 60: return access_type_t::RW;
+            case 64: return access_type_t::RW;
+            case 68: return access_type_t::RW;
+            case 72: return access_type_t::RW;
+            case 76: return access_type_t::RW;
+            case 80: return access_type_t::RW;
+            case 84: return access_type_t::RW;
+            case 96: return access_type_t::RO;
+            case 100: return access_type_t::RO;
+            case 104: return access_type_t::RO;
+            case 112: return access_type_t::RO;
+            case 120: return access_type_t::RO;
+            case 128: return access_type_t::RW;
+            case 136: return access_type_t::RW;
+            case 144: return access_type_t::RW;
+            case 152: return access_type_t::RW;
+            case 160: return access_type_t::RW;
+            case 168: return access_type_t::RW;
+            case 176: return access_type_t::RW;
+            case 184: return access_type_t::RW;
+            case 320: return access_type_t::RW;
+            case 324: return access_type_t::RW;
+            case 328: return access_type_t::RW;
+            case 576: return access_type_t::RO;
+            case 584: return access_type_t::RO;
+            case 588: return access_type_t::RO;
+            case 592: return access_type_t::RO;
+            case 600: return access_type_t::RO;
+            case 608: return access_type_t::RO;
+            case 616: return access_type_t::RO;
+            case 620: return access_type_t::RO;
+            case 628: return access_type_t::RO;
+            case 636: return access_type_t::RO;
+            case 640: return access_type_t::RO;
+            case 648: return access_type_t::RO;
+            case 656: return access_type_t::RO;
+            case 664: return access_type_t::RO;
+            case 696: return access_type_t::RO;
+            case 700: return access_type_t::RO;
+            case 1024: return access_type_t::RW;
+            case 1028: return access_type_t::RW;
+            case 1032: return access_type_t::RW;
+            case 1036: return access_type_t::RW;
+            case 1040: return access_type_t::RW;
+            case 1044: return access_type_t::RW;
+            case 1048: return access_type_t::RW;
+            case 1052: return access_type_t::RW;
+            case 1056: return access_type_t::RW;
+            case 1060: return access_type_t::RW;
+            case 1064: return access_type_t::RW;
+            case 1068: return access_type_t::RW;
+            case 1072: return access_type_t::RW;
+            case 1076: return access_type_t::RW;
+            case 1080: return access_type_t::RW;
+            case 1084: return access_type_t::RW;
+            case 1088: return access_type_t::RW;
+            case 1092: return access_type_t::RW;
+            case 1096: return access_type_t::RW;
+            case 1100: return access_type_t::RW;
+            case 1104: return access_type_t::RW;
+            case 1108: return access_type_t::RW;
+            case 1112: return access_type_t::RW;
+            case 1116: return access_type_t::RW;
+            case 1120: return access_type_t::RW;
+            case 1124: return access_type_t::RW;
+            case 1128: return access_type_t::RW;
+            case 1132: return access_type_t::RW;
+            case 1136: return access_type_t::RW;
+            case 1140: return access_type_t::RW;
+            case 1144: return access_type_t::RW;
+            case 1148: return access_type_t::RW;
+            case 1152: return access_type_t::RW;
+            case 1156: return access_type_t::RW;
+            case 1160: return access_type_t::RW;
+            case 1164: return access_type_t::RW;
+            case 1168: return access_type_t::RW;
+            case 1172: return access_type_t::RW;
+            case 1176: return access_type_t::RW;
+            case 1180: return access_type_t::RW;
+            case 1184: return access_type_t::RW;
+            case 1188: return access_type_t::RW;
+            case 1192: return access_type_t::RW;
+            case 1196: return access_type_t::RW;
+            case 1200: return access_type_t::RW;
+            case 1204: return access_type_t::RW;
+            case 1208: return access_type_t::RW;
+            case 1212: return access_type_t::RW;
+            case 1216: return access_type_t::RW;
+            case 1220: return access_type_t::RW;
+            case 1224: return access_type_t::RW;
+            case 1228: return access_type_t::RW;
+            case 1232: return access_type_t::RW;
+            case 1236: return access_type_t::RW;
+            case 1240: return access_type_t::RW;
+            case 1244: return access_type_t::RW;
+            case 1248: return access_type_t::RW;
+            case 1252: return access_type_t::RW;
+            case 1256: return access_type_t::RW;
+            case 1260: return access_type_t::RW;
+            case 1264: return access_type_t::RW;
+            case 1268: return access_type_t::RW;
+            case 1272: return access_type_t::RW;
+            case 1276: return access_type_t::RW;
+            case 1280: return access_type_t::RW;
+            case 1284: return access_type_t::RW;
+            case 1288: return access_type_t::RW;
+            case 1292: return access_type_t::RW;
+            case 1296: return access_type_t::RW;
+            case 1300: return access_type_t::RW;
+            case 1304: return access_type_t::RW;
+            case 1308: return access_type_t::RW;
+            case 1312: return access_type_t::RW;
+            case 1316: return access_type_t::RW;
+            case 1320: return access_type_t::RW;
+            case 1324: return access_type_t::RW;
+            case 1328: return access_type_t::RW;
+            case 1332: return access_type_t::RW;
+            case 1336: return access_type_t::RW;
+            case 1340: return access_type_t::RW;
+            case 1344: return access_type_t::RW;
+            case 1348: return access_type_t::RW;
+            case 1352: return access_type_t::RW;
+            case 1356: return access_type_t::RW;
+            case 1360: return access_type_t::RW;
+            case 1364: return access_type_t::RW;
+            case 1368: return access_type_t::RW;
+            case 1372: return access_type_t::RW;
+            case 1376: return access_type_t::RW;
+            case 1380: return access_type_t::RW;
+            case 1384: return access_type_t::RW;
+            case 1388: return access_type_t::RW;
+            case 1392: return access_type_t::RW;
+            case 1396: return access_type_t::RW;
+            case 1400: return access_type_t::RW;
+            case 1404: return access_type_t::RW;
+            case 1408: return access_type_t::RW;
+            case 1412: return access_type_t::RW;
+            case 1416: return access_type_t::RW;
+            case 1420: return access_type_t::RW;
+            case 1424: return access_type_t::RW;
+            case 1428: return access_type_t::RW;
+            case 1432: return access_type_t::RW;
+            case 1436: return access_type_t::RW;
+            case 1440: return access_type_t::RW;
+            case 1444: return access_type_t::RW;
+            case 1448: return access_type_t::RW;
+            case 1452: return access_type_t::RW;
+            case 1456: return access_type_t::RW;
+            case 1460: return access_type_t::RW;
+            case 1464: return access_type_t::RW;
+            case 1468: return access_type_t::RW;
+            case 1472: return access_type_t::RW;
+            case 1476: return access_type_t::RW;
+            case 1480: return access_type_t::RW;
+            case 1484: return access_type_t::RW;
+            case 1488: return access_type_t::RW;
+            case 1492: return access_type_t::RW;
+            case 1496: return access_type_t::RW;
+            case 1500: return access_type_t::RW;
+            case 1504: return access_type_t::RW;
+            case 1508: return access_type_t::RW;
+            case 1512: return access_type_t::RW;
+            case 1516: return access_type_t::RW;
+            case 1520: return access_type_t::RW;
+            case 1524: return access_type_t::RW;
+            case 1528: return access_type_t::RW;
+            case 1532: return access_type_t::RW;
+            case 1536: return access_type_t::RW;
+            case 1540: return access_type_t::RW;
+            case 1544: return access_type_t::RW;
+            case 1548: return access_type_t::RW;
+            case 1552: return access_type_t::RW;
+            case 1556: return access_type_t::RW;
+            case 1560: return access_type_t::RW;
+            case 1564: return access_type_t::RW;
+            case 1568: return access_type_t::RW;
+            case 1572: return access_type_t::RW;
+            case 1576: return access_type_t::RW;
+            case 1580: return access_type_t::RW;
+            case 1584: return access_type_t::RW;
+            case 1588: return access_type_t::RW;
+            case 1592: return access_type_t::RW;
+            case 1596: return access_type_t::RW;
+            case 1600: return access_type_t::RW;
+            case 1604: return access_type_t::RW;
+            case 1608: return access_type_t::RW;
+            case 1612: return access_type_t::RW;
+            case 1616: return access_type_t::RW;
+            case 1620: return access_type_t::RW;
+            case 1624: return access_type_t::RW;
+            case 1628: return access_type_t::RW;
+            case 1632: return access_type_t::RW;
+            case 1636: return access_type_t::RW;
+            case 1640: return access_type_t::RW;
+            case 1644: return access_type_t::RW;
+            case 1648: return access_type_t::RW;
+            case 1652: return access_type_t::RW;
+            case 1656: return access_type_t::RW;
+            case 1660: return access_type_t::RW;
+            case 1664: return access_type_t::RW;
+            case 1668: return access_type_t::RW;
+            case 1672: return access_type_t::RW;
+            case 1676: return access_type_t::RW;
+            case 1680: return access_type_t::RW;
+            case 1684: return access_type_t::RW;
+            case 1688: return access_type_t::RW;
+            case 1692: return access_type_t::RW;
+            case 1696: return access_type_t::RW;
+            case 1700: return access_type_t::RW;
+            case 1704: return access_type_t::RW;
+            case 1708: return access_type_t::RW;
+            case 1712: return access_type_t::RW;
+            case 1716: return access_type_t::RW;
+            case 1720: return access_type_t::RW;
+            case 1724: return access_type_t::RW;
+            case 1728: return access_type_t::RW;
+            case 1732: return access_type_t::RW;
+            case 1736: return access_type_t::RW;
+            case 1740: return access_type_t::RW;
+            case 1744: return access_type_t::RW;
+            case 1748: return access_type_t::RW;
+            case 1752: return access_type_t::RW;
+            case 1756: return access_type_t::RW;
+            case 1760: return access_type_t::RW;
+            case 1764: return access_type_t::RW;
+            case 1768: return access_type_t::RW;
+            case 1772: return access_type_t::RW;
+            case 1776: return access_type_t::RW;
+            case 1780: return access_type_t::RW;
+            case 1784: return access_type_t::RW;
+            case 1788: return access_type_t::RW;
+            case 1792: return access_type_t::RW;
+            case 1796: return access_type_t::RW;
+            case 1800: return access_type_t::RW;
+            case 1804: return access_type_t::RW;
+            case 1808: return access_type_t::RW;
+            case 1812: return access_type_t::RW;
+            case 1816: return access_type_t::RW;
+            case 1820: return access_type_t::RW;
+            case 1824: return access_type_t::RW;
+            case 1828: return access_type_t::RW;
+            case 1832: return access_type_t::RW;
+            case 1836: return access_type_t::RW;
+            case 1840: return access_type_t::RW;
+            case 1844: return access_type_t::RW;
+            case 1848: return access_type_t::RW;
+            case 1852: return access_type_t::RW;
+            case 1856: return access_type_t::RW;
+            case 1860: return access_type_t::RW;
+            case 1864: return access_type_t::RW;
+            case 1868: return access_type_t::RW;
+            case 1872: return access_type_t::RW;
+            case 1876: return access_type_t::RW;
+            case 1880: return access_type_t::RW;
+            case 1884: return access_type_t::RW;
+            case 1888: return access_type_t::RW;
+            case 1892: return access_type_t::RW;
+            case 1896: return access_type_t::RW;
+            case 1900: return access_type_t::RW;
+            case 1904: return access_type_t::RW;
+            case 1908: return access_type_t::RW;
+            case 1912: return access_type_t::RW;
+            case 1916: return access_type_t::RW;
+            case 1920: return access_type_t::RW;
+            case 1924: return access_type_t::RW;
+            case 1928: return access_type_t::RW;
+            case 1932: return access_type_t::RW;
+            case 1936: return access_type_t::RW;
+            case 1940: return access_type_t::RW;
+            case 1944: return access_type_t::RW;
+            case 1948: return access_type_t::RW;
+            case 1952: return access_type_t::RW;
+            case 1956: return access_type_t::RW;
+            case 1960: return access_type_t::RW;
+            case 1964: return access_type_t::RW;
+            case 1968: return access_type_t::RW;
+            case 1972: return access_type_t::RW;
+            case 1976: return access_type_t::RW;
+            case 1980: return access_type_t::RW;
+            case 1984: return access_type_t::RW;
+            case 1988: return access_type_t::RW;
+            case 1992: return access_type_t::RW;
+            case 1996: return access_type_t::RW;
+            case 2000: return access_type_t::RW;
+            case 2004: return access_type_t::RW;
+            case 2008: return access_type_t::RW;
+            case 2012: return access_type_t::RW;
+            case 2016: return access_type_t::RW;
+            case 2020: return access_type_t::RW;
+            case 2024: return access_type_t::RW;
+            case 2028: return access_type_t::RW;
+            case 2032: return access_type_t::RW;
+            case 2036: return access_type_t::RW;
+            case 2040: return access_type_t::RW;
+            case 2044: return access_type_t::RW;
+            case 2048: return access_type_t::RW;
+            case 2052: return access_type_t::RW;
+            case 2056: return access_type_t::RW;
+            case 2060: return access_type_t::RW;
+            case 2064: return access_type_t::RW;
+            case 2068: return access_type_t::RW;
+            case 2076: return access_type_t::RW;
+            case 2080: return access_type_t::RW;
+            case 2084: return access_type_t::RW;
+            case 2088: return access_type_t::RW;
+            case 2092: return access_type_t::RW;
+            case 2096: return access_type_t::RW;
+            case 2108: return access_type_t::RW;
+            case 2116: return access_type_t::RW;
+            case 2120: return access_type_t::RW;
+            case 2124: return access_type_t::RW;
+            case 2128: return access_type_t::RW;
+            case 2132: return access_type_t::RW;
+            case 2136: return access_type_t::RW;
+            case 2140: return access_type_t::RW;
+            case 2144: return access_type_t::RW;
+            case 2152: return access_type_t::RW;
+            case 2156: return access_type_t::RW;
+            case 2160: return access_type_t::RW;
+            case 2172: return access_type_t::RW;
+            case 2176: return access_type_t::RW;
+            case 2180: return access_type_t::RW;
+            case 2184: return access_type_t::RW;
+            case 2192: return access_type_t::RW;
+            case 2196: return access_type_t::RW;
+            case 2200: return access_type_t::RW;
+            case 2204: return access_type_t::RW;
+            case 2208: return access_type_t::RW;
+            case 2212: return access_type_t::RW;
+            case 2216: return access_type_t::RW;
+            case 2220: return access_type_t::RW;
+            case 2224: return access_type_t::RW;
+            case 2228: return access_type_t::RW;
+            case 2232: return access_type_t::RW;
+            case 2236: return access_type_t::RW;
+            case 2240: return access_type_t::RW;
+            case 2244: return access_type_t::RW;
+            case 2248: return access_type_t::RW;
+            case 2252: return access_type_t::RW;
+            case 2256: return access_type_t::RW;
+            case 2304: return access_type_t::RW;
+            case 2324: return access_type_t::RW;
+            case 2340: return access_type_t::RW;
+            case 2344: return access_type_t::RW;
+            case 2348: return access_type_t::RW;
+            case 2352: return access_type_t::RW;
+            case 2364: return access_type_t::RW;
+            case 2560: return access_type_t::RW;
+            case 2568: return access_type_t::RW;
+            case 2576: return access_type_t::RW;
+            case 2584: return access_type_t::RW;
+            case 2592: return access_type_t::RW;
+            case 2600: return access_type_t::RW;
+            case 2608: return access_type_t::RW;
+            case 2624: return access_type_t::RW;
+            case 2632: return access_type_t::RW;
+            case 2640: return access_type_t::RW;
+            case 2648: return access_type_t::RW;
+            case 2656: return access_type_t::RW;
+            case 2664: return access_type_t::RW;
+            case 2672: return access_type_t::RW;
+            case 2688: return access_type_t::RW;
+            case 2696: return access_type_t::RW;
+            case 2704: return access_type_t::RW;
+            case 2712: return access_type_t::RW;
+            case 2720: return access_type_t::RW;
+            case 2728: return access_type_t::RW;
+            case 2736: return access_type_t::RW;
+            case 2744: return access_type_t::RW;
+            case 2752: return access_type_t::RW;
+            case 2760: return access_type_t::RW;
+            case 2768: return access_type_t::RW;
+            case 2776: return access_type_t::RW;
+            case 2784: return access_type_t::RW;
+            case 2792: return access_type_t::RW;
+            case 2800: return access_type_t::RW;
+            case 2808: return access_type_t::RW;
+            case 2816: return access_type_t::RW;
+            case 2824: return access_type_t::RW;
+            case 2832: return access_type_t::RW;
+            case 2840: return access_type_t::RW;
+            case 2848: return access_type_t::RW;
+            case 2856: return access_type_t::RW;
+            case 2864: return access_type_t::RW;
+            case 2880: return access_type_t::RW;
+            case 2888: return access_type_t::RW;
+            case 2896: return access_type_t::RW;
+            case 2904: return access_type_t::RW;
+            case 2912: return access_type_t::RW;
+            case 2920: return access_type_t::RW;
+            case 2928: return access_type_t::RW;
+            case 2936: return access_type_t::RW;
+            case 3008: return access_type_t::RW;
+            case 3016: return access_type_t::RW;
+            case 4032: return access_type_t::RO;
+            case 4048: return access_type_t::RO;
+            case 4052: return access_type_t::RO;
+            case 4056: return access_type_t::RO;
+            case 4060: return access_type_t::RO;
+            case 4064: return access_type_t::RO;
+            case 4068: return access_type_t::RO;
+            case 4072: return access_type_t::RO;
+            case 4076: return access_type_t::RO;
+            case 4080: return access_type_t::RO;
+            case 4084: return access_type_t::RO;
+            case 4088: return access_type_t::RO;
+            case 4092: return access_type_t::RO;
+            case 4352: return access_type_t::RO;
+            case 4356: return access_type_t::RO;
+            case 4360: return access_type_t::RO;
+            case 4368: return access_type_t::RO;
+            case 4372: return access_type_t::RO;
+            case 4480: return access_type_t::RW;
+            case 4484: return access_type_t::RW;
+            case 4488: return access_type_t::RW;
+            case 4492: return access_type_t::RW;
+            case 4496: return access_type_t::RW;
+            case 4500: return access_type_t::RW;
+            case 4504: return access_type_t::RW;
+            case 4512: return access_type_t::RW;
+            case 4520: return access_type_t::RW;
+            case 4524: return access_type_t::RW;
+            case 4528: return access_type_t::RW;
+            case 4864: return access_type_t::RW;
+            case 4868: return access_type_t::RW;
+            case 4872: return access_type_t::RW;
+            case 4876: return access_type_t::RW;
+            case 4880: return access_type_t::RW;
+            case 4884: return access_type_t::RW;
+            case 4888: return access_type_t::RW;
+            case 4892: return access_type_t::RW;
+            case 4992: return access_type_t::RW;
+            case 4996: return access_type_t::RW;
+            case 5000: return access_type_t::RW;
+            case 5004: return access_type_t::RW;
+            case 5008: return access_type_t::RW;
+            case 5012: return access_type_t::RW;
+            case 5016: return access_type_t::RW;
+            case 5020: return access_type_t::RW;
+            default: return access_type_t::RO;
+        }
+    }
+#endif
+};
+
+#ifdef __cplusplus
+struct isa
+{
+#ifdef NPU_DISASSEMBLE
+static int disassemble(const uint32_t* in, std::string& op, std::vector<std::pair<std::string, std::string>>& fields)
+{
+    switch (*in & 0xffff)
+    {
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_STOP):
+        {
+            const npu_op_stop_t& v = *reinterpret_cast<const npu_op_stop_t*>(in);
+            op = "NPU_OP_STOP";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_IRQ):
+        {
+            const npu_op_irq_t& v = *reinterpret_cast<const npu_op_irq_t*>(in);
+            op = "NPU_OP_IRQ";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_CONV):
+        {
+            const npu_op_conv_t& v = *reinterpret_cast<const npu_op_conv_t*>(in);
+            op = "NPU_OP_CONV";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DEPTHWISE):
+        {
+            const npu_op_depthwise_t& v = *reinterpret_cast<const npu_op_depthwise_t*>(in);
+            op = "NPU_OP_DEPTHWISE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_POOL):
+        {
+            const npu_op_pool_t& v = *reinterpret_cast<const npu_op_pool_t*>(in);
+            op = "NPU_OP_POOL";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_ELEMENTWISE):
+        {
+            const npu_op_elementwise_t& v = *reinterpret_cast<const npu_op_elementwise_t*>(in);
+            op = "NPU_OP_ELEMENTWISE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_RESIZE):
+        {
+            const npu_op_resize_t& v = *reinterpret_cast<const npu_op_resize_t*>(in);
+            op = "NPU_OP_RESIZE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_START):
+        {
+            const npu_op_dma_start_t& v = *reinterpret_cast<const npu_op_dma_start_t*>(in);
+            op = "NPU_OP_DMA_START";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_WAIT):
+        {
+            const npu_op_dma_wait_t& v = *reinterpret_cast<const npu_op_dma_wait_t*>(in);
+            op = "NPU_OP_DMA_WAIT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_KERNEL_WAIT):
+        {
+            const npu_op_kernel_wait_t& v = *reinterpret_cast<const npu_op_kernel_wait_t*>(in);
+            op = "NPU_OP_KERNEL_WAIT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_PMU_MASK):
+        {
+            const npu_op_pmu_mask_t& v = *reinterpret_cast<const npu_op_pmu_mask_t*>(in);
+            op = "NPU_OP_PMU_MASK";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_TOP):
+        {
+            const npu_set_ifm_pad_top_t& v = *reinterpret_cast<const npu_set_ifm_pad_top_t*>(in);
+            op = "NPU_SET_IFM_PAD_TOP";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_LEFT):
+        {
+            const npu_set_ifm_pad_left_t& v = *reinterpret_cast<const npu_set_ifm_pad_left_t*>(in);
+            op = "NPU_SET_IFM_PAD_LEFT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_RIGHT):
+        {
+            const npu_set_ifm_pad_right_t& v = *reinterpret_cast<const npu_set_ifm_pad_right_t*>(in);
+            op = "NPU_SET_IFM_PAD_RIGHT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_BOTTOM):
+        {
+            const npu_set_ifm_pad_bottom_t& v = *reinterpret_cast<const npu_set_ifm_pad_bottom_t*>(in);
+            op = "NPU_SET_IFM_PAD_BOTTOM";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_DEPTH_M1):
+        {
+            const npu_set_ifm_depth_m1_t& v = *reinterpret_cast<const npu_set_ifm_depth_m1_t*>(in);
+            op = "NPU_SET_IFM_DEPTH_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PRECISION):
+        {
+            const npu_set_ifm_precision_t& v = *reinterpret_cast<const npu_set_ifm_precision_t*>(in);
+            op = "NPU_SET_IFM_PRECISION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_UPSCALE):
+        {
+            const npu_set_ifm_upscale_t& v = *reinterpret_cast<const npu_set_ifm_upscale_t*>(in);
+            op = "NPU_SET_IFM_UPSCALE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_ZERO_POINT):
+        {
+            const npu_set_ifm_zero_point_t& v = *reinterpret_cast<const npu_set_ifm_zero_point_t*>(in);
+            op = "NPU_SET_IFM_ZERO_POINT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_WIDTH0_M1):
+        {
+            const npu_set_ifm_width0_m1_t& v = *reinterpret_cast<const npu_set_ifm_width0_m1_t*>(in);
+            op = "NPU_SET_IFM_WIDTH0_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT0_M1):
+        {
+            const npu_set_ifm_height0_m1_t& v = *reinterpret_cast<const npu_set_ifm_height0_m1_t*>(in);
+            op = "NPU_SET_IFM_HEIGHT0_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT1_M1):
+        {
+            const npu_set_ifm_height1_m1_t& v = *reinterpret_cast<const npu_set_ifm_height1_m1_t*>(in);
+            op = "NPU_SET_IFM_HEIGHT1_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_REGION):
+        {
+            const npu_set_ifm_region_t& v = *reinterpret_cast<const npu_set_ifm_region_t*>(in);
+            op = "NPU_SET_IFM_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_BROADCAST):
+        {
+            const npu_set_ifm_broadcast_t& v = *reinterpret_cast<const npu_set_ifm_broadcast_t*>(in);
+            op = "NPU_SET_IFM_BROADCAST";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH_M1):
+        {
+            const npu_set_ofm_width_m1_t& v = *reinterpret_cast<const npu_set_ofm_width_m1_t*>(in);
+            op = "NPU_SET_OFM_WIDTH_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT_M1):
+        {
+            const npu_set_ofm_height_m1_t& v = *reinterpret_cast<const npu_set_ofm_height_m1_t*>(in);
+            op = "NPU_SET_OFM_HEIGHT_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_DEPTH_M1):
+        {
+            const npu_set_ofm_depth_m1_t& v = *reinterpret_cast<const npu_set_ofm_depth_m1_t*>(in);
+            op = "NPU_SET_OFM_DEPTH_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_PRECISION):
+        {
+            const npu_set_ofm_precision_t& v = *reinterpret_cast<const npu_set_ofm_precision_t*>(in);
+            op = "NPU_SET_OFM_PRECISION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_WIDTH_M1):
+        {
+            const npu_set_ofm_blk_width_m1_t& v = *reinterpret_cast<const npu_set_ofm_blk_width_m1_t*>(in);
+            op = "NPU_SET_OFM_BLK_WIDTH_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_HEIGHT_M1):
+        {
+            const npu_set_ofm_blk_height_m1_t& v = *reinterpret_cast<const npu_set_ofm_blk_height_m1_t*>(in);
+            op = "NPU_SET_OFM_BLK_HEIGHT_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_DEPTH_M1):
+        {
+            const npu_set_ofm_blk_depth_m1_t& v = *reinterpret_cast<const npu_set_ofm_blk_depth_m1_t*>(in);
+            op = "NPU_SET_OFM_BLK_DEPTH_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_ZERO_POINT):
+        {
+            const npu_set_ofm_zero_point_t& v = *reinterpret_cast<const npu_set_ofm_zero_point_t*>(in);
+            op = "NPU_SET_OFM_ZERO_POINT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH0_M1):
+        {
+            const npu_set_ofm_width0_m1_t& v = *reinterpret_cast<const npu_set_ofm_width0_m1_t*>(in);
+            op = "NPU_SET_OFM_WIDTH0_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT0_M1):
+        {
+            const npu_set_ofm_height0_m1_t& v = *reinterpret_cast<const npu_set_ofm_height0_m1_t*>(in);
+            op = "NPU_SET_OFM_HEIGHT0_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT1_M1):
+        {
+            const npu_set_ofm_height1_m1_t& v = *reinterpret_cast<const npu_set_ofm_height1_m1_t*>(in);
+            op = "NPU_SET_OFM_HEIGHT1_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_REGION):
+        {
+            const npu_set_ofm_region_t& v = *reinterpret_cast<const npu_set_ofm_region_t*>(in);
+            op = "NPU_SET_OFM_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_WIDTH_M1):
+        {
+            const npu_set_kernel_width_m1_t& v = *reinterpret_cast<const npu_set_kernel_width_m1_t*>(in);
+            op = "NPU_SET_KERNEL_WIDTH_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_HEIGHT_M1):
+        {
+            const npu_set_kernel_height_m1_t& v = *reinterpret_cast<const npu_set_kernel_height_m1_t*>(in);
+            op = "NPU_SET_KERNEL_HEIGHT_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_STRIDE):
+        {
+            const npu_set_kernel_stride_t& v = *reinterpret_cast<const npu_set_kernel_stride_t*>(in);
+            op = "NPU_SET_KERNEL_STRIDE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACC_FORMAT):
+        {
+            const npu_set_acc_format_t& v = *reinterpret_cast<const npu_set_acc_format_t*>(in);
+            op = "NPU_SET_ACC_FORMAT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION):
+        {
+            const npu_set_activation_t& v = *reinterpret_cast<const npu_set_activation_t*>(in);
+            op = "NPU_SET_ACTIVATION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MIN):
+        {
+            const npu_set_activation_min_t& v = *reinterpret_cast<const npu_set_activation_min_t*>(in);
+            op = "NPU_SET_ACTIVATION_MIN";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MAX):
+        {
+            const npu_set_activation_max_t& v = *reinterpret_cast<const npu_set_activation_max_t*>(in);
+            op = "NPU_SET_ACTIVATION_MAX";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_WEIGHT_REGION):
+        {
+            const npu_set_weight_region_t& v = *reinterpret_cast<const npu_set_weight_region_t*>(in);
+            op = "NPU_SET_WEIGHT_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_SCALE_REGION):
+        {
+            const npu_set_scale_region_t& v = *reinterpret_cast<const npu_set_scale_region_t*>(in);
+            op = "NPU_SET_SCALE_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_WEIGHT_FORMAT):
+        {
+            const npu_set_weight_format_t& v = *reinterpret_cast<const npu_set_weight_format_t*>(in);
+            op = "NPU_SET_WEIGHT_FORMAT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_BLOCKDEP):
+        {
+            const npu_set_blockdep_t& v = *reinterpret_cast<const npu_set_blockdep_t*>(in);
+            op = "NPU_SET_BLOCKDEP";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_RESIZE_X_SCALE_N_M1):
+        {
+            const npu_set_resize_x_scale_n_m1_t& v = *reinterpret_cast<const npu_set_resize_x_scale_n_m1_t*>(in);
+            op = "NPU_SET_RESIZE_X_SCALE_N_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_RESIZE_Y_SCALE_N_M1):
+        {
+            const npu_set_resize_y_scale_n_m1_t& v = *reinterpret_cast<const npu_set_resize_y_scale_n_m1_t*>(in);
+            op = "NPU_SET_RESIZE_Y_SCALE_N_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_RESIZE_X_OFFSET):
+        {
+            const npu_set_resize_x_offset_t& v = *reinterpret_cast<const npu_set_resize_x_offset_t*>(in);
+            op = "NPU_SET_RESIZE_X_OFFSET";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_RESIZE_Y_OFFSET):
+        {
+            const npu_set_resize_y_offset_t& v = *reinterpret_cast<const npu_set_resize_y_offset_t*>(in);
+            op = "NPU_SET_RESIZE_Y_OFFSET";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SRC_REGION):
+        {
+            const npu_set_dma0_src_region_t& v = *reinterpret_cast<const npu_set_dma0_src_region_t*>(in);
+            op = "NPU_SET_DMA0_SRC_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_DST_REGION):
+        {
+            const npu_set_dma0_dst_region_t& v = *reinterpret_cast<const npu_set_dma0_dst_region_t*>(in);
+            op = "NPU_SET_DMA0_DST_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE0):
+        {
+            const npu_set_dma0_size0_t& v = *reinterpret_cast<const npu_set_dma0_size0_t*>(in);
+            op = "NPU_SET_DMA0_SIZE0";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE1):
+        {
+            const npu_set_dma0_size1_t& v = *reinterpret_cast<const npu_set_dma0_size1_t*>(in);
+            op = "NPU_SET_DMA0_SIZE1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_IDX_REGION):
+        {
+            const npu_set_dma0_idx_region_t& v = *reinterpret_cast<const npu_set_dma0_idx_region_t*>(in);
+            op = "NPU_SET_DMA0_IDX_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_BROADCAST):
+        {
+            const npu_set_ifm2_broadcast_t& v = *reinterpret_cast<const npu_set_ifm2_broadcast_t*>(in);
+            op = "NPU_SET_IFM2_BROADCAST";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_PRECISION):
+        {
+            const npu_set_ifm2_precision_t& v = *reinterpret_cast<const npu_set_ifm2_precision_t*>(in);
+            op = "NPU_SET_IFM2_PRECISION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_ZERO_POINT):
+        {
+            const npu_set_ifm2_zero_point_t& v = *reinterpret_cast<const npu_set_ifm2_zero_point_t*>(in);
+            op = "NPU_SET_IFM2_ZERO_POINT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_WIDTH0_M1):
+        {
+            const npu_set_ifm2_width0_m1_t& v = *reinterpret_cast<const npu_set_ifm2_width0_m1_t*>(in);
+            op = "NPU_SET_IFM2_WIDTH0_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT0_M1):
+        {
+            const npu_set_ifm2_height0_m1_t& v = *reinterpret_cast<const npu_set_ifm2_height0_m1_t*>(in);
+            op = "NPU_SET_IFM2_HEIGHT0_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT1_M1):
+        {
+            const npu_set_ifm2_height1_m1_t& v = *reinterpret_cast<const npu_set_ifm2_height1_m1_t*>(in);
+            op = "NPU_SET_IFM2_HEIGHT1_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_REGION):
+        {
+            const npu_set_ifm2_region_t& v = *reinterpret_cast<const npu_set_ifm2_region_t*>(in);
+            op = "NPU_SET_IFM2_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE0):
+        {
+            const npu_set_ifm_base0_t& v = *reinterpret_cast<const npu_set_ifm_base0_t*>(in);
+            op = "NPU_SET_IFM_BASE0";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE1):
+        {
+            const npu_set_ifm_base1_t& v = *reinterpret_cast<const npu_set_ifm_base1_t*>(in);
+            op = "NPU_SET_IFM_BASE1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE2):
+        {
+            const npu_set_ifm_base2_t& v = *reinterpret_cast<const npu_set_ifm_base2_t*>(in);
+            op = "NPU_SET_IFM_BASE2";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE3):
+        {
+            const npu_set_ifm_base3_t& v = *reinterpret_cast<const npu_set_ifm_base3_t*>(in);
+            op = "NPU_SET_IFM_BASE3";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_X):
+        {
+            const npu_set_ifm_stride_x_t& v = *reinterpret_cast<const npu_set_ifm_stride_x_t*>(in);
+            op = "NPU_SET_IFM_STRIDE_X";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_Y):
+        {
+            const npu_set_ifm_stride_y_t& v = *reinterpret_cast<const npu_set_ifm_stride_y_t*>(in);
+            op = "NPU_SET_IFM_STRIDE_Y";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_C):
+        {
+            const npu_set_ifm_stride_c_t& v = *reinterpret_cast<const npu_set_ifm_stride_c_t*>(in);
+            op = "NPU_SET_IFM_STRIDE_C";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE0):
+        {
+            const npu_set_ofm_base0_t& v = *reinterpret_cast<const npu_set_ofm_base0_t*>(in);
+            op = "NPU_SET_OFM_BASE0";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE1):
+        {
+            const npu_set_ofm_base1_t& v = *reinterpret_cast<const npu_set_ofm_base1_t*>(in);
+            op = "NPU_SET_OFM_BASE1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE2):
+        {
+            const npu_set_ofm_base2_t& v = *reinterpret_cast<const npu_set_ofm_base2_t*>(in);
+            op = "NPU_SET_OFM_BASE2";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE3):
+        {
+            const npu_set_ofm_base3_t& v = *reinterpret_cast<const npu_set_ofm_base3_t*>(in);
+            op = "NPU_SET_OFM_BASE3";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_X):
+        {
+            const npu_set_ofm_stride_x_t& v = *reinterpret_cast<const npu_set_ofm_stride_x_t*>(in);
+            op = "NPU_SET_OFM_STRIDE_X";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_Y):
+        {
+            const npu_set_ofm_stride_y_t& v = *reinterpret_cast<const npu_set_ofm_stride_y_t*>(in);
+            op = "NPU_SET_OFM_STRIDE_Y";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_C):
+        {
+            const npu_set_ofm_stride_c_t& v = *reinterpret_cast<const npu_set_ofm_stride_c_t*>(in);
+            op = "NPU_SET_OFM_STRIDE_C";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_BASE):
+        {
+            const npu_set_weight_base_t& v = *reinterpret_cast<const npu_set_weight_base_t*>(in);
+            op = "NPU_SET_WEIGHT_BASE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_LENGTH):
+        {
+            const npu_set_weight_length_t& v = *reinterpret_cast<const npu_set_weight_length_t*>(in);
+            op = "NPU_SET_WEIGHT_LENGTH";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_BASE):
+        {
+            const npu_set_scale_base_t& v = *reinterpret_cast<const npu_set_scale_base_t*>(in);
+            op = "NPU_SET_SCALE_BASE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_LENGTH):
+        {
+            const npu_set_scale_length_t& v = *reinterpret_cast<const npu_set_scale_length_t*>(in);
+            op = "NPU_SET_SCALE_LENGTH";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_SCALE):
+        {
+            const npu_set_ofm_scale_t& v = *reinterpret_cast<const npu_set_ofm_scale_t*>(in);
+            op = "NPU_SET_OFM_SCALE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_SCALE):
+        {
+            const npu_set_ifm_scale_t& v = *reinterpret_cast<const npu_set_ifm_scale_t*>(in);
+            op = "NPU_SET_IFM_SCALE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_SCALE):
+        {
+            const npu_set_ifm2_scale_t& v = *reinterpret_cast<const npu_set_ifm2_scale_t*>(in);
+            op = "NPU_SET_IFM2_SCALE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OP_SCALAR):
+        {
+            const npu_set_op_scalar_t& v = *reinterpret_cast<const npu_set_op_scalar_t*>(in);
+            op = "NPU_SET_OP_SCALAR";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC):
+        {
+            const npu_set_dma0_src_t& v = *reinterpret_cast<const npu_set_dma0_src_t*>(in);
+            op = "NPU_SET_DMA0_SRC";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST):
+        {
+            const npu_set_dma0_dst_t& v = *reinterpret_cast<const npu_set_dma0_dst_t*>(in);
+            op = "NPU_SET_DMA0_DST";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_LEN):
+        {
+            const npu_set_dma0_len_t& v = *reinterpret_cast<const npu_set_dma0_len_t*>(in);
+            op = "NPU_SET_DMA0_LEN";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC_STRIDE0):
+        {
+            const npu_set_dma0_src_stride0_t& v = *reinterpret_cast<const npu_set_dma0_src_stride0_t*>(in);
+            op = "NPU_SET_DMA0_SRC_STRIDE0";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC_STRIDE1):
+        {
+            const npu_set_dma0_src_stride1_t& v = *reinterpret_cast<const npu_set_dma0_src_stride1_t*>(in);
+            op = "NPU_SET_DMA0_SRC_STRIDE1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST_STRIDE0):
+        {
+            const npu_set_dma0_dst_stride0_t& v = *reinterpret_cast<const npu_set_dma0_dst_stride0_t*>(in);
+            op = "NPU_SET_DMA0_DST_STRIDE0";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST_STRIDE1):
+        {
+            const npu_set_dma0_dst_stride1_t& v = *reinterpret_cast<const npu_set_dma0_dst_stride1_t*>(in);
+            op = "NPU_SET_DMA0_DST_STRIDE1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_IDX):
+        {
+            const npu_set_dma0_idx_t& v = *reinterpret_cast<const npu_set_dma0_idx_t*>(in);
+            op = "NPU_SET_DMA0_IDX";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_IDX_MAX):
+        {
+            const npu_set_dma0_idx_max_t& v = *reinterpret_cast<const npu_set_dma0_idx_max_t*>(in);
+            op = "NPU_SET_DMA0_IDX_MAX";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_IDX_SKIP1):
+        {
+            const npu_set_dma0_idx_skip1_t& v = *reinterpret_cast<const npu_set_dma0_idx_skip1_t*>(in);
+            op = "NPU_SET_DMA0_IDX_SKIP1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE0):
+        {
+            const npu_set_ifm2_base0_t& v = *reinterpret_cast<const npu_set_ifm2_base0_t*>(in);
+            op = "NPU_SET_IFM2_BASE0";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE1):
+        {
+            const npu_set_ifm2_base1_t& v = *reinterpret_cast<const npu_set_ifm2_base1_t*>(in);
+            op = "NPU_SET_IFM2_BASE1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE2):
+        {
+            const npu_set_ifm2_base2_t& v = *reinterpret_cast<const npu_set_ifm2_base2_t*>(in);
+            op = "NPU_SET_IFM2_BASE2";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE3):
+        {
+            const npu_set_ifm2_base3_t& v = *reinterpret_cast<const npu_set_ifm2_base3_t*>(in);
+            op = "NPU_SET_IFM2_BASE3";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_X):
+        {
+            const npu_set_ifm2_stride_x_t& v = *reinterpret_cast<const npu_set_ifm2_stride_x_t*>(in);
+            op = "NPU_SET_IFM2_STRIDE_X";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_Y):
+        {
+            const npu_set_ifm2_stride_y_t& v = *reinterpret_cast<const npu_set_ifm2_stride_y_t*>(in);
+            op = "NPU_SET_IFM2_STRIDE_Y";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_C):
+        {
+            const npu_set_ifm2_stride_c_t& v = *reinterpret_cast<const npu_set_ifm2_stride_c_t*>(in);
+            op = "NPU_SET_IFM2_STRIDE_C";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT1_BASE):
+        {
+            const npu_set_weight1_base_t& v = *reinterpret_cast<const npu_set_weight1_base_t*>(in);
+            op = "NPU_SET_WEIGHT1_BASE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT1_LENGTH):
+        {
+            const npu_set_weight1_length_t& v = *reinterpret_cast<const npu_set_weight1_length_t*>(in);
+            op = "NPU_SET_WEIGHT1_LENGTH";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT2_BASE):
+        {
+            const npu_set_weight2_base_t& v = *reinterpret_cast<const npu_set_weight2_base_t*>(in);
+            op = "NPU_SET_WEIGHT2_BASE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT2_LENGTH):
+        {
+            const npu_set_weight2_length_t& v = *reinterpret_cast<const npu_set_weight2_length_t*>(in);
+            op = "NPU_SET_WEIGHT2_LENGTH";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT3_BASE):
+        {
+            const npu_set_weight3_base_t& v = *reinterpret_cast<const npu_set_weight3_base_t*>(in);
+            op = "NPU_SET_WEIGHT3_BASE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT3_LENGTH):
+        {
+            const npu_set_weight3_length_t& v = *reinterpret_cast<const npu_set_weight3_length_t*>(in);
+            op = "NPU_SET_WEIGHT3_LENGTH";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_RESIZE_X):
+        {
+            const npu_set_resize_x_step_t& v = *reinterpret_cast<const npu_set_resize_x_step_t*>(in);
+            op = "NPU_SET_RESIZE_X_STEP";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_RESIZE_Y):
+        {
+            const npu_set_resize_y_step_t& v = *reinterpret_cast<const npu_set_resize_y_step_t*>(in);
+            op = "NPU_SET_RESIZE_Y_STEP";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) | static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_OP_BRANCH):
+        {
+            const npu_op_branch_t& v = *reinterpret_cast<const npu_op_branch_t*>(in);
+            op = "NPU_OP_BRANCH";
+            v.disassemble(fields);
+            break;
+        }
+        default: break;
+    }
+    return (*in & (3<<14)) != 0 ? 2 : 1;
+}
+#endif
+#endif
+// Signal the end of command stream
+struct npu_op_stop_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t mask:16; //  Encoding for 16-bit mask value
+#ifdef __cplusplus
+public:
+    npu_op_stop_t(uint32_t _mask) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_STOP)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        mask(_mask & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_op_stop_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_STOP)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        mask(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_STOP) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_STOP); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(mask) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_op_stop_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_op_stop_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_mask() const
+    {
+        return static_cast<uint32_t>(mask);
+    }
+    CONSTEXPR npu_op_stop_t& set_mask(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        mask = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("mask", std::to_string(mask)));
+    }
+#endif
+#endif
+};
+// Raises an IRQ to the host
+struct npu_op_irq_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t mask:16; //  Encoding for 16-bit mask value
+#ifdef __cplusplus
+public:
+    npu_op_irq_t(uint32_t _mask) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_IRQ)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        mask(_mask & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_op_irq_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_IRQ)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        mask(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_IRQ) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_IRQ); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(mask) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_op_irq_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_op_irq_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_mask() const
+    {
+        return static_cast<uint32_t>(mask);
+    }
+    CONSTEXPR npu_op_irq_t& set_mask(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        mask = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("mask", std::to_string(mask)));
+    }
+#endif
+#endif
+};
+// 2D convolution
+struct npu_op_conv_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t weights_ifm2:1; //  Read weights from IFM2 rather than a static compressed stream
+    uint32_t reserved1:15;
+#ifdef __cplusplus
+public:
+    npu_op_conv_t(uint32_t _weights_ifm2) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_CONV)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        weights_ifm2(_weights_ifm2 & ((1U << 1)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_op_conv_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_CONV)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        weights_ifm2(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_CONV) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_CONV); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(weights_ifm2) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_op_conv_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_op_conv_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_weights_ifm2() const
+    {
+        return static_cast<uint32_t>(weights_ifm2);
+    }
+    CONSTEXPR npu_op_conv_t& set_weights_ifm2(uint32_t value)
+    {
+        assert((value >> 1) == 0);
+        weights_ifm2 = static_cast<uint8_t>(value & ((1U << 1)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("weights_ifm2", std::to_string(weights_ifm2)));
+    }
+#endif
+#endif
+};
+// Depth-wise 2D convolution
+struct npu_op_depthwise_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t reserved1:16;
+#ifdef __cplusplus
+public:
+    CONSTEXPR npu_op_depthwise_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DEPTHWISE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DEPTHWISE) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DEPTHWISE); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_op_depthwise_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_op_depthwise_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>&) const
+    {
+    }
+#endif
+#endif
+};
+// Pooling
+struct npu_op_pool_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t pooling_mode:3; //  Pooling mode
+    uint32_t reserved1:13;
+#ifdef __cplusplus
+public:
+    npu_op_pool_t(NPU_NAMESPACE::pooling_mode _pooling_mode) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_POOL)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        pooling_mode(static_cast<uint8_t>(_pooling_mode) & ((1U << 3)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_op_pool_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_POOL)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        pooling_mode(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_POOL) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_POOL); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(pooling_mode) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_op_pool_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_op_pool_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pooling_mode get_pooling_mode() const
+    {
+        return static_cast<NPU_NAMESPACE::pooling_mode>(pooling_mode);
+    }
+    CONSTEXPR npu_op_pool_t& set_pooling_mode(NPU_NAMESPACE::pooling_mode value)
+    {
+        pooling_mode = static_cast<uint8_t>(value) & ((1U << 3)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("pooling_mode", (pooling_mode < (sizeof(pooling_mode_str)/sizeof(pooling_mode_str[0])) ? pooling_mode_str[pooling_mode] : "****")));
+    }
+#endif
+#endif
+};
+// Elementwise operation
+struct npu_op_elementwise_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t elementwise_mode:6; //  Elementwise mode
+    uint32_t reserved1:10;
+#ifdef __cplusplus
+public:
+    npu_op_elementwise_t(NPU_NAMESPACE::elementwise_mode _elementwise_mode) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_ELEMENTWISE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        elementwise_mode(static_cast<uint8_t>(_elementwise_mode) & ((1U << 6)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_op_elementwise_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_ELEMENTWISE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        elementwise_mode(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_ELEMENTWISE) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_ELEMENTWISE); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(elementwise_mode) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_op_elementwise_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_op_elementwise_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::elementwise_mode get_elementwise_mode() const
+    {
+        return static_cast<NPU_NAMESPACE::elementwise_mode>(elementwise_mode);
+    }
+    CONSTEXPR npu_op_elementwise_t& set_elementwise_mode(NPU_NAMESPACE::elementwise_mode value)
+    {
+        elementwise_mode = static_cast<uint8_t>(value) & ((1U << 6)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("elementwise_mode", (elementwise_mode < (sizeof(elementwise_mode_str)/sizeof(elementwise_mode_str[0])) ? elementwise_mode_str[elementwise_mode] : "****")));
+    }
+#endif
+#endif
+};
+// Resize operation
+struct npu_op_resize_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t resize_mode:2; //  Resize mode
+    uint32_t reserved1:14;
+#ifdef __cplusplus
+public:
+    npu_op_resize_t(NPU_NAMESPACE::resize_mode _resize_mode) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_RESIZE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        resize_mode(static_cast<uint8_t>(_resize_mode) & ((1U << 2)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_op_resize_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_RESIZE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        resize_mode(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_RESIZE) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_RESIZE); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(resize_mode) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_op_resize_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_op_resize_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::resize_mode get_resize_mode() const
+    {
+        return static_cast<NPU_NAMESPACE::resize_mode>(resize_mode);
+    }
+    CONSTEXPR npu_op_resize_t& set_resize_mode(NPU_NAMESPACE::resize_mode value)
+    {
+        resize_mode = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("resize_mode", (resize_mode < (sizeof(resize_mode_str)/sizeof(resize_mode_str[0])) ? resize_mode_str[resize_mode] : "****")));
+    }
+#endif
+#endif
+};
+// Queue new DMA for the given channel
+struct npu_op_dma_start_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t reserved1:16;
+#ifdef __cplusplus
+public:
+    CONSTEXPR npu_op_dma_start_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_START)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_START) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_START); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_op_dma_start_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_op_dma_start_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>&) const
+    {
+    }
+#endif
+#endif
+};
+// Wait for the mem2mem DMA channel to have k or fewer active descriptors outstanding
+struct npu_op_dma_wait_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t k:2; //  Number of outstanding descriptors
+    uint32_t reserved1:14;
+#ifdef __cplusplus
+public:
+    npu_op_dma_wait_t(uint32_t _k) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_WAIT)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        k(_k & ((1U << 2)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_op_dma_wait_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_WAIT)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        k(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_WAIT) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_WAIT); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(k) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_op_dma_wait_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_op_dma_wait_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_k() const
+    {
+        return static_cast<uint32_t>(k);
+    }
+    CONSTEXPR npu_op_dma_wait_t& set_k(uint32_t value)
+    {
+        assert((value >> 2) == 0);
+        k = static_cast<uint8_t>(value & ((1U << 2)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("k", std::to_string(k)));
+    }
+#endif
+#endif
+};
+// Wait for kernel operations to complete
+struct npu_op_kernel_wait_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t n:1; //  Number of kernel operations
+    uint32_t reserved1:15;
+#ifdef __cplusplus
+public:
+    npu_op_kernel_wait_t(uint32_t _n) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_KERNEL_WAIT)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        n(_n & ((1U << 1)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_op_kernel_wait_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_KERNEL_WAIT)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        n(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_KERNEL_WAIT) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_KERNEL_WAIT); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(n) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_op_kernel_wait_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_op_kernel_wait_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_n() const
+    {
+        return static_cast<uint32_t>(n);
+    }
+    CONSTEXPR npu_op_kernel_wait_t& set_n(uint32_t value)
+    {
+        assert((value >> 1) == 0);
+        n = static_cast<uint8_t>(value & ((1U << 1)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("n", std::to_string(n)));
+    }
+#endif
+#endif
+};
+// Enable or disable PMU counting (debug feature only)
+struct npu_op_pmu_mask_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t enable:1; //  Enable or disable PMU mask
+    uint32_t reserved1:15;
+#ifdef __cplusplus
+public:
+    npu_op_pmu_mask_t(uint32_t _enable) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_PMU_MASK)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        enable(_enable & ((1U << 1)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_op_pmu_mask_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_PMU_MASK)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        enable(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_PMU_MASK) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_PMU_MASK); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(enable) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_op_pmu_mask_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_op_pmu_mask_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_enable() const
+    {
+        return static_cast<uint32_t>(enable);
+    }
+    CONSTEXPR npu_op_pmu_mask_t& set_enable(uint32_t value)
+    {
+        assert((value >> 1) == 0);
+        enable = static_cast<uint8_t>(value & ((1U << 1)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("enable", std::to_string(enable)));
+    }
+#endif
+#endif
+};
+// IFM top pad
+struct npu_set_ifm_pad_top_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t pad:7; //  IFM top pad
+    uint32_t reserved1:9;
+#ifdef __cplusplus
+public:
+    npu_set_ifm_pad_top_t(uint32_t _pad) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_TOP)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        pad(_pad & ((1U << 7)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_ifm_pad_top_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_TOP)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        pad(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_TOP) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_TOP); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(pad) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm_pad_top_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm_pad_top_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_pad() const
+    {
+        return static_cast<uint32_t>(pad);
+    }
+    CONSTEXPR npu_set_ifm_pad_top_t& set_pad(uint32_t value)
+    {
+        assert((value >> 7) == 0);
+        pad = static_cast<uint8_t>(value & ((1U << 7)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("pad", std::to_string(pad)));
+    }
+#endif
+#endif
+};
+// IFM left pad
+struct npu_set_ifm_pad_left_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t pad:7; //  IFM left pad
+    uint32_t reserved1:9;
+#ifdef __cplusplus
+public:
+    npu_set_ifm_pad_left_t(uint32_t _pad) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_LEFT)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        pad(_pad & ((1U << 7)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_ifm_pad_left_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_LEFT)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        pad(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_LEFT) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_LEFT); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(pad) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm_pad_left_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm_pad_left_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_pad() const
+    {
+        return static_cast<uint32_t>(pad);
+    }
+    CONSTEXPR npu_set_ifm_pad_left_t& set_pad(uint32_t value)
+    {
+        assert((value >> 7) == 0);
+        pad = static_cast<uint8_t>(value & ((1U << 7)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("pad", std::to_string(pad)));
+    }
+#endif
+#endif
+};
+// IFM right pad
+struct npu_set_ifm_pad_right_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t pad:8; //  IFM right pad. Max value is 128
+    uint32_t reserved1:8;
+#ifdef __cplusplus
+public:
+    npu_set_ifm_pad_right_t(uint32_t _pad) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_RIGHT)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        pad(_pad & ((1U << 8)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_ifm_pad_right_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_RIGHT)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        pad(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_RIGHT) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_RIGHT); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(pad) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm_pad_right_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm_pad_right_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_pad() const
+    {
+        return static_cast<uint32_t>(pad);
+    }
+    CONSTEXPR npu_set_ifm_pad_right_t& set_pad(uint32_t value)
+    {
+        assert((value >> 8) == 0);
+        pad = static_cast<uint8_t>(value & ((1U << 8)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("pad", std::to_string(pad)));
+    }
+#endif
+#endif
+};
+// IFM bottom pad
+struct npu_set_ifm_pad_bottom_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t pad:8; //  IFM bottom pad. Max value is 128
+    uint32_t reserved1:8;
+#ifdef __cplusplus
+public:
+    npu_set_ifm_pad_bottom_t(uint32_t _pad) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_BOTTOM)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        pad(_pad & ((1U << 8)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_ifm_pad_bottom_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_BOTTOM)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        pad(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_BOTTOM) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_BOTTOM); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(pad) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm_pad_bottom_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm_pad_bottom_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_pad() const
+    {
+        return static_cast<uint32_t>(pad);
+    }
+    CONSTEXPR npu_set_ifm_pad_bottom_t& set_pad(uint32_t value)
+    {
+        assert((value >> 8) == 0);
+        pad = static_cast<uint8_t>(value & ((1U << 8)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("pad", std::to_string(pad)));
+    }
+#endif
+#endif
+};
+// Number of input channels for convolution
+struct npu_set_ifm_depth_m1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t depth_m1:16; //  Number of input channels for convolution
+#ifdef __cplusplus
+public:
+    npu_set_ifm_depth_m1_t(uint32_t _depth_m1) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_DEPTH_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        depth_m1(_depth_m1 & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_ifm_depth_m1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_DEPTH_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        depth_m1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_DEPTH_M1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_DEPTH_M1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(depth_m1) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm_depth_m1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm_depth_m1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_depth_m1() const
+    {
+        return static_cast<uint32_t>(depth_m1);
+    }
+    CONSTEXPR npu_set_ifm_depth_m1_t& set_depth_m1(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        depth_m1 = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("depth_m1", std::to_string(depth_m1)));
+    }
+#endif
+#endif
+};
+// IFM Precision
+struct npu_set_ifm_precision_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t activation_type:1; //  IFM type
+    uint32_t reserved1:1;
+    uint32_t activation_precision:2; //  IFM precision
+    uint32_t reserved2:2;
+    uint32_t activation_format:2; //  IFM format
+    uint32_t reserved3:6;
+    uint32_t activation_storage:2; //  IFM storage mode
+#ifdef __cplusplus
+public:
+    npu_set_ifm_precision_t(NPU_NAMESPACE::activation_type _activation_type, NPU_NAMESPACE::activation_precision _activation_precision, NPU_NAMESPACE::activation_format _activation_format, NPU_NAMESPACE::activation_storage _activation_storage) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PRECISION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        activation_type(static_cast<uint8_t>(_activation_type) & ((1U << 1)-1)),
+        reserved1(0),
+        activation_precision(static_cast<uint8_t>(_activation_precision) & ((1U << 2)-1)),
+        reserved2(0),
+        activation_format(static_cast<uint8_t>(_activation_format) & ((1U << 2)-1)),
+        reserved3(0),
+        activation_storage(static_cast<uint8_t>(_activation_storage) & ((1U << 2)-1))
+    {}
+    CONSTEXPR npu_set_ifm_precision_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PRECISION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        activation_type(0),
+        reserved1(0),
+        activation_precision(0),
+        reserved2(0),
+        activation_format(0),
+        reserved3(0),
+        activation_storage(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PRECISION) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PRECISION); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(activation_type) << 16;
+        word |= uint32_t(activation_precision) << 18;
+        word |= uint32_t(activation_format) << 22;
+        word |= uint32_t(activation_storage) << 30;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm_precision_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm_precision_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::activation_type get_activation_type() const
+    {
+        return static_cast<NPU_NAMESPACE::activation_type>(activation_type);
+    }
+    CONSTEXPR npu_set_ifm_precision_t& set_activation_type(NPU_NAMESPACE::activation_type value)
+    {
+        activation_type = static_cast<uint8_t>(value) & ((1U << 1)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::activation_precision get_activation_precision() const
+    {
+        return static_cast<NPU_NAMESPACE::activation_precision>(activation_precision);
+    }
+    CONSTEXPR npu_set_ifm_precision_t& set_activation_precision(NPU_NAMESPACE::activation_precision value)
+    {
+        activation_precision = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::activation_format get_activation_format() const
+    {
+        return static_cast<NPU_NAMESPACE::activation_format>(activation_format);
+    }
+    CONSTEXPR npu_set_ifm_precision_t& set_activation_format(NPU_NAMESPACE::activation_format value)
+    {
+        activation_format = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::activation_storage get_activation_storage() const
+    {
+        return static_cast<NPU_NAMESPACE::activation_storage>(activation_storage);
+    }
+    CONSTEXPR npu_set_ifm_precision_t& set_activation_storage(NPU_NAMESPACE::activation_storage value)
+    {
+        activation_storage = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("activation_type", (activation_type < (sizeof(activation_type_str)/sizeof(activation_type_str[0])) ? activation_type_str[activation_type] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("activation_precision", (activation_precision < (sizeof(activation_precision_str)/sizeof(activation_precision_str[0])) ? activation_precision_str[activation_precision] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("activation_format", (activation_format < (sizeof(activation_format_str)/sizeof(activation_format_str[0])) ? activation_format_str[activation_format] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("activation_storage", (activation_storage < (sizeof(activation_storage_str)/sizeof(activation_storage_str[0])) ? activation_storage_str[activation_storage] : "****")));
+    }
+#endif
+#endif
+};
+// IFM upscale mode
+struct npu_set_ifm_upscale_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t mode:2; //  IFM upscale mode
+    uint32_t reserved1:14;
+#ifdef __cplusplus
+public:
+    npu_set_ifm_upscale_t(NPU_NAMESPACE::ifm_upscale_mode _mode) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_UPSCALE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        mode(static_cast<uint8_t>(_mode) & ((1U << 2)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_ifm_upscale_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_UPSCALE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        mode(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_UPSCALE) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_UPSCALE); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(mode) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm_upscale_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm_upscale_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::ifm_upscale_mode get_mode() const
+    {
+        return static_cast<NPU_NAMESPACE::ifm_upscale_mode>(mode);
+    }
+    CONSTEXPR npu_set_ifm_upscale_t& set_mode(NPU_NAMESPACE::ifm_upscale_mode value)
+    {
+        mode = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("mode", (mode < (sizeof(ifm_upscale_mode_str)/sizeof(ifm_upscale_mode_str[0])) ? ifm_upscale_mode_str[mode] : "****")));
+    }
+#endif
+#endif
+};
+// IFM zero point
+struct npu_set_ifm_zero_point_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t zero_point:16; //  Zero point offset
+#ifdef __cplusplus
+public:
+    npu_set_ifm_zero_point_t(uint32_t _zero_point) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_ZERO_POINT)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        zero_point(_zero_point & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_ifm_zero_point_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_ZERO_POINT)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        zero_point(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_ZERO_POINT) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_ZERO_POINT); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(zero_point) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm_zero_point_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm_zero_point_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_zero_point() const
+    {
+        return static_cast<uint32_t>(zero_point);
+    }
+    CONSTEXPR npu_set_ifm_zero_point_t& set_zero_point(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        zero_point = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("zero_point", std::to_string(zero_point)));
+    }
+#endif
+#endif
+};
+// IFM Tile 0 and Tile 2 width
+struct npu_set_ifm_width0_m1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t width_m1:16; //  IFM Tile 0 and tile 2 width
+#ifdef __cplusplus
+public:
+    npu_set_ifm_width0_m1_t(uint32_t _width_m1) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_WIDTH0_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        width_m1(_width_m1 & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_ifm_width0_m1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_WIDTH0_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        width_m1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_WIDTH0_M1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_WIDTH0_M1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(width_m1) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm_width0_m1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm_width0_m1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_width_m1() const
+    {
+        return static_cast<uint32_t>(width_m1);
+    }
+    CONSTEXPR npu_set_ifm_width0_m1_t& set_width_m1(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        width_m1 = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("width_m1", std::to_string(width_m1)));
+    }
+#endif
+#endif
+};
+// IFM Tile 0 height
+struct npu_set_ifm_height0_m1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t height_m1:16; //  IFM Tile 0 height
+#ifdef __cplusplus
+public:
+    npu_set_ifm_height0_m1_t(uint32_t _height_m1) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT0_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        height_m1(_height_m1 & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_ifm_height0_m1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT0_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        height_m1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT0_M1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT0_M1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(height_m1) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm_height0_m1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm_height0_m1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_height_m1() const
+    {
+        return static_cast<uint32_t>(height_m1);
+    }
+    CONSTEXPR npu_set_ifm_height0_m1_t& set_height_m1(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        height_m1 = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+    }
+#endif
+#endif
+};
+// IFM Tile 1 height
+struct npu_set_ifm_height1_m1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t height_m1:16; //  IFM Tile 1 height
+#ifdef __cplusplus
+public:
+    npu_set_ifm_height1_m1_t(uint32_t _height_m1) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT1_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        height_m1(_height_m1 & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_ifm_height1_m1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT1_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        height_m1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT1_M1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT1_M1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(height_m1) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm_height1_m1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm_height1_m1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_height_m1() const
+    {
+        return static_cast<uint32_t>(height_m1);
+    }
+    CONSTEXPR npu_set_ifm_height1_m1_t& set_height_m1(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        height_m1 = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+    }
+#endif
+#endif
+};
+// Index n for IFM access
+struct npu_set_ifm_region_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t region:3; //  Region number for external memory accesses
+    uint32_t reserved1:13;
+#ifdef __cplusplus
+public:
+    npu_set_ifm_region_t(uint32_t _region) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_REGION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        region(_region & ((1U << 3)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_ifm_region_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_REGION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        region(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_REGION) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_REGION); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(region) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm_region_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm_region_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_region() const
+    {
+        return static_cast<uint32_t>(region);
+    }
+    CONSTEXPR npu_set_ifm_region_t& set_region(uint32_t value)
+    {
+        assert((value >> 3) == 0);
+        region = static_cast<uint8_t>(value & ((1U << 3)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+    }
+#endif
+#endif
+};
+// IFM broadcast configuration
+struct npu_set_ifm_broadcast_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t broadcast_mode:4; //  Broadcast mode for IFM1. When not using broadcast_mode_scalar, accesses to IFM1 sets corresponding axes to 0 and corresponding IFM1 H/W/C to 1)
+    uint32_t reserved1:12;
+#ifdef __cplusplus
+public:
+    npu_set_ifm_broadcast_t(NPU_NAMESPACE::broadcast_mode _broadcast_mode) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_BROADCAST)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        broadcast_mode(static_cast<uint8_t>(_broadcast_mode) & ((1U << 4)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_ifm_broadcast_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_BROADCAST)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        broadcast_mode(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_BROADCAST) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_BROADCAST); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(broadcast_mode) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm_broadcast_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm_broadcast_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::broadcast_mode get_broadcast_mode() const
+    {
+        return static_cast<NPU_NAMESPACE::broadcast_mode>(broadcast_mode);
+    }
+    CONSTEXPR npu_set_ifm_broadcast_t& set_broadcast_mode(NPU_NAMESPACE::broadcast_mode value)
+    {
+        broadcast_mode = static_cast<uint8_t>(value) & ((1U << 4)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("broadcast_mode", (broadcast_mode < (sizeof(broadcast_mode_str)/sizeof(broadcast_mode_str[0])) ? broadcast_mode_str[broadcast_mode] : "****")));
+    }
+#endif
+#endif
+};
+// Output feature map width
+struct npu_set_ofm_width_m1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t width_m1:16; //  Output feature map width
+#ifdef __cplusplus
+public:
+    npu_set_ofm_width_m1_t(uint32_t _width_m1) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        width_m1(_width_m1 & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_ofm_width_m1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        width_m1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH_M1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH_M1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(width_m1) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ofm_width_m1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ofm_width_m1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_width_m1() const
+    {
+        return static_cast<uint32_t>(width_m1);
+    }
+    CONSTEXPR npu_set_ofm_width_m1_t& set_width_m1(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        width_m1 = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("width_m1", std::to_string(width_m1)));
+    }
+#endif
+#endif
+};
+// Output feature map height
+struct npu_set_ofm_height_m1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t height_m1:16; //  Output feature map height
+#ifdef __cplusplus
+public:
+    npu_set_ofm_height_m1_t(uint32_t _height_m1) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        height_m1(_height_m1 & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_ofm_height_m1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        height_m1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT_M1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT_M1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(height_m1) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ofm_height_m1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ofm_height_m1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_height_m1() const
+    {
+        return static_cast<uint32_t>(height_m1);
+    }
+    CONSTEXPR npu_set_ofm_height_m1_t& set_height_m1(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        height_m1 = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+    }
+#endif
+#endif
+};
+// Output feature map depth
+struct npu_set_ofm_depth_m1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t depth_m1:16; //  Output feature map depth
+#ifdef __cplusplus
+public:
+    npu_set_ofm_depth_m1_t(uint32_t _depth_m1) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_DEPTH_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        depth_m1(_depth_m1 & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_ofm_depth_m1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_DEPTH_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        depth_m1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_DEPTH_M1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_DEPTH_M1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(depth_m1) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ofm_depth_m1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ofm_depth_m1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_depth_m1() const
+    {
+        return static_cast<uint32_t>(depth_m1);
+    }
+    CONSTEXPR npu_set_ofm_depth_m1_t& set_depth_m1(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        depth_m1 = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("depth_m1", std::to_string(depth_m1)));
+    }
+#endif
+#endif
+};
+// OFM Precision
+struct npu_set_ofm_precision_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t activation_type:1; //  OFM type
+    uint32_t activation_precision:2; //  OFM precision
+    uint32_t reserved1:3;
+    uint32_t activation_format:2; //  OFM format
+    uint32_t scale_mode:1; //  OFM scale mode
+    uint32_t activation_reverse:2; //  OFM reverse
+    uint32_t activation_transpose:3; //  OFM transpose
+    uint32_t activation_storage:2; //  OFM storage mode
+#ifdef __cplusplus
+public:
+    npu_set_ofm_precision_t(NPU_NAMESPACE::activation_type _activation_type, NPU_NAMESPACE::activation_precision _activation_precision, NPU_NAMESPACE::activation_format _activation_format, NPU_NAMESPACE::ofm_scale_mode _scale_mode, NPU_NAMESPACE::activation_reverse _activation_reverse, NPU_NAMESPACE::activation_transpose _activation_transpose, NPU_NAMESPACE::activation_storage _activation_storage) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_PRECISION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        activation_type(static_cast<uint8_t>(_activation_type) & ((1U << 1)-1)),
+        activation_precision(static_cast<uint8_t>(_activation_precision) & ((1U << 2)-1)),
+        reserved1(0),
+        activation_format(static_cast<uint8_t>(_activation_format) & ((1U << 2)-1)),
+        scale_mode(static_cast<uint8_t>(_scale_mode) & ((1U << 1)-1)),
+        activation_reverse(static_cast<uint8_t>(_activation_reverse) & ((1U << 2)-1)),
+        activation_transpose(static_cast<uint8_t>(_activation_transpose) & ((1U << 3)-1)),
+        activation_storage(static_cast<uint8_t>(_activation_storage) & ((1U << 2)-1))
+    {}
+    CONSTEXPR npu_set_ofm_precision_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_PRECISION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        activation_type(0),
+        activation_precision(0),
+        reserved1(0),
+        activation_format(0),
+        scale_mode(0),
+        activation_reverse(0),
+        activation_transpose(0),
+        activation_storage(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_PRECISION) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_PRECISION); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(activation_type) << 16;
+        word |= uint32_t(activation_precision) << 17;
+        word |= uint32_t(activation_format) << 22;
+        word |= uint32_t(scale_mode) << 24;
+        word |= uint32_t(activation_reverse) << 25;
+        word |= uint32_t(activation_transpose) << 27;
+        word |= uint32_t(activation_storage) << 30;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ofm_precision_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ofm_precision_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::activation_type get_activation_type() const
+    {
+        return static_cast<NPU_NAMESPACE::activation_type>(activation_type);
+    }
+    CONSTEXPR npu_set_ofm_precision_t& set_activation_type(NPU_NAMESPACE::activation_type value)
+    {
+        activation_type = static_cast<uint8_t>(value) & ((1U << 1)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::activation_precision get_activation_precision() const
+    {
+        return static_cast<NPU_NAMESPACE::activation_precision>(activation_precision);
+    }
+    CONSTEXPR npu_set_ofm_precision_t& set_activation_precision(NPU_NAMESPACE::activation_precision value)
+    {
+        activation_precision = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::activation_format get_activation_format() const
+    {
+        return static_cast<NPU_NAMESPACE::activation_format>(activation_format);
+    }
+    CONSTEXPR npu_set_ofm_precision_t& set_activation_format(NPU_NAMESPACE::activation_format value)
+    {
+        activation_format = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::ofm_scale_mode get_scale_mode() const
+    {
+        return static_cast<NPU_NAMESPACE::ofm_scale_mode>(scale_mode);
+    }
+    CONSTEXPR npu_set_ofm_precision_t& set_scale_mode(NPU_NAMESPACE::ofm_scale_mode value)
+    {
+        scale_mode = static_cast<uint8_t>(value) & ((1U << 1)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::activation_reverse get_activation_reverse() const
+    {
+        return static_cast<NPU_NAMESPACE::activation_reverse>(activation_reverse);
+    }
+    CONSTEXPR npu_set_ofm_precision_t& set_activation_reverse(NPU_NAMESPACE::activation_reverse value)
+    {
+        activation_reverse = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::activation_transpose get_activation_transpose() const
+    {
+        return static_cast<NPU_NAMESPACE::activation_transpose>(activation_transpose);
+    }
+    CONSTEXPR npu_set_ofm_precision_t& set_activation_transpose(NPU_NAMESPACE::activation_transpose value)
+    {
+        activation_transpose = static_cast<uint8_t>(value) & ((1U << 3)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::activation_storage get_activation_storage() const
+    {
+        return static_cast<NPU_NAMESPACE::activation_storage>(activation_storage);
+    }
+    CONSTEXPR npu_set_ofm_precision_t& set_activation_storage(NPU_NAMESPACE::activation_storage value)
+    {
+        activation_storage = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("activation_type", (activation_type < (sizeof(activation_type_str)/sizeof(activation_type_str[0])) ? activation_type_str[activation_type] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("activation_precision", (activation_precision < (sizeof(activation_precision_str)/sizeof(activation_precision_str[0])) ? activation_precision_str[activation_precision] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("activation_format", (activation_format < (sizeof(activation_format_str)/sizeof(activation_format_str[0])) ? activation_format_str[activation_format] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("scale_mode", (scale_mode < (sizeof(ofm_scale_mode_str)/sizeof(ofm_scale_mode_str[0])) ? ofm_scale_mode_str[scale_mode] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("activation_reverse", (activation_reverse < (sizeof(activation_reverse_str)/sizeof(activation_reverse_str[0])) ? activation_reverse_str[activation_reverse] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("activation_transpose", (activation_transpose < (sizeof(activation_transpose_str)/sizeof(activation_transpose_str[0])) ? activation_transpose_str[activation_transpose] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("activation_storage", (activation_storage < (sizeof(activation_storage_str)/sizeof(activation_storage_str[0])) ? activation_storage_str[activation_storage] : "****")));
+    }
+#endif
+#endif
+};
+// OFM block width
+struct npu_set_ofm_blk_width_m1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t width_m1:7; //  OFM block width
+    uint32_t reserved1:9;
+#ifdef __cplusplus
+public:
+    npu_set_ofm_blk_width_m1_t(uint32_t _width_m1) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_WIDTH_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        width_m1(_width_m1 & ((1U << 7)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_ofm_blk_width_m1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_WIDTH_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        width_m1(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_WIDTH_M1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_WIDTH_M1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(width_m1) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ofm_blk_width_m1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ofm_blk_width_m1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_width_m1() const
+    {
+        return static_cast<uint32_t>(width_m1);
+    }
+    CONSTEXPR npu_set_ofm_blk_width_m1_t& set_width_m1(uint32_t value)
+    {
+        assert((value >> 7) == 0);
+        width_m1 = static_cast<uint8_t>(value & ((1U << 7)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("width_m1", std::to_string(width_m1)));
+    }
+#endif
+#endif
+};
+// OFM block height
+struct npu_set_ofm_blk_height_m1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t height_m1:7; //  OFM block height
+    uint32_t reserved1:9;
+#ifdef __cplusplus
+public:
+    npu_set_ofm_blk_height_m1_t(uint32_t _height_m1) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_HEIGHT_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        height_m1(_height_m1 & ((1U << 7)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_ofm_blk_height_m1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_HEIGHT_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        height_m1(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_HEIGHT_M1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_HEIGHT_M1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(height_m1) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ofm_blk_height_m1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ofm_blk_height_m1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_height_m1() const
+    {
+        return static_cast<uint32_t>(height_m1);
+    }
+    CONSTEXPR npu_set_ofm_blk_height_m1_t& set_height_m1(uint32_t value)
+    {
+        assert((value >> 7) == 0);
+        height_m1 = static_cast<uint8_t>(value & ((1U << 7)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+    }
+#endif
+#endif
+};
+// OFM block depth
+struct npu_set_ofm_blk_depth_m1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t depth_m1:10; //  OFM block depth
+    uint32_t reserved1:6;
+#ifdef __cplusplus
+public:
+    npu_set_ofm_blk_depth_m1_t(uint32_t _depth_m1) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_DEPTH_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        depth_m1(_depth_m1 & ((1U << 10)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_ofm_blk_depth_m1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_DEPTH_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        depth_m1(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_DEPTH_M1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_DEPTH_M1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(depth_m1) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ofm_blk_depth_m1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ofm_blk_depth_m1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_depth_m1() const
+    {
+        return static_cast<uint32_t>(depth_m1);
+    }
+    CONSTEXPR npu_set_ofm_blk_depth_m1_t& set_depth_m1(uint32_t value)
+    {
+        assert((value >> 10) == 0);
+        depth_m1 = static_cast<uint16_t>(value & ((1U << 10)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("depth_m1", std::to_string(depth_m1)));
+    }
+#endif
+#endif
+};
+// OFM zero point
+struct npu_set_ofm_zero_point_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t zero_point:16; //  Zero point offset
+#ifdef __cplusplus
+public:
+    npu_set_ofm_zero_point_t(uint32_t _zero_point) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_ZERO_POINT)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        zero_point(_zero_point & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_ofm_zero_point_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_ZERO_POINT)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        zero_point(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_ZERO_POINT) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_ZERO_POINT); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(zero_point) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ofm_zero_point_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ofm_zero_point_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_zero_point() const
+    {
+        return static_cast<uint32_t>(zero_point);
+    }
+    CONSTEXPR npu_set_ofm_zero_point_t& set_zero_point(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        zero_point = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("zero_point", std::to_string(zero_point)));
+    }
+#endif
+#endif
+};
+// OFM Tile 0 and tile 2 width
+struct npu_set_ofm_width0_m1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t width_m1:16; //  OFM Tile 0 and tile 2 width
+#ifdef __cplusplus
+public:
+    npu_set_ofm_width0_m1_t(uint32_t _width_m1) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH0_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        width_m1(_width_m1 & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_ofm_width0_m1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH0_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        width_m1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH0_M1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH0_M1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(width_m1) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ofm_width0_m1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ofm_width0_m1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_width_m1() const
+    {
+        return static_cast<uint32_t>(width_m1);
+    }
+    CONSTEXPR npu_set_ofm_width0_m1_t& set_width_m1(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        width_m1 = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("width_m1", std::to_string(width_m1)));
+    }
+#endif
+#endif
+};
+// OFM Tile 0 height
+struct npu_set_ofm_height0_m1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t height_m1:16; //  OFM Tile 0 height
+#ifdef __cplusplus
+public:
+    npu_set_ofm_height0_m1_t(uint32_t _height_m1) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT0_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        height_m1(_height_m1 & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_ofm_height0_m1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT0_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        height_m1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT0_M1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT0_M1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(height_m1) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ofm_height0_m1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ofm_height0_m1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_height_m1() const
+    {
+        return static_cast<uint32_t>(height_m1);
+    }
+    CONSTEXPR npu_set_ofm_height0_m1_t& set_height_m1(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        height_m1 = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+    }
+#endif
+#endif
+};
+// OFM Tile 1 height
+struct npu_set_ofm_height1_m1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t height_m1:16; //  OFM Tile 1 height
+#ifdef __cplusplus
+public:
+    npu_set_ofm_height1_m1_t(uint32_t _height_m1) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT1_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        height_m1(_height_m1 & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_ofm_height1_m1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT1_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        height_m1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT1_M1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT1_M1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(height_m1) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ofm_height1_m1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ofm_height1_m1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_height_m1() const
+    {
+        return static_cast<uint32_t>(height_m1);
+    }
+    CONSTEXPR npu_set_ofm_height1_m1_t& set_height_m1(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        height_m1 = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+    }
+#endif
+#endif
+};
+// Index n for OFM access
+struct npu_set_ofm_region_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t region:3; //  Region number for external memory accesses
+    uint32_t reserved1:13;
+#ifdef __cplusplus
+public:
+    npu_set_ofm_region_t(uint32_t _region) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_REGION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        region(_region & ((1U << 3)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_ofm_region_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_REGION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        region(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_REGION) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_REGION); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(region) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ofm_region_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ofm_region_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_region() const
+    {
+        return static_cast<uint32_t>(region);
+    }
+    CONSTEXPR npu_set_ofm_region_t& set_region(uint32_t value)
+    {
+        assert((value >> 3) == 0);
+        region = static_cast<uint8_t>(value & ((1U << 3)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+    }
+#endif
+#endif
+};
+// Kernel width
+struct npu_set_kernel_width_m1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t width_m1:16; //  Kernel width
+#ifdef __cplusplus
+public:
+    npu_set_kernel_width_m1_t(uint32_t _width_m1) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_WIDTH_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        width_m1(_width_m1 & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_kernel_width_m1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_WIDTH_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        width_m1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_WIDTH_M1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_WIDTH_M1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(width_m1) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_kernel_width_m1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_kernel_width_m1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_width_m1() const
+    {
+        return static_cast<uint32_t>(width_m1);
+    }
+    CONSTEXPR npu_set_kernel_width_m1_t& set_width_m1(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        width_m1 = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("width_m1", std::to_string(width_m1)));
+    }
+#endif
+#endif
+};
+// Kernel height
+struct npu_set_kernel_height_m1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t height_m1:16; //  Kernel height
+#ifdef __cplusplus
+public:
+    npu_set_kernel_height_m1_t(uint32_t _height_m1) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_HEIGHT_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        height_m1(_height_m1 & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_kernel_height_m1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_HEIGHT_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        height_m1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_HEIGHT_M1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_HEIGHT_M1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(height_m1) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_kernel_height_m1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_kernel_height_m1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_height_m1() const
+    {
+        return static_cast<uint32_t>(height_m1);
+    }
+    CONSTEXPR npu_set_kernel_height_m1_t& set_height_m1(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        height_m1 = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+    }
+#endif
+#endif
+};
+// Kernel stride
+struct npu_set_kernel_stride_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t stride_x_lsb:1; //  Stride x LSB. (kernel_x_stride - 1)[0]
+    uint32_t stride_y_lsb:1; //  Stride y LSB. (kernel_y_stride - 1)[0]
+    uint32_t weight_order:1; //  Weight ordering mode
+    uint32_t dilation_x:1; //  Kernel x dilation
+    uint32_t dilation_y:1; //  Kernel y dilation
+    uint32_t decomposition:1; //  Kernel decomposition
+    uint32_t stride_x_msb:1; //  Stride x MSB. (kernel_x_stride - 1) >> 1
+    uint32_t reserved1:2;
+    uint32_t stride_y_msb:1; //  Stride y MSB. (kernel_y_stride - 1) >> 1
+    uint32_t reserved2:6;
+#ifdef __cplusplus
+public:
+    npu_set_kernel_stride_t(uint32_t _stride_x_lsb, uint32_t _stride_y_lsb, NPU_NAMESPACE::weight_order _weight_order, NPU_NAMESPACE::kernel_dilation _dilation_x, NPU_NAMESPACE::kernel_dilation _dilation_y, NPU_NAMESPACE::kernel_decomposition _decomposition, uint32_t _stride_x_msb, uint32_t _stride_y_msb) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_STRIDE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        stride_x_lsb(_stride_x_lsb & ((1U << 1)-1)),
+        stride_y_lsb(_stride_y_lsb & ((1U << 1)-1)),
+        weight_order(static_cast<uint8_t>(_weight_order) & ((1U << 1)-1)),
+        dilation_x(static_cast<uint8_t>(_dilation_x) & ((1U << 1)-1)),
+        dilation_y(static_cast<uint8_t>(_dilation_y) & ((1U << 1)-1)),
+        decomposition(static_cast<uint8_t>(_decomposition) & ((1U << 1)-1)),
+        stride_x_msb(_stride_x_msb & ((1U << 1)-1)),
+        reserved1(0),
+        stride_y_msb(_stride_y_msb & ((1U << 1)-1)),
+        reserved2(0)
+    {}
+    CONSTEXPR npu_set_kernel_stride_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_STRIDE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        stride_x_lsb(0),
+        stride_y_lsb(0),
+        weight_order(0),
+        dilation_x(0),
+        dilation_y(0),
+        decomposition(0),
+        stride_x_msb(0),
+        reserved1(0),
+        stride_y_msb(0),
+        reserved2(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_STRIDE) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_STRIDE); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(stride_x_lsb) << 16;
+        word |= uint32_t(stride_y_lsb) << 17;
+        word |= uint32_t(weight_order) << 18;
+        word |= uint32_t(dilation_x) << 19;
+        word |= uint32_t(dilation_y) << 20;
+        word |= uint32_t(decomposition) << 21;
+        word |= uint32_t(stride_x_msb) << 22;
+        word |= uint32_t(stride_y_msb) << 25;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_kernel_stride_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_kernel_stride_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_stride_x_lsb() const
+    {
+        return static_cast<uint32_t>(stride_x_lsb);
+    }
+    CONSTEXPR npu_set_kernel_stride_t& set_stride_x_lsb(uint32_t value)
+    {
+        assert((value >> 1) == 0);
+        stride_x_lsb = static_cast<uint8_t>(value & ((1U << 1)-1));
+        return *this;
+    }
+    CONSTEXPR uint32_t get_stride_y_lsb() const
+    {
+        return static_cast<uint32_t>(stride_y_lsb);
+    }
+    CONSTEXPR npu_set_kernel_stride_t& set_stride_y_lsb(uint32_t value)
+    {
+        assert((value >> 1) == 0);
+        stride_y_lsb = static_cast<uint8_t>(value & ((1U << 1)-1));
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::weight_order get_weight_order() const
+    {
+        return static_cast<NPU_NAMESPACE::weight_order>(weight_order);
+    }
+    CONSTEXPR npu_set_kernel_stride_t& set_weight_order(NPU_NAMESPACE::weight_order value)
+    {
+        weight_order = static_cast<uint8_t>(value) & ((1U << 1)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::kernel_dilation get_dilation_x() const
+    {
+        return static_cast<NPU_NAMESPACE::kernel_dilation>(dilation_x);
+    }
+    CONSTEXPR npu_set_kernel_stride_t& set_dilation_x(NPU_NAMESPACE::kernel_dilation value)
+    {
+        dilation_x = static_cast<uint8_t>(value) & ((1U << 1)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::kernel_dilation get_dilation_y() const
+    {
+        return static_cast<NPU_NAMESPACE::kernel_dilation>(dilation_y);
+    }
+    CONSTEXPR npu_set_kernel_stride_t& set_dilation_y(NPU_NAMESPACE::kernel_dilation value)
+    {
+        dilation_y = static_cast<uint8_t>(value) & ((1U << 1)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::kernel_decomposition get_decomposition() const
+    {
+        return static_cast<NPU_NAMESPACE::kernel_decomposition>(decomposition);
+    }
+    CONSTEXPR npu_set_kernel_stride_t& set_decomposition(NPU_NAMESPACE::kernel_decomposition value)
+    {
+        decomposition = static_cast<uint8_t>(value) & ((1U << 1)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_stride_x_msb() const
+    {
+        return static_cast<uint32_t>(stride_x_msb);
+    }
+    CONSTEXPR npu_set_kernel_stride_t& set_stride_x_msb(uint32_t value)
+    {
+        assert((value >> 1) == 0);
+        stride_x_msb = static_cast<uint8_t>(value & ((1U << 1)-1));
+        return *this;
+    }
+    CONSTEXPR uint32_t get_stride_y_msb() const
+    {
+        return static_cast<uint32_t>(stride_y_msb);
+    }
+    CONSTEXPR npu_set_kernel_stride_t& set_stride_y_msb(uint32_t value)
+    {
+        assert((value >> 1) == 0);
+        stride_y_msb = static_cast<uint8_t>(value & ((1U << 1)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("stride_x_lsb", std::to_string(stride_x_lsb)));
+        fields.push_back(std::make_pair<std::string, std::string>("stride_y_lsb", std::to_string(stride_y_lsb)));
+        fields.push_back(std::make_pair<std::string, std::string>("weight_order", (weight_order < (sizeof(weight_order_str)/sizeof(weight_order_str[0])) ? weight_order_str[weight_order] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("dilation_x", (dilation_x < (sizeof(kernel_dilation_str)/sizeof(kernel_dilation_str[0])) ? kernel_dilation_str[dilation_x] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("dilation_y", (dilation_y < (sizeof(kernel_dilation_str)/sizeof(kernel_dilation_str[0])) ? kernel_dilation_str[dilation_y] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("decomposition", (decomposition < (sizeof(kernel_decomposition_str)/sizeof(kernel_decomposition_str[0])) ? kernel_decomposition_str[decomposition] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("stride_x_msb", std::to_string(stride_x_msb)));
+        fields.push_back(std::make_pair<std::string, std::string>("stride_y_msb", std::to_string(stride_y_msb)));
+    }
+#endif
+#endif
+};
+// Accumulator format
+struct npu_set_acc_format_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t acc_format:2; //  Accumulator format
+    uint32_t reserved1:2;
+    uint32_t acc_input:2; //  Accumulator input
+    uint32_t acc_output:1; //  Accumulator output
+    uint32_t reserved2:1;
+    uint32_t microblock:3; //  Accumulator microblock size
+    uint32_t reserved3:5;
+#ifdef __cplusplus
+public:
+    npu_set_acc_format_t(NPU_NAMESPACE::acc_format _acc_format, NPU_NAMESPACE::acc_input _acc_input, NPU_NAMESPACE::acc_output _acc_output, NPU_NAMESPACE::microblock _microblock) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACC_FORMAT)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        acc_format(static_cast<uint8_t>(_acc_format) & ((1U << 2)-1)),
+        reserved1(0),
+        acc_input(static_cast<uint8_t>(_acc_input) & ((1U << 2)-1)),
+        acc_output(static_cast<uint8_t>(_acc_output) & ((1U << 1)-1)),
+        reserved2(0),
+        microblock(static_cast<uint8_t>(_microblock) & ((1U << 3)-1)),
+        reserved3(0)
+    {}
+    CONSTEXPR npu_set_acc_format_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACC_FORMAT)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        acc_format(0),
+        reserved1(0),
+        acc_input(0),
+        acc_output(0),
+        reserved2(0),
+        microblock(0),
+        reserved3(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACC_FORMAT) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACC_FORMAT); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(acc_format) << 16;
+        word |= uint32_t(acc_input) << 20;
+        word |= uint32_t(acc_output) << 22;
+        word |= uint32_t(microblock) << 24;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_acc_format_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_acc_format_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::acc_format get_acc_format() const
+    {
+        return static_cast<NPU_NAMESPACE::acc_format>(acc_format);
+    }
+    CONSTEXPR npu_set_acc_format_t& set_acc_format(NPU_NAMESPACE::acc_format value)
+    {
+        acc_format = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::acc_input get_acc_input() const
+    {
+        return static_cast<NPU_NAMESPACE::acc_input>(acc_input);
+    }
+    CONSTEXPR npu_set_acc_format_t& set_acc_input(NPU_NAMESPACE::acc_input value)
+    {
+        acc_input = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::acc_output get_acc_output() const
+    {
+        return static_cast<NPU_NAMESPACE::acc_output>(acc_output);
+    }
+    CONSTEXPR npu_set_acc_format_t& set_acc_output(NPU_NAMESPACE::acc_output value)
+    {
+        acc_output = static_cast<uint8_t>(value) & ((1U << 1)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::microblock get_microblock() const
+    {
+        return static_cast<NPU_NAMESPACE::microblock>(microblock);
+    }
+    CONSTEXPR npu_set_acc_format_t& set_microblock(NPU_NAMESPACE::microblock value)
+    {
+        microblock = static_cast<uint8_t>(value) & ((1U << 3)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("acc_format", (acc_format < (sizeof(acc_format_str)/sizeof(acc_format_str[0])) ? acc_format_str[acc_format] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("acc_input", (acc_input < (sizeof(acc_input_str)/sizeof(acc_input_str[0])) ? acc_input_str[acc_input] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("acc_output", (acc_output < (sizeof(acc_output_str)/sizeof(acc_output_str[0])) ? acc_output_str[acc_output] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("microblock", (microblock < (sizeof(microblock_str)/sizeof(microblock_str[0])) ? microblock_str[microblock] : "****")));
+    }
+#endif
+#endif
+};
+// Activation function and clip range
+struct npu_set_activation_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t activation_function:5; //  Activation function to apply
+    uint32_t table:3; //  Table number to use for LUT
+    uint32_t reserved1:4;
+    uint32_t activation_clip_range:1; //  Activation clip to apply
+    uint32_t reserved2:3;
+#ifdef __cplusplus
+public:
+    npu_set_activation_t(NPU_NAMESPACE::activation_function _activation_function, uint32_t _table, NPU_NAMESPACE::activation_clip_range _activation_clip_range) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        activation_function(static_cast<uint8_t>(_activation_function) & ((1U << 5)-1)),
+        table(_table & ((1U << 3)-1)),
+        reserved1(0),
+        activation_clip_range(static_cast<uint8_t>(_activation_clip_range) & ((1U << 1)-1)),
+        reserved2(0)
+    {}
+    CONSTEXPR npu_set_activation_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        activation_function(0),
+        table(0),
+        reserved1(0),
+        activation_clip_range(0),
+        reserved2(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(activation_function) << 16;
+        word |= uint32_t(table) << 21;
+        word |= uint32_t(activation_clip_range) << 28;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_activation_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_activation_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::activation_function get_activation_function() const
+    {
+        return static_cast<NPU_NAMESPACE::activation_function>(activation_function);
+    }
+    CONSTEXPR npu_set_activation_t& set_activation_function(NPU_NAMESPACE::activation_function value)
+    {
+        activation_function = static_cast<uint8_t>(value) & ((1U << 5)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_table() const
+    {
+        return static_cast<uint32_t>(table);
+    }
+    CONSTEXPR npu_set_activation_t& set_table(uint32_t value)
+    {
+        assert((value >> 3) == 0);
+        table = static_cast<uint8_t>(value & ((1U << 3)-1));
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::activation_clip_range get_activation_clip_range() const
+    {
+        return static_cast<NPU_NAMESPACE::activation_clip_range>(activation_clip_range);
+    }
+    CONSTEXPR npu_set_activation_t& set_activation_clip_range(NPU_NAMESPACE::activation_clip_range value)
+    {
+        activation_clip_range = static_cast<uint8_t>(value) & ((1U << 1)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("activation_function", (activation_function < (sizeof(activation_function_str)/sizeof(activation_function_str[0])) ? activation_function_str[activation_function] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("table", std::to_string(table)));
+        fields.push_back(std::make_pair<std::string, std::string>("activation_clip_range", (activation_clip_range < (sizeof(activation_clip_range_str)/sizeof(activation_clip_range_str[0])) ? activation_clip_range_str[activation_clip_range] : "****")));
+    }
+#endif
+#endif
+};
+// Lower bound clip
+struct npu_set_activation_min_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t clip_boundary:16; //  Clip boundary for OFM activations
+#ifdef __cplusplus
+public:
+    npu_set_activation_min_t(uint32_t _clip_boundary) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MIN)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        clip_boundary(_clip_boundary & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_activation_min_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MIN)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        clip_boundary(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MIN) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MIN); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(clip_boundary) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_activation_min_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_activation_min_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_clip_boundary() const
+    {
+        return static_cast<uint32_t>(clip_boundary);
+    }
+    CONSTEXPR npu_set_activation_min_t& set_clip_boundary(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        clip_boundary = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("clip_boundary", std::to_string(clip_boundary)));
+    }
+#endif
+#endif
+};
+// Upper bound clip
+struct npu_set_activation_max_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t clip_boundary:16; //  Clip boundary for OFM activations
+#ifdef __cplusplus
+public:
+    npu_set_activation_max_t(uint32_t _clip_boundary) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MAX)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        clip_boundary(_clip_boundary & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_activation_max_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MAX)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        clip_boundary(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MAX) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MAX); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(clip_boundary) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_activation_max_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_activation_max_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_clip_boundary() const
+    {
+        return static_cast<uint32_t>(clip_boundary);
+    }
+    CONSTEXPR npu_set_activation_max_t& set_clip_boundary(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        clip_boundary = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("clip_boundary", std::to_string(clip_boundary)));
+    }
+#endif
+#endif
+};
+// Index n for weight stream access
+struct npu_set_weight_region_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t region:3; //  Index n for weight stream access
+    uint32_t reserved1:13;
+#ifdef __cplusplus
+public:
+    npu_set_weight_region_t(uint32_t _region) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_WEIGHT_REGION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        region(_region & ((1U << 3)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_weight_region_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_WEIGHT_REGION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        region(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_WEIGHT_REGION) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_WEIGHT_REGION); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(region) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_weight_region_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_weight_region_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_region() const
+    {
+        return static_cast<uint32_t>(region);
+    }
+    CONSTEXPR npu_set_weight_region_t& set_region(uint32_t value)
+    {
+        assert((value >> 3) == 0);
+        region = static_cast<uint8_t>(value & ((1U << 3)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+    }
+#endif
+#endif
+};
+// Index n for scale stream access
+struct npu_set_scale_region_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t region:3; //  Index n for scale stream access
+    uint32_t reserved1:13;
+#ifdef __cplusplus
+public:
+    npu_set_scale_region_t(uint32_t _region) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_SCALE_REGION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        region(_region & ((1U << 3)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_scale_region_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_SCALE_REGION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        region(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_SCALE_REGION) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_SCALE_REGION); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(region) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_scale_region_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_scale_region_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_region() const
+    {
+        return static_cast<uint32_t>(region);
+    }
+    CONSTEXPR npu_set_scale_region_t& set_region(uint32_t value)
+    {
+        assert((value >> 3) == 0);
+        region = static_cast<uint8_t>(value & ((1U << 3)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+    }
+#endif
+#endif
+};
+// Set weight stream format
+struct npu_set_weight_format_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t weight_format:1; //  Weight stream format
+    uint32_t reserved1:3;
+    uint32_t weight_sparsity:1; //  Weight sparsity type
+    uint32_t reserved2:11;
+#ifdef __cplusplus
+public:
+    npu_set_weight_format_t(NPU_NAMESPACE::weight_format _weight_format, NPU_NAMESPACE::weight_sparsity _weight_sparsity) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_WEIGHT_FORMAT)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        weight_format(static_cast<uint8_t>(_weight_format) & ((1U << 1)-1)),
+        reserved1(0),
+        weight_sparsity(static_cast<uint8_t>(_weight_sparsity) & ((1U << 1)-1)),
+        reserved2(0)
+    {}
+    CONSTEXPR npu_set_weight_format_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_WEIGHT_FORMAT)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        weight_format(0),
+        reserved1(0),
+        weight_sparsity(0),
+        reserved2(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_WEIGHT_FORMAT) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_WEIGHT_FORMAT); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(weight_format) << 16;
+        word |= uint32_t(weight_sparsity) << 20;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_weight_format_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_weight_format_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::weight_format get_weight_format() const
+    {
+        return static_cast<NPU_NAMESPACE::weight_format>(weight_format);
+    }
+    CONSTEXPR npu_set_weight_format_t& set_weight_format(NPU_NAMESPACE::weight_format value)
+    {
+        weight_format = static_cast<uint8_t>(value) & ((1U << 1)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::weight_sparsity get_weight_sparsity() const
+    {
+        return static_cast<NPU_NAMESPACE::weight_sparsity>(weight_sparsity);
+    }
+    CONSTEXPR npu_set_weight_format_t& set_weight_sparsity(NPU_NAMESPACE::weight_sparsity value)
+    {
+        weight_sparsity = static_cast<uint8_t>(value) & ((1U << 1)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("weight_format", (weight_format < (sizeof(weight_format_str)/sizeof(weight_format_str[0])) ? weight_format_str[weight_format] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("weight_sparsity", (weight_sparsity < (sizeof(weight_sparsity_str)/sizeof(weight_sparsity_str[0])) ? weight_sparsity_str[weight_sparsity] : "****")));
+    }
+#endif
+#endif
+};
+// Block number of blocks dependency
+struct npu_set_blockdep_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t blockdep:3; //  Block number of blocks dependency between kernel operations
+    uint32_t reserved1:13;
+#ifdef __cplusplus
+public:
+    npu_set_blockdep_t(uint32_t _blockdep) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_BLOCKDEP)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        blockdep(_blockdep & ((1U << 3)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_blockdep_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_BLOCKDEP)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        blockdep(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_BLOCKDEP) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_BLOCKDEP); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(blockdep) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_blockdep_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_blockdep_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_blockdep() const
+    {
+        return static_cast<uint32_t>(blockdep);
+    }
+    CONSTEXPR npu_set_blockdep_t& set_blockdep(uint32_t value)
+    {
+        assert((value >> 3) == 0);
+        blockdep = static_cast<uint8_t>(value & ((1U << 3)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("blockdep", std::to_string(blockdep)));
+    }
+#endif
+#endif
+};
+// Set resize scale X numerator
+struct npu_set_resize_x_scale_n_m1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t resize_x_scale_n_m1:11; //  Resize X scale numerator
+    uint32_t reserved1:5;
+#ifdef __cplusplus
+public:
+    npu_set_resize_x_scale_n_m1_t(uint32_t _resize_x_scale_n_m1) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_RESIZE_X_SCALE_N_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        resize_x_scale_n_m1(_resize_x_scale_n_m1 & ((1U << 11)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_resize_x_scale_n_m1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_RESIZE_X_SCALE_N_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        resize_x_scale_n_m1(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_RESIZE_X_SCALE_N_M1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_RESIZE_X_SCALE_N_M1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(resize_x_scale_n_m1) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_resize_x_scale_n_m1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_resize_x_scale_n_m1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_resize_x_scale_n_m1() const
+    {
+        return static_cast<uint32_t>(resize_x_scale_n_m1);
+    }
+    CONSTEXPR npu_set_resize_x_scale_n_m1_t& set_resize_x_scale_n_m1(uint32_t value)
+    {
+        assert((value >> 11) == 0);
+        resize_x_scale_n_m1 = static_cast<uint16_t>(value & ((1U << 11)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("resize_x_scale_n_m1", std::to_string(resize_x_scale_n_m1)));
+    }
+#endif
+#endif
+};
+// Set resize scale Y numerator
+struct npu_set_resize_y_scale_n_m1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t resize_y_scale_n_m1:11; //  Resize Y scale numerator
+    uint32_t reserved1:5;
+#ifdef __cplusplus
+public:
+    npu_set_resize_y_scale_n_m1_t(uint32_t _resize_y_scale_n_m1) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_RESIZE_Y_SCALE_N_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        resize_y_scale_n_m1(_resize_y_scale_n_m1 & ((1U << 11)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_resize_y_scale_n_m1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_RESIZE_Y_SCALE_N_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        resize_y_scale_n_m1(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_RESIZE_Y_SCALE_N_M1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_RESIZE_Y_SCALE_N_M1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(resize_y_scale_n_m1) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_resize_y_scale_n_m1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_resize_y_scale_n_m1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_resize_y_scale_n_m1() const
+    {
+        return static_cast<uint32_t>(resize_y_scale_n_m1);
+    }
+    CONSTEXPR npu_set_resize_y_scale_n_m1_t& set_resize_y_scale_n_m1(uint32_t value)
+    {
+        assert((value >> 11) == 0);
+        resize_y_scale_n_m1 = static_cast<uint16_t>(value & ((1U << 11)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("resize_y_scale_n_m1", std::to_string(resize_y_scale_n_m1)));
+    }
+#endif
+#endif
+};
+// Set resize offset X
+struct npu_set_resize_x_offset_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t resize_x_offset:12; //  Resize X offset
+    uint32_t reserved1:4;
+#ifdef __cplusplus
+public:
+    npu_set_resize_x_offset_t(uint32_t _resize_x_offset) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_RESIZE_X_OFFSET)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        resize_x_offset(_resize_x_offset & ((1U << 12)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_resize_x_offset_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_RESIZE_X_OFFSET)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        resize_x_offset(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_RESIZE_X_OFFSET) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_RESIZE_X_OFFSET); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(resize_x_offset) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_resize_x_offset_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_resize_x_offset_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_resize_x_offset() const
+    {
+        return static_cast<uint32_t>(resize_x_offset);
+    }
+    CONSTEXPR npu_set_resize_x_offset_t& set_resize_x_offset(uint32_t value)
+    {
+        assert((value >> 12) == 0);
+        resize_x_offset = static_cast<uint16_t>(value & ((1U << 12)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("resize_x_offset", std::to_string(((resize_x_offset <= std::numeric_limits<int>::max() ? static_cast<int>(resize_x_offset) : resize_x_offset - std::numeric_limits<int>::min() + std::numeric_limits<int>::max()) << 20) >> 20)));
+    }
+#endif
+#endif
+};
+// Set resize offset Y
+struct npu_set_resize_y_offset_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t resize_y_offset:12; //  Resize Y offset
+    uint32_t reserved1:4;
+#ifdef __cplusplus
+public:
+    npu_set_resize_y_offset_t(uint32_t _resize_y_offset) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_RESIZE_Y_OFFSET)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        resize_y_offset(_resize_y_offset & ((1U << 12)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_resize_y_offset_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_RESIZE_Y_OFFSET)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        resize_y_offset(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_RESIZE_Y_OFFSET) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_RESIZE_Y_OFFSET); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(resize_y_offset) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_resize_y_offset_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_resize_y_offset_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_resize_y_offset() const
+    {
+        return static_cast<uint32_t>(resize_y_offset);
+    }
+    CONSTEXPR npu_set_resize_y_offset_t& set_resize_y_offset(uint32_t value)
+    {
+        assert((value >> 12) == 0);
+        resize_y_offset = static_cast<uint16_t>(value & ((1U << 12)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("resize_y_offset", std::to_string(((resize_y_offset <= std::numeric_limits<int>::max() ? static_cast<int>(resize_y_offset) : resize_y_offset - std::numeric_limits<int>::min() + std::numeric_limits<int>::max()) << 20) >> 20)));
+    }
+#endif
+#endif
+};
+// DMA0 source region
+struct npu_set_dma0_src_region_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t region:3; //  Region number for external memory accesses
+    uint32_t reserved1:5;
+    uint32_t region_mode:1; //  Region mode
+    uint32_t stride_mode:2; //  Stride mode
+    uint32_t idx_mode:1; //  Index mode for gather or scatter
+    uint32_t reserved2:4;
+#ifdef __cplusplus
+public:
+    npu_set_dma0_src_region_t(uint32_t _region, NPU_NAMESPACE::dma_region_mode _region_mode, NPU_NAMESPACE::dma_stride_mode _stride_mode, NPU_NAMESPACE::dma_idx_mode _idx_mode) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SRC_REGION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        region(_region & ((1U << 3)-1)),
+        reserved1(0),
+        region_mode(static_cast<uint8_t>(_region_mode) & ((1U << 1)-1)),
+        stride_mode(static_cast<uint8_t>(_stride_mode) & ((1U << 2)-1)),
+        idx_mode(static_cast<uint8_t>(_idx_mode) & ((1U << 1)-1)),
+        reserved2(0)
+    {}
+    CONSTEXPR npu_set_dma0_src_region_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SRC_REGION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        region(0),
+        reserved1(0),
+        region_mode(0),
+        stride_mode(0),
+        idx_mode(0),
+        reserved2(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SRC_REGION) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SRC_REGION); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(region) << 16;
+        word |= uint32_t(region_mode) << 24;
+        word |= uint32_t(stride_mode) << 25;
+        word |= uint32_t(idx_mode) << 27;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_dma0_src_region_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_dma0_src_region_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_region() const
+    {
+        return static_cast<uint32_t>(region);
+    }
+    CONSTEXPR npu_set_dma0_src_region_t& set_region(uint32_t value)
+    {
+        assert((value >> 3) == 0);
+        region = static_cast<uint8_t>(value & ((1U << 3)-1));
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::dma_region_mode get_region_mode() const
+    {
+        return static_cast<NPU_NAMESPACE::dma_region_mode>(region_mode);
+    }
+    CONSTEXPR npu_set_dma0_src_region_t& set_region_mode(NPU_NAMESPACE::dma_region_mode value)
+    {
+        region_mode = static_cast<uint8_t>(value) & ((1U << 1)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::dma_stride_mode get_stride_mode() const
+    {
+        return static_cast<NPU_NAMESPACE::dma_stride_mode>(stride_mode);
+    }
+    CONSTEXPR npu_set_dma0_src_region_t& set_stride_mode(NPU_NAMESPACE::dma_stride_mode value)
+    {
+        stride_mode = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::dma_idx_mode get_idx_mode() const
+    {
+        return static_cast<NPU_NAMESPACE::dma_idx_mode>(idx_mode);
+    }
+    CONSTEXPR npu_set_dma0_src_region_t& set_idx_mode(NPU_NAMESPACE::dma_idx_mode value)
+    {
+        idx_mode = static_cast<uint8_t>(value) & ((1U << 1)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+        fields.push_back(std::make_pair<std::string, std::string>("region_mode", (region_mode < (sizeof(dma_region_mode_str)/sizeof(dma_region_mode_str[0])) ? dma_region_mode_str[region_mode] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("stride_mode", (stride_mode < (sizeof(dma_stride_mode_str)/sizeof(dma_stride_mode_str[0])) ? dma_stride_mode_str[stride_mode] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("idx_mode", (idx_mode < (sizeof(dma_idx_mode_str)/sizeof(dma_idx_mode_str[0])) ? dma_idx_mode_str[idx_mode] : "****")));
+    }
+#endif
+#endif
+};
+// DMA0 destination region
+struct npu_set_dma0_dst_region_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t region:3; //  Region number for memory accesses
+    uint32_t reserved1:5;
+    uint32_t region_mode:1; //  Region mode
+    uint32_t reserved2:2;
+    uint32_t idx_mode:1; //  Index mode for gather or scatter
+    uint32_t reserved3:4;
+#ifdef __cplusplus
+public:
+    npu_set_dma0_dst_region_t(uint32_t _region, NPU_NAMESPACE::dma_region_mode _region_mode, NPU_NAMESPACE::dma_idx_mode _idx_mode) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_DST_REGION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        region(_region & ((1U << 3)-1)),
+        reserved1(0),
+        region_mode(static_cast<uint8_t>(_region_mode) & ((1U << 1)-1)),
+        reserved2(0),
+        idx_mode(static_cast<uint8_t>(_idx_mode) & ((1U << 1)-1)),
+        reserved3(0)
+    {}
+    CONSTEXPR npu_set_dma0_dst_region_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_DST_REGION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        region(0),
+        reserved1(0),
+        region_mode(0),
+        reserved2(0),
+        idx_mode(0),
+        reserved3(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_DST_REGION) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_DST_REGION); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(region) << 16;
+        word |= uint32_t(region_mode) << 24;
+        word |= uint32_t(idx_mode) << 27;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_dma0_dst_region_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_dma0_dst_region_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_region() const
+    {
+        return static_cast<uint32_t>(region);
+    }
+    CONSTEXPR npu_set_dma0_dst_region_t& set_region(uint32_t value)
+    {
+        assert((value >> 3) == 0);
+        region = static_cast<uint8_t>(value & ((1U << 3)-1));
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::dma_region_mode get_region_mode() const
+    {
+        return static_cast<NPU_NAMESPACE::dma_region_mode>(region_mode);
+    }
+    CONSTEXPR npu_set_dma0_dst_region_t& set_region_mode(NPU_NAMESPACE::dma_region_mode value)
+    {
+        region_mode = static_cast<uint8_t>(value) & ((1U << 1)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::dma_idx_mode get_idx_mode() const
+    {
+        return static_cast<NPU_NAMESPACE::dma_idx_mode>(idx_mode);
+    }
+    CONSTEXPR npu_set_dma0_dst_region_t& set_idx_mode(NPU_NAMESPACE::dma_idx_mode value)
+    {
+        idx_mode = static_cast<uint8_t>(value) & ((1U << 1)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+        fields.push_back(std::make_pair<std::string, std::string>("region_mode", (region_mode < (sizeof(dma_region_mode_str)/sizeof(dma_region_mode_str[0])) ? dma_region_mode_str[region_mode] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("idx_mode", (idx_mode < (sizeof(dma_idx_mode_str)/sizeof(dma_idx_mode_str[0])) ? dma_idx_mode_str[idx_mode] : "****")));
+    }
+#endif
+#endif
+};
+// Size of second dimension for 2D/3D transfers
+struct npu_set_dma0_size0_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t size:16; //  Size of second dimension for 2D/3D transfers
+#ifdef __cplusplus
+public:
+    npu_set_dma0_size0_t(uint32_t _size) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE0)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        size(_size & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_dma0_size0_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE0)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        size(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE0) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE0); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(size) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_dma0_size0_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_dma0_size0_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_size() const
+    {
+        return static_cast<uint32_t>(size);
+    }
+    CONSTEXPR npu_set_dma0_size0_t& set_size(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        size = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("size", std::to_string(size)));
+    }
+#endif
+#endif
+};
+// Size of third dimension for 3D transfers
+struct npu_set_dma0_size1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t size:16; //  Size of third dimension for 3D transfers
+#ifdef __cplusplus
+public:
+    npu_set_dma0_size1_t(uint32_t _size) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        size(_size & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_dma0_size1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        size(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(size) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_dma0_size1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_dma0_size1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_size() const
+    {
+        return static_cast<uint32_t>(size);
+    }
+    CONSTEXPR npu_set_dma0_size1_t& set_size(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        size = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("size", std::to_string(size)));
+    }
+#endif
+#endif
+};
+// DMA0 index region
+struct npu_set_dma0_idx_region_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t region:3; //  Region number for external memory accesses
+    uint32_t reserved1:13;
+#ifdef __cplusplus
+public:
+    npu_set_dma0_idx_region_t(uint32_t _region) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_IDX_REGION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        region(_region & ((1U << 3)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_dma0_idx_region_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_IDX_REGION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        region(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_IDX_REGION) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_IDX_REGION); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(region) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_dma0_idx_region_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_dma0_idx_region_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_region() const
+    {
+        return static_cast<uint32_t>(region);
+    }
+    CONSTEXPR npu_set_dma0_idx_region_t& set_region(uint32_t value)
+    {
+        assert((value >> 3) == 0);
+        region = static_cast<uint8_t>(value & ((1U << 3)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+    }
+#endif
+#endif
+};
+// IFM2 broadcast configuration
+struct npu_set_ifm2_broadcast_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t broadcast_mode:4; //  Broadcast mode for IFM2. When not using broadcast_mode_scalar, accesses to IFM2 sets corresponding axes to 0 and corresponding IFM2 H/W/C to 1)
+    uint32_t reserved1:12;
+#ifdef __cplusplus
+public:
+    npu_set_ifm2_broadcast_t(NPU_NAMESPACE::broadcast_mode _broadcast_mode) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_BROADCAST)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        broadcast_mode(static_cast<uint8_t>(_broadcast_mode) & ((1U << 4)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_ifm2_broadcast_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_BROADCAST)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        broadcast_mode(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_BROADCAST) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_BROADCAST); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(broadcast_mode) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm2_broadcast_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm2_broadcast_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::broadcast_mode get_broadcast_mode() const
+    {
+        return static_cast<NPU_NAMESPACE::broadcast_mode>(broadcast_mode);
+    }
+    CONSTEXPR npu_set_ifm2_broadcast_t& set_broadcast_mode(NPU_NAMESPACE::broadcast_mode value)
+    {
+        broadcast_mode = static_cast<uint8_t>(value) & ((1U << 4)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("broadcast_mode", (broadcast_mode < (sizeof(broadcast_mode_str)/sizeof(broadcast_mode_str[0])) ? broadcast_mode_str[broadcast_mode] : "****")));
+    }
+#endif
+#endif
+};
+// IFM2 Precision
+struct npu_set_ifm2_precision_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t activation_type:1; //  IFM type
+    uint32_t reserved1:1;
+    uint32_t activation_precision:2; //  IFM precision
+    uint32_t reserved2:2;
+    uint32_t activation_format:2; //  IFM format
+    uint32_t reserved3:6;
+    uint32_t activation_storage:2; //  IFM storage mode
+#ifdef __cplusplus
+public:
+    npu_set_ifm2_precision_t(NPU_NAMESPACE::activation_type _activation_type, NPU_NAMESPACE::activation_precision _activation_precision, NPU_NAMESPACE::activation_format _activation_format, NPU_NAMESPACE::activation_storage _activation_storage) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_PRECISION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        activation_type(static_cast<uint8_t>(_activation_type) & ((1U << 1)-1)),
+        reserved1(0),
+        activation_precision(static_cast<uint8_t>(_activation_precision) & ((1U << 2)-1)),
+        reserved2(0),
+        activation_format(static_cast<uint8_t>(_activation_format) & ((1U << 2)-1)),
+        reserved3(0),
+        activation_storage(static_cast<uint8_t>(_activation_storage) & ((1U << 2)-1))
+    {}
+    CONSTEXPR npu_set_ifm2_precision_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_PRECISION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        activation_type(0),
+        reserved1(0),
+        activation_precision(0),
+        reserved2(0),
+        activation_format(0),
+        reserved3(0),
+        activation_storage(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_PRECISION) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_PRECISION); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(activation_type) << 16;
+        word |= uint32_t(activation_precision) << 18;
+        word |= uint32_t(activation_format) << 22;
+        word |= uint32_t(activation_storage) << 30;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm2_precision_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm2_precision_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::activation_type get_activation_type() const
+    {
+        return static_cast<NPU_NAMESPACE::activation_type>(activation_type);
+    }
+    CONSTEXPR npu_set_ifm2_precision_t& set_activation_type(NPU_NAMESPACE::activation_type value)
+    {
+        activation_type = static_cast<uint8_t>(value) & ((1U << 1)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::activation_precision get_activation_precision() const
+    {
+        return static_cast<NPU_NAMESPACE::activation_precision>(activation_precision);
+    }
+    CONSTEXPR npu_set_ifm2_precision_t& set_activation_precision(NPU_NAMESPACE::activation_precision value)
+    {
+        activation_precision = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::activation_format get_activation_format() const
+    {
+        return static_cast<NPU_NAMESPACE::activation_format>(activation_format);
+    }
+    CONSTEXPR npu_set_ifm2_precision_t& set_activation_format(NPU_NAMESPACE::activation_format value)
+    {
+        activation_format = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::activation_storage get_activation_storage() const
+    {
+        return static_cast<NPU_NAMESPACE::activation_storage>(activation_storage);
+    }
+    CONSTEXPR npu_set_ifm2_precision_t& set_activation_storage(NPU_NAMESPACE::activation_storage value)
+    {
+        activation_storage = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("activation_type", (activation_type < (sizeof(activation_type_str)/sizeof(activation_type_str[0])) ? activation_type_str[activation_type] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("activation_precision", (activation_precision < (sizeof(activation_precision_str)/sizeof(activation_precision_str[0])) ? activation_precision_str[activation_precision] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("activation_format", (activation_format < (sizeof(activation_format_str)/sizeof(activation_format_str[0])) ? activation_format_str[activation_format] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("activation_storage", (activation_storage < (sizeof(activation_storage_str)/sizeof(activation_storage_str[0])) ? activation_storage_str[activation_storage] : "****")));
+    }
+#endif
+#endif
+};
+// IFM2 zero point
+struct npu_set_ifm2_zero_point_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t zero_point:16; //  Zero point offset
+#ifdef __cplusplus
+public:
+    npu_set_ifm2_zero_point_t(uint32_t _zero_point) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_ZERO_POINT)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        zero_point(_zero_point & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_ifm2_zero_point_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_ZERO_POINT)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        zero_point(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_ZERO_POINT) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_ZERO_POINT); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(zero_point) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm2_zero_point_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm2_zero_point_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_zero_point() const
+    {
+        return static_cast<uint32_t>(zero_point);
+    }
+    CONSTEXPR npu_set_ifm2_zero_point_t& set_zero_point(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        zero_point = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("zero_point", std::to_string(zero_point)));
+    }
+#endif
+#endif
+};
+// IFM2 Tile 0 and Tile 2 width
+struct npu_set_ifm2_width0_m1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t width_m1:16; //  IFM2 Tile 0 and tile 2 width
+#ifdef __cplusplus
+public:
+    npu_set_ifm2_width0_m1_t(uint32_t _width_m1) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_WIDTH0_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        width_m1(_width_m1 & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_ifm2_width0_m1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_WIDTH0_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        width_m1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_WIDTH0_M1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_WIDTH0_M1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(width_m1) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm2_width0_m1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm2_width0_m1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_width_m1() const
+    {
+        return static_cast<uint32_t>(width_m1);
+    }
+    CONSTEXPR npu_set_ifm2_width0_m1_t& set_width_m1(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        width_m1 = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("width_m1", std::to_string(width_m1)));
+    }
+#endif
+#endif
+};
+// IFM2 Tile 0 height
+struct npu_set_ifm2_height0_m1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t height_m1:16; //  IFM2 Tile 0 height
+#ifdef __cplusplus
+public:
+    npu_set_ifm2_height0_m1_t(uint32_t _height_m1) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT0_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        height_m1(_height_m1 & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_ifm2_height0_m1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT0_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        height_m1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT0_M1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT0_M1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(height_m1) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm2_height0_m1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm2_height0_m1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_height_m1() const
+    {
+        return static_cast<uint32_t>(height_m1);
+    }
+    CONSTEXPR npu_set_ifm2_height0_m1_t& set_height_m1(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        height_m1 = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+    }
+#endif
+#endif
+};
+// IFM2 Tile 1 height
+struct npu_set_ifm2_height1_m1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t height_m1:16; //  IFM2 Tile 1 height
+#ifdef __cplusplus
+public:
+    npu_set_ifm2_height1_m1_t(uint32_t _height_m1) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT1_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        height_m1(_height_m1 & ((1U << 16)-1))
+    {}
+    CONSTEXPR npu_set_ifm2_height1_m1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT1_M1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        height_m1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT1_M1) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT1_M1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(height_m1) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm2_height1_m1_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm2_height1_m1_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_height_m1() const
+    {
+        return static_cast<uint32_t>(height_m1);
+    }
+    CONSTEXPR npu_set_ifm2_height1_m1_t& set_height_m1(uint32_t value)
+    {
+        assert((value >> 16) == 0);
+        height_m1 = static_cast<uint16_t>(value & ((1U << 16)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+    }
+#endif
+#endif
+};
+// Index n for IFM2 access
+struct npu_set_ifm2_region_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t region:3; //  Region number for external memory accesses
+    uint32_t reserved1:13;
+#ifdef __cplusplus
+public:
+    npu_set_ifm2_region_t(uint32_t _region) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_REGION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        region(_region & ((1U << 3)-1)),
+        reserved1(0)
+    {}
+    CONSTEXPR npu_set_ifm2_region_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_REGION)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+        region(0),
+        reserved1(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_REGION) && control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_REGION); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+    }
+    operator uint32_t()
+    {
+        uint32_t word = 0;
+        word |= uint32_t(opcode) << 0;
+        word |= uint32_t(control) << 14;
+        word |= uint32_t(region) << 16;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm2_region_t& set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm2_region_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_region() const
+    {
+        return static_cast<uint32_t>(region);
+    }
+    CONSTEXPR npu_set_ifm2_region_t& set_region(uint32_t value)
+    {
+        assert((value >> 3) == 0);
+        region = static_cast<uint8_t>(value & ((1U << 3)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+    }
+#endif
+#endif
+};
+// IFM Tile 0 address
+struct npu_set_ifm_base0_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ifm_base0_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE0)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ifm_base0_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE0)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE0) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE0); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ifm_base0_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// IFM Tile 1 address
+struct npu_set_ifm_base1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ifm_base1_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ifm_base1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE1) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ifm_base1_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// IFM Tile 2 address
+struct npu_set_ifm_base2_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ifm_base2_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE2)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ifm_base2_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE2)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE2) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE2); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ifm_base2_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// IFM Tile 3 address
+struct npu_set_ifm_base3_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ifm_base3_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE3)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ifm_base3_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE3)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE3) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE3); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ifm_base3_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// IFM byte stride between horizontal values
+struct npu_set_ifm_stride_x_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ifm_stride_x_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_X)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ifm_stride_x_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_X)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_X) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_X); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ifm_stride_x_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// IFM byte stride between vertical values
+struct npu_set_ifm_stride_y_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ifm_stride_y_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_Y)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ifm_stride_y_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_Y)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_Y) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_Y); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ifm_stride_y_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// IFM byte stride between channel blocks
+struct npu_set_ifm_stride_c_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ifm_stride_c_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_C)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ifm_stride_c_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_C)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_C) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_C); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ifm_stride_c_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// OFM Tile 0 address
+struct npu_set_ofm_base0_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ofm_base0_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE0)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ofm_base0_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE0)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE0) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE0); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ofm_base0_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// OFM Tile 1 address
+struct npu_set_ofm_base1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ofm_base1_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ofm_base1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE1) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ofm_base1_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// OFM Tile 2 address
+struct npu_set_ofm_base2_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ofm_base2_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE2)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ofm_base2_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE2)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE2) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE2); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ofm_base2_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// OFM Tile 3 address
+struct npu_set_ofm_base3_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ofm_base3_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE3)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ofm_base3_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE3)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE3) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE3); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ofm_base3_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// OFM byte stride between horizontal values
+struct npu_set_ofm_stride_x_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ofm_stride_x_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_X)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ofm_stride_x_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_X)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_X) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_X); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ofm_stride_x_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// OFM byte stride between vertical values
+struct npu_set_ofm_stride_y_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ofm_stride_y_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_Y)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ofm_stride_y_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_Y)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_Y) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_Y); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ofm_stride_y_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// OFM byte stride between channel blocks
+struct npu_set_ofm_stride_c_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ofm_stride_c_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_C)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ofm_stride_c_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_C)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_C) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_C); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ofm_stride_c_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// Weight stream byte offset in WEIGHT_REGION
+struct npu_set_weight_base_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_weight_base_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_BASE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_weight_base_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_BASE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_BASE) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_BASE); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_weight_base_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// Weight stream byte length
+struct npu_set_weight_length_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t reserved1:16;
+    uint32_t length:32; //  Weight stream byte length
+#ifdef __cplusplus
+public:
+    npu_set_weight_length_t(uint32_t _length) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_LENGTH)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        reserved1(0),
+        length(_length)
+    {}
+    CONSTEXPR npu_set_weight_length_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_LENGTH)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        reserved1(0),
+        length(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_LENGTH) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_LENGTH); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(length) << 32;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_weight_length_t& set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_weight_length_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_length() const
+    {
+        return static_cast<uint32_t>(length);
+    }
+    CONSTEXPR npu_set_weight_length_t& set_length(uint32_t value)
+    {
+        length = value;
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("length", std::to_string(length)));
+    }
+#endif
+#endif
+};
+// Scale and bias stream input byte offset from SCALE_REGION
+struct npu_set_scale_base_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_scale_base_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_BASE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_scale_base_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_BASE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_BASE) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_BASE); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_scale_base_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// Scale and bias stream input byte length
+struct npu_set_scale_length_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t reserved1:16;
+    uint32_t length:20; //  Scale and bias stream byte length
+    uint32_t reserved2:12;
+#ifdef __cplusplus
+public:
+    npu_set_scale_length_t(uint32_t _length) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_LENGTH)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        reserved1(0),
+        length(_length & ((1U << 20)-1)),
+        reserved2(0)
+    {}
+    CONSTEXPR npu_set_scale_length_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_LENGTH)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        reserved1(0),
+        length(0),
+        reserved2(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_LENGTH) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_LENGTH); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(length) << 32;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_scale_length_t& set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_scale_length_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_length() const
+    {
+        return static_cast<uint32_t>(length);
+    }
+    CONSTEXPR npu_set_scale_length_t& set_length(uint32_t value)
+    {
+        assert((value >> 20) == 0);
+        length = value & ((1U << 20)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("length", std::to_string(length)));
+    }
+#endif
+#endif
+};
+// OFM scale
+struct npu_set_ofm_scale_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t shift:6; //  Right shift (0 to 63)
+    uint32_t dbl_rnd:5; //  Double rounding shift (0 to 30)
+    uint32_t reserved1:2;
+    uint32_t round_mode:3; //  Rounding mode
+    uint32_t scale:31; //  Scale value (uint31 = non-negative int32)
+    uint32_t reserved2:1;
+#ifdef __cplusplus
+public:
+    npu_set_ofm_scale_t(uint32_t _shift, uint32_t _dbl_rnd, NPU_NAMESPACE::round_mode_ofm _round_mode, uint32_t _scale) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_SCALE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        shift(_shift & ((1U << 6)-1)),
+        dbl_rnd(_dbl_rnd & ((1U << 5)-1)),
+        reserved1(0),
+        round_mode(static_cast<uint8_t>(_round_mode) & ((1U << 3)-1)),
+        scale(_scale & ((1U << 31)-1)),
+        reserved2(0)
+    {}
+    CONSTEXPR npu_set_ofm_scale_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_SCALE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        shift(0),
+        dbl_rnd(0),
+        reserved1(0),
+        round_mode(0),
+        scale(0),
+        reserved2(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_SCALE) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_SCALE); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(shift) << 16;
+        word |= uint64_t(dbl_rnd) << 22;
+        word |= uint64_t(round_mode) << 29;
+        word |= uint64_t(scale) << 32;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ofm_scale_t& set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ofm_scale_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_shift() const
+    {
+        return static_cast<uint32_t>(shift);
+    }
+    CONSTEXPR npu_set_ofm_scale_t& set_shift(uint32_t value)
+    {
+        assert((value >> 6) == 0);
+        shift = static_cast<uint8_t>(value & ((1U << 6)-1));
+        return *this;
+    }
+    CONSTEXPR uint32_t get_dbl_rnd() const
+    {
+        return static_cast<uint32_t>(dbl_rnd);
+    }
+    CONSTEXPR npu_set_ofm_scale_t& set_dbl_rnd(uint32_t value)
+    {
+        assert((value >> 5) == 0);
+        dbl_rnd = static_cast<uint8_t>(value & ((1U << 5)-1));
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::round_mode_ofm get_round_mode() const
+    {
+        return static_cast<NPU_NAMESPACE::round_mode_ofm>(round_mode);
+    }
+    CONSTEXPR npu_set_ofm_scale_t& set_round_mode(NPU_NAMESPACE::round_mode_ofm value)
+    {
+        round_mode = static_cast<uint8_t>(value) & ((1U << 3)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_scale() const
+    {
+        return static_cast<uint32_t>(scale);
+    }
+    CONSTEXPR npu_set_ofm_scale_t& set_scale(uint32_t value)
+    {
+        assert((value >> 31) == 0);
+        scale = value & ((1U << 31)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("shift", std::to_string(shift)));
+        fields.push_back(std::make_pair<std::string, std::string>("dbl_rnd", std::to_string(dbl_rnd)));
+        fields.push_back(std::make_pair<std::string, std::string>("round_mode", (round_mode < (sizeof(round_mode_ofm_str)/sizeof(round_mode_ofm_str[0])) ? round_mode_ofm_str[round_mode] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("scale", std::to_string(scale)));
+    }
+#endif
+#endif
+};
+// IFM input scale
+struct npu_set_ifm_scale_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t shift:6; //  Right shift (0 to 63)
+    uint32_t dbl_rnd:5; //  Double rounding shift (0 to 30)
+    uint32_t reserved1:2;
+    uint32_t round_mode:1; //  Rounding mode
+    uint32_t reserved2:2;
+    uint32_t scale:31; //  Scale value (uint31 = non-negative int32)
+    uint32_t reserved3:1;
+#ifdef __cplusplus
+public:
+    npu_set_ifm_scale_t(uint32_t _shift, uint32_t _dbl_rnd, NPU_NAMESPACE::round_mode_ifm _round_mode, uint32_t _scale) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_SCALE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        shift(_shift & ((1U << 6)-1)),
+        dbl_rnd(_dbl_rnd & ((1U << 5)-1)),
+        reserved1(0),
+        round_mode(static_cast<uint8_t>(_round_mode) & ((1U << 1)-1)),
+        reserved2(0),
+        scale(_scale & ((1U << 31)-1)),
+        reserved3(0)
+    {}
+    CONSTEXPR npu_set_ifm_scale_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_SCALE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        shift(0),
+        dbl_rnd(0),
+        reserved1(0),
+        round_mode(0),
+        reserved2(0),
+        scale(0),
+        reserved3(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_SCALE) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_SCALE); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(shift) << 16;
+        word |= uint64_t(dbl_rnd) << 22;
+        word |= uint64_t(round_mode) << 29;
+        word |= uint64_t(scale) << 32;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm_scale_t& set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm_scale_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_shift() const
+    {
+        return static_cast<uint32_t>(shift);
+    }
+    CONSTEXPR npu_set_ifm_scale_t& set_shift(uint32_t value)
+    {
+        assert((value >> 6) == 0);
+        shift = static_cast<uint8_t>(value & ((1U << 6)-1));
+        return *this;
+    }
+    CONSTEXPR uint32_t get_dbl_rnd() const
+    {
+        return static_cast<uint32_t>(dbl_rnd);
+    }
+    CONSTEXPR npu_set_ifm_scale_t& set_dbl_rnd(uint32_t value)
+    {
+        assert((value >> 5) == 0);
+        dbl_rnd = static_cast<uint8_t>(value & ((1U << 5)-1));
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::round_mode_ifm get_round_mode() const
+    {
+        return static_cast<NPU_NAMESPACE::round_mode_ifm>(round_mode);
+    }
+    CONSTEXPR npu_set_ifm_scale_t& set_round_mode(NPU_NAMESPACE::round_mode_ifm value)
+    {
+        round_mode = static_cast<uint8_t>(value) & ((1U << 1)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_scale() const
+    {
+        return static_cast<uint32_t>(scale);
+    }
+    CONSTEXPR npu_set_ifm_scale_t& set_scale(uint32_t value)
+    {
+        assert((value >> 31) == 0);
+        scale = value & ((1U << 31)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("shift", std::to_string(shift)));
+        fields.push_back(std::make_pair<std::string, std::string>("dbl_rnd", std::to_string(dbl_rnd)));
+        fields.push_back(std::make_pair<std::string, std::string>("round_mode", (round_mode < (sizeof(round_mode_ifm_str)/sizeof(round_mode_ifm_str[0])) ? round_mode_ifm_str[round_mode] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("scale", std::to_string(scale)));
+    }
+#endif
+#endif
+};
+// IFM2 input scale
+struct npu_set_ifm2_scale_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t shift:6; //  Right shift (0 to 63)
+    uint32_t dbl_rnd:5; //  Double rounding shift (0 to 30)
+    uint32_t reserved1:2;
+    uint32_t round_mode:1; //  Rounding mode
+    uint32_t reserved2:2;
+    uint32_t scale:31; //  Scale value (uint31 = non-negative int32)
+    uint32_t reserved3:1;
+#ifdef __cplusplus
+public:
+    npu_set_ifm2_scale_t(uint32_t _shift, uint32_t _dbl_rnd, NPU_NAMESPACE::round_mode_ifm _round_mode, uint32_t _scale) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_SCALE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        shift(_shift & ((1U << 6)-1)),
+        dbl_rnd(_dbl_rnd & ((1U << 5)-1)),
+        reserved1(0),
+        round_mode(static_cast<uint8_t>(_round_mode) & ((1U << 1)-1)),
+        reserved2(0),
+        scale(_scale & ((1U << 31)-1)),
+        reserved3(0)
+    {}
+    CONSTEXPR npu_set_ifm2_scale_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_SCALE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        shift(0),
+        dbl_rnd(0),
+        reserved1(0),
+        round_mode(0),
+        reserved2(0),
+        scale(0),
+        reserved3(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_SCALE) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_SCALE); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(shift) << 16;
+        word |= uint64_t(dbl_rnd) << 22;
+        word |= uint64_t(round_mode) << 29;
+        word |= uint64_t(scale) << 32;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_ifm2_scale_t& set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_ifm2_scale_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_shift() const
+    {
+        return static_cast<uint32_t>(shift);
+    }
+    CONSTEXPR npu_set_ifm2_scale_t& set_shift(uint32_t value)
+    {
+        assert((value >> 6) == 0);
+        shift = static_cast<uint8_t>(value & ((1U << 6)-1));
+        return *this;
+    }
+    CONSTEXPR uint32_t get_dbl_rnd() const
+    {
+        return static_cast<uint32_t>(dbl_rnd);
+    }
+    CONSTEXPR npu_set_ifm2_scale_t& set_dbl_rnd(uint32_t value)
+    {
+        assert((value >> 5) == 0);
+        dbl_rnd = static_cast<uint8_t>(value & ((1U << 5)-1));
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::round_mode_ifm get_round_mode() const
+    {
+        return static_cast<NPU_NAMESPACE::round_mode_ifm>(round_mode);
+    }
+    CONSTEXPR npu_set_ifm2_scale_t& set_round_mode(NPU_NAMESPACE::round_mode_ifm value)
+    {
+        round_mode = static_cast<uint8_t>(value) & ((1U << 1)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_scale() const
+    {
+        return static_cast<uint32_t>(scale);
+    }
+    CONSTEXPR npu_set_ifm2_scale_t& set_scale(uint32_t value)
+    {
+        assert((value >> 31) == 0);
+        scale = value & ((1U << 31)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("shift", std::to_string(shift)));
+        fields.push_back(std::make_pair<std::string, std::string>("dbl_rnd", std::to_string(dbl_rnd)));
+        fields.push_back(std::make_pair<std::string, std::string>("round_mode", (round_mode < (sizeof(round_mode_ifm_str)/sizeof(round_mode_ifm_str[0])) ? round_mode_ifm_str[round_mode] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("scale", std::to_string(scale)));
+    }
+#endif
+#endif
+};
+// Operation scalar value
+struct npu_set_op_scalar_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t reserved1:16;
+    uint32_t scalar:32; //  Scalar value (int32)
+#ifdef __cplusplus
+public:
+    npu_set_op_scalar_t(uint32_t _scalar) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OP_SCALAR)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        reserved1(0),
+        scalar(_scalar)
+    {}
+    CONSTEXPR npu_set_op_scalar_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OP_SCALAR)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        reserved1(0),
+        scalar(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OP_SCALAR) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OP_SCALAR); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(scalar) << 32;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_op_scalar_t& set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_op_scalar_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_scalar() const
+    {
+        return static_cast<uint32_t>(scalar);
+    }
+    CONSTEXPR npu_set_op_scalar_t& set_scalar(uint32_t value)
+    {
+        scalar = value;
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("scalar", std::to_string(((scalar <= std::numeric_limits<int>::max() ? static_cast<int>(scalar) : scalar - std::numeric_limits<int>::min() + std::numeric_limits<int>::max()) << 0) >> 0)));
+    }
+#endif
+#endif
+};
+// DMA user channel 0 source byte offset from DMA0_SRC_REGION
+struct npu_set_dma0_src_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_dma0_src_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_dma0_src_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_dma0_src_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// DMA user channel 0 destination byte offset from DMA0_DST_REGION
+struct npu_set_dma0_dst_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_dma0_dst_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_dma0_dst_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_dma0_dst_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// DMA user channel 0 transfer length in bytes for each 1D transfer
+struct npu_set_dma0_len_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_dma0_len_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_LEN)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_dma0_len_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_LEN)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_LEN) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_LEN); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_dma0_len_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// Source byte stride after each 1D transfer
+struct npu_set_dma0_src_stride0_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_dma0_src_stride0_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC_STRIDE0)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_dma0_src_stride0_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC_STRIDE0)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC_STRIDE0) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC_STRIDE0); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_dma0_src_stride0_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// Source byte stride after each 2D transfer
+struct npu_set_dma0_src_stride1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_dma0_src_stride1_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC_STRIDE1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_dma0_src_stride1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC_STRIDE1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC_STRIDE1) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC_STRIDE1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_dma0_src_stride1_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// Destination byte stride after each 1D transfer
+struct npu_set_dma0_dst_stride0_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_dma0_dst_stride0_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST_STRIDE0)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_dma0_dst_stride0_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST_STRIDE0)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST_STRIDE0) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST_STRIDE0); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_dma0_dst_stride0_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// Destination byte stride after 2D transfer
+struct npu_set_dma0_dst_stride1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_dma0_dst_stride1_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST_STRIDE1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_dma0_dst_stride1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST_STRIDE1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST_STRIDE1) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST_STRIDE1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_dma0_dst_stride1_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// DMA channel 0 index array address
+struct npu_set_dma0_idx_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_dma0_idx_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_IDX)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_dma0_idx_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_IDX)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_IDX) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_IDX); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_dma0_idx_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// DMA channel 0 index maximum value
+struct npu_set_dma0_idx_max_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t reserved1:16;
+    uint32_t idx_max:31; //  maximum permitted index
+    uint32_t reserved2:1;
+#ifdef __cplusplus
+public:
+    npu_set_dma0_idx_max_t(uint32_t _idx_max) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_IDX_MAX)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        reserved1(0),
+        idx_max(_idx_max & ((1U << 31)-1)),
+        reserved2(0)
+    {}
+    CONSTEXPR npu_set_dma0_idx_max_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_IDX_MAX)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        reserved1(0),
+        idx_max(0),
+        reserved2(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_IDX_MAX) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_IDX_MAX); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(idx_max) << 32;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_dma0_idx_max_t& set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_dma0_idx_max_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_idx_max() const
+    {
+        return static_cast<uint32_t>(idx_max);
+    }
+    CONSTEXPR npu_set_dma0_idx_max_t& set_idx_max(uint32_t value)
+    {
+        assert((value >> 31) == 0);
+        idx_max = value & ((1U << 31)-1);
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("idx_max", std::to_string(idx_max)));
+    }
+#endif
+#endif
+};
+// Index byte distance to skip in index after each 2D transfer
+struct npu_set_dma0_idx_skip1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_dma0_idx_skip1_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_IDX_SKIP1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_dma0_idx_skip1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_IDX_SKIP1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_IDX_SKIP1) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_IDX_SKIP1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_dma0_idx_skip1_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// IFM2 Tile 0 address
+struct npu_set_ifm2_base0_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ifm2_base0_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE0)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ifm2_base0_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE0)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE0) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE0); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ifm2_base0_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// IFM2 Tile 1 address
+struct npu_set_ifm2_base1_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ifm2_base1_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ifm2_base1_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE1)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE1) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE1); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ifm2_base1_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// IFM2 Tile 2 address
+struct npu_set_ifm2_base2_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ifm2_base2_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE2)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ifm2_base2_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE2)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE2) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE2); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ifm2_base2_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// IFM2 Tile 3 address
+struct npu_set_ifm2_base3_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ifm2_base3_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE3)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ifm2_base3_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE3)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE3) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE3); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ifm2_base3_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// IFM2 byte stride between horizontal values
+struct npu_set_ifm2_stride_x_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ifm2_stride_x_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_X)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ifm2_stride_x_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_X)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_X) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_X); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ifm2_stride_x_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// IFM2 byte stride between vertical values
+struct npu_set_ifm2_stride_y_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ifm2_stride_y_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_Y)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ifm2_stride_y_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_Y)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_Y) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_Y); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ifm2_stride_y_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// IFM2 byte stride between channel blocks
+struct npu_set_ifm2_stride_c_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_ifm2_stride_c_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_C)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_ifm2_stride_c_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_C)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_C) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_C); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_ifm2_stride_c_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// Weight stream byte offset in WEIGHT_REGION for weight decoder 1
+struct npu_set_weight1_base_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_weight1_base_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT1_BASE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_weight1_base_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT1_BASE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT1_BASE) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT1_BASE); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_weight1_base_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// Weight stream byte length for weight decoder 1
+struct npu_set_weight1_length_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t reserved1:16;
+    uint32_t length:32; //  Weight stream byte length
+#ifdef __cplusplus
+public:
+    npu_set_weight1_length_t(uint32_t _length) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT1_LENGTH)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        reserved1(0),
+        length(_length)
+    {}
+    CONSTEXPR npu_set_weight1_length_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT1_LENGTH)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        reserved1(0),
+        length(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT1_LENGTH) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT1_LENGTH); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(length) << 32;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_weight1_length_t& set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_weight1_length_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_length() const
+    {
+        return static_cast<uint32_t>(length);
+    }
+    CONSTEXPR npu_set_weight1_length_t& set_length(uint32_t value)
+    {
+        length = value;
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("length", std::to_string(length)));
+    }
+#endif
+#endif
+};
+// Weight stream byte offset in WEIGHT_REGION for weight decoder 2
+struct npu_set_weight2_base_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_weight2_base_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT2_BASE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_weight2_base_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT2_BASE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT2_BASE) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT2_BASE); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_weight2_base_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// Weight stream byte length for weight decoder 2
+struct npu_set_weight2_length_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t reserved1:16;
+    uint32_t length:32; //  Weight stream byte length
+#ifdef __cplusplus
+public:
+    npu_set_weight2_length_t(uint32_t _length) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT2_LENGTH)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        reserved1(0),
+        length(_length)
+    {}
+    CONSTEXPR npu_set_weight2_length_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT2_LENGTH)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        reserved1(0),
+        length(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT2_LENGTH) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT2_LENGTH); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(length) << 32;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_weight2_length_t& set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_weight2_length_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_length() const
+    {
+        return static_cast<uint32_t>(length);
+    }
+    CONSTEXPR npu_set_weight2_length_t& set_length(uint32_t value)
+    {
+        length = value;
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("length", std::to_string(length)));
+    }
+#endif
+#endif
+};
+// Weight stream byte offset in WEIGHT_REGION for weight decoder 3
+struct npu_set_weight3_base_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t addr_hi:8; //  address extension
+    uint32_t reserved1:8;
+    uint32_t addr_lo:32; //  address offset
+#ifdef __cplusplus
+public:
+    npu_set_weight3_base_t(uint64_t _addr) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT3_BASE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(static_cast<uint8_t>((_addr >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max()))),
+        reserved1(0),
+        addr_lo(static_cast<uint32_t>((_addr) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))
+    {}
+    CONSTEXPR npu_set_weight3_base_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT3_BASE)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        addr_hi(0),
+        reserved1(0),
+        addr_lo(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT3_BASE) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT3_BASE); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(addr_hi) << 16;
+        word |= uint64_t(addr_lo) << 32;
+        return word;
+    }
+    CONSTEXPR uint64_t get_addr() const
+    {
+        return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+    }
+    CONSTEXPR npu_set_weight3_base_t& set_addr(uint64_t value)
+    {
+        addr_lo = static_cast<uint32_t>((value) & static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())); addr_hi = static_cast<uint8_t>((value >> 32) & static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())); return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        std::stringstream saddr; saddr << std::hex << "0x" << get_addr();
+        fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+    }
+#endif
+#endif
+};
+// Weight stream byte length for weight decoder 3
+struct npu_set_weight3_length_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t reserved1:16;
+    uint32_t length:32; //  Weight stream byte length
+#ifdef __cplusplus
+public:
+    npu_set_weight3_length_t(uint32_t _length) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT3_LENGTH)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        reserved1(0),
+        length(_length)
+    {}
+    CONSTEXPR npu_set_weight3_length_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT3_LENGTH)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        reserved1(0),
+        length(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT3_LENGTH) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT3_LENGTH); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(length) << 32;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_weight3_length_t& set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_weight3_length_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_length() const
+    {
+        return static_cast<uint32_t>(length);
+    }
+    CONSTEXPR npu_set_weight3_length_t& set_length(uint32_t value)
+    {
+        length = value;
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("length", std::to_string(length)));
+    }
+#endif
+#endif
+};
+// Resize X axis step parameters
+struct npu_set_resize_x_step_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t one_step_int:4; //  (1*scale_d) / scale_n
+    uint32_t blk_step_int:11; //  ((ofm_block_size-1)*scale_d) / scale_n
+    uint32_t reserved1:1;
+    uint32_t one_step_mod:11; //  (1*scale_d) % scale_n
+    uint32_t reserved2:5;
+    uint32_t blk_step_mod:11; //  ((ofm_block_size-1)*scale_d) % scale_n
+    uint32_t reserved3:5;
+#ifdef __cplusplus
+public:
+    npu_set_resize_x_step_t(uint32_t _one_step_int, uint32_t _blk_step_int, uint32_t _one_step_mod, uint32_t _blk_step_mod) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_RESIZE_X)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        one_step_int(_one_step_int & ((1U << 4)-1)),
+        blk_step_int(_blk_step_int & ((1U << 11)-1)),
+        reserved1(0),
+        one_step_mod(_one_step_mod & ((1U << 11)-1)),
+        reserved2(0),
+        blk_step_mod(_blk_step_mod & ((1U << 11)-1)),
+        reserved3(0)
+    {}
+    CONSTEXPR npu_set_resize_x_step_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_RESIZE_X)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        one_step_int(0),
+        blk_step_int(0),
+        reserved1(0),
+        one_step_mod(0),
+        reserved2(0),
+        blk_step_mod(0),
+        reserved3(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_RESIZE_X) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_RESIZE_X); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(one_step_int) << 16;
+        word |= uint64_t(blk_step_int) << 20;
+        word |= uint64_t(one_step_mod) << 32;
+        word |= uint64_t(blk_step_mod) << 48;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_resize_x_step_t& set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_resize_x_step_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_one_step_int() const
+    {
+        return static_cast<uint32_t>(one_step_int);
+    }
+    CONSTEXPR npu_set_resize_x_step_t& set_one_step_int(uint32_t value)
+    {
+        assert((value >> 4) == 0);
+        one_step_int = static_cast<uint8_t>(value & ((1U << 4)-1));
+        return *this;
+    }
+    CONSTEXPR uint32_t get_blk_step_int() const
+    {
+        return static_cast<uint32_t>(blk_step_int);
+    }
+    CONSTEXPR npu_set_resize_x_step_t& set_blk_step_int(uint32_t value)
+    {
+        assert((value >> 11) == 0);
+        blk_step_int = static_cast<uint16_t>(value & ((1U << 11)-1));
+        return *this;
+    }
+    CONSTEXPR uint32_t get_one_step_mod() const
+    {
+        return static_cast<uint32_t>(one_step_mod);
+    }
+    CONSTEXPR npu_set_resize_x_step_t& set_one_step_mod(uint32_t value)
+    {
+        assert((value >> 11) == 0);
+        one_step_mod = static_cast<uint16_t>(value & ((1U << 11)-1));
+        return *this;
+    }
+    CONSTEXPR uint32_t get_blk_step_mod() const
+    {
+        return static_cast<uint32_t>(blk_step_mod);
+    }
+    CONSTEXPR npu_set_resize_x_step_t& set_blk_step_mod(uint32_t value)
+    {
+        assert((value >> 11) == 0);
+        blk_step_mod = static_cast<uint16_t>(value & ((1U << 11)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("one_step_int", std::to_string(one_step_int)));
+        fields.push_back(std::make_pair<std::string, std::string>("blk_step_int", std::to_string(blk_step_int)));
+        fields.push_back(std::make_pair<std::string, std::string>("one_step_mod", std::to_string(one_step_mod)));
+        fields.push_back(std::make_pair<std::string, std::string>("blk_step_mod", std::to_string(blk_step_mod)));
+    }
+#endif
+#endif
+};
+// Resize Y axis step parameters
+struct npu_set_resize_y_step_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t one_step_int:4; //  (1*scale_d) / scale_n
+    uint32_t blk_step_int:11; //  ((ofm_block_size-1)*scale_d) / scale_n
+    uint32_t reserved1:1;
+    uint32_t one_step_mod:11; //  (1*scale_d) % scale_n
+    uint32_t reserved2:5;
+    uint32_t blk_step_mod:11; //  ((ofm_block_size-1)*scale_d) % scale_n
+    uint32_t reserved3:5;
+#ifdef __cplusplus
+public:
+    npu_set_resize_y_step_t(uint32_t _one_step_int, uint32_t _blk_step_int, uint32_t _one_step_mod, uint32_t _blk_step_mod) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_RESIZE_Y)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        one_step_int(_one_step_int & ((1U << 4)-1)),
+        blk_step_int(_blk_step_int & ((1U << 11)-1)),
+        reserved1(0),
+        one_step_mod(_one_step_mod & ((1U << 11)-1)),
+        reserved2(0),
+        blk_step_mod(_blk_step_mod & ((1U << 11)-1)),
+        reserved3(0)
+    {}
+    CONSTEXPR npu_set_resize_y_step_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_RESIZE_Y)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        one_step_int(0),
+        blk_step_int(0),
+        reserved1(0),
+        one_step_mod(0),
+        reserved2(0),
+        blk_step_mod(0),
+        reserved3(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_RESIZE_Y) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_RESIZE_Y); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(one_step_int) << 16;
+        word |= uint64_t(blk_step_int) << 20;
+        word |= uint64_t(one_step_mod) << 32;
+        word |= uint64_t(blk_step_mod) << 48;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+    }
+    CONSTEXPR npu_set_resize_y_step_t& set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_set_resize_y_step_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_one_step_int() const
+    {
+        return static_cast<uint32_t>(one_step_int);
+    }
+    CONSTEXPR npu_set_resize_y_step_t& set_one_step_int(uint32_t value)
+    {
+        assert((value >> 4) == 0);
+        one_step_int = static_cast<uint8_t>(value & ((1U << 4)-1));
+        return *this;
+    }
+    CONSTEXPR uint32_t get_blk_step_int() const
+    {
+        return static_cast<uint32_t>(blk_step_int);
+    }
+    CONSTEXPR npu_set_resize_y_step_t& set_blk_step_int(uint32_t value)
+    {
+        assert((value >> 11) == 0);
+        blk_step_int = static_cast<uint16_t>(value & ((1U << 11)-1));
+        return *this;
+    }
+    CONSTEXPR uint32_t get_one_step_mod() const
+    {
+        return static_cast<uint32_t>(one_step_mod);
+    }
+    CONSTEXPR npu_set_resize_y_step_t& set_one_step_mod(uint32_t value)
+    {
+        assert((value >> 11) == 0);
+        one_step_mod = static_cast<uint16_t>(value & ((1U << 11)-1));
+        return *this;
+    }
+    CONSTEXPR uint32_t get_blk_step_mod() const
+    {
+        return static_cast<uint32_t>(blk_step_mod);
+    }
+    CONSTEXPR npu_set_resize_y_step_t& set_blk_step_mod(uint32_t value)
+    {
+        assert((value >> 11) == 0);
+        blk_step_mod = static_cast<uint16_t>(value & ((1U << 11)-1));
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("one_step_int", std::to_string(one_step_int)));
+        fields.push_back(std::make_pair<std::string, std::string>("blk_step_int", std::to_string(blk_step_int)));
+        fields.push_back(std::make_pair<std::string, std::string>("one_step_mod", std::to_string(one_step_mod)));
+        fields.push_back(std::make_pair<std::string, std::string>("blk_step_mod", std::to_string(blk_step_mod)));
+    }
+#endif
+#endif
+};
+// Branch to new location
+struct npu_op_branch_t
+{
+#ifdef __cplusplus
+private:
+#endif
+    uint32_t opcode:10; //  opcode
+    uint32_t reserved0:4;
+    uint32_t control:2; //  control
+    uint32_t branch_cond:1; //  Branch condition
+    uint32_t reserved1:15;
+    uint32_t branch_target:32; //  Branch target in bytes
+#ifdef __cplusplus
+public:
+    npu_op_branch_t(NPU_NAMESPACE::branch_cond _branch_cond, uint32_t _branch_target) :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_OP_BRANCH)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        branch_cond(static_cast<uint8_t>(_branch_cond) & ((1U << 1)-1)),
+        reserved1(0),
+        branch_target(_branch_target)
+    {}
+    CONSTEXPR npu_op_branch_t() :
+        opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_OP_BRANCH)),
+        reserved0(0),
+        control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+        branch_cond(0),
+        reserved1(0),
+        branch_target(0)
+    {}
+    CONSTEXPR bool valid() const
+    {
+        return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_OP_BRANCH) && control >= 1 && control <= 2;
+    }
+    CONSTEXPR void init()
+    {
+        opcode = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_OP_BRANCH); control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+    }
+    operator uint64_t()
+    {
+        uint64_t word = 0;
+        word |= uint64_t(opcode) << 0;
+        word |= uint64_t(control) << 14;
+        word |= uint64_t(branch_cond) << 16;
+        word |= uint64_t(branch_target) << 32;
+        return word;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+    }
+    CONSTEXPR npu_op_branch_t& set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+    {
+        opcode = static_cast<uint16_t>(value) & ((1U << 10)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+    {
+        return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+    }
+    CONSTEXPR npu_op_branch_t& set_control(NPU_NAMESPACE::cmd_ctrl value)
+    {
+        control = static_cast<uint8_t>(value) & ((1U << 2)-1);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::branch_cond get_branch_cond() const
+    {
+        return static_cast<NPU_NAMESPACE::branch_cond>(branch_cond);
+    }
+    CONSTEXPR npu_op_branch_t& set_branch_cond(NPU_NAMESPACE::branch_cond value)
+    {
+        branch_cond = static_cast<uint8_t>(value) & ((1U << 1)-1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_branch_target() const
+    {
+        return static_cast<uint32_t>(branch_target);
+    }
+    CONSTEXPR npu_op_branch_t& set_branch_target(uint32_t value)
+    {
+        branch_target = value;
+        return *this;
+    }
+#ifdef NPU_DISASSEMBLE
+    void disassemble(std::vector<std::pair<std::string, std::string>>& fields) const
+    {
+        fields.push_back(std::make_pair<std::string, std::string>("branch_cond", (branch_cond < (sizeof(branch_cond_str)/sizeof(branch_cond_str[0])) ? branch_cond_str[branch_cond] : "****")));
+        fields.push_back(std::make_pair<std::string, std::string>("branch_target", std::to_string(branch_target)));
+    }
+#endif
+#endif
+};
+#ifdef __cplusplus
+};
+#endif
+#define NPU_OP_STRUCTS \
+    NPU_OP_(stop) \
+    NPU_OP_(irq) \
+    NPU_OP_(conv) \
+    NPU_OP_(depthwise) \
+    NPU_OP_(pool) \
+    NPU_OP_(elementwise) \
+    NPU_OP_(resize) \
+    NPU_OP_(dma_start) \
+    NPU_OP_(dma_wait) \
+    NPU_OP_(kernel_wait) \
+    NPU_OP_(pmu_mask) \
+    NPU_OP_(branch)
+
+#define NPU_SET_STRUCTS \
+    NPU_SET_(ifm_pad_top) \
+    NPU_SET_(ifm_pad_left) \
+    NPU_SET_(ifm_pad_right) \
+    NPU_SET_(ifm_pad_bottom) \
+    NPU_SET_(ifm_depth_m1) \
+    NPU_SET_(ifm_precision) \
+    NPU_SET_(ifm_upscale) \
+    NPU_SET_(ifm_zero_point) \
+    NPU_SET_(ifm_width0_m1) \
+    NPU_SET_(ifm_height0_m1) \
+    NPU_SET_(ifm_height1_m1) \
+    NPU_SET_(ifm_region) \
+    NPU_SET_(ifm_broadcast) \
+    NPU_SET_(ofm_width_m1) \
+    NPU_SET_(ofm_height_m1) \
+    NPU_SET_(ofm_depth_m1) \
+    NPU_SET_(ofm_precision) \
+    NPU_SET_(ofm_blk_width_m1) \
+    NPU_SET_(ofm_blk_height_m1) \
+    NPU_SET_(ofm_blk_depth_m1) \
+    NPU_SET_(ofm_zero_point) \
+    NPU_SET_(ofm_width0_m1) \
+    NPU_SET_(ofm_height0_m1) \
+    NPU_SET_(ofm_height1_m1) \
+    NPU_SET_(ofm_region) \
+    NPU_SET_(kernel_width_m1) \
+    NPU_SET_(kernel_height_m1) \
+    NPU_SET_(kernel_stride) \
+    NPU_SET_(acc_format) \
+    NPU_SET_(activation) \
+    NPU_SET_(activation_min) \
+    NPU_SET_(activation_max) \
+    NPU_SET_(weight_region) \
+    NPU_SET_(scale_region) \
+    NPU_SET_(weight_format) \
+    NPU_SET_(blockdep) \
+    NPU_SET_(resize_x_scale_n_m1) \
+    NPU_SET_(resize_y_scale_n_m1) \
+    NPU_SET_(resize_x_offset) \
+    NPU_SET_(resize_y_offset) \
+    NPU_SET_(dma0_src_region) \
+    NPU_SET_(dma0_dst_region) \
+    NPU_SET_(dma0_size0) \
+    NPU_SET_(dma0_size1) \
+    NPU_SET_(dma0_idx_region) \
+    NPU_SET_(ifm2_broadcast) \
+    NPU_SET_(ifm2_precision) \
+    NPU_SET_(ifm2_zero_point) \
+    NPU_SET_(ifm2_width0_m1) \
+    NPU_SET_(ifm2_height0_m1) \
+    NPU_SET_(ifm2_height1_m1) \
+    NPU_SET_(ifm2_region) \
+    NPU_SET_(ifm_base0) \
+    NPU_SET_(ifm_base1) \
+    NPU_SET_(ifm_base2) \
+    NPU_SET_(ifm_base3) \
+    NPU_SET_(ifm_stride_x) \
+    NPU_SET_(ifm_stride_y) \
+    NPU_SET_(ifm_stride_c) \
+    NPU_SET_(ofm_base0) \
+    NPU_SET_(ofm_base1) \
+    NPU_SET_(ofm_base2) \
+    NPU_SET_(ofm_base3) \
+    NPU_SET_(ofm_stride_x) \
+    NPU_SET_(ofm_stride_y) \
+    NPU_SET_(ofm_stride_c) \
+    NPU_SET_(weight_base) \
+    NPU_SET_(weight_length) \
+    NPU_SET_(scale_base) \
+    NPU_SET_(scale_length) \
+    NPU_SET_(ofm_scale) \
+    NPU_SET_(ifm_scale) \
+    NPU_SET_(ifm2_scale) \
+    NPU_SET_(op_scalar) \
+    NPU_SET_(dma0_src) \
+    NPU_SET_(dma0_dst) \
+    NPU_SET_(dma0_len) \
+    NPU_SET_(dma0_src_stride0) \
+    NPU_SET_(dma0_src_stride1) \
+    NPU_SET_(dma0_dst_stride0) \
+    NPU_SET_(dma0_dst_stride1) \
+    NPU_SET_(dma0_idx) \
+    NPU_SET_(dma0_idx_max) \
+    NPU_SET_(dma0_idx_skip1) \
+    NPU_SET_(ifm2_base0) \
+    NPU_SET_(ifm2_base1) \
+    NPU_SET_(ifm2_base2) \
+    NPU_SET_(ifm2_base3) \
+    NPU_SET_(ifm2_stride_x) \
+    NPU_SET_(ifm2_stride_y) \
+    NPU_SET_(ifm2_stride_c) \
+    NPU_SET_(weight1_base) \
+    NPU_SET_(weight1_length) \
+    NPU_SET_(weight2_base) \
+    NPU_SET_(weight2_length) \
+    NPU_SET_(weight3_base) \
+    NPU_SET_(weight3_length) \
+    NPU_SET_(resize_x_step) \
+    NPU_SET_(resize_y_step)
+
+#define EXPAND_ACC_FORMAT(FUNC, SEP) \
+    FUNC(acc_format, I32) SEP \
+    FUNC(acc_format, I48)
+
+#define EXPAND_ACC_INPUT(FUNC, SEP) \
+    FUNC(acc_input, RESET) SEP \
+    FUNC(acc_input, KEEP) SEP \
+    FUNC(acc_input, IFM2)
+
+#define EXPAND_ACC_OUTPUT(FUNC, SEP) \
+    FUNC(acc_output, ENABLE) SEP \
+    FUNC(acc_output, DISABLE)
+
+#define EXPAND_ACTIVATION_CLIP_RANGE(FUNC, SEP) \
+    FUNC(activation_clip_range, B16) SEP \
+    FUNC(activation_clip_range, NONE)
+
+#define EXPAND_ACTIVATION_FORMAT(FUNC, SEP) \
+    FUNC(activation_format, NHWC) SEP \
+    FUNC(activation_format, NHCWB16)
+
+#define EXPAND_ACTIVATION_FUNCTION(FUNC, SEP) \
+    FUNC(activation_function, LUT_NONE) SEP \
+    FUNC(activation_function, LUT_U8_U8) SEP \
+    FUNC(activation_function, LUT_S8_S8) SEP \
+    FUNC(activation_function, LUT_S8_S16) SEP \
+    FUNC(activation_function, LUT_S8_S32) SEP \
+    FUNC(activation_function, LUT_S16_S16) SEP \
+    FUNC(activation_function, LUT_S16_S32) SEP \
+    FUNC(activation_function, LUT_TANH) SEP \
+    FUNC(activation_function, LUT_SIGMOID)
+
+#define EXPAND_ACTIVATION_PRECISION(FUNC, SEP) \
+    FUNC(activation_precision, B8) SEP \
+    FUNC(activation_precision, B16) SEP \
+    FUNC(activation_precision, B32) SEP \
+    FUNC(activation_precision, B64)
+
+#define EXPAND_ACTIVATION_REVERSE(FUNC, SEP) \
+    FUNC(activation_reverse, NONE) SEP \
+    FUNC(activation_reverse, H) SEP \
+    FUNC(activation_reverse, W) SEP \
+    FUNC(activation_reverse, C)
+
+#define EXPAND_ACTIVATION_STORAGE(FUNC, SEP) \
+    FUNC(activation_storage, TILE2X2) SEP \
+    FUNC(activation_storage, TILE3X1) SEP \
+    FUNC(activation_storage, CHAINED) SEP \
+    FUNC(activation_storage, NONE)
+
+#define EXPAND_ACTIVATION_TRANSPOSE(FUNC, SEP) \
+    FUNC(activation_transpose, HWC) SEP \
+    FUNC(activation_transpose, WHC) SEP \
+    FUNC(activation_transpose, HCW) SEP \
+    FUNC(activation_transpose, WCH) SEP \
+    FUNC(activation_transpose, CHW) SEP \
+    FUNC(activation_transpose, CWH)
+
+#define EXPAND_ACTIVATION_TYPE(FUNC, SEP) \
+    FUNC(activation_type, UNSIGNED) SEP \
+    FUNC(activation_type, SIGNED)
+
+#define EXPAND_AXI_MEM_DOMAIN(FUNC, SEP) \
+    FUNC(axi_mem_domain, NON_SHARABLE) SEP \
+    FUNC(axi_mem_domain, INNER_SHARABLE) SEP \
+    FUNC(axi_mem_domain, OUTER_SHARABLE) SEP \
+    FUNC(axi_mem_domain, SYSTEM)
+
+#define EXPAND_AXI_MEM_ENCODING(FUNC, SEP) \
+    FUNC(axi_mem_encoding, DEVICE_NON_BUFFERABLE) SEP \
+    FUNC(axi_mem_encoding, DEVICE_BUFFERABLE) SEP \
+    FUNC(axi_mem_encoding, NORMAL_NON_CACHEABLE_NON_BUFFERABLE) SEP \
+    FUNC(axi_mem_encoding, NORMAL_NON_CACHEABLE_BUFFERABLE) SEP \
+    FUNC(axi_mem_encoding, WRITE_THROUGH_NO_ALLOCATE) SEP \
+    FUNC(axi_mem_encoding, WRITE_THROUGH_READ_ALLOCATE) SEP \
+    FUNC(axi_mem_encoding, WRITE_THROUGH_WRITE_ALLOCATE) SEP \
+    FUNC(axi_mem_encoding, WRITE_THROUGH_READ_AND_WRITE_ALLOCATE) SEP \
+    FUNC(axi_mem_encoding, WRITE_BACK_NO_ALLOCATE) SEP \
+    FUNC(axi_mem_encoding, WRITE_BACK_READ_ALLOCATE) SEP \
+    FUNC(axi_mem_encoding, WRITE_BACK_WRITE_ALLOCATE) SEP \
+    FUNC(axi_mem_encoding, WRITE_BACK_READ_AND_WRITE_ALLOCATE)
+
+#define EXPAND_AXI_PORT(FUNC, SEP) \
+    FUNC(axi_port, SRAM) SEP \
+    FUNC(axi_port, EXT)
+
+#define EXPAND_BRANCH_COND(FUNC, SEP) \
+    FUNC(branch_cond, ALWAYS) SEP \
+    FUNC(branch_cond, RF_TRUE)
+
+#define EXPAND_BROADCAST_MODE(FUNC, SEP) \
+    FUNC(broadcast_mode, NONE) SEP \
+    FUNC(broadcast_mode, H) SEP \
+    FUNC(broadcast_mode, W) SEP \
+    FUNC(broadcast_mode, HW) SEP \
+    FUNC(broadcast_mode, C) SEP \
+    FUNC(broadcast_mode, CH) SEP \
+    FUNC(broadcast_mode, CW) SEP \
+    FUNC(broadcast_mode, CWH) SEP \
+    FUNC(broadcast_mode, SCALAR)
+
+#define EXPAND_CMD0_OPCODE(FUNC, SEP) \
+    FUNC(cmd0_opcode, NPU_OP_STOP) SEP \
+    FUNC(cmd0_opcode, NPU_OP_IRQ) SEP \
+    FUNC(cmd0_opcode, NPU_OP_CONV) SEP \
+    FUNC(cmd0_opcode, NPU_OP_DEPTHWISE) SEP \
+    FUNC(cmd0_opcode, NPU_OP_POOL) SEP \
+    FUNC(cmd0_opcode, NPU_OP_ELEMENTWISE) SEP \
+    FUNC(cmd0_opcode, NPU_OP_RESIZE) SEP \
+    FUNC(cmd0_opcode, NPU_OP_DMA_START) SEP \
+    FUNC(cmd0_opcode, NPU_OP_DMA_WAIT) SEP \
+    FUNC(cmd0_opcode, NPU_OP_KERNEL_WAIT) SEP \
+    FUNC(cmd0_opcode, NPU_OP_PMU_MASK) SEP \
+    FUNC(cmd0_opcode, NPU_SET_IFM_PAD_TOP) SEP \
+    FUNC(cmd0_opcode, NPU_SET_IFM_PAD_LEFT) SEP \
+    FUNC(cmd0_opcode, NPU_SET_IFM_PAD_RIGHT) SEP \
+    FUNC(cmd0_opcode, NPU_SET_IFM_PAD_BOTTOM) SEP \
+    FUNC(cmd0_opcode, NPU_SET_IFM_DEPTH_M1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_IFM_PRECISION) SEP \
+    FUNC(cmd0_opcode, NPU_SET_IFM_UPSCALE) SEP \
+    FUNC(cmd0_opcode, NPU_SET_IFM_BROADCAST) SEP \
+    FUNC(cmd0_opcode, NPU_SET_IFM_ZERO_POINT) SEP \
+    FUNC(cmd0_opcode, NPU_SET_IFM_WIDTH0_M1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_IFM_HEIGHT0_M1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_IFM_HEIGHT1_M1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_IFM_REGION) SEP \
+    FUNC(cmd0_opcode, NPU_SET_OFM_WIDTH_M1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_OFM_HEIGHT_M1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_OFM_DEPTH_M1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_OFM_PRECISION) SEP \
+    FUNC(cmd0_opcode, NPU_SET_OFM_BLK_WIDTH_M1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_OFM_BLK_HEIGHT_M1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_OFM_BLK_DEPTH_M1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_OFM_ZERO_POINT) SEP \
+    FUNC(cmd0_opcode, NPU_SET_OFM_WIDTH0_M1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_OFM_HEIGHT0_M1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_OFM_HEIGHT1_M1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_OFM_REGION) SEP \
+    FUNC(cmd0_opcode, NPU_SET_KERNEL_WIDTH_M1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_KERNEL_HEIGHT_M1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_KERNEL_STRIDE) SEP \
+    FUNC(cmd0_opcode, NPU_SET_ACC_FORMAT) SEP \
+    FUNC(cmd0_opcode, NPU_SET_ACTIVATION) SEP \
+    FUNC(cmd0_opcode, NPU_SET_ACTIVATION_MIN) SEP \
+    FUNC(cmd0_opcode, NPU_SET_ACTIVATION_MAX) SEP \
+    FUNC(cmd0_opcode, NPU_SET_WEIGHT_REGION) SEP \
+    FUNC(cmd0_opcode, NPU_SET_SCALE_REGION) SEP \
+    FUNC(cmd0_opcode, NPU_SET_RESIZE_X_SCALE_N_M1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_RESIZE_Y_SCALE_N_M1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_RESIZE_X_OFFSET) SEP \
+    FUNC(cmd0_opcode, NPU_SET_RESIZE_Y_OFFSET) SEP \
+    FUNC(cmd0_opcode, NPU_SET_WEIGHT_FORMAT) SEP \
+    FUNC(cmd0_opcode, NPU_SET_BLOCKDEP) SEP \
+    FUNC(cmd0_opcode, NPU_SET_DMA0_SRC_REGION) SEP \
+    FUNC(cmd0_opcode, NPU_SET_DMA0_DST_REGION) SEP \
+    FUNC(cmd0_opcode, NPU_SET_DMA0_SIZE0) SEP \
+    FUNC(cmd0_opcode, NPU_SET_DMA0_SIZE1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_DMA0_IDX_REGION) SEP \
+    FUNC(cmd0_opcode, NPU_SET_IFM2_BROADCAST) SEP \
+    FUNC(cmd0_opcode, NPU_SET_IFM2_PRECISION) SEP \
+    FUNC(cmd0_opcode, NPU_SET_IFM2_ZERO_POINT) SEP \
+    FUNC(cmd0_opcode, NPU_SET_IFM2_WIDTH0_M1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_IFM2_HEIGHT0_M1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_IFM2_HEIGHT1_M1) SEP \
+    FUNC(cmd0_opcode, NPU_SET_IFM2_REGION)
+
+#define EXPAND_CMD1_OPCODE(FUNC, SEP) \
+    FUNC(cmd1_opcode, NPU_SET_IFM_BASE0) SEP \
+    FUNC(cmd1_opcode, NPU_SET_IFM_BASE1) SEP \
+    FUNC(cmd1_opcode, NPU_SET_IFM_BASE2) SEP \
+    FUNC(cmd1_opcode, NPU_SET_IFM_BASE3) SEP \
+    FUNC(cmd1_opcode, NPU_SET_IFM_STRIDE_X) SEP \
+    FUNC(cmd1_opcode, NPU_SET_IFM_STRIDE_Y) SEP \
+    FUNC(cmd1_opcode, NPU_SET_IFM_STRIDE_C) SEP \
+    FUNC(cmd1_opcode, NPU_SET_OFM_BASE0) SEP \
+    FUNC(cmd1_opcode, NPU_SET_OFM_BASE1) SEP \
+    FUNC(cmd1_opcode, NPU_SET_OFM_BASE2) SEP \
+    FUNC(cmd1_opcode, NPU_SET_OFM_BASE3) SEP \
+    FUNC(cmd1_opcode, NPU_SET_OFM_STRIDE_X) SEP \
+    FUNC(cmd1_opcode, NPU_SET_OFM_STRIDE_Y) SEP \
+    FUNC(cmd1_opcode, NPU_SET_OFM_STRIDE_C) SEP \
+    FUNC(cmd1_opcode, NPU_SET_WEIGHT_BASE) SEP \
+    FUNC(cmd1_opcode, NPU_SET_WEIGHT_LENGTH) SEP \
+    FUNC(cmd1_opcode, NPU_SET_SCALE_BASE) SEP \
+    FUNC(cmd1_opcode, NPU_SET_SCALE_LENGTH) SEP \
+    FUNC(cmd1_opcode, NPU_SET_OFM_SCALE) SEP \
+    FUNC(cmd1_opcode, NPU_SET_IFM_SCALE) SEP \
+    FUNC(cmd1_opcode, NPU_SET_IFM2_SCALE) SEP \
+    FUNC(cmd1_opcode, NPU_SET_OP_SCALAR) SEP \
+    FUNC(cmd1_opcode, NPU_SET_DMA0_SRC) SEP \
+    FUNC(cmd1_opcode, NPU_SET_DMA0_DST) SEP \
+    FUNC(cmd1_opcode, NPU_SET_DMA0_LEN) SEP \
+    FUNC(cmd1_opcode, NPU_SET_DMA0_SRC_STRIDE0) SEP \
+    FUNC(cmd1_opcode, NPU_SET_DMA0_SRC_STRIDE1) SEP \
+    FUNC(cmd1_opcode, NPU_SET_DMA0_DST_STRIDE0) SEP \
+    FUNC(cmd1_opcode, NPU_SET_DMA0_DST_STRIDE1) SEP \
+    FUNC(cmd1_opcode, NPU_SET_DMA0_IDX) SEP \
+    FUNC(cmd1_opcode, NPU_SET_DMA0_IDX_MAX) SEP \
+    FUNC(cmd1_opcode, NPU_SET_DMA0_IDX_SKIP1) SEP \
+    FUNC(cmd1_opcode, NPU_SET_IFM2_BASE0) SEP \
+    FUNC(cmd1_opcode, NPU_SET_IFM2_BASE1) SEP \
+    FUNC(cmd1_opcode, NPU_SET_IFM2_BASE2) SEP \
+    FUNC(cmd1_opcode, NPU_SET_IFM2_BASE3) SEP \
+    FUNC(cmd1_opcode, NPU_SET_IFM2_STRIDE_X) SEP \
+    FUNC(cmd1_opcode, NPU_SET_IFM2_STRIDE_Y) SEP \
+    FUNC(cmd1_opcode, NPU_SET_IFM2_STRIDE_C) SEP \
+    FUNC(cmd1_opcode, NPU_SET_WEIGHT1_BASE) SEP \
+    FUNC(cmd1_opcode, NPU_SET_WEIGHT1_LENGTH) SEP \
+    FUNC(cmd1_opcode, NPU_SET_WEIGHT2_BASE) SEP \
+    FUNC(cmd1_opcode, NPU_SET_WEIGHT2_LENGTH) SEP \
+    FUNC(cmd1_opcode, NPU_SET_WEIGHT3_BASE) SEP \
+    FUNC(cmd1_opcode, NPU_SET_WEIGHT3_LENGTH) SEP \
+    FUNC(cmd1_opcode, NPU_SET_RESIZE_X) SEP \
+    FUNC(cmd1_opcode, NPU_SET_RESIZE_Y) SEP \
+    FUNC(cmd1_opcode, NPU_OP_BRANCH)
+
+#define EXPAND_CMD_CTRL(FUNC, SEP) \
+    FUNC(cmd_ctrl, CMD0_CTRL) SEP \
+    FUNC(cmd_ctrl, CMD1_CTRL)
+
+#define EXPAND_CUSTOM_DMA(FUNC, SEP) \
+    FUNC(custom_dma, NOT_IMPLEMENTED) SEP \
+    FUNC(custom_dma, IMPLEMENTED)
+
+#define EXPAND_DMA_FAULT_CHANNEL(FUNC, SEP) \
+    FUNC(dma_fault_channel, CMD_READ) SEP \
+    FUNC(dma_fault_channel, IFM_READ) SEP \
+    FUNC(dma_fault_channel, WEIGHT_READ) SEP \
+    FUNC(dma_fault_channel, SBS_READ) SEP \
+    FUNC(dma_fault_channel, MEM2MEM_READ) SEP \
+    FUNC(dma_fault_channel, OFM_WRITE) SEP \
+    FUNC(dma_fault_channel, MEM2MEM_WRITE)
+
+#define EXPAND_DMA_FAULT_SRC(FUNC, SEP) \
+    FUNC(dma_fault_src, SRAM) SEP \
+    FUNC(dma_fault_src, EXT)
+
+#define EXPAND_DMA_IDX_MODE(FUNC, SEP) \
+    FUNC(dma_idx_mode, DISABLED) SEP \
+    FUNC(dma_idx_mode, ENABLED)
+
+#define EXPAND_DMA_REGION_MODE(FUNC, SEP) \
+    FUNC(dma_region_mode, EXTERNAL) SEP \
+    FUNC(dma_region_mode, INTERNAL)
+
+#define EXPAND_DMA_STRIDE_MODE(FUNC, SEP) \
+    FUNC(dma_stride_mode, D1) SEP \
+    FUNC(dma_stride_mode, D2) SEP \
+    FUNC(dma_stride_mode, D3)
+
+#define EXPAND_ELEMENTWISE_MODE(FUNC, SEP) \
+    FUNC(elementwise_mode, MUL) SEP \
+    FUNC(elementwise_mode, ADD) SEP \
+    FUNC(elementwise_mode, SUB) SEP \
+    FUNC(elementwise_mode, MIN) SEP \
+    FUNC(elementwise_mode, MAX) SEP \
+    FUNC(elementwise_mode, LRELU) SEP \
+    FUNC(elementwise_mode, ABS) SEP \
+    FUNC(elementwise_mode, CLZ) SEP \
+    FUNC(elementwise_mode, SHR) SEP \
+    FUNC(elementwise_mode, SHL) SEP \
+    FUNC(elementwise_mode, LSR) SEP \
+    FUNC(elementwise_mode, DIV) SEP \
+    FUNC(elementwise_mode, CMP_EQ) SEP \
+    FUNC(elementwise_mode, CMP_NE) SEP \
+    FUNC(elementwise_mode, CMP_GE) SEP \
+    FUNC(elementwise_mode, CMP_GT) SEP \
+    FUNC(elementwise_mode, AND) SEP \
+    FUNC(elementwise_mode, OR) SEP \
+    FUNC(elementwise_mode, XOR) SEP \
+    FUNC(elementwise_mode, NOT) SEP \
+    FUNC(elementwise_mode, AND_NOT)
+
+#define EXPAND_IFM_UPSCALE_MODE(FUNC, SEP) \
+    FUNC(ifm_upscale_mode, NONE) SEP \
+    FUNC(ifm_upscale_mode, NEAREST) SEP \
+    FUNC(ifm_upscale_mode, ZEROS)
+
+#define EXPAND_KERNEL_DECOMPOSITION(FUNC, SEP) \
+    FUNC(kernel_decomposition, D8X8) SEP \
+    FUNC(kernel_decomposition, D4X4)
+
+#define EXPAND_KERNEL_DILATION(FUNC, SEP) \
+    FUNC(kernel_dilation, NONE) SEP \
+    FUNC(kernel_dilation, X2)
+
+#define EXPAND_MAX_BEATS(FUNC, SEP) \
+    FUNC(max_beats, B64) SEP \
+    FUNC(max_beats, B128) SEP \
+    FUNC(max_beats, B256)
+
+#define EXPAND_MICROBLOCK(FUNC, SEP) \
+    FUNC(microblock, U1X1) SEP \
+    FUNC(microblock, U1X2) SEP \
+    FUNC(microblock, U1X4) SEP \
+    FUNC(microblock, U2X2) SEP \
+    FUNC(microblock, U2X4) SEP \
+    FUNC(microblock, U4X4)
+
+#define EXPAND_OFM_SCALE_MODE(FUNC, SEP) \
+    FUNC(ofm_scale_mode, PER_CHANNEL) SEP \
+    FUNC(ofm_scale_mode, GLOBAL)
+
+#define EXPAND_PMU_AXI_CHANNEL(FUNC, SEP) \
+    FUNC(pmu_axi_channel, RD_CMD) SEP \
+    FUNC(pmu_axi_channel, RD_IFM) SEP \
+    FUNC(pmu_axi_channel, RD_WEIGHTS) SEP \
+    FUNC(pmu_axi_channel, RD_SCALE_BIAS) SEP \
+    FUNC(pmu_axi_channel, RD_MEM2MEM) SEP \
+    FUNC(pmu_axi_channel, RD_IFM_STREAM) SEP \
+    FUNC(pmu_axi_channel, RD_MEM2MEM_IDX) SEP \
+    FUNC(pmu_axi_channel, WR_OFM) SEP \
+    FUNC(pmu_axi_channel, WR_MEM2MEM)
+
+#define EXPAND_PMU_EVENT(FUNC, SEP) \
+    FUNC(pmu_event, NO_EVENT) SEP \
+    FUNC(pmu_event, CYCLE) SEP \
+    FUNC(pmu_event, NPU_IDLE) SEP \
+    FUNC(pmu_event, CC_STALLED_ON_BLOCKDEP) SEP \
+    FUNC(pmu_event, CC_STALLED_ON_SHRAM_RECONFIG) SEP \
+    FUNC(pmu_event, NPU_ACTIVE) SEP \
+    FUNC(pmu_event, MAC_ACTIVE) SEP \
+    FUNC(pmu_event, MAC_DPU_ACTIVE) SEP \
+    FUNC(pmu_event, MAC_STALLED_BY_W_OR_ACC) SEP \
+    FUNC(pmu_event, MAC_STALLED_BY_W) SEP \
+    FUNC(pmu_event, MAC_STALLED_BY_ACC) SEP \
+    FUNC(pmu_event, MAC_STALLED_BY_IB) SEP \
+    FUNC(pmu_event, AO_ACTIVE) SEP \
+    FUNC(pmu_event, AO_STALLED_BY_BS_OR_OB) SEP \
+    FUNC(pmu_event, AO_STALLED_BY_BS) SEP \
+    FUNC(pmu_event, AO_STALLED_BY_OB) SEP \
+    FUNC(pmu_event, AO_STALLED_BY_AB_OR_CB) SEP \
+    FUNC(pmu_event, AO_STALLED_BY_AB) SEP \
+    FUNC(pmu_event, AO_STALLED_BY_CB) SEP \
+    FUNC(pmu_event, WD_ACTIVE) SEP \
+    FUNC(pmu_event, WD_STALLED) SEP \
+    FUNC(pmu_event, WD_STALLED_BY_WD_BUF) SEP \
+    FUNC(pmu_event, WD_STALLED_BY_WS_FC) SEP \
+    FUNC(pmu_event, WD_STALLED_BY_WS_TC) SEP \
+    FUNC(pmu_event, WD_TRANS_WBLK) SEP \
+    FUNC(pmu_event, WD_TRANS_WS_FC) SEP \
+    FUNC(pmu_event, WD_TRANS_WS_TC) SEP \
+    FUNC(pmu_event, WD_STALLED_BY_WS_SC0) SEP \
+    FUNC(pmu_event, WD_STALLED_BY_WS_SC1) SEP \
+    FUNC(pmu_event, WD_STALLED_BY_WS_SC2) SEP \
+    FUNC(pmu_event, WD_STALLED_BY_WS_SC3) SEP \
+    FUNC(pmu_event, WD_PARSE_ACTIVE_SC0) SEP \
+    FUNC(pmu_event, WD_PARSE_ACTIVE_SC1) SEP \
+    FUNC(pmu_event, WD_PARSE_ACTIVE_SC2) SEP \
+    FUNC(pmu_event, WD_PARSE_ACTIVE_SC3) SEP \
+    FUNC(pmu_event, WD_PARSE_STALL_SC0) SEP \
+    FUNC(pmu_event, WD_PARSE_STALL_SC1) SEP \
+    FUNC(pmu_event, WD_PARSE_STALL_SC2) SEP \
+    FUNC(pmu_event, WD_PARSE_STALL_SC3) SEP \
+    FUNC(pmu_event, WD_PARSE_STALL_IN_SC0) SEP \
+    FUNC(pmu_event, WD_PARSE_STALL_IN_SC1) SEP \
+    FUNC(pmu_event, WD_PARSE_STALL_IN_SC2) SEP \
+    FUNC(pmu_event, WD_PARSE_STALL_IN_SC3) SEP \
+    FUNC(pmu_event, WD_PARSE_STALL_OUT_SC0) SEP \
+    FUNC(pmu_event, WD_PARSE_STALL_OUT_SC1) SEP \
+    FUNC(pmu_event, WD_PARSE_STALL_OUT_SC2) SEP \
+    FUNC(pmu_event, WD_PARSE_STALL_OUT_SC3) SEP \
+    FUNC(pmu_event, WD_TRANS_WS_SC0) SEP \
+    FUNC(pmu_event, WD_TRANS_WS_SC1) SEP \
+    FUNC(pmu_event, WD_TRANS_WS_SC2) SEP \
+    FUNC(pmu_event, WD_TRANS_WS_SC3) SEP \
+    FUNC(pmu_event, WD_TRANS_WB0) SEP \
+    FUNC(pmu_event, WD_TRANS_WB1) SEP \
+    FUNC(pmu_event, WD_TRANS_WB2) SEP \
+    FUNC(pmu_event, WD_TRANS_WB3) SEP \
+    FUNC(pmu_event, SRAM_RD_TRANS_ACCEPTED) SEP \
+    FUNC(pmu_event, SRAM_RD_TRANS_COMPLETED) SEP \
+    FUNC(pmu_event, SRAM_RD_DATA_BEAT_RECEIVED) SEP \
+    FUNC(pmu_event, SRAM_RD_TRAN_REQ_STALLED) SEP \
+    FUNC(pmu_event, SRAM_WR_TRANS_ACCEPTED) SEP \
+    FUNC(pmu_event, SRAM_WR_TRANS_COMPLETED_M) SEP \
+    FUNC(pmu_event, SRAM_WR_TRANS_COMPLETED_S) SEP \
+    FUNC(pmu_event, SRAM_WR_DATA_BEAT_WRITTEN) SEP \
+    FUNC(pmu_event, SRAM_WR_TRAN_REQ_STALLED) SEP \
+    FUNC(pmu_event, SRAM_WR_DATA_BEAT_STALLED) SEP \
+    FUNC(pmu_event, SRAM_ENABLED_CYCLES) SEP \
+    FUNC(pmu_event, SRAM_RD_STALL_LIMIT) SEP \
+    FUNC(pmu_event, SRAM_WR_STALL_LIMIT) SEP \
+    FUNC(pmu_event, AXI_LATENCY_ANY) SEP \
+    FUNC(pmu_event, AXI_LATENCY_32) SEP \
+    FUNC(pmu_event, AXI_LATENCY_64) SEP \
+    FUNC(pmu_event, AXI_LATENCY_128) SEP \
+    FUNC(pmu_event, AXI_LATENCY_256) SEP \
+    FUNC(pmu_event, AXI_LATENCY_512) SEP \
+    FUNC(pmu_event, AXI_LATENCY_1024) SEP \
+    FUNC(pmu_event, ECC_DMA) SEP \
+    FUNC(pmu_event, ECC_MAC_IB) SEP \
+    FUNC(pmu_event, ECC_MAC_AB) SEP \
+    FUNC(pmu_event, ECC_AO_CB) SEP \
+    FUNC(pmu_event, ECC_AO_OB) SEP \
+    FUNC(pmu_event, ECC_AO_LUT) SEP \
+    FUNC(pmu_event, EXT_RD_TRANS_ACCEPTED) SEP \
+    FUNC(pmu_event, EXT_RD_TRANS_COMPLETED) SEP \
+    FUNC(pmu_event, EXT_RD_DATA_BEAT_RECEIVED) SEP \
+    FUNC(pmu_event, EXT_RD_TRAN_REQ_STALLED) SEP \
+    FUNC(pmu_event, EXT_WR_TRANS_ACCEPTED) SEP \
+    FUNC(pmu_event, EXT_WR_TRANS_COMPLETED_M) SEP \
+    FUNC(pmu_event, EXT_WR_TRANS_COMPLETED_S) SEP \
+    FUNC(pmu_event, EXT_WR_DATA_BEAT_WRITTEN) SEP \
+    FUNC(pmu_event, EXT_WR_TRAN_REQ_STALLED) SEP \
+    FUNC(pmu_event, EXT_WR_DATA_BEAT_STALLED) SEP \
+    FUNC(pmu_event, EXT_ENABLED_CYCLES) SEP \
+    FUNC(pmu_event, EXT_RD_STALL_LIMIT) SEP \
+    FUNC(pmu_event, EXT_WR_STALL_LIMIT) SEP \
+    FUNC(pmu_event, SRAM0_RD_TRANS_ACCEPTED) SEP \
+    FUNC(pmu_event, SRAM0_RD_TRANS_COMPLETED) SEP \
+    FUNC(pmu_event, SRAM0_RD_DATA_BEAT_RECEIVED) SEP \
+    FUNC(pmu_event, SRAM0_RD_TRAN_REQ_STALLED) SEP \
+    FUNC(pmu_event, SRAM0_WR_TRANS_ACCEPTED) SEP \
+    FUNC(pmu_event, SRAM0_WR_TRANS_COMPLETED_M) SEP \
+    FUNC(pmu_event, SRAM0_WR_TRANS_COMPLETED_S) SEP \
+    FUNC(pmu_event, SRAM0_WR_DATA_BEAT_WRITTEN) SEP \
+    FUNC(pmu_event, SRAM0_WR_TRAN_REQ_STALLED) SEP \
+    FUNC(pmu_event, SRAM0_WR_DATA_BEAT_STALLED) SEP \
+    FUNC(pmu_event, SRAM0_ENABLED_CYCLES) SEP \
+    FUNC(pmu_event, SRAM0_RD_STALL_LIMIT) SEP \
+    FUNC(pmu_event, SRAM0_WR_STALL_LIMIT) SEP \
+    FUNC(pmu_event, SRAM1_RD_TRANS_ACCEPTED) SEP \
+    FUNC(pmu_event, SRAM1_RD_TRANS_COMPLETED) SEP \
+    FUNC(pmu_event, SRAM1_RD_DATA_BEAT_RECEIVED) SEP \
+    FUNC(pmu_event, SRAM1_RD_TRAN_REQ_STALLED) SEP \
+    FUNC(pmu_event, SRAM1_WR_TRANS_ACCEPTED) SEP \
+    FUNC(pmu_event, SRAM1_WR_TRANS_COMPLETED_M) SEP \
+    FUNC(pmu_event, SRAM1_WR_TRANS_COMPLETED_S) SEP \
+    FUNC(pmu_event, SRAM1_WR_DATA_BEAT_WRITTEN) SEP \
+    FUNC(pmu_event, SRAM1_WR_TRAN_REQ_STALLED) SEP \
+    FUNC(pmu_event, SRAM1_WR_DATA_BEAT_STALLED) SEP \
+    FUNC(pmu_event, SRAM1_ENABLED_CYCLES) SEP \
+    FUNC(pmu_event, SRAM1_RD_STALL_LIMIT) SEP \
+    FUNC(pmu_event, SRAM1_WR_STALL_LIMIT) SEP \
+    FUNC(pmu_event, SRAM2_RD_TRANS_ACCEPTED) SEP \
+    FUNC(pmu_event, SRAM2_RD_TRANS_COMPLETED) SEP \
+    FUNC(pmu_event, SRAM2_RD_DATA_BEAT_RECEIVED) SEP \
+    FUNC(pmu_event, SRAM2_RD_TRAN_REQ_STALLED) SEP \
+    FUNC(pmu_event, SRAM2_WR_TRANS_ACCEPTED) SEP \
+    FUNC(pmu_event, SRAM2_WR_TRANS_COMPLETED_M) SEP \
+    FUNC(pmu_event, SRAM2_WR_TRANS_COMPLETED_S) SEP \
+    FUNC(pmu_event, SRAM2_WR_DATA_BEAT_WRITTEN) SEP \
+    FUNC(pmu_event, SRAM2_WR_TRAN_REQ_STALLED) SEP \
+    FUNC(pmu_event, SRAM2_WR_DATA_BEAT_STALLED) SEP \
+    FUNC(pmu_event, SRAM2_ENABLED_CYCLES) SEP \
+    FUNC(pmu_event, SRAM2_RD_STALL_LIMIT) SEP \
+    FUNC(pmu_event, SRAM2_WR_STALL_LIMIT) SEP \
+    FUNC(pmu_event, SRAM3_RD_TRANS_ACCEPTED) SEP \
+    FUNC(pmu_event, SRAM3_RD_TRANS_COMPLETED) SEP \
+    FUNC(pmu_event, SRAM3_RD_DATA_BEAT_RECEIVED) SEP \
+    FUNC(pmu_event, SRAM3_RD_TRAN_REQ_STALLED) SEP \
+    FUNC(pmu_event, SRAM3_WR_TRANS_ACCEPTED) SEP \
+    FUNC(pmu_event, SRAM3_WR_TRANS_COMPLETED_M) SEP \
+    FUNC(pmu_event, SRAM3_WR_TRANS_COMPLETED_S) SEP \
+    FUNC(pmu_event, SRAM3_WR_DATA_BEAT_WRITTEN) SEP \
+    FUNC(pmu_event, SRAM3_WR_TRAN_REQ_STALLED) SEP \
+    FUNC(pmu_event, SRAM3_WR_DATA_BEAT_STALLED) SEP \
+    FUNC(pmu_event, SRAM3_ENABLED_CYCLES) SEP \
+    FUNC(pmu_event, SRAM3_RD_STALL_LIMIT) SEP \
+    FUNC(pmu_event, SRAM3_WR_STALL_LIMIT) SEP \
+    FUNC(pmu_event, EXT0_RD_TRANS_ACCEPTED) SEP \
+    FUNC(pmu_event, EXT0_RD_TRANS_COMPLETED) SEP \
+    FUNC(pmu_event, EXT0_RD_DATA_BEAT_RECEIVED) SEP \
+    FUNC(pmu_event, EXT0_RD_TRAN_REQ_STALLED) SEP \
+    FUNC(pmu_event, EXT0_WR_TRANS_ACCEPTED) SEP \
+    FUNC(pmu_event, EXT0_WR_TRANS_COMPLETED_M) SEP \
+    FUNC(pmu_event, EXT0_WR_TRANS_COMPLETED_S) SEP \
+    FUNC(pmu_event, EXT0_WR_DATA_BEAT_WRITTEN) SEP \
+    FUNC(pmu_event, EXT0_WR_TRAN_REQ_STALLED) SEP \
+    FUNC(pmu_event, EXT0_WR_DATA_BEAT_STALLED) SEP \
+    FUNC(pmu_event, EXT0_ENABLED_CYCLES) SEP \
+    FUNC(pmu_event, EXT0_RD_STALL_LIMIT) SEP \
+    FUNC(pmu_event, EXT0_WR_STALL_LIMIT) SEP \
+    FUNC(pmu_event, EXT1_RD_TRANS_ACCEPTED) SEP \
+    FUNC(pmu_event, EXT1_RD_TRANS_COMPLETED) SEP \
+    FUNC(pmu_event, EXT1_RD_DATA_BEAT_RECEIVED) SEP \
+    FUNC(pmu_event, EXT1_RD_TRAN_REQ_STALLED) SEP \
+    FUNC(pmu_event, EXT1_WR_TRANS_ACCEPTED) SEP \
+    FUNC(pmu_event, EXT1_WR_TRANS_COMPLETED_M) SEP \
+    FUNC(pmu_event, EXT1_WR_TRANS_COMPLETED_S) SEP \
+    FUNC(pmu_event, EXT1_WR_DATA_BEAT_WRITTEN) SEP \
+    FUNC(pmu_event, EXT1_WR_TRAN_REQ_STALLED) SEP \
+    FUNC(pmu_event, EXT1_WR_DATA_BEAT_STALLED) SEP \
+    FUNC(pmu_event, EXT1_ENABLED_CYCLES) SEP \
+    FUNC(pmu_event, EXT1_RD_STALL_LIMIT) SEP \
+    FUNC(pmu_event, EXT1_WR_STALL_LIMIT)
+
+#define EXPAND_PMU_PORT_DISABLE(FUNC, SEP) \
+    FUNC(pmu_port_disable, ENABLE) SEP \
+    FUNC(pmu_port_disable, DISABLE)
+
+#define EXPAND_POOLING_MODE(FUNC, SEP) \
+    FUNC(pooling_mode, MAX) SEP \
+    FUNC(pooling_mode, AVERAGE) SEP \
+    FUNC(pooling_mode, REDUCE_SUM) SEP \
+    FUNC(pooling_mode, SUM) SEP \
+    FUNC(pooling_mode, NONE) SEP \
+    FUNC(pooling_mode, MIN) SEP \
+    FUNC(pooling_mode, ARGMAX_X) SEP \
+    FUNC(pooling_mode, ARGMAX_Y)
+
+#define EXPAND_PRIVILEGE_LEVEL(FUNC, SEP) \
+    FUNC(privilege_level, USER) SEP \
+    FUNC(privilege_level, PRIVILEGED)
+
+#define EXPAND_RAM_ID(FUNC, SEP) \
+    FUNC(ram_id, LUT) SEP \
+    FUNC(ram_id, IB) SEP \
+    FUNC(ram_id, AB) SEP \
+    FUNC(ram_id, CB) SEP \
+    FUNC(ram_id, OB)
+
+#define EXPAND_RESIZE_MODE(FUNC, SEP) \
+    FUNC(resize_mode, BILINEAR) SEP \
+    FUNC(resize_mode, REPLICATE) SEP \
+    FUNC(resize_mode, NEAREST)
+
+#define EXPAND_ROUND_MODE_IFM(FUNC, SEP) \
+    FUNC(round_mode_ifm, DOUBLE_SYMMETRIC) SEP \
+    FUNC(round_mode_ifm, NATURAL)
+
+#define EXPAND_ROUND_MODE_OFM(FUNC, SEP) \
+    FUNC(round_mode_ofm, DOUBLE_SYMMETRIC) SEP \
+    FUNC(round_mode_ofm, NATURAL) SEP \
+    FUNC(round_mode_ofm, DOUBLE_ASYMMETRIC) SEP \
+    FUNC(round_mode_ofm, SYMMETRIC) SEP \
+    FUNC(round_mode_ofm, TRUNCATE_TO_ZERO) SEP \
+    FUNC(round_mode_ofm, TRUNCATE_TO_LOWER)
+
+#define EXPAND_SECURITY_LEVEL(FUNC, SEP) \
+    FUNC(security_level, SECURE) SEP \
+    FUNC(security_level, NON_SECURE)
+
+#define EXPAND_STATE(FUNC, SEP) \
+    FUNC(state, STOPPED) SEP \
+    FUNC(state, RUNNING)
+
+#define EXPAND_WD_ACTIVE_CORE(FUNC, SEP) \
+    FUNC(wd_active_core, NONE) SEP \
+    FUNC(wd_active_core, STANDARD) SEP \
+    FUNC(wd_active_core, FAST) SEP \
+    FUNC(wd_active_core, TENSOR)
+
+#define EXPAND_WEIGHT_FORMAT(FUNC, SEP) \
+    FUNC(weight_format, SWD) SEP \
+    FUNC(weight_format, FWD)
+
+#define EXPAND_WEIGHT_ORDER(FUNC, SEP) \
+    FUNC(weight_order, DEPTH_FIRST) SEP \
+    FUNC(weight_order, PART_KERNEL_FIRST)
+
+#define EXPAND_WEIGHT_SPARSITY(FUNC, SEP) \
+    FUNC(weight_sparsity, NONE) SEP \
+    FUNC(weight_sparsity, SPARSE_2_4)
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/src/ethosu_config_u85.h b/src/ethosu_config_u85.h
new file mode 100644
index 0000000..20f3d94
--- /dev/null
+++ b/src/ethosu_config_u85.h
@@ -0,0 +1,105 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2020, 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ETHOSU_CONFIG_U85_H
+#define ETHOSU_CONFIG_U85_H
+
+/* Set default values if not manually overriden */
+
+/* Default MEM_ATTR entries */
+#ifndef NPU_MEM_ATTR_0
+#define NPU_MEM_ATTR_0 0 /* SRAM AXI port, non_sharable, device_non_bufferable */
+#endif
+
+#ifndef NPU_MEM_ATTR_1
+#define NPU_MEM_ATTR_1 0 /* SRAM AXI port, non_sharable, device_non_bufferable */
+#endif
+
+#ifndef NPU_MEM_ATTR_2
+#define NPU_MEM_ATTR_2 (1 << 2) /* EXT AXI port, non_sharable, device_non_bufferable */
+#endif
+
+#ifndef NPU_MEM_ATTR_3
+#define NPU_MEM_ATTR_3 (1 << 2) /* EXT AXI port, non_sharable, device_non_bufferable */
+#endif
+
+/* Default MEM_ATTR index to use for command stream */
+#ifndef NPU_QCONFIG
+#define NPU_QCONFIG 2
+#endif
+
+/* Default MEM_ATTR index to use for regions 0-7 */
+#ifndef NPU_REGIONCFG_0
+#define NPU_REGIONCFG_0 3
+#endif
+
+#ifndef NPU_REGIONCFG_1
+#define NPU_REGIONCFG_1 0
+#endif
+
+#ifndef NPU_REGIONCFG_2
+#define NPU_REGIONCFG_2 1
+#endif
+
+#ifndef NPU_REGIONCFG_3
+#define NPU_REGIONCFG_3 1
+#endif
+
+#ifndef NPU_REGIONCFG_4
+#define NPU_REGIONCFG_4 1
+#endif
+
+#ifndef NPU_REGIONCFG_5
+#define NPU_REGIONCFG_5 1
+#endif
+
+#ifndef NPU_REGIONCFG_6
+#define NPU_REGIONCFG_6 1
+#endif
+
+#ifndef NPU_REGIONCFG_7
+#define NPU_REGIONCFG_7 1
+#endif
+
+/* AXI SRAM/EXT limits
+ * Set defaults to max. Hardware can be configured to cap at lower values.
+ */
+#ifndef AXI_LIMIT_SRAM_MAX_OUTSTANDING_READ_M1
+#define AXI_LIMIT_SRAM_MAX_OUTSTANDING_READ_M1 64
+#endif
+
+#ifndef AXI_LIMIT_SRAM_MAX_OUTSTANDING_WRITE_M1
+#define AXI_LIMIT_SRAM_MAX_OUTSTANDING_WRITE_M1 32
+#endif
+
+#ifndef AXI_LIMIT_SRAM_MAX_BEATS
+#define AXI_LIMIT_SRAM_MAX_BEATS 2 /* 0=64B, 1=128B, 2=256B */
+#endif
+
+#ifndef AXI_LIMIT_EXT_MAX_OUTSTANDING_READ_M1
+#define AXI_LIMIT_EXT_MAX_OUTSTANDING_READ_M1 64
+#endif
+
+#ifndef AXI_LIMIT_EXT_MAX_OUTSTANDING_WRITE_M1
+#define AXI_LIMIT_EXT_MAX_OUTSTANDING_WRITE_M1 32
+#endif
+
+#ifndef AXI_LIMIT_EXT_MAX_BEATS
+#define AXI_LIMIT_EXT_MAX_BEATS 2 /* 0=64B, 1=128B, 2=256B */
+#endif
+
+#endif /* #ifndef ETHOSU_CONFIG_U85_H */
diff --git a/src/ethosu_device.h b/src/ethosu_device.h
index 02942b1..42d6d23 100644
--- a/src/ethosu_device.h
+++ b/src/ethosu_device.h
@@ -1,6 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright 2019-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
- *
+ * SPDX-FileCopyrightText: Copyright 2019-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the License); you may
@@ -32,15 +31,6 @@
 #endif
 
 /******************************************************************************
- * Defines
- ******************************************************************************/
-
-// NOTE: Deprecated
-#ifndef ETHOSU_PMU_NCOUNTERS
-#define ETHOSU_PMU_NCOUNTERS 4
-#endif
-
-/******************************************************************************
  * Types
  ******************************************************************************/
 struct NPU_REG; // Forward declare, to be implemented by each device
diff --git a/src/ethosu_device_u85.c b/src/ethosu_device_u85.c
new file mode 100644
index 0000000..31f6a77
--- /dev/null
+++ b/src/ethosu_device_u85.c
@@ -0,0 +1,385 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2019-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/******************************************************************************
+ * Includes
+ ******************************************************************************/
+#include "ethosu85_interface.h"
+
+#include "ethosu_config_u85.h"
+#include "ethosu_device.h"
+#include "ethosu_log.h"
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/******************************************************************************
+ * Defines
+ ******************************************************************************/
+
+#define ETHOSU_PRODUCT_U85 2
+
+#define BASEP_OFFSET 4
+
+#define ADDRESS_BITS 40
+
+#define ADDRESS_MASK ((1ull << ADDRESS_BITS) - 1)
+
+#define NPU_CMD_PWR_CLK_MASK (0xC)
+
+/******************************************************************************
+ * Functions
+ ******************************************************************************/
+
+uint64_t __attribute__((weak)) ethosu_address_remap(uint64_t address, int index)
+{
+    (void)(index);
+    return address;
+}
+
+struct ethosu_device *ethosu_dev_init(void *const base_address, uint32_t secure_enable, uint32_t privilege_enable)
+{
+    struct ethosu_device *dev = malloc(sizeof(struct ethosu_device));
+    if (!dev)
+    {
+        LOG_ERR("Failed to allocate memory for Ethos-U device");
+        return NULL;
+    }
+
+    dev->reg        = (volatile struct NPU_REG *)base_address;
+    dev->secure     = secure_enable;
+    dev->privileged = privilege_enable;
+
+    if (dev->reg->CONFIG.product != ETHOSU_PRODUCT_U85)
+    {
+        LOG_ERR("Failed to initialize device. Driver has not been compiled for this product");
+        goto err;
+    }
+
+    // Make sure the NPU is in a known state
+    if (ethosu_dev_soft_reset(dev) != ETHOSU_SUCCESS)
+    {
+        goto err;
+    }
+
+    return dev;
+
+err:
+    free(dev);
+    return NULL;
+}
+
+void ethosu_dev_deinit(struct ethosu_device *dev)
+{
+    free(dev);
+}
+
+enum ethosu_error_codes ethosu_dev_axi_init(struct ethosu_device *dev)
+{
+    struct regioncfg_r rcfg = {0};
+    struct axi_sram_r axi_s = {0};
+    struct axi_ext_r axi_e  = {0};
+
+    // Configure MEM_ATTR array. These are user configurable,
+    // and each region will be set to use one of the entries
+    // as its config.
+    dev->reg->MEM_ATTR[0].word = NPU_MEM_ATTR_0;
+    dev->reg->MEM_ATTR[1].word = NPU_MEM_ATTR_1;
+    dev->reg->MEM_ATTR[2].word = NPU_MEM_ATTR_2;
+    dev->reg->MEM_ATTR[3].word = NPU_MEM_ATTR_3;
+
+    // Set MEM_ATTR entry for command stream
+    dev->reg->QCONFIG.word = NPU_QCONFIG;
+
+    // Set MEM_ATTR entries to use for regions 0-7
+    rcfg.region0             = NPU_REGIONCFG_0;
+    rcfg.region1             = NPU_REGIONCFG_1;
+    rcfg.region2             = NPU_REGIONCFG_2;
+    rcfg.region3             = NPU_REGIONCFG_3;
+    rcfg.region4             = NPU_REGIONCFG_4;
+    rcfg.region5             = NPU_REGIONCFG_5;
+    rcfg.region6             = NPU_REGIONCFG_6;
+    rcfg.region7             = NPU_REGIONCFG_7;
+    dev->reg->REGIONCFG.word = rcfg.word;
+
+    // Set AXI limits on SRAM AXI interfaces
+    axi_s.max_outstanding_read_m1  = AXI_LIMIT_SRAM_MAX_OUTSTANDING_READ_M1 - 1;
+    axi_s.max_outstanding_write_m1 = AXI_LIMIT_SRAM_MAX_OUTSTANDING_WRITE_M1 - 1;
+    axi_s.max_beats                = AXI_LIMIT_SRAM_MAX_BEATS;
+    dev->reg->AXI_SRAM.word        = axi_s.word;
+
+    // Set AXI limits on EXT AXI interface(s)
+    axi_e.max_outstanding_read_m1  = AXI_LIMIT_EXT_MAX_OUTSTANDING_READ_M1 - 1;
+    axi_e.max_outstanding_write_m1 = AXI_LIMIT_EXT_MAX_OUTSTANDING_WRITE_M1 - 1;
+    axi_e.max_beats                = AXI_LIMIT_EXT_MAX_BEATS;
+    dev->reg->AXI_EXT.word         = axi_e.word;
+
+    return ETHOSU_SUCCESS;
+}
+
+void ethosu_dev_run_command_stream(struct ethosu_device *dev,
+                                   const uint8_t *cmd_stream_ptr,
+                                   uint32_t cms_length,
+                                   const uint64_t *base_addr,
+                                   int num_base_addr)
+{
+    assert(num_base_addr <= NPU_REG_BASEP_ARRLEN);
+
+    struct cmd_r cmd;
+    uint64_t qbase = ethosu_address_remap((uintptr_t)cmd_stream_ptr, -1);
+    assert(qbase <= ADDRESS_MASK);
+    LOG_DEBUG("QBASE=0x%016llx, QSIZE=%" PRIu32 ", cmd_stream_ptr=%p", qbase, cms_length, cmd_stream_ptr);
+
+    dev->reg->QBASE.word[0] = qbase & 0xffffffff;
+    dev->reg->QBASE.word[1] = qbase >> 32;
+    dev->reg->QSIZE.word    = cms_length;
+
+    for (int i = 0; i < num_base_addr; i++)
+    {
+        uint64_t addr = ethosu_address_remap(base_addr[i], i);
+        assert(addr <= ADDRESS_MASK);
+        LOG_DEBUG("BASEP%d=0x%016llx", i, addr);
+        dev->reg->BASEP[i].word[0] = addr & 0xffffffff;
+        dev->reg->BASEP[i].word[1] = addr >> 32;
+    }
+
+    cmd.word                        = dev->reg->CMD.word & NPU_CMD_PWR_CLK_MASK;
+    cmd.transition_to_running_state = 1;
+
+    dev->reg->CMD.word = cmd.word;
+    LOG_DEBUG("CMD=0x%08" PRIx32, cmd.word);
+}
+
+void ethosu_dev_print_err_status(struct ethosu_device *dev)
+{
+    LOG_ERR("NPU status=0x%08" PRIx32 ", qread=%" PRIu32 ", cmd_end_reached=%u",
+            dev->reg->STATUS.word,
+            dev->reg->QREAD.word,
+            dev->reg->STATUS.cmd_end_reached);
+}
+
+bool ethosu_dev_handle_interrupt(struct ethosu_device *dev)
+{
+    struct cmd_r cmd;
+
+    // Clear interrupt
+    cmd.word           = dev->reg->CMD.word & NPU_CMD_PWR_CLK_MASK;
+    cmd.clear_irq      = 1;
+    dev->reg->CMD.word = cmd.word;
+
+    // If a fault has occured, the NPU needs to be reset
+    if (dev->reg->STATUS.bus_status || dev->reg->STATUS.cmd_parse_error || dev->reg->STATUS.branch_fault ||
+        dev->reg->STATUS.ecc_fault || !dev->reg->STATUS.cmd_end_reached)
+    {
+        return false;
+    }
+
+    return true;
+}
+
+bool ethosu_dev_verify_access_state(struct ethosu_device *dev)
+{
+    if (dev->reg->PROT.active_CSL != (dev->secure ? SECURITY_LEVEL_SECURE : SECURITY_LEVEL_NON_SECURE) ||
+        dev->reg->PROT.active_CPL != (dev->privileged ? PRIVILEGE_LEVEL_PRIVILEGED : PRIVILEGE_LEVEL_USER))
+    {
+        return false;
+    }
+    return true;
+}
+
+enum ethosu_error_codes ethosu_dev_soft_reset(struct ethosu_device *dev)
+{
+    struct reset_r reset;
+
+    reset.word        = 0;
+    reset.pending_CPL = dev->privileged ? PRIVILEGE_LEVEL_PRIVILEGED : PRIVILEGE_LEVEL_USER;
+    reset.pending_CSL = dev->secure ? SECURITY_LEVEL_SECURE : SECURITY_LEVEL_NON_SECURE;
+
+    // Reset and set security level
+    LOG_INFO("Soft reset NPU");
+    dev->reg->RESET.word = reset.word;
+
+    // Wait until reset status indicates that reset has been completed
+    for (int i = 0; i < 100000 && dev->reg->STATUS.reset_status != 0; i++)
+    {
+    }
+
+    if (dev->reg->STATUS.reset_status != 0)
+    {
+        LOG_ERR("Soft reset timed out");
+        return ETHOSU_GENERIC_FAILURE;
+    }
+
+    // Verify that NPU has switched security state and privilege level
+    if (ethosu_dev_verify_access_state(dev) != true)
+    {
+        LOG_ERR("Failed to switch security state and privilege level");
+        return ETHOSU_GENERIC_FAILURE;
+    }
+
+    // Reinitialize AXI settings
+    ethosu_dev_axi_init(dev);
+
+    return ETHOSU_SUCCESS;
+}
+
+void ethosu_dev_get_hw_info(struct ethosu_device *dev, struct ethosu_hw_info *hwinfo)
+{
+    struct config_r cfg;
+    struct id_r id;
+
+    cfg.word = dev->reg->CONFIG.word;
+    id.word  = dev->reg->ID.word;
+
+    hwinfo->cfg.cmd_stream_version = cfg.cmd_stream_version;
+    hwinfo->cfg.custom_dma         = cfg.custom_dma;
+    hwinfo->cfg.macs_per_cc        = cfg.macs_per_cc;
+
+    hwinfo->version.arch_major_rev = id.arch_major_rev;
+    hwinfo->version.arch_minor_rev = id.arch_minor_rev;
+    hwinfo->version.arch_patch_rev = id.arch_patch_rev;
+    hwinfo->version.product_major  = id.product_major;
+    hwinfo->version.version_major  = id.version_major;
+    hwinfo->version.version_minor  = id.version_minor;
+    hwinfo->version.version_status = id.version_status;
+}
+
+enum ethosu_error_codes ethosu_dev_set_clock_and_power(struct ethosu_device *dev,
+                                                       enum ethosu_clock_q_request clock_q,
+                                                       enum ethosu_power_q_request power_q)
+{
+    struct cmd_r cmd = {0};
+    cmd.word         = dev->reg->CMD.word & NPU_CMD_PWR_CLK_MASK;
+
+    if (power_q != ETHOSU_POWER_Q_UNCHANGED)
+    {
+        cmd.power_q_enable = power_q == ETHOSU_POWER_Q_ENABLE ? 1 : 0;
+    }
+    if (clock_q != ETHOSU_CLOCK_Q_UNCHANGED)
+    {
+        cmd.clock_q_enable = clock_q == ETHOSU_CLOCK_Q_ENABLE ? 1 : 0;
+    }
+
+    dev->reg->CMD.word = cmd.word;
+    LOG_DEBUG("CMD=0x%08" PRIx32, cmd.word);
+
+    return ETHOSU_SUCCESS;
+}
+
+bool ethosu_dev_verify_optimizer_config(struct ethosu_device *dev, uint32_t cfg_in, uint32_t id_in)
+{
+    struct config_r *opt_cfg = (struct config_r *)&cfg_in;
+    struct config_r hw_cfg;
+    struct id_r *opt_id = (struct id_r *)&id_in;
+    struct id_r hw_id;
+    bool ret = true;
+
+    hw_cfg.word = dev->reg->CONFIG.word;
+    hw_id.word  = dev->reg->ID.word;
+
+    LOG_INFO("Optimizer config. product=%u, cmd_stream_version=%u, macs_per_cc=%u, num_axi_ext=%u, num_axi_sram=%u, "
+             "custom_dma=%u",
+             opt_cfg->product,
+             opt_cfg->cmd_stream_version,
+             opt_cfg->macs_per_cc,
+             1U << opt_cfg->num_axi_ext,
+             1U << opt_cfg->num_axi_sram,
+             opt_cfg->custom_dma);
+
+    LOG_INFO("Optimizer config. arch version=%u.%u.%u",
+             opt_id->arch_major_rev,
+             opt_id->arch_minor_rev,
+             opt_id->arch_patch_rev);
+
+    LOG_INFO("Ethos-U config. product=%u, cmd_stream_version=%u, macs_per_cc=%u, num_axi_ext=%u, num_axi_sram=%u, "
+             "custom_dma=%u",
+             hw_cfg.product,
+             hw_cfg.cmd_stream_version,
+             hw_cfg.macs_per_cc,
+             1U << hw_cfg.num_axi_ext,
+             1U << hw_cfg.num_axi_sram,
+             hw_cfg.custom_dma);
+
+    LOG_INFO("Ethos-U. arch version=%u.%u.%u", hw_id.arch_major_rev, hw_id.arch_minor_rev, hw_id.arch_patch_rev);
+
+    if (opt_cfg->word != hw_cfg.word)
+    {
+        if (hw_cfg.product != opt_cfg->product)
+        {
+            LOG_ERR("NPU config mismatch. npu.product=%u, optimizer.product=%u", hw_cfg.product, opt_cfg->product);
+            ret = false;
+        }
+
+        if (hw_cfg.macs_per_cc != opt_cfg->macs_per_cc)
+        {
+            LOG_ERR("NPU config mismatch. npu.macs_per_cc=%u, optimizer.macs_per_cc=%u",
+                    hw_cfg.macs_per_cc,
+                    opt_cfg->macs_per_cc);
+            ret = false;
+        }
+
+        if (hw_cfg.num_axi_ext != opt_cfg->num_axi_ext)
+        {
+            LOG_ERR("NPU config mismatch. npu.num_axi_ext=%u, optimizer.num_axi_ext=%u",
+                    1U << hw_cfg.num_axi_ext,
+                    1U << opt_cfg->num_axi_ext);
+            ret = false;
+        }
+
+        if (hw_cfg.num_axi_sram != opt_cfg->num_axi_sram)
+        {
+            LOG_ERR("NPU config mismatch. npu.num_axi_sram=%u, optimizer.num_axi_sram=%u",
+                    1U << hw_cfg.num_axi_sram,
+                    1U << opt_cfg->num_axi_sram);
+            ret = false;
+        }
+
+        if (hw_cfg.cmd_stream_version != opt_cfg->cmd_stream_version)
+        {
+            LOG_ERR("NPU config mismatch. npu.cmd_stream_version=%u, optimizer.cmd_stream_version=%u",
+                    hw_cfg.cmd_stream_version,
+                    opt_cfg->cmd_stream_version);
+            ret = false;
+        }
+
+        if (!hw_cfg.custom_dma && opt_cfg->custom_dma)
+        {
+            LOG_ERR("NPU config mismatch. npu.custom_dma=%u, optimizer.custom_dma=%u",
+                    hw_cfg.custom_dma,
+                    opt_cfg->custom_dma);
+            ret = false;
+        }
+    }
+
+    if ((hw_id.arch_major_rev != opt_id->arch_major_rev) || (hw_id.arch_minor_rev < opt_id->arch_minor_rev))
+    {
+        LOG_ERR("NPU arch mismatch. npu.arch=%u.%u.%u, optimizer.arch=%u.%u.%u",
+                hw_id.arch_major_rev,
+                hw_id.arch_minor_rev,
+                hw_id.arch_patch_rev,
+                opt_id->arch_major_rev,
+                opt_id->arch_minor_rev,
+                opt_id->arch_patch_rev);
+        ret = false;
+    }
+
+    return ret;
+}
diff --git a/src/ethosu_driver.c b/src/ethosu_driver.c
index 5128455..919fc11 100644
--- a/src/ethosu_driver.c
+++ b/src/ethosu_driver.c
@@ -23,10 +23,14 @@
 #include "ethosu_device.h"
 #include "ethosu_log.h"
 
-#ifdef ETHOSU55
+#if defined(ETHOSU55)
 #include "ethosu_config_u55.h"
-#else
+#elif defined(ETHOSU65)
 #include "ethosu_config_u65.h"
+#elif defined(ETHOSU85)
+#include "ethosu_config_u85.h"
+#else
+#error Missing device type macro
 #endif
 
 #include <assert.h>