MLECO-2395: Allow users to select Ethos-U memory mode

Change-Id: Icf09410f12072e8d7850dd1e540c3243af24ed09
diff --git a/source/application/hal/hal.c b/source/application/hal/hal.c
index 12bb7ef..53a304b 100644
--- a/source/application/hal/hal.c
+++ b/source/application/hal/hal.c
@@ -24,18 +24,39 @@
 
 #if defined(ARM_NPU)
 
+#include "ethosu_mem_config.h"          /* Arm Ethos-U memory config */
 #include "ethosu_driver.h"              /* Arm Ethos-U driver header */
 #include "timing_adapter.h"             /* Arm Ethos-U timing adapter driver header */
 #include "timing_adapter_settings.h"    /* Arm Ethos-U timing adapter settings */
 
 struct ethosu_driver ethosu_drv; /* Default Ethos-U device driver */
 
+#if defined(ETHOS_U_CACHE_BUF_SZ) && (ETHOS_U_CACHE_BUF_SZ > 0)
+    static uint8_t  cache_arena[ETHOS_U_CACHE_BUF_SZ] CACHE_BUF_ATTRIBUTE;
+#else /* defined (ETHOS_U_CACHE_BUF_SZ) && (ETHOS_U_CACHE_BUF_SZ > 0) */
+    static uint8_t* cache_arena = NULL;
+#endif /* defined (ETHOS_U_CACHE_BUF_SZ) && (ETHOS_U_CACHE_BUF_SZ > 0) */
+
 /**
  * @brief   Initialises the Arm Ethos-U NPU
  * @return  0 if successful, error code otherwise
  **/
 static int arm_npu_init(void);
 
+static uint8_t * get_cache_arena()
+{
+    return cache_arena;
+}
+
+static size_t get_cache_arena_size()
+{
+#if defined(ETHOS_U_CACHE_BUF_SZ) && (ETHOS_U_CACHE_BUF_SZ > 0)
+    return sizeof(cache_arena);
+#else /* defined (ETHOS_U_CACHE_BUF_SZ) && (ETHOS_U_CACHE_BUF_SZ > 0) */
+    return 0;
+#endif /* defined (ETHOS_U_CACHE_BUF_SZ) && (ETHOS_U_CACHE_BUF_SZ > 0) */
+}
+
 #endif /* ARM_NPU */
 
 int hal_init(hal_platform* platform, data_acq_module* data_acq,
@@ -232,12 +253,12 @@
     const void * ethosu_base_address = (void *)(SEC_ETHOS_U_NPU_BASE);
 
     if (0 != (err = ethosu_init(
-                        &ethosu_drv,            /* Ethos-U driver device pointer */
-                        ethosu_base_address,    /* Ethos-U NPU's base address. */
-                        NULL,                   /* Pointer to fast mem area - NULL for U55. */
-                        0,                      /* Fast mem region size. */
-                        1,                      /* Security enable. */
-                        1))) {                  /* Privilege enable. */
+                        &ethosu_drv,             /* Ethos-U driver device pointer */
+                        ethosu_base_address,     /* Ethos-U NPU's base address. */
+                        get_cache_arena(),       /* Pointer to fast mem area - NULL for U55. */
+                        get_cache_arena_size(), /* Fast mem region size. */
+                        1,                       /* Security enable. */
+                        1))) {                   /* Privilege enable. */
         printf_err("failed to initalise Ethos-U device\n");
         return err;
     }
@@ -266,4 +287,5 @@
 
     return 0;
 }
+
 #endif /* ARM_NPU */
diff --git a/source/application/hal/platforms/bare-metal/bsp/include/bsp.h b/source/application/hal/platforms/bare-metal/bsp/include/bsp.h
index 20052ef..2bd4fa1 100644
--- a/source/application/hal/platforms/bare-metal/bsp/include/bsp.h
+++ b/source/application/hal/platforms/bare-metal/bsp/include/bsp.h
@@ -35,4 +35,8 @@
 
 #endif /* MPS3_PLATFORM */
 
+#if defined(ARM_NPU)
+#include "ethosu_mem_config.h"
+#endif /* defined(ARM_NPU) */
+
 #endif /* BSP_H */
diff --git a/source/application/hal/platforms/bare-metal/bsp/include/ethosu_mem_config.h b/source/application/hal/platforms/bare-metal/bsp/include/ethosu_mem_config.h
new file mode 100644
index 0000000..b393a03
--- /dev/null
+++ b/source/application/hal/platforms/bare-metal/bsp/include/ethosu_mem_config.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ETHOS_U_NPU_MEM_CONFIG_H
+#define ETHOS_U_NPU_MEM_CONFIG_H
+
+#define ETHOS_U_NPU_MEMORY_MODE_SRAM_ONLY           0
+#define ETHOS_U_NPU_MEMORY_MODE_SHARED_SRAM         1
+#define ETHOS_U_NPU_MEMORY_MODE_DEDICATED_SRAM      2
+
+#define ETHOS_U_MEM_BYTE_ALIGNMENT                  16
+
+#ifndef ETHOS_U_NPU_MEMORY_MODE
+    #define ETHOS_U_NPU_MEMORY_MODE                 ETHOS_U_MEMORY_MODE_SHARED_SRAM
+#endif /* ETHOS_U_NPU_MEMORY_MODE */
+
+#if (ETHOS_U_NPU_MEMORY_MODE==ETHOS_U_NPU_MEMORY_MODE_DEDICATED_SRAM)
+    #define ETHOS_U_CACHE_BUF_SZ    (393216U)    /* See vela doc? for reference? */
+#else
+    #define ETHOS_U_CACHE_BUF_SZ    (0U)
+#endif /* CACHE_BUF_SZ */
+
+/**
+ * Activation buffer aka tensor arena section name
+ * We have to place the tensor arena in different region based on the memory config.
+ **/
+#if (ETHOS_U_NPU_MEMORY_MODE==ETHOS_U_NPU_MEMORY_MODE_SHARED_SRAM)
+    #define ACTIVATION_BUF_SECTION      section(".bss.NoInit.activation_buf_sram")
+    #define ACTIVATION_BUF_SECTION_NAME ("SRAM")
+#elif (ETHOS_U_NPU_MEMORY_MODE==ETHOS_U_NPU_MEMORY_MODE_SRAM_ONLY)
+    #define ACTIVATION_BUF_SECTION      section(".bss.NoInit.activation_buf_sram")
+    #define ACTIVATION_BUF_SECTION_NAME ("SRAM")
+#elif (ETHOS_U_NPU_MEMORY_MODE==ETHOS_U_NPU_MEMORY_MODE_DEDICATED_SRAM)
+    #define ACTIVATION_BUF_SECTION      section("activation_buf_dram")
+    #define CACHE_BUF_SECTION           section(".bss.NoInit.ethos_u_cache")
+    #define ACTIVATION_BUF_SECTION_NAME ("DDR/DRAM")
+    #define CACHE_BUF_ATTRIBUTE         __attribute__((aligned(ETHOS_U_MEM_BYTE_ALIGNMENT), CACHE_BUF_SECTION))
+#endif
+
+#endif /* ETHOS_U_NPU_MEM_CONFIG_H */
\ No newline at end of file
diff --git a/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.ld b/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.ld
index e5c2a14..bcbc81f 100644
--- a/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.ld
+++ b/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.ld
@@ -144,7 +144,11 @@
   .sram :
   {
     . = ALIGN(16);
-    *(.bss.NoInit.activation_buf)
+    /* Cache area (if used) */
+    *(.bss.NoInit.ethos_u_cache)
+    . = ALIGN (16);
+    /* activation buffers a.k.a tensor arena when memory mode sram only or shared sram */
+    *(.bss.NoInit.activation_buf_sram)
     . = ALIGN(16);
   } > SRAM AT > SRAM
 
@@ -177,13 +181,17 @@
     /* __attribute__((aligned(16))) is not handled by the CMSIS startup code.
      * Force the alignment here as a workaround */
     . = ALIGN(16);
+    /* nn model's baked in input matrices */
     *(ifm)
     . = ALIGN(16);
+    /* nn model's default space */
     *(nn_model)
     . = ALIGN (16);
+    /* labels */
     *(labels)
     . = ALIGN (16);
-    *(activation_buf)
+    /* activation buffers a.k.a tensor arena when memory mode dedicated sram */
+    *(activation_buf_dram)
     . = ALIGN (16);
   } > DDR AT > DDR
 
diff --git a/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.sct b/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.sct
index 4760875..62dbbe5 100644
--- a/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.sct
+++ b/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.sct
@@ -67,8 +67,11 @@
     ;-----------------------------------------------------
     isram.bin       0x31000000  UNINIT ALIGN 16 0x00400000
     {
-        ; activation buffers a.k.a tensor arena
-        *.o (.bss.NoInit.activation_buf)
+        ; Cache area (if used)
+        *.o (.bss.NoInit.ethos_u_cache)
+
+        ; activation buffers a.k.a tensor arena when memory mode sram only or shared sram
+        *.o (.bss.NoInit.activation_buf_sram)
     }
 }
 
@@ -88,15 +91,14 @@
         ; nn model's baked in input matrices
         *.o (ifm)
 
-        ; nn model
+        ; nn model's default space
         *.o (nn_model)
 
         ; labels
         *.o (labels)
 
-        ; if the activation buffer (tensor arena) doesn't
-        ; fit in the SRAM region, we accommodate it here
-        *.o (activation_buf)
+        ; activation buffers a.k.a tensor arena when memory mode dedicated sram
+        *.o (activation_buf_dram)
     }
 
     ;-----------------------------------------------------
diff --git a/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.ld b/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.ld
index ceaff7d..e5b6bd9 100644
--- a/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.ld
+++ b/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.ld
@@ -137,7 +137,11 @@
   .sram :
   {
     . = ALIGN(16);
-    *(.bss.NoInit.activation_buf)
+    /* Cache area (if used) */
+    *(.bss.NoInit.ethos_u_cache)
+    . = ALIGN (16);
+    /* activation buffers a.k.a tensor arena when memory mode sram only or shared sram */
+    *(.bss.NoInit.activation_buf_sram)
     . = ALIGN(16);
   } > SRAM AT > SRAM
 
@@ -170,13 +174,17 @@
     /* __attribute__((aligned(16))) is not handled by the CMSIS startup code.
      * Force the alignment here as a workaround */
     . = ALIGN(16);
+    /* nn model's baked in input matrices */
     *(ifm)
     . = ALIGN(16);
+    /* nn model's default space */
     *(nn_model)
     . = ALIGN (16);
+    /* labels */
     *(labels)
     . = ALIGN (16);
-    *(activation_buf)
+    /* activation buffers a.k.a tensor arena when memory mode dedicated sram */
+    *(activation_buf_dram)
     . = ALIGN (16);
   } > DDR AT > DDR
 
diff --git a/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.sct b/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.sct
index 0c6a388..e84d81e 100644
--- a/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.sct
+++ b/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.sct
@@ -16,6 +16,10 @@
 ; *************************************************************
 ; ***       Scatter-Loading Description File                ***
 ; *************************************************************
+; Please see docs/sections/appendix.md for memory mapping information.
+;
+; Note: Ethos-U NPU can access BRAM, internal SRAM and the DDR sections => activation buffers and
+;       the model should only be placed in those regions.
 ;
 ;---------------------------------------------------------
 ; First load region (ITCM)
@@ -63,8 +67,11 @@
     ;-----------------------------------------------------
     isram.bin       0x31000000  UNINIT ALIGN 16 0x00400000
     {
-        ; activation buffers a.k.a tensor arena
-        *.o (.bss.NoInit.activation_buf)
+        ; Cache area (if used)
+        *.o (.bss.NoInit.ethos_u_cache)
+
+        ; activation buffers a.k.a tensor arena when memory mode sram only or shared sram
+        *.o (.bss.NoInit.activation_buf_sram)
     }
 }
 
@@ -84,15 +91,14 @@
         ; nn model's baked in input matrices
         *.o (ifm)
 
-        ; nn model
+        ; nn model's default space
         *.o (nn_model)
 
         ; labels
         *.o (labels)
 
-        ; if the activation buffer (tensor arena) doesn't
-        ; fit in the SRAM region, we accommodate it here
-        *.o (activation_buf)
+        ; activation buffers a.k.a tensor arena when memory mode dedicated sram
+        *.o (activation_buf_dram)
     }
 
     ;-----------------------------------------------------
diff --git a/source/application/main/Main.cc b/source/application/main/Main.cc
index 6e1c620..9622566 100644
--- a/source/application/main/Main.cc
+++ b/source/application/main/Main.cc
@@ -36,7 +36,7 @@
     info("%s\n", PRJ_DES_STR);
     info("Target system design: %s\n", DESIGN_NAME);
     info("Version %s Build date: " __DATE__ " @ " __TIME__ "\n", PRJ_VER_STR);
-    info("Copyright (C) ARM Ltd 2020. All rights reserved.\n\n");
+    info("Copyright (C) ARM Ltd 2021. All rights reserved.\n\n");
 }
 
 int main ()
diff --git a/source/application/tensorflow-lite-micro/include/BufAttributes.hpp b/source/application/tensorflow-lite-micro/include/BufAttributes.hpp
index 126172b..a3b5890 100644
--- a/source/application/tensorflow-lite-micro/include/BufAttributes.hpp
+++ b/source/application/tensorflow-lite-micro/include/BufAttributes.hpp
@@ -18,6 +18,17 @@
 #ifndef BUF_ATTRIBUTES_HPP
 #define BUF_ATTRIBUTES_HPP
 
+#if defined(ARM_NPU)
+    /* When Arm NPU is defined, we use the config set by NPU mem parameters */
+    #include "ethosu_mem_config.h"
+    #define BYTE_ALIGNMENT              ETHOS_U_MEM_BYTE_ALIGNMENT
+#else /* defined(ARM_NPU) */
+    /* otherwise, we use the default ones here. */
+    #define ACTIVATION_BUF_SECTION      section(".bss.NoInit.activation_buf_sram")
+    #define ACTIVATION_BUF_SECTION_NAME ("SRAM")
+    #define BYTE_ALIGNMENT              16
+#endif /* defined(ARM_NPU) */
+
 #ifdef __has_attribute
 #define HAVE_ATTRIBUTE(x) __has_attribute(x)
 #else   /* __has_attribute */
@@ -27,9 +38,8 @@
 #if HAVE_ATTRIBUTE(aligned) || (defined(__GNUC__) && !defined(__clang__))
 
 /* We want all buffers/sections to be aligned to 16 byte.  */
-#define ALIGNMENT_REQ               aligned(16)
+#define ALIGNMENT_REQ               aligned(BYTE_ALIGNMENT)
 
-/* Model data section name. */
 #define MODEL_SECTION               section("nn_model")
 
 /* Label section name */
@@ -45,24 +55,6 @@
     #define ACTIVATION_BUF_SRAM_SZ  0x00000000
 #endif /* ACTIVATION_BUF_SRAM_SZ */
 
-/**
- * Activation buffer aka tensor arena section name
- * We have to place the tensor arena in different region based on its size.
- * If it fits in SRAM, we place it there, and also mark it by giving it a
- * different section name. The scatter file places the ZI data in DDR and
- * the uninitialised region in the SRAM.
- **/
-#define ACTIVATION_BUF_SECTION_SRAM section(".bss.NoInit.activation_buf")
-#define ACTIVATION_BUF_SECTION_DRAM section("activation_buf")
-
-#if     ACTIVATION_BUF_SZ > ACTIVATION_BUF_SRAM_SZ /* Will buffer not fit in SRAM? */
-    #define ACTIVATION_BUF_SECTION      ACTIVATION_BUF_SECTION_DRAM
-    #define ACTIVATION_BUF_SECTION_NAME ("DDR")
-#else   /* ACTIVATION_BUF_SZ > 0x00200000 */
-    #define ACTIVATION_BUF_SECTION  ACTIVATION_BUF_SECTION_SRAM
-    #define ACTIVATION_BUF_SECTION_NAME ("SRAM")
-#endif  /* ACTIVATION_BUF_SZ > 0x00200000 */
-
 /* IFM section name. */
 #define IFM_BUF_SECTION             section("ifm")
 
diff --git a/source/use_case/ad/usecase.cmake b/source/use_case/ad/usecase.cmake
index 72683d1..9ddf66e 100644
--- a/source/use_case/ad/usecase.cmake
+++ b/source/use_case/ad/usecase.cmake
@@ -61,7 +61,7 @@
         STRING)
 
 if (ETHOS_U_NPU_ENABLED)
-    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/ad_medium_int8_vela_H128.tflite)
+    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/ad_medium_int8_vela_${DEFAULT_NPU_CONFIG_ID}.tflite)
 else()
     set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/ad_medium_int8.tflite)
 endif()
diff --git a/source/use_case/asr/usecase.cmake b/source/use_case/asr/usecase.cmake
index 21fc80d..60fc132 100644
--- a/source/use_case/asr/usecase.cmake
+++ b/source/use_case/asr/usecase.cmake
@@ -75,7 +75,7 @@
     STRING)
 
 if (ETHOS_U_NPU_ENABLED)
-    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/wav2letter_pruned_int8_vela_H128.tflite)
+    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/wav2letter_pruned_int8_vela_${DEFAULT_NPU_CONFIG_ID}.tflite)
 else()
     set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/wav2letter_pruned_int8.tflite)
 endif()
diff --git a/source/use_case/img_class/usecase.cmake b/source/use_case/img_class/usecase.cmake
index e46de00..441a346 100644
--- a/source/use_case/img_class/usecase.cmake
+++ b/source/use_case/img_class/usecase.cmake
@@ -47,7 +47,7 @@
     STRING)
 
 if (ETHOS_U_NPU_ENABLED)
-    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/mobilenet_v2_1.0_224_INT8_vela_H128.tflite)
+    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/mobilenet_v2_1.0_224_INT8_vela_${DEFAULT_NPU_CONFIG_ID}.tflite)
 else()
     set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/mobilenet_v2_1.0_224_INT8.tflite)
 endif()
diff --git a/source/use_case/inference_runner/usecase.cmake b/source/use_case/inference_runner/usecase.cmake
index bab5c65..7f9c74f 100644
--- a/source/use_case/inference_runner/usecase.cmake
+++ b/source/use_case/inference_runner/usecase.cmake
@@ -22,7 +22,7 @@
 generate_default_input_code(${INC_GEN_DIR})
 
 if (ETHOS_U_NPU_ENABLED)
-    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/dnn_s_quantized_vela_H128.tflite)
+    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/dnn_s_quantized_vela_${DEFAULT_NPU_CONFIG_ID}.tflite)
 else()
     set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/dnn_s_quantized.tflite)
 endif()
diff --git a/source/use_case/kws/usecase.cmake b/source/use_case/kws/usecase.cmake
index 6d0ad7e..bd54cea 100644
--- a/source/use_case/kws/usecase.cmake
+++ b/source/use_case/kws/usecase.cmake
@@ -74,7 +74,7 @@
     STRING)
 
 if (ETHOS_U_NPU_ENABLED)
-    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/ds_cnn_clustered_int8_vela_H128.tflite)
+    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/ds_cnn_clustered_int8_vela_${DEFAULT_NPU_CONFIG_ID}.tflite)
 else()
     set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/ds_cnn_clustered_int8.tflite)
 endif()
diff --git a/source/use_case/kws_asr/usecase.cmake b/source/use_case/kws_asr/usecase.cmake
index 716a8c8..e9b9150 100644
--- a/source/use_case/kws_asr/usecase.cmake
+++ b/source/use_case/kws_asr/usecase.cmake
@@ -67,8 +67,8 @@
     STRING)
 
 if (ETHOS_U_NPU_ENABLED)
-    set(DEFAULT_MODEL_PATH_KWS      ${DEFAULT_MODEL_DIR}/ds_cnn_clustered_int8_vela_H128.tflite)
-    set(DEFAULT_MODEL_PATH_ASR      ${DEFAULT_MODEL_DIR}/wav2letter_pruned_int8_vela_H128.tflite)
+    set(DEFAULT_MODEL_PATH_KWS      ${DEFAULT_MODEL_DIR}/ds_cnn_clustered_int8_vela_${DEFAULT_NPU_CONFIG_ID}.tflite)
+    set(DEFAULT_MODEL_PATH_ASR      ${DEFAULT_MODEL_DIR}/wav2letter_pruned_int8_vela_${DEFAULT_NPU_CONFIG_ID}.tflite)
 else()
     set(DEFAULT_MODEL_PATH_KWS      ${DEFAULT_MODEL_DIR}/ds_cnn_clustered_int8.tflite)
     set(DEFAULT_MODEL_PATH_ASR      ${DEFAULT_MODEL_DIR}/wav2letter_pruned_int8.tflite)
diff --git a/source/use_case/vww/usecase.cmake b/source/use_case/vww/usecase.cmake
index 0201aed..4005297 100644
--- a/source/use_case/vww/usecase.cmake
+++ b/source/use_case/vww/usecase.cmake
@@ -31,7 +31,7 @@
     STRING)
 
 if (ETHOS_U_NPU_ENABLED)
-    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/vww4_128_128_INT8_vela_H128.tflite)
+    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/vww4_128_128_INT8_vela_${DEFAULT_NPU_CONFIG_ID}.tflite)
 else()
     set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/vww4_128_128_INT8.tflite)
 endif()