MLECO-2395: Allow users to select Ethos-U memory mode

Change-Id: Icf09410f12072e8d7850dd1e540c3243af24ed09
diff --git a/Readme.md b/Readme.md
index b2a20b5..cfbf46d 100644
--- a/Readme.md
+++ b/Readme.md
@@ -75,10 +75,13 @@
 
 > **Note:** The default flow assumes Arm® *Ethos™-U55* NPU usage, configured to use 128 Multiply-Accumulate units
 > and is sharing SRAM with the Arm® *Cortex®-M55*.
-> Evaluation kit supports also:
 >
-> - *Ethos™-U55* NPU configured to use 32, 64 and 256 Multiply-Accumulate units.
-> - *Ethos™-U65* NPU configured to use 256 and 512 Multiply-Accumulate units.
+> Ml embedded evaluation kit supports:
+>
+> |  *Ethos™-U* NPU  | Default MACs/cc | Other MACs/cc supported | Default Memory Mode | Other Memory Modes supported |
+> |------------------|-----------------|-------------------------|---------------------|------------------------------|
+> |   *Ethos™-U55*   |       128       |      32, 64, 256        |     Shared_Sram     |          Sram_Only           |
+> |   *Ethos™-U65*   |       256       |          512            |    Dedicated_Sram   |         Shared_Sram          |
 >
 > For more information see [Building](./docs/documentation.md#building).
 
@@ -105,7 +108,7 @@
 
 - The contribution have certified origin and give us your permission. To manage this process we use
   [Developer Certificate of Origin (DCO) V1.1](https://developercertificate.org/).
-  To indicate that contributors agree to the the terms of the DCO, it's neccessary "sign off" the
+  To indicate that contributors agree to the the terms of the DCO, it's necessary "sign off" the
   contribution by adding a line with name and e-mail address to every git commit message:
 
   ```log
@@ -189,4 +192,4 @@
 | [Image Classification Samples](./resources/img_class/samples/files.md) | [Creative Commons Attribution 1.0](./resources/LICENSE_CC_1.0.txt) | <https://www.pexels.com> |
 | [Keyword Spotting Samples](./resources/kws/samples/files.md) | [Creative Commons Attribution 4.0 International Public License](./resources/LICENSE_CC_4.0.txt) | <http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz> |
 | [Keyword Spotting and Automatic Speech Recognition Samples](./resources/kws_asr/samples/files.md) | [Creative Commons Attribution 4.0 International Public License](./resources/LICENSE_CC_4.0.txt) | <http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz> |
-| [Visual Wake Word Samples](./resources/vww/samples/files.md) | [Creative Commons Attribution 1.0](./resources/LICENSE_CC_1.0.txt) | <https://www.pexels.com> |
\ No newline at end of file
+| [Visual Wake Word Samples](./resources/vww/samples/files.md) | [Creative Commons Attribution 1.0](./resources/LICENSE_CC_1.0.txt) | <https://www.pexels.com> |
diff --git a/docs/documentation.md b/docs/documentation.md
index 28b9eda..a186fbb 100644
--- a/docs/documentation.md
+++ b/docs/documentation.md
@@ -306,7 +306,16 @@
 
 ## Memory Considerations
 
-Please refer to: [Memory considerations](./sections/memory_considerations.md#memory-considerations)
+Please refer to:
+
+- [Memory considerations](./sections/memory_considerations.md#memory-considerations)
+  - [Understanding memory usage from Vela output](./sections/memory_considerations.md#understanding-memory-usage-from-vela-output)
+    - [Total SRAM used](./sections/memory_considerations.md#total-sram-used)
+    - [Total Off-chip Flash used](./sections/memory_considerations.md#total-off_chip-flash-used)
+  - [Memory mode configurations](./sections/memory_considerations.md#memory-mode-configurations)
+  - [Tensor arena and neural network model memory placement](./sections/memory_considerations.md#tensor-arena-and-neural-network-model-memory-placement)
+  - [Memory usage for ML use-cases](./sections/memory_considerations.md#memory-usage-for-ml-use_cases)
+  - [Memory constraints](./sections/memory_considerations.md#memory-constraints)
 
 ## Troubleshooting
 
diff --git a/docs/quick_start.md b/docs/quick_start.md
index ce0b436..3488447 100644
--- a/docs/quick_start.md
+++ b/docs/quick_start.md
@@ -113,6 +113,14 @@
     --output-dir=resources_downloaded/kws
 mv resources_downloaded/kws/ds_cnn_clustered_int8_vela.tflite resources_downloaded/kws/ds_cnn_clustered_int8_vela_H128.tflite
 
+. resources_downloaded/env/bin/activate && vela resources_downloaded/kws/ds_cnn_clustered_int8.tflite \
+    --accelerator-config=ethos-u65-256 \
+    --optimise Performance --config scripts/vela/default_vela.ini \
+    --memory-mode=Dedicated_Sram \
+    --system-config=Ethos_U65_High_End \
+    --output-dir=resources_downloaded/kws
+mv resources_downloaded/kws/ds_cnn_clustered_int8_vela.tflite resources_downloaded/kws/ds_cnn_clustered_int8_vela_Y256.tflite
+
 . resources_downloaded/env/bin/activate && vela resources_downloaded/kws_asr/wav2letter_int8.tflite \
     --accelerator-config=ethos-u55-128 \
     --optimise Performance --config scripts/vela/default_vela.ini \
@@ -121,7 +129,15 @@
     --output-dir=resources_downloaded/kws_asr
 mv resources_downloaded/kws_asr/wav2letter_int8_vela.tflite resources_downloaded/kws_asr/wav2letter_int8_vela_H128.tflite
 
-. resources_downloaded/env/bin/activate && vela resources_downloaded/kws_asr/ds_cnn_clustered_int8.tflite -\
+. resources_downloaded/env/bin/activate && vela resources_downloaded/kws_asr/wav2letter_int8.tflite \
+    --accelerator-config=ethos-u65-256 \
+    --optimise Performance --config scripts/vela/default_vela.ini \
+    --memory-mode=Dedicated_Sram \
+    --system-config=Ethos_U65_High_End \
+    --output-dir=resources_downloaded/kws_asr
+mv resources_downloaded/kws_asr/wav2letter_int8_vela.tflite resources_downloaded/kws_asr/wav2letter_int8_vela_Y256.tflite
+
+. resources_downloaded/env/bin/activate && vela resources_downloaded/kws_asr/ds_cnn_clustered_int8.tflite \
     --accelerator-config=ethos-u55-128 \
     --optimise Performance --config scripts/vela/default_vela.ini \
     --memory-mode=Shared_Sram \
@@ -129,7 +145,15 @@
     --output-dir=resources_downloaded/kws_asr
 mv resources_downloaded/kws_asr/ds_cnn_clustered_int8_vela.tflite resources_downloaded/kws_asr/ds_cnn_clustered_int8_vela_H128.tflite
 
-. resources_downloaded/env/bin/activate && vela resources_downloaded/inference_runner/dnn_s_quantized.tflite -\
+. resources_downloaded/env/bin/activate && vela resources_downloaded/kws_asr/ds_cnn_clustered_int8.tflite \
+    --accelerator-config=ethos-u65-256 \
+    --optimise Performance --config scripts/vela/default_vela.ini \
+    --memory-mode=Dedicated_Sram \
+    --system-config=Ethos_U65_High_End \
+    --output-dir=resources_downloaded/kws_asr
+mv resources_downloaded/kws_asr/ds_cnn_clustered_int8_vela.tflite resources_downloaded/kws_asr/ds_cnn_clustered_int8_vela_Y256.tflite
+
+. resources_downloaded/env/bin/activate && vela resources_downloaded/inference_runner/dnn_s_quantized.tflite \
     --accelerator-config=ethos-u55-128 \
     --optimise Performance --config scripts/vela/default_vela.ini \
     --memory-mode=Shared_Sram \
@@ -137,6 +161,14 @@
     --output-dir=resources_downloaded/inference_runner
 mv resources_downloaded/inference_runner/dnn_s_quantized_vela.tflite resources_downloaded/inference_runner/dnn_s_quantized_vela_H128.tflite
 
+. resources_downloaded/env/bin/activate && vela resources_downloaded/inference_runner/dnn_s_quantized.tflite \
+    --accelerator-config=ethos-u65-256 \
+    --optimise Performance --config scripts/vela/default_vela.ini \
+    --memory-mode=Dedicated_Sram \
+    --system-config=Ethos_U65_High_End \
+    --output-dir=resources_downloaded/inference_runner
+mv resources_downloaded/inference_runner/dnn_s_quantized_vela.tflite resources_downloaded/inference_runner/dnn_s_quantized_vela_Y256.tflite
+
 . resources_downloaded/env/bin/activate && vela resources_downloaded/img_class/mobilenet_v2_1.0_224_INT8.tflite \
     --accelerator-config=ethos-u55-128 \
     --optimise Performance --config scripts/vela/default_vela.ini \
@@ -145,6 +177,14 @@
     --output-dir=resources_downloaded/img_class
 mv resources_downloaded/img_class/mobilenet_v2_1.0_224_INT8_vela.tflite resources_downloaded/img_class/mobilenet_v2_1.0_224_INT8_vela_H128.tflite
 
+. resources_downloaded/env/bin/activate && vela resources_downloaded/img_class/mobilenet_v2_1.0_224_INT8.tflite \
+    --accelerator-config=ethos-u65-256 \
+    --optimise Performance --config scripts/vela/default_vela.ini \
+    --memory-mode=Dedicated_Sram \
+    --system-config=Ethos_U65_High_End \
+    --output-dir=resources_downloaded/img_class
+mv resources_downloaded/img_class/mobilenet_v2_1.0_224_INT8_vela.tflite resources_downloaded/img_class/mobilenet_v2_1.0_224_INT8_vela_Y256.tflite
+
 . resources_downloaded/env/bin/activate && vela resources_downloaded/asr/wav2letter_int8.tflite \
     --accelerator-config=ethos-u55-128 \
     --optimise Performance --config scripts/vela/default_vela.ini \
@@ -153,6 +193,14 @@
     --output-dir=resources_downloaded/asr
 mv resources_downloaded/asr/wav2letter_int8_vela.tflite resources_downloaded/asr/wav2letter_int8_vela_H128.tflite
 
+. resources_downloaded/env/bin/activate && vela resources_downloaded/asr/wav2letter_int8.tflite \
+    --accelerator-config=ethos-u65-256 \
+    --optimise Performance --config scripts/vela/default_vela.ini \
+    --memory-mode=Dedicated_Sram \
+    --system-config=Ethos_U65_High_End \
+    --output-dir=resources_downloaded/asr
+mv resources_downloaded/asr/wav2letter_int8_vela.tflite resources_downloaded/asr/wav2letter_int8_vela_Y256.tflite
+
 . resources_downloaded/env/bin/activate && vela resources_downloaded/ad/ad_medium_int8.tflite \
     --accelerator-config=ethos-u55-128 \
     --optimise Performance --config scripts/vela/default_vela.ini \
@@ -161,6 +209,14 @@
     --output-dir=resources_downloaded/ad
 mv resources_downloaded/ad/ad_medium_int8_vela.tflite resources_downloaded/ad/ad_medium_int8_vela_H128.tflite
 
+. resources_downloaded/env/bin/activate && vela resources_downloaded/ad/ad_medium_int8.tflite \
+    --accelerator-config=ethos-u65-256 \
+    --optimise Performance --config scripts/vela/default_vela.ini \
+    --memory-mode=Dedicated_Sram \
+    --system-config=Ethos_U65_High_End \
+    --output-dir=resources_downloaded/ad
+mv resources_downloaded/ad/ad_medium_int8_vela.tflite resources_downloaded/ad/ad_medium_int8_vela_Y256.tflite
+
 mkdir cmake-build-mps3-sse-300-gnu-release and cd cmake-build-mps3-sse-300-gnu-release
 
 cmake .. \
@@ -171,4 +227,4 @@
 
 > **Note:** If you want to change the application, then, instead of using the `build_default` Python script, follow the
 > approach defined in [documentation.md](./documentation.md#arm_ml-embedded-evaluation-kit). For example, if you wanted to modify the number of
-> MAC units of the Ethos-U, or running a custom neural network.
+> MACs units of the Ethos-U, or running a custom neural network.
diff --git a/docs/sections/building.md b/docs/sections/building.md
index 192c4aa..3adaa72 100644
--- a/docs/sections/building.md
+++ b/docs/sections/building.md
@@ -139,7 +139,7 @@
   [bare-metal-gcc.cmake](../../scripts/cmake/toolchains/bare-metal-gcc.cmake).
 
 - `TENSORFLOW_SRC_PATH`: the path to the root of the TensorFlow directory. The default value points to the
-  `dependencies/tensorflow` git submodule. Respository is hosted here: [tensorflow](https://github.com/tensorflow/tensorflow)
+  `dependencies/tensorflow` git submodule. Repository is hosted here: [tensorflow](https://github.com/tensorflow/tensorflow)
 
 - `ETHOS_U_NPU_DRIVER_SRC_PATH`: The path to the *Ethos-U* NPU core driver sources. The default value points to the
   `dependencies/core-driver` git submodule. Repository is hosted here:
@@ -147,11 +147,23 @@
 
 - `CMSIS_SRC_PATH`: The path to the CMSIS sources to be used to build TensorFlow Lite Micro library. This parameter is
   optional and is only valid for Arm® *Cortex®-M* CPU targeted configurations. The default value points to the
-  `dependencies/cmsis` git submodule. Respository is hosted here: [CMSIS-5](https://github.com/ARM-software/CMSIS_5.git)
+  `dependencies/cmsis` git submodule. Repository is hosted here: [CMSIS-5](https://github.com/ARM-software/CMSIS_5.git)
 
 - `ETHOS_U_NPU_ENABLED`: Sets whether the use of *Ethos-U* NPU is available for the deployment target. By default, this
   is set and therefore application is built with *Ethos-U* NPU supported.
 
+- `ETHOS_U_NPU_ID`: The *Ethos-U* NPU processor:
+  - `U55` (default)
+  - `U65`
+
+- `ETHOS_U_NPU_MEMORY_MODE`:  The *Ethos-U* NPU memory mode:
+  - `Shared_Sram` (default for *Ethos-U55* NPU)
+  - `Dedicated_Sram` (default for *Ethos-U65* NPU)
+  - `Sram_Only`
+
+  >**Note:** The `Shared_Sram` memory mode is available on both *Ethos-U55* and *Ethos-U65* NPU, `Dedicated_Sram` only
+  > for *Ethos-U65* NPU and `Sram_Only` only for Ethos-U55* NPU.
+
 - `CPU_PROFILE_ENABLED`: Sets whether profiling information for the CPU core should be displayed. By default, this is
   set to false, but can be turned on for FPGA targets. The the FVP and the CPU core cycle counts are not meaningful and
   are not to be used.
@@ -178,7 +190,9 @@
   `timing_adapter` dependencies folder.
 
 - `TA_CONFIG_FILE`: The path to the CMake configuration file that contains the timing adapter parameters. Used only if
-  the timing adapter build is enabled.
+  the timing adapter build is enabled. Default for Ethos-U55 NPU is
+  [ta_config_u55_high_end.cmake](../../scripts/timing_adapter/ta_config_u55_high_end.cmake),
+  for Ethos-U65 NPU is [ta_config_u55_high_end.cmake](../../scripts/timing_adapter/ta_config_u55_high_end.cmake).
 
 - `TENSORFLOW_LITE_MICRO_CLEAN_BUILD`: Optional parameter to enable, or disable, "cleaning" prior to building for the
   TensorFlow Lite Micro library. Enabled by default.
@@ -189,12 +203,12 @@
 - `ARMCLANG_DEBUG_DWARF_LEVEL`: When the CMake build type is specified as `Debug` and when the `armclang` toolchain is
   being used to build for a *Cortex-M* CPU target, this optional argument can be set to specify the `DWARF` format.
 
-    By default, this is set to 4 and is synonymous with passing `-g` flag to the compiler. This is compatible with Arm
-    DS and other tools which can interpret the latest DWARF format. To allow debugging using the Model Debugger from Arm
-    Fast Model Tools Suite, this argument can be used to pass DWARF format version as "3".
+  By default, this is set to 4 and is synonymous with passing `-g` flag to the compiler. This is compatible with Arm
+  DS and other tools which can interpret the latest DWARF format. To allow debugging using the Model Debugger from Arm
+  Fast Model Tools Suite, this argument can be used to pass DWARF format version as "3".
 
-    >**Note:** This option is only available when the CMake project is configured with the `-DCMAKE_BUILD_TYPE=Debug`
-    >argument. Also, the same dwarf format is used for building TensorFlow Lite Micro library.
+  >**Note:** This option is only available when the CMake project is configured with the `-DCMAKE_BUILD_TYPE=Debug`
+  >argument. Also, the same dwarf format is used for building TensorFlow Lite Micro library.
 
 For details on the specific use-case build options, follow the instructions in the use-case specific documentation.
 
@@ -265,7 +279,7 @@
 ```
 
 This fetches every model into the `resources_downloaded` directory. It also optimizes the models using the Vela compiler
-for the default 128 MAC configuration of the Arm® *Ethos™-U55* NPU.
+for the default 128 MACs configuration of the Arm® *Ethos™-U55* NPU and for the default 256 MACs configuration of the Arm® *Ethos™-U65* NPU.
 
 > **Note:** This script requires Python version 3.6 or higher. Please make sure all [build prerequisites](#build-prerequisites)
 > are satisfied.
@@ -507,7 +521,7 @@
   > **Note:** The bandwidth cap `BWCAP` operates on the transaction level and, because of its simple implementation, the accuracy is limited.
   > When set to a small value it allows only a small number of transactions for each pulse cycle.
   > Once the counter has reached or exceeded the configured cap, no transactions will be allowed before the next pulse cycle.
-  > In order to minimise this effect some possible solutions are:
+  > In order to minimize this effect some possible solutions are:
   >
   >- scale up all the parameters to a reasonably large value.
   >- scale up `BWCAP` as a multiple of the burst length (in this case bulk traffic will not face rounding errors in the bandwidth cap).
@@ -688,7 +702,7 @@
 - `--accelerator-config`: Specifies the accelerator configuration to use between `ethos-u55-256`, `ethos-u55-128`,
   `ethos-u55-64`, `ethos-u55-32`, `ethos-u65-256`, and `ethos-u65-512`.
 - `--optimise`: Sets the optimisation strategy to Performance or Size. The Size strategy results in a model minimising the SRAM
-  usage whereas the Performance strategy optimises the neural network for maximal perforamance.
+  usage whereas the Performance strategy optimises the neural network for maximal performance.
   Note that if using the Performance strategy, you can also pass the `--arena-cache-size` option to Vela.
 - `--config`: Specifies the path to the Vela configuration file. The format of the file is a Python ConfigParser `.ini`
     file. An example can be found in the `dependencies` folder [default_vela.ini](../../scripts/vela/default_vela.ini).
@@ -714,17 +728,18 @@
 To build for a different *Ethos-U* NPU variant:
 
 - Optimize the model with Vela compiler with the correct parameters. See [Optimize custom model with Vela compiler](./building.md#optimize-custom-model-with-vela-compiler).
+- Use the correct `ETHOS_U_NPU_ID`: `U55` for *Ethos-U55* NPU, `U65` for *Ethos-U65* NPU.
 - Use the Vela model as custom model in the building command. See [Add custom model](./building.md#add-custom-model)
 - Use the correct timing adapter settings configuration. See [Building timing adapter with custom options](./building.md#building-timing-adapter-with-custom-options)
 
-For example, when building for *Ethos-U65* High End system configuration, the Vela comand will be:
+For example, when building for *Ethos-U65* High End system configuration and 512 MACs/cc, the Vela command will be:
 
 ```commandline
 vela \
     <model_file>.tflite \
-    --accelerator-config ethos-u65-256 \
+    --accelerator-config ethos-u65-512 \
     --optimise Performance \
-    --memory-mode=Shared_Sram \
+    --memory-mode=Dedicated_Sram \
     --system-config=Ethos_U65_High_End \
     --config=../scripts/vela/default_vela.ini
 ```
@@ -733,8 +748,8 @@
 
 ```commandline
 cmake .. \
-    -D<use_case>_MODEL_TFLITE_PATH=<path/to/ethos_u65_vela_model.tflite> \
-    -DTA_CONFIG_FILE=scripts/cmake/ta_config_u65_high_end.cmake
+    -DETHOS_U_NPU_ID=U65 \
+    -D<use_case>_MODEL_TFLITE_PATH=<path/to/ethos_u65_vela_model.tflite>
 ```
 
 ## Automatic file generation
diff --git a/docs/sections/customizing.md b/docs/sections/customizing.md
index 3104986..854a3ed 100644
--- a/docs/sections/customizing.md
+++ b/docs/sections/customizing.md
@@ -671,8 +671,8 @@
 so:
 
 ```cmake
-if (ETHOS_U_NPU_ENABLED EQUAL 1)
-  set(DEFAULT_MODEL_PATH  ${DEFAULT_MODEL_DIR}/helloworldmodel_uint8_vela.tflite)
+if (ETHOS_U_NPU_ENABLED)
+  set(DEFAULT_MODEL_PATH  ${DEFAULT_MODEL_DIR}/helloworldmodel_uint8_vela_${DEFAULT_NPU_CONFIG_ID}.tflite)
 else()
   set(DEFAULT_MODEL_PATH  ${DEFAULT_MODEL_DIR}/helloworldmodel_uint8.tflite)
 endif()
diff --git a/docs/sections/memory_considerations.md b/docs/sections/memory_considerations.md
index fc81f8f..89baf41 100644
--- a/docs/sections/memory_considerations.md
+++ b/docs/sections/memory_considerations.md
@@ -7,7 +7,7 @@
   - [Understanding memory usage from Vela output](#understanding-memory-usage-from-vela-output)
     - [Total SRAM used](#total-sram-used)
     - [Total Off-chip Flash used](#total-off_chip-flash-used)
-  - [Non-default configurations](#non-default-configurations)
+  - [Memory mode configurations](#memory-mode-configurations)
   - [Tensor arena and neural network model memory placement](#tensor-arena-and-neural-network-model-memory-placement)
   - [Memory usage for ML use-cases](#memory-usage-for-ml-use_cases)
   - [Memory constraints](#memory-constraints)
@@ -94,52 +94,88 @@
 
 ### Total SRAM used
 
-When the neural network model is compiled with Vela, a summary report that includes memory usage is generated. For
-example, compiling the keyword spotting model
+When the neural network model is compiled with Vela, a summary report that includes memory usage is generated.
+For example, compiling the keyword spotting model
 [ds_cnn_clustered_int8](https://github.com/ARM-software/ML-zoo/blob/master/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/ds_cnn_clustered_int8.tflite)
-with Vela produces, among others, the following output:
+with the Vela command:
+
+```commandline
+vela \
+  --accelerator-config=ethos-u55-128 \
+  --optimise Performance \
+  --config scripts/vela/default_vela.ini
+  --memory-mode=Shared_Sram
+  --system-config=Ethos_U55_High_End_Embedded
+  ds_cnn_clustered_int8.tflite
+```
+
+It produces, among others, the following output:
 
 ```log
-Total SRAM used                                 70.77 KiB
-Total Off-chip Flash used                      430.78 KiB
+Total SRAM used                                146.31 KiB
+Total Off-chip Flash used                      452.42 KiB
 ```
 
 The `Total SRAM used` here shows the required memory to store the `tensor arena` for the TensorFlow Lite Micro
 framework. This is the amount of memory required to store the input, output, and intermediate buffers. In the preceding
-example, the tensor arena requires 70.77 KiB of available SRAM.
+example, the tensor arena requires 146.31 KiB of available SRAM.
 
 > **Note:** Vela can only estimate the SRAM required for graph execution. It has no way of estimating the memory used by
 > internal structures from TensorFlow Lite Micro framework.
 
-Therefore, we recommend that you top this memory size by at least 2KiB. We also recoomend that you also carve out the
+Therefore, we recommend that you top this memory size by at least 2KiB. We also recommend that you also carve out the
 `tensor arena` of this size, and then place it on the SRAM of the target system.
 
 ### Total Off-chip Flash used
 
 The `Total Off-chip Flash` parameter indicates the minimum amount of flash required to store the neural network model.
-In the preceding example, the system must have a minimum of 430.78 KiB of available flash memory to store the `.tflite`
+In the preceding example, the system must have a minimum of 452.42 KiB of available flash memory to store the `.tflite`
 file contents.
 
 > **Note:** The Arm® *Corstone™-300* system uses the DDR region as a flash memory. The timing adapter sets up the AXI
 > bus that is wired to the DDR to mimic both bandwidth and latency characteristics of a flash memory device.
 
-## Non-default configurations
+## Memory mode configurations
 
-The preceding example outlines a typical configuration, and this corresponds to the default Vela setting. However, the
-system SRAM can also be used to store the neural network model along with the `tensor arena`. Vela supports optimizing
-the model for this configuration with its `Sram_Only` memory mode.
+The preceding example outlines a typical configuration for *Ethos-U55* NPU, and this corresponds to the default
+Vela memory mode setting.
+Evaluation kit supports all the *Ethos-U* NPU memory modes:
 
-For further information, please refer to: [vela.ini](../../scripts/vela/vela.ini).
+|  *Ethos™-U* NPU  |   Default Memory Mode  |  Other Memory Modes supported  |
+|------------------|------------------------|--------------------------------|
+|   *Ethos™-U55*   |     `Shared_Sram`      |          `Sram_Only`           |
+|   *Ethos™-U65*   |    `Dedicated_Sram`    |         `Shared_Sram`          |
 
-To make use of a neural network model that is optimized for this configuration, the linker script for the target
-platform must be changed. By default, the linker scripts are set up to support the default configuration only.
+For further information on the default settings, please refer to: [default_vela.ini](../../scripts/vela/default_vela.ini).
+
+For *Ethos-U55* NPU, the system SRAM can also be used to store the neural network model along with the `tensor arena`.
+Vela supports optimizing the model for this configuration with its `Sram_Only` memory mode.
+Although the Vela settings for this configurations suggests that only AXI0 bus is used, when compiling the model
+a warning is generated, for example:
+
+```log
+vela \
+  --accelerator-config=ethos-u55-128 \
+  --optimise Performance \
+  --config scripts/vela/default_vela.ini
+  --memory-mode=Sram_Only
+  --system-config=Ethos_U55_High_End_Embedded
+  ds_cnn_clustered_int8.tflite
+
+Info: Changing const_mem_area from Sram to OnChipFlash. This will use the same characteristics as Sram.
+```
+
+This means that the  neural network model is always placed in the flash region. In this case, timing adapters for the
+AXI buses are set the same values to mimic both bandwidth and latency characteristics of a SRAM memory device.
+See [Ethos-U55 NPU timing adapter default configuration](../../scripts/cmake/timing_adapter/ta_config_u55_high_end.cmake).
 
 For script snippets, please refer to: [Memory constraints](./memory_considerations.md#memory-constraints).
 
 > **Note:**
 >
-> 1. The the `Shared_Sram` memory mode represents the default configuration.
-> 2. The `Dedicated_Sram` mode is only applicable for the Arm® *Ethos™-U65*.
+> 1. The `Shared_Sram` memory mode represents the default configuration.
+> 2. The `Dedicated_Sram` memory mode is only applicable for the Arm® *Ethos™-U65*.
+> 3. The `Sram_only` memory mode is only applicable for the Arm® *Ethos™-U55*.
 
 ## Tensor arena and neural network model memory placement
 
@@ -147,18 +183,15 @@
 Every use-case application has a corresponding `<use_case_name>_ACTIVATION_BUF_SZ` parameter that governs the maximum
 available size of the `activation buffer` for that particular use-case.
 
-The linker script is set up to place this memory region in SRAM. However, if the memory required is more than what the
-target platform supports, this buffer needs to be placed on flash instead. Every target platform has a profile
-definition in the form of a `CMake` file.
+The linker script is set up to place this memory region in SRAM for *Ethos-U55* and in flash for *Ethos-U65*.
+Every target platform has a profile definition in the form of a `CMake` file.
 
 For further information and an example, please refer to: [Corstone-300 profile](../../scripts/cmake/subsystem-profiles/corstone-sse-300.cmake).
 
 The parameter `ACTIVATION_BUF_SRAM_SZ` defines the maximum SRAM size available for the platform. This is propagated
-through the build system. If the `<use_case_name>_ACTIVATION_BUF_SZ` for a given use-case is *more* than the
-`ACTIVATION_BUF_SRAM_SZ` for the target build platform, then the `activation buffer` is placed on the flash memory
-instead.
+through the build system.
 
-The neural network model is always placed in the flash region. However, this can be changed in the linker script.
+The neural network model is always placed in the flash region (even in case of `Sram_Only` memory mode as mentioned earlier).
 
 ## Memory usage for ML use-cases
 
@@ -168,12 +201,12 @@
 > **Note:** The SRAM usage does not include memory used by TensorFlow Lite Micro and must be topped up as explained
 > under [Total SRAM used](#total-sram-used).
 
-- [Keyword spotting model](https://github.com/ARM-software/ML-zoo/tree/master/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8)
+- [Keyword spotting model](https://github.com/ARM-software/ML-zoo/tree/68b5fbc77ed28e67b2efc915997ea4477c1d9d5b//models/keyword_spotting/ds_cnn_large/tflite_clustered_int8)
   requires
   - 70.7 KiB of SRAM
   - 430.7 KiB of flash memory.
 
-- [Image classification model](https://github.com/ARM-software/ML-zoo/tree/master/models/image_classification/mobilenet_v2_1.0_224/tflite_uint8)
+- [Image classification model](https://github.com/ARM-software/ML-zoo/tree/e0aa361b03c738047b9147d1a50e3f2dcb13dbcb/models/image_classification/mobilenet_v2_1.0_224/tflite_uint8)
   requires
   - 638.6 KiB of SRAM
   - 3.1 MB of flash memory.
@@ -199,38 +232,8 @@
 ;---------------------------------------------------------
 LOAD_REGION_0       0x00000000                  0x00080000
 {
-    ;-----------------------------------------------------
-    ; First part of code mem - 512kiB
-    ;-----------------------------------------------------
-    itcm.bin        0x00000000                  0x00080000
-    {
-        *.o (RESET, +First)
-        * (InRoot$$Sections)
 
-        ; Essentially only RO-CODE, RO-DATA is in a
-        ; different region.
-        .ANY (+RO)
-    }
-
-    ;-----------------------------------------------------
-    ; 128kiB of 512kiB DTCM is used for any other RW or ZI
-    ; data. Note: this region is internal to the Cortex-M
-    ; CPU.
-    ;-----------------------------------------------------
-    dtcm.bin        0x20000000                  0x00020000
-    {
-        ; Any R/W and/or zero initialised data
-        .ANY(+RW +ZI)
-    }
-
-    ;-----------------------------------------------------
-    ; 384kiB of stack space within the DTCM region. See
-    ; `dtcm.bin` for the first section. Note: by virtue of
-    ; being part of DTCM, this region is only accessible
-    ; from Cortex-M55.
-    ;-----------------------------------------------------
-    ARM_LIB_STACK   0x20020000 EMPTY ALIGN 8    0x00060000
-    {}
+...
 
     ;-----------------------------------------------------
     ; SSE-300's internal SRAM of 4MiB - reserved for
@@ -240,8 +243,11 @@
     ;-----------------------------------------------------
     isram.bin       0x31000000  UNINIT ALIGN 16 0x00400000
     {
-        ; activation buffers a.k.a tensor arena
-        *.o (.bss.NoInit.activation_buf)
+        ; Cache area (if used)
+        *.o (.bss.NoInit.ethos_u_cache)
+
+        ; activation buffers a.k.a tensor arena when memory mode sram only
+        *.o (.bss.NoInit.activation_buf_sram)
     }
 }
 
@@ -251,7 +257,7 @@
 LOAD_REGION_1       0x70000000                  0x02000000
 {
     ;-----------------------------------------------------
-    ; 32 MiB of DRAM space for neural network model,
+    ; 32 MiB of DDR space for neural network model,
     ; input vectors and labels. If the activation buffer
     ; size required by the network is bigger than the
     ; SRAM size available, it is accommodated here.
@@ -261,33 +267,18 @@
         ; nn model's baked in input matrices
         *.o (ifm)
 
-        ; nn model
+        ; nn model's default space
         *.o (nn_model)
 
         ; labels
         *.o (labels)
 
-        ; if the activation buffer (tensor arena) doesn't
-        ; fit in the SRAM region, we accommodate it here
-        *.o (activation_buf)
+        ; activation buffers a.k.a tensor arena when memory mode dedicated sram
+        *.o (activation_buf_dram)
     }
 
-    ;-----------------------------------------------------
-    ; First 256kiB of BRAM (FPGA SRAM) used for RO data.
-    ; Note: Total BRAM size available is 2MiB.
-    ;-----------------------------------------------------
-    bram.bin        0x11000000          ALIGN 8 0x00040000
-    {
-        ; RO data (incl. unwinding tables for debugging)
-        .ANY (+RO-DATA)
-    }
+...
 
-    ;-----------------------------------------------------
-    ; Remaining part of the 2MiB BRAM used as heap space.
-    ; 0x00200000 - 0x00040000 = 0x001C0000 (1.75 MiB)
-    ;-----------------------------------------------------
-    ARM_LIB_HEAP    0x11040000 EMPTY ALIGN 8    0x001C0000
-    {}
 }
 
 ```
diff --git a/docs/sections/troubleshooting.md b/docs/sections/troubleshooting.md
index b2bd421..fc81ffd 100644
--- a/docs/sections/troubleshooting.md
+++ b/docs/sections/troubleshooting.md
@@ -36,20 +36,20 @@
 ERROR - Inference failed.
 ```
 
-It shows that the configuration of the Vela compiled `.tflite` file doesn't match the number of MAC units on the FVP.
+It shows that the configuration of the Vela compiled `.tflite` file doesn't match the number of MACs units on the FVP.
 
 The Vela configuration parameter `accelerator-config` used for producing the .`tflite` file that is used
-while building the application should match the MAC configuration that the FVP is emulating.
+while building the application should match the MACs configuration that the FVP is emulating.
 For example, if the `accelerator-config` from the Vela command was `ethos-u55-128`, the FVP should be emulating the
-128 MAC configuration of the Ethos-U55 block(default FVP configuration). If the `accelerator-config` used was
+128 MACs configuration of the Ethos-U55 block(default FVP configuration). If the `accelerator-config` used was
 `ethos-u55-256`, the FVP must be executed with additional command line parameter to instruct it to emulate the
-256 MAC configuration instead.
+256 MACs configuration instead.
 
 The [deploying on an FVP emulating MPS3](./deployment.md#deploying-on-an-fvp-emulating-mps3) page provides guidance
-on how to instruct the FVP to change the number of MAC units.
+on how to instruct the FVP to change the number of MACs units.
 
 Note that when the FVP is launched and the application starts executing, various parameters about the system are
-logged over UART. These include the MAC/cc configuration of the FVP.
+logged over UART. These include the MACs/cc configuration of the FVP.
 
 ```log
 INFO - MPS3 core clock has been set to: 32000000Hz
diff --git a/release_notes.txt b/release_notes.txt
index 62618a5..da3a5e1 100644
--- a/release_notes.txt
+++ b/release_notes.txt
@@ -1,9 +1,16 @@
+Changes in 21.11
+    * Support for 21.08 Ethos-U component and dependencies (core-software, core-driver, Vela 3.1.0, CMSIS, TensorFlow Lite).
+    * Added dynamic load support for FVP for inference runner use-case.
+    * Added support for different memory modes: Shared_Sram, Dedicated_Sram and Sram_Only
+    * Documentation updates.
+
 Changes in 21.08
     * Support for 21.05 Ethos-U component (core-software, core-driver, Vela 3.0.0).
     * TensorFlow submodule changed to https://github.com/tensorflow/tflite-micro (tested with TensorFlow Lite Micro commit hash: f510d38d0eaa3195ce3af66e3f32648740f08afb).
     * Image classification model changed (from uint8 to int8 datatype).
     * Documentation updates, added support to Corstone-300 + Ethos-U65.
     * Various scripts improvements.
+    * Added Visual Wake Word use case.
 
 Changes in 21.05
     * Added script to download and optimize default models.
diff --git a/scripts/cmake/bare-metal-sources.cmake b/scripts/cmake/bare-metal-sources.cmake
index d3dad41..3fe9b1b 100644
--- a/scripts/cmake/bare-metal-sources.cmake
+++ b/scripts/cmake/bare-metal-sources.cmake
@@ -39,7 +39,52 @@
 set(ETHOS_U_NPU_FLAG                           "-DARM_NPU=1")
 
 if (ETHOS_U_NPU_ENABLED)
-    set(OPTIONAL_FLAGS      "${OPTIONAL_FLAGS} ${ETHOS_U_NPU_FLAG}")
+
+    USER_OPTION(ETHOS_U_NPU_ID "Arm Ethos-U NPU IP (U55 or U65)"
+        "U55"
+        STRING)
+
+    if ((ETHOS_U_NPU_ID STREQUAL U55) OR (ETHOS_U_NPU_ID STREQUAL U65))
+        if (ETHOS_U_NPU_ID STREQUAL U55)
+            set(DEFAULT_NPU_MEM_MODE    "Shared_Sram")
+            set(DEFAULT_NPU_CONFIG_ID     "H128")
+        elseif(ETHOS_U_NPU_ID STREQUAL U65)
+            set(DEFAULT_NPU_MEM_MODE    "Dedicated_Sram")
+            set(DEFAULT_NPU_CONFIG_ID     "Y256")
+        endif()
+    else ()
+        message(FATAL_ERROR "Non compatible Ethos-U NPU processor ${ETHOS_U_NPU_ID}")
+    endif ()
+
+    USER_OPTION(ETHOS_U_NPU_MEMORY_MODE "Specifies the memory mode used in the Vela command."
+        "${DEFAULT_NPU_MEM_MODE}"
+        STRING)
+
+    if (ETHOS_U_NPU_MEMORY_MODE STREQUAL Sram_Only)
+
+        if (ETHOS_U_NPU_ID STREQUAL U55)
+            set(ETHOS_U_NPU_MEMORY_MODE_FLAG "-DETHOS_U_NPU_MEMORY_MODE=ETHOS_U_NPU_MEM_MODE_SRAM_ONLY")
+        else ()
+            message(FATAL_ERROR "Non compatible Ethos-U NPU memory mode and processor ${ETHOS_U_NPU_MEMORY_MODE} - ${ETHOS_U_NPU_ID}. `sram_only` can be used only for Ethos-U55.")
+        endif ()
+
+    elseif (ETHOS_U_NPU_MEMORY_MODE STREQUAL Shared_Sram)
+        # Shared Sram can be used for Ethos-U55 and Ethos-U65
+        set(ETHOS_U_NPU_MEMORY_MODE_FLAG "-DETHOS_U_NPU_MEMORY_MODE=ETHOS_U_NPU_MEMORY_MODE_SHARED_SRAM")
+
+    elseif (ETHOS_U_NPU_MEMORY_MODE STREQUAL Dedicated_Sram)
+        # Dedicated Sram is used only for Ethos-U65
+        if (ETHOS_U_NPU_ID STREQUAL U65)
+            set(ETHOS_U_NPU_MEMORY_MODE_FLAG  "-DETHOS_U_NPU_MEMORY_MODE=ETHOS_U_NPU_MEMORY_MODE_DEDICATED_SRAM")
+        else ()
+            message(FATAL_ERROR "Non compatible Ethos-U NPU memory mode and processor ${ETHOS_U_NPU_MEMORY_MODE} - ${ETHOS_U_NPU_ID}. `dedicated_sram` can be used only for Ethos-U65.")
+        endif ()
+
+    else ()
+        message(FATAL_ERROR "Non compatible Ethos-U NPU memory mode ${ETHOS_U_NPU_MEMORY_MODE}")
+    endif ()
+
+    set(OPTIONAL_FLAGS      "${OPTIONAL_FLAGS} ${ETHOS_U_NPU_FLAG} ${ETHOS_U_NPU_MEMORY_MODE_FLAG}")
 endif ()
 
 # Set specific flags depending on target platform and subsystem
@@ -86,8 +131,13 @@
 add_linker_script(${LINKER_SCRIPT_DIR} ${LINKER_SCRIPT_NAME})
 
 if (ETHOS_U_NPU_ENABLED)
+    if (ETHOS_U_NPU_ID STREQUAL U55)
+        set(DEFAULT_TA_CONFIG_FILE_PATH "${CMAKE_SCRIPTS_DIR}/timing_adapter/ta_config_u55_high_end.cmake")
+    else ()
+        set(DEFAULT_TA_CONFIG_FILE_PATH "${CMAKE_SCRIPTS_DIR}/timing_adapter/ta_config_u65_high_end.cmake")
+    endif ()
     USER_OPTION(TA_CONFIG_FILE "Path to the timing adapter configuration file"
-            "${CMAKE_SCRIPTS_DIR}/timing_adapter/ta_config_u55_high_end.cmake"
+            ${DEFAULT_TA_CONFIG_FILE_PATH}
             FILEPATH)
 
     # must be included after target subsystem CMake file
diff --git a/scripts/cmake/timing_adapter/ta_config_u55_high_end.cmake b/scripts/cmake/timing_adapter/ta_config_u55_high_end.cmake
index 30e1516..c822dc0 100644
--- a/scripts/cmake/timing_adapter/ta_config_u55_high_end.cmake
+++ b/scripts/cmake/timing_adapter/ta_config_u55_high_end.cmake
@@ -45,20 +45,41 @@
 set(TA0_HISTCNT     "0"        CACHE STRING "32-bit field. Read/write the selected histogram bin.")
 
 # Timing adapter settings for AXI1
-set(TA1_MAXR        "2"       CACHE STRING "6-bit field. Max no. of pending reads. 0=infinite")
-set(TA1_MAXW        "0"       CACHE STRING "6-bit field. Max no. of pending writes. 0=infinite")
-set(TA1_MAXRW       "0"       CACHE STRING "6-bit field. Max no. of pending reads+writes. 0=infinite")
-set(TA1_RLATENCY    "64"      CACHE STRING "12-bit field. Minimum latency (clock cycles) from AVALID to RVALID.")
-set(TA1_WLATENCY    "0"       CACHE STRING "12-bit field. Minimum latency (clock cycles) from WVALID&WLAST to BVALID.")
-set(TA1_PULSE_ON    "320"     CACHE STRING "No. of cycles addresses let through (0-65535).")
-set(TA1_PULSE_OFF   "80"      CACHE STRING "No. of cycles addresses blocked (0-65535).")
-set(TA1_BWCAP       "50"      CACHE STRING "16-bit field. Max no. of 64-bit words transfered per pulse cycle 0=infinite")
-set(TA1_PERFCTRL    "0"       CACHE STRING "6-bit field selecting an event for event counter 0=default")
-set(TA1_PERFCNT     "0"       CACHE STRING "32-bit event counter")
-set(TA1_MODE        "1"       CACHE STRING "Bit 0: 1=enable dynamic clocking to avoid underrun;
-                                            Bit 1: 1=enable random AR reordering (0=default);
-                                            Bit 2: 1=enable random R reordering (0=default);
-                                            Bit 3: 1=enable random B reordering (0=default);
-                                            Bit 11-4: Frequency scale 0=full speed, 255=(1/256) speed")
-set(TA1_HISTBIN     "0"       CACHE STRING "Controls which histogram bin (0-15) that should be accessed by HISTCNT.")
-set(TA1_HISTCNT     "0"       CACHE STRING "32-bit field. Read/write the selected histogram bin.")
+# If Memory mode is Sram_Only Timing adapter settings for AXI1 need to match the same as AXI0
+if (ETHOS_U_NPU_MEMORY_MODE STREQUAL Sram_Only)
+    set(TA1_MAXR        ${TA0_MAXR}         CACHE STRING "6-bit field. Max no. of pending reads. 0=infinite")
+    set(TA1_MAXW        ${TA0_MAXW}         CACHE STRING "6-bit field. Max no. of pending writes. 0=infinite")
+    set(TA1_MAXRW       ${TA0_MAXRW}        CACHE STRING "6-bit field. Max no. of pending reads+writes. 0=infinite")
+    set(TA1_RLATENCY    ${TA0_RLATENCY}     CACHE STRING "12-bit field. Minimum latency (clock cycles) from AVALID to RVALID.")
+    set(TA1_WLATENCY    ${TA0_WLATENCY}     CACHE STRING "12-bit field. Minimum latency (clock cycles) from WVALID&WLAST to BVALID.")
+    set(TA1_PULSE_ON    ${TA0_PULSE_ON}     CACHE STRING "No. of cycles addresses let through (0-65535).")
+    set(TA1_PULSE_OFF   ${TA0_PULSE_OFF}    CACHE STRING "No. of cycles addresses blocked (0-65535).")
+    set(TA1_BWCAP       ${TA0_BWCAP}        CACHE STRING "16-bit field. Max no. of 64-bit words transfered per pulse cycle 0=infinite")
+    set(TA1_PERFCTRL    ${TA0_PERFCTRL}     CACHE STRING "6-bit field selecting an event for event counter 0=default")
+    set(TA1_PERFCNT     ${TA0_PERFCNT}      CACHE STRING "32-bit event counter")
+    set(TA1_MODE        ${TA0_MODE}         CACHE STRING "Bit 0: 1=enable dynamic clocking to avoid underrun;
+                                                          Bit 1: 1=enable random AR reordering (0=default);
+                                                          Bit 2: 1=enable random R reordering (0=default);
+                                                          Bit 3: 1=enable random B reordering (0=default);
+                                                          Bit 11-4: Frequency scale 0=full speed, 255=(1/256) speed")
+    set(TA1_HISTBIN     ${TA0_HISTBIN}      CACHE STRING "Controls which histogram bin (0-15) that should be accessed by HISTCNT.")
+    set(TA1_HISTCNT     ${TA0_HISTCNT}      CACHE STRING "32-bit field. Read/write the selected histogram bin.")
+else ()
+    set(TA1_MAXR        "2"       CACHE STRING "6-bit field. Max no. of pending reads. 0=infinite")
+    set(TA1_MAXW        "0"       CACHE STRING "6-bit field. Max no. of pending writes. 0=infinite")
+    set(TA1_MAXRW       "0"       CACHE STRING "6-bit field. Max no. of pending reads+writes. 0=infinite")
+    set(TA1_RLATENCY    "64"      CACHE STRING "12-bit field. Minimum latency (clock cycles) from AVALID to RVALID.")
+    set(TA1_WLATENCY    "0"       CACHE STRING "12-bit field. Minimum latency (clock cycles) from WVALID&WLAST to BVALID.")
+    set(TA1_PULSE_ON    "320"     CACHE STRING "No. of cycles addresses let through (0-65535).")
+    set(TA1_PULSE_OFF   "80"      CACHE STRING "No. of cycles addresses blocked (0-65535).")
+    set(TA1_BWCAP       "50"      CACHE STRING "16-bit field. Max no. of 64-bit words transfered per pulse cycle 0=infinite")
+    set(TA1_PERFCTRL    "0"       CACHE STRING "6-bit field selecting an event for event counter 0=default")
+    set(TA1_PERFCNT     "0"       CACHE STRING "32-bit event counter")
+    set(TA1_MODE        "1"       CACHE STRING "Bit 0: 1=enable dynamic clocking to avoid underrun;
+                                                Bit 1: 1=enable random AR reordering (0=default);
+                                                Bit 2: 1=enable random R reordering (0=default);
+                                                Bit 3: 1=enable random B reordering (0=default);
+                                                Bit 11-4: Frequency scale 0=full speed, 255=(1/256) speed")
+    set(TA1_HISTBIN     "0"       CACHE STRING "Controls which histogram bin (0-15) that should be accessed by HISTCNT.")
+    set(TA1_HISTCNT     "0"       CACHE STRING "32-bit field. Read/write the selected histogram bin.")
+endif ()
\ No newline at end of file
diff --git a/scripts/vela/default_vela.ini b/scripts/vela/default_vela.ini
index 884b057..30de99d 100644
--- a/scripts/vela/default_vela.ini
+++ b/scripts/vela/default_vela.ini
@@ -34,6 +34,7 @@
 OffChipFlash_burst_length=128
 OffChipFlash_read_latency=64
 OffChipFlash_write_latency=64
+
 ; Ethos-U65 High-End: SRAM (16 GB/s) and DRAM (3.75 GB/s)
 [System_Config.Ethos_U65_High_End]
 core_clock=1e9
@@ -56,4 +57,12 @@
 const_mem_area=Axi1
 arena_mem_area=Axi0
 cache_mem_area=Axi0
-arena_cache_size=4194304
\ No newline at end of file
+arena_cache_size=4194304
+
+; Dedicated SRAM: the SRAM (384KB) is only for use by the Ethos-U
+; The non-SRAM memory is assumed to be read-writeable
+[Memory_Mode.Dedicated_Sram]
+const_mem_area=Axi1
+arena_mem_area=Axi1
+cache_mem_area=Axi0
+arena_cache_size=393216
\ No newline at end of file
diff --git a/set_up_default_resources.py b/set_up_default_resources.py
index 219cb3c..3fb2c8a 100755
--- a/set_up_default_resources.py
+++ b/set_up_default_resources.py
@@ -214,25 +214,38 @@
             output_dir = os.path.dirname(model)
             # model name after compiling with vela is an initial model name + _vela suffix
             vela_optimised_model_path = str(model).replace(".tflite", "_vela.tflite")
-            # we want it to be initial model name + _vela_H128 suffix which indicates selected MAC config.
-            new_vela_optimised_model_path = vela_optimised_model_path.replace("_vela.tflite", "_vela_H128.tflite")
 
-            if os.path.isfile(new_vela_optimised_model_path):
-                logging.info(f"File {new_vela_optimised_model_path} exists, skipping optimisation.")
-                continue
-
-            command = (f". {env_activate} && vela {model} " +
+            # Ethos-U NPU default generation
+            vela_opt_suffixes = ["_vela_H128.tflite", "_vela_Y256.tflite"]
+            vela_commands = [f". {env_activate} && vela {model} " +
                        "--accelerator-config=ethos-u55-128 " +
                        "--optimise Performance " +
                        f"--config {config_file} " +
                        "--memory-mode=Shared_Sram " +
                        "--system-config=Ethos_U55_High_End_Embedded " +
-                       f"--output-dir={output_dir}")
-            call_command(command)
+                       f"--output-dir={output_dir}",
 
-            # rename default vela model
-            os.rename(vela_optimised_model_path, new_vela_optimised_model_path)
-            logging.info(f"Renaming {vela_optimised_model_path} to {new_vela_optimised_model_path}.")
+                       f". {env_activate} && vela {model} " +
+                       "--accelerator-config=ethos-u65-256 " +
+                       "--optimise Performance " +
+                       f"--config {config_file} " +
+                       "--memory-mode=Dedicated_Sram " +
+                       "--system-config=Ethos_U65_High_End " +
+                       f"--output-dir={output_dir}"]
+
+            for vela_suffix, command in zip(vela_opt_suffixes, vela_commands):
+                # we want it to be initial model name + _vela_H128 suffix which indicates selected MACs config.
+                new_vela_optimised_model_path = vela_optimised_model_path.replace("_vela.tflite", vela_suffix)
+
+                if os.path.isfile(new_vela_optimised_model_path):
+                    logging.info(f"File {new_vela_optimised_model_path} exists, skipping optimisation.")
+                    continue
+
+                call_command(command)
+
+                # rename default vela model
+                os.rename(vela_optimised_model_path, new_vela_optimised_model_path)
+                logging.info(f"Renaming {vela_optimised_model_path} to {new_vela_optimised_model_path}.")
 
 
 if __name__ == '__main__':
diff --git a/source/application/hal/hal.c b/source/application/hal/hal.c
index 12bb7ef..53a304b 100644
--- a/source/application/hal/hal.c
+++ b/source/application/hal/hal.c
@@ -24,18 +24,39 @@
 
 #if defined(ARM_NPU)
 
+#include "ethosu_mem_config.h"          /* Arm Ethos-U memory config */
 #include "ethosu_driver.h"              /* Arm Ethos-U driver header */
 #include "timing_adapter.h"             /* Arm Ethos-U timing adapter driver header */
 #include "timing_adapter_settings.h"    /* Arm Ethos-U timing adapter settings */
 
 struct ethosu_driver ethosu_drv; /* Default Ethos-U device driver */
 
+#if defined(ETHOS_U_CACHE_BUF_SZ) && (ETHOS_U_CACHE_BUF_SZ > 0)
+    static uint8_t  cache_arena[ETHOS_U_CACHE_BUF_SZ] CACHE_BUF_ATTRIBUTE;
+#else /* defined (ETHOS_U_CACHE_BUF_SZ) && (ETHOS_U_CACHE_BUF_SZ > 0) */
+    static uint8_t* cache_arena = NULL;
+#endif /* defined (ETHOS_U_CACHE_BUF_SZ) && (ETHOS_U_CACHE_BUF_SZ > 0) */
+
 /**
  * @brief   Initialises the Arm Ethos-U NPU
  * @return  0 if successful, error code otherwise
  **/
 static int arm_npu_init(void);
 
+static uint8_t * get_cache_arena()
+{
+    return cache_arena;
+}
+
+static size_t get_cache_arena_size()
+{
+#if defined(ETHOS_U_CACHE_BUF_SZ) && (ETHOS_U_CACHE_BUF_SZ > 0)
+    return sizeof(cache_arena);
+#else /* defined (ETHOS_U_CACHE_BUF_SZ) && (ETHOS_U_CACHE_BUF_SZ > 0) */
+    return 0;
+#endif /* defined (ETHOS_U_CACHE_BUF_SZ) && (ETHOS_U_CACHE_BUF_SZ > 0) */
+}
+
 #endif /* ARM_NPU */
 
 int hal_init(hal_platform* platform, data_acq_module* data_acq,
@@ -232,12 +253,12 @@
     const void * ethosu_base_address = (void *)(SEC_ETHOS_U_NPU_BASE);
 
     if (0 != (err = ethosu_init(
-                        &ethosu_drv,            /* Ethos-U driver device pointer */
-                        ethosu_base_address,    /* Ethos-U NPU's base address. */
-                        NULL,                   /* Pointer to fast mem area - NULL for U55. */
-                        0,                      /* Fast mem region size. */
-                        1,                      /* Security enable. */
-                        1))) {                  /* Privilege enable. */
+                        &ethosu_drv,             /* Ethos-U driver device pointer */
+                        ethosu_base_address,     /* Ethos-U NPU's base address. */
+                        get_cache_arena(),       /* Pointer to fast mem area - NULL for U55. */
+                        get_cache_arena_size(), /* Fast mem region size. */
+                        1,                       /* Security enable. */
+                        1))) {                   /* Privilege enable. */
         printf_err("failed to initalise Ethos-U device\n");
         return err;
     }
@@ -266,4 +287,5 @@
 
     return 0;
 }
+
 #endif /* ARM_NPU */
diff --git a/source/application/hal/platforms/bare-metal/bsp/include/bsp.h b/source/application/hal/platforms/bare-metal/bsp/include/bsp.h
index 20052ef..2bd4fa1 100644
--- a/source/application/hal/platforms/bare-metal/bsp/include/bsp.h
+++ b/source/application/hal/platforms/bare-metal/bsp/include/bsp.h
@@ -35,4 +35,8 @@
 
 #endif /* MPS3_PLATFORM */
 
+#if defined(ARM_NPU)
+#include "ethosu_mem_config.h"
+#endif /* defined(ARM_NPU) */
+
 #endif /* BSP_H */
diff --git a/source/application/hal/platforms/bare-metal/bsp/include/ethosu_mem_config.h b/source/application/hal/platforms/bare-metal/bsp/include/ethosu_mem_config.h
new file mode 100644
index 0000000..b393a03
--- /dev/null
+++ b/source/application/hal/platforms/bare-metal/bsp/include/ethosu_mem_config.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ETHOS_U_NPU_MEM_CONFIG_H
+#define ETHOS_U_NPU_MEM_CONFIG_H
+
+#define ETHOS_U_NPU_MEMORY_MODE_SRAM_ONLY           0
+#define ETHOS_U_NPU_MEMORY_MODE_SHARED_SRAM         1
+#define ETHOS_U_NPU_MEMORY_MODE_DEDICATED_SRAM      2
+
+#define ETHOS_U_MEM_BYTE_ALIGNMENT                  16
+
+#ifndef ETHOS_U_NPU_MEMORY_MODE
+    #define ETHOS_U_NPU_MEMORY_MODE                 ETHOS_U_MEMORY_MODE_SHARED_SRAM
+#endif /* ETHOS_U_NPU_MEMORY_MODE */
+
+#if (ETHOS_U_NPU_MEMORY_MODE==ETHOS_U_NPU_MEMORY_MODE_DEDICATED_SRAM)
+    #define ETHOS_U_CACHE_BUF_SZ    (393216U)    /* See vela doc? for reference? */
+#else
+    #define ETHOS_U_CACHE_BUF_SZ    (0U)
+#endif /* CACHE_BUF_SZ */
+
+/**
+ * Activation buffer aka tensor arena section name
+ * We have to place the tensor arena in different region based on the memory config.
+ **/
+#if (ETHOS_U_NPU_MEMORY_MODE==ETHOS_U_NPU_MEMORY_MODE_SHARED_SRAM)
+    #define ACTIVATION_BUF_SECTION      section(".bss.NoInit.activation_buf_sram")
+    #define ACTIVATION_BUF_SECTION_NAME ("SRAM")
+#elif (ETHOS_U_NPU_MEMORY_MODE==ETHOS_U_NPU_MEMORY_MODE_SRAM_ONLY)
+    #define ACTIVATION_BUF_SECTION      section(".bss.NoInit.activation_buf_sram")
+    #define ACTIVATION_BUF_SECTION_NAME ("SRAM")
+#elif (ETHOS_U_NPU_MEMORY_MODE==ETHOS_U_NPU_MEMORY_MODE_DEDICATED_SRAM)
+    #define ACTIVATION_BUF_SECTION      section("activation_buf_dram")
+    #define CACHE_BUF_SECTION           section(".bss.NoInit.ethos_u_cache")
+    #define ACTIVATION_BUF_SECTION_NAME ("DDR/DRAM")
+    #define CACHE_BUF_ATTRIBUTE         __attribute__((aligned(ETHOS_U_MEM_BYTE_ALIGNMENT), CACHE_BUF_SECTION))
+#endif
+
+#endif /* ETHOS_U_NPU_MEM_CONFIG_H */
\ No newline at end of file
diff --git a/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.ld b/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.ld
index e5c2a14..bcbc81f 100644
--- a/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.ld
+++ b/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.ld
@@ -144,7 +144,11 @@
   .sram :
   {
     . = ALIGN(16);
-    *(.bss.NoInit.activation_buf)
+    /* Cache area (if used) */
+    *(.bss.NoInit.ethos_u_cache)
+    . = ALIGN (16);
+    /* activation buffers a.k.a tensor arena when memory mode sram only or shared sram */
+    *(.bss.NoInit.activation_buf_sram)
     . = ALIGN(16);
   } > SRAM AT > SRAM
 
@@ -177,13 +181,17 @@
     /* __attribute__((aligned(16))) is not handled by the CMSIS startup code.
      * Force the alignment here as a workaround */
     . = ALIGN(16);
+    /* nn model's baked in input matrices */
     *(ifm)
     . = ALIGN(16);
+    /* nn model's default space */
     *(nn_model)
     . = ALIGN (16);
+    /* labels */
     *(labels)
     . = ALIGN (16);
-    *(activation_buf)
+    /* activation buffers a.k.a tensor arena when memory mode dedicated sram */
+    *(activation_buf_dram)
     . = ALIGN (16);
   } > DDR AT > DDR
 
diff --git a/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.sct b/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.sct
index 4760875..62dbbe5 100644
--- a/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.sct
+++ b/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.sct
@@ -67,8 +67,11 @@
     ;-----------------------------------------------------
     isram.bin       0x31000000  UNINIT ALIGN 16 0x00400000
     {
-        ; activation buffers a.k.a tensor arena
-        *.o (.bss.NoInit.activation_buf)
+        ; Cache area (if used)
+        *.o (.bss.NoInit.ethos_u_cache)
+
+        ; activation buffers a.k.a tensor arena when memory mode sram only or shared sram
+        *.o (.bss.NoInit.activation_buf_sram)
     }
 }
 
@@ -88,15 +91,14 @@
         ; nn model's baked in input matrices
         *.o (ifm)
 
-        ; nn model
+        ; nn model's default space
         *.o (nn_model)
 
         ; labels
         *.o (labels)
 
-        ; if the activation buffer (tensor arena) doesn't
-        ; fit in the SRAM region, we accommodate it here
-        *.o (activation_buf)
+        ; activation buffers a.k.a tensor arena when memory mode dedicated sram
+        *.o (activation_buf_dram)
     }
 
     ;-----------------------------------------------------
diff --git a/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.ld b/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.ld
index ceaff7d..e5b6bd9 100644
--- a/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.ld
+++ b/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.ld
@@ -137,7 +137,11 @@
   .sram :
   {
     . = ALIGN(16);
-    *(.bss.NoInit.activation_buf)
+    /* Cache area (if used) */
+    *(.bss.NoInit.ethos_u_cache)
+    . = ALIGN (16);
+    /* activation buffers a.k.a tensor arena when memory mode sram only or shared sram */
+    *(.bss.NoInit.activation_buf_sram)
     . = ALIGN(16);
   } > SRAM AT > SRAM
 
@@ -170,13 +174,17 @@
     /* __attribute__((aligned(16))) is not handled by the CMSIS startup code.
      * Force the alignment here as a workaround */
     . = ALIGN(16);
+    /* nn model's baked in input matrices */
     *(ifm)
     . = ALIGN(16);
+    /* nn model's default space */
     *(nn_model)
     . = ALIGN (16);
+    /* labels */
     *(labels)
     . = ALIGN (16);
-    *(activation_buf)
+    /* activation buffers a.k.a tensor arena when memory mode dedicated sram */
+    *(activation_buf_dram)
     . = ALIGN (16);
   } > DDR AT > DDR
 
diff --git a/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.sct b/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.sct
index 0c6a388..e84d81e 100644
--- a/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.sct
+++ b/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.sct
@@ -16,6 +16,10 @@
 ; *************************************************************
 ; ***       Scatter-Loading Description File                ***
 ; *************************************************************
+; Please see docs/sections/appendix.md for memory mapping information.
+;
+; Note: Ethos-U NPU can access BRAM, internal SRAM and the DDR sections => activation buffers and
+;       the model should only be placed in those regions.
 ;
 ;---------------------------------------------------------
 ; First load region (ITCM)
@@ -63,8 +67,11 @@
     ;-----------------------------------------------------
     isram.bin       0x31000000  UNINIT ALIGN 16 0x00400000
     {
-        ; activation buffers a.k.a tensor arena
-        *.o (.bss.NoInit.activation_buf)
+        ; Cache area (if used)
+        *.o (.bss.NoInit.ethos_u_cache)
+
+        ; activation buffers a.k.a tensor arena when memory mode sram only or shared sram
+        *.o (.bss.NoInit.activation_buf_sram)
     }
 }
 
@@ -84,15 +91,14 @@
         ; nn model's baked in input matrices
         *.o (ifm)
 
-        ; nn model
+        ; nn model's default space
         *.o (nn_model)
 
         ; labels
         *.o (labels)
 
-        ; if the activation buffer (tensor arena) doesn't
-        ; fit in the SRAM region, we accommodate it here
-        *.o (activation_buf)
+        ; activation buffers a.k.a tensor arena when memory mode dedicated sram
+        *.o (activation_buf_dram)
     }
 
     ;-----------------------------------------------------
diff --git a/source/application/main/Main.cc b/source/application/main/Main.cc
index 6e1c620..9622566 100644
--- a/source/application/main/Main.cc
+++ b/source/application/main/Main.cc
@@ -36,7 +36,7 @@
     info("%s\n", PRJ_DES_STR);
     info("Target system design: %s\n", DESIGN_NAME);
     info("Version %s Build date: " __DATE__ " @ " __TIME__ "\n", PRJ_VER_STR);
-    info("Copyright (C) ARM Ltd 2020. All rights reserved.\n\n");
+    info("Copyright (C) ARM Ltd 2021. All rights reserved.\n\n");
 }
 
 int main ()
diff --git a/source/application/tensorflow-lite-micro/include/BufAttributes.hpp b/source/application/tensorflow-lite-micro/include/BufAttributes.hpp
index 126172b..a3b5890 100644
--- a/source/application/tensorflow-lite-micro/include/BufAttributes.hpp
+++ b/source/application/tensorflow-lite-micro/include/BufAttributes.hpp
@@ -18,6 +18,17 @@
 #ifndef BUF_ATTRIBUTES_HPP
 #define BUF_ATTRIBUTES_HPP
 
+#if defined(ARM_NPU)
+    /* When Arm NPU is defined, we use the config set by NPU mem parameters */
+    #include "ethosu_mem_config.h"
+    #define BYTE_ALIGNMENT              ETHOS_U_MEM_BYTE_ALIGNMENT
+#else /* defined(ARM_NPU) */
+    /* otherwise, we use the default ones here. */
+    #define ACTIVATION_BUF_SECTION      section(".bss.NoInit.activation_buf_sram")
+    #define ACTIVATION_BUF_SECTION_NAME ("SRAM")
+    #define BYTE_ALIGNMENT              16
+#endif /* defined(ARM_NPU) */
+
 #ifdef __has_attribute
 #define HAVE_ATTRIBUTE(x) __has_attribute(x)
 #else   /* __has_attribute */
@@ -27,9 +38,8 @@
 #if HAVE_ATTRIBUTE(aligned) || (defined(__GNUC__) && !defined(__clang__))
 
 /* We want all buffers/sections to be aligned to 16 byte.  */
-#define ALIGNMENT_REQ               aligned(16)
+#define ALIGNMENT_REQ               aligned(BYTE_ALIGNMENT)
 
-/* Model data section name. */
 #define MODEL_SECTION               section("nn_model")
 
 /* Label section name */
@@ -45,24 +55,6 @@
     #define ACTIVATION_BUF_SRAM_SZ  0x00000000
 #endif /* ACTIVATION_BUF_SRAM_SZ */
 
-/**
- * Activation buffer aka tensor arena section name
- * We have to place the tensor arena in different region based on its size.
- * If it fits in SRAM, we place it there, and also mark it by giving it a
- * different section name. The scatter file places the ZI data in DDR and
- * the uninitialised region in the SRAM.
- **/
-#define ACTIVATION_BUF_SECTION_SRAM section(".bss.NoInit.activation_buf")
-#define ACTIVATION_BUF_SECTION_DRAM section("activation_buf")
-
-#if     ACTIVATION_BUF_SZ > ACTIVATION_BUF_SRAM_SZ /* Will buffer not fit in SRAM? */
-    #define ACTIVATION_BUF_SECTION      ACTIVATION_BUF_SECTION_DRAM
-    #define ACTIVATION_BUF_SECTION_NAME ("DDR")
-#else   /* ACTIVATION_BUF_SZ > 0x00200000 */
-    #define ACTIVATION_BUF_SECTION  ACTIVATION_BUF_SECTION_SRAM
-    #define ACTIVATION_BUF_SECTION_NAME ("SRAM")
-#endif  /* ACTIVATION_BUF_SZ > 0x00200000 */
-
 /* IFM section name. */
 #define IFM_BUF_SECTION             section("ifm")
 
diff --git a/source/use_case/ad/usecase.cmake b/source/use_case/ad/usecase.cmake
index 72683d1..9ddf66e 100644
--- a/source/use_case/ad/usecase.cmake
+++ b/source/use_case/ad/usecase.cmake
@@ -61,7 +61,7 @@
         STRING)
 
 if (ETHOS_U_NPU_ENABLED)
-    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/ad_medium_int8_vela_H128.tflite)
+    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/ad_medium_int8_vela_${DEFAULT_NPU_CONFIG_ID}.tflite)
 else()
     set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/ad_medium_int8.tflite)
 endif()
diff --git a/source/use_case/asr/usecase.cmake b/source/use_case/asr/usecase.cmake
index 21fc80d..60fc132 100644
--- a/source/use_case/asr/usecase.cmake
+++ b/source/use_case/asr/usecase.cmake
@@ -75,7 +75,7 @@
     STRING)
 
 if (ETHOS_U_NPU_ENABLED)
-    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/wav2letter_pruned_int8_vela_H128.tflite)
+    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/wav2letter_pruned_int8_vela_${DEFAULT_NPU_CONFIG_ID}.tflite)
 else()
     set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/wav2letter_pruned_int8.tflite)
 endif()
diff --git a/source/use_case/img_class/usecase.cmake b/source/use_case/img_class/usecase.cmake
index e46de00..441a346 100644
--- a/source/use_case/img_class/usecase.cmake
+++ b/source/use_case/img_class/usecase.cmake
@@ -47,7 +47,7 @@
     STRING)
 
 if (ETHOS_U_NPU_ENABLED)
-    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/mobilenet_v2_1.0_224_INT8_vela_H128.tflite)
+    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/mobilenet_v2_1.0_224_INT8_vela_${DEFAULT_NPU_CONFIG_ID}.tflite)
 else()
     set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/mobilenet_v2_1.0_224_INT8.tflite)
 endif()
diff --git a/source/use_case/inference_runner/usecase.cmake b/source/use_case/inference_runner/usecase.cmake
index bab5c65..7f9c74f 100644
--- a/source/use_case/inference_runner/usecase.cmake
+++ b/source/use_case/inference_runner/usecase.cmake
@@ -22,7 +22,7 @@
 generate_default_input_code(${INC_GEN_DIR})
 
 if (ETHOS_U_NPU_ENABLED)
-    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/dnn_s_quantized_vela_H128.tflite)
+    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/dnn_s_quantized_vela_${DEFAULT_NPU_CONFIG_ID}.tflite)
 else()
     set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/dnn_s_quantized.tflite)
 endif()
diff --git a/source/use_case/kws/usecase.cmake b/source/use_case/kws/usecase.cmake
index 6d0ad7e..bd54cea 100644
--- a/source/use_case/kws/usecase.cmake
+++ b/source/use_case/kws/usecase.cmake
@@ -74,7 +74,7 @@
     STRING)
 
 if (ETHOS_U_NPU_ENABLED)
-    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/ds_cnn_clustered_int8_vela_H128.tflite)
+    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/ds_cnn_clustered_int8_vela_${DEFAULT_NPU_CONFIG_ID}.tflite)
 else()
     set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/ds_cnn_clustered_int8.tflite)
 endif()
diff --git a/source/use_case/kws_asr/usecase.cmake b/source/use_case/kws_asr/usecase.cmake
index 716a8c8..e9b9150 100644
--- a/source/use_case/kws_asr/usecase.cmake
+++ b/source/use_case/kws_asr/usecase.cmake
@@ -67,8 +67,8 @@
     STRING)
 
 if (ETHOS_U_NPU_ENABLED)
-    set(DEFAULT_MODEL_PATH_KWS      ${DEFAULT_MODEL_DIR}/ds_cnn_clustered_int8_vela_H128.tflite)
-    set(DEFAULT_MODEL_PATH_ASR      ${DEFAULT_MODEL_DIR}/wav2letter_pruned_int8_vela_H128.tflite)
+    set(DEFAULT_MODEL_PATH_KWS      ${DEFAULT_MODEL_DIR}/ds_cnn_clustered_int8_vela_${DEFAULT_NPU_CONFIG_ID}.tflite)
+    set(DEFAULT_MODEL_PATH_ASR      ${DEFAULT_MODEL_DIR}/wav2letter_pruned_int8_vela_${DEFAULT_NPU_CONFIG_ID}.tflite)
 else()
     set(DEFAULT_MODEL_PATH_KWS      ${DEFAULT_MODEL_DIR}/ds_cnn_clustered_int8.tflite)
     set(DEFAULT_MODEL_PATH_ASR      ${DEFAULT_MODEL_DIR}/wav2letter_pruned_int8.tflite)
diff --git a/source/use_case/vww/usecase.cmake b/source/use_case/vww/usecase.cmake
index 0201aed..4005297 100644
--- a/source/use_case/vww/usecase.cmake
+++ b/source/use_case/vww/usecase.cmake
@@ -31,7 +31,7 @@
     STRING)
 
 if (ETHOS_U_NPU_ENABLED)
-    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/vww4_128_128_INT8_vela_H128.tflite)
+    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/vww4_128_128_INT8_vela_${DEFAULT_NPU_CONFIG_ID}.tflite)
 else()
     set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/vww4_128_128_INT8.tflite)
 endif()