MLECO-1860: Support for Arm GNU Embedded Toolchain

This patch enables compilation of ML use cases bare-metal applications
using Arm GNU Embedded Toolchain. The GNU toolchain can be used instead
of the Arm Compiler that was already supported.

The GNU toolchain is also set as the default toolchain when building
applications for the MPS3 target.

Note: The version of GNU toolchain must be 10.2.1 or higher.

Change-Id: I5fff242f0f52d2db6c75d292f9fa990df1aec978
Signed-off-by: Kshitij Sisodia <kshitij.sisodia@arm.com>
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0581a2c..411b4f2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -39,15 +39,9 @@
 set(CMAKE_C_EXTENSIONS OFF)
 set(CMAKE_CXX_EXTENSIONS OFF)
 
-project(arm_ethos_u55_eval
-        VERSION     21.03
-        DESCRIPTION "ARM Ethos-U55 Evaluation application for MPS3 FPGA Prototyping Board and FastModel")
-
-add_compile_definitions(PRJ_VER_STR="${PROJECT_VERSION}")
-add_compile_definitions(PRJ_DES_STR="${PROJECT_DESCRIPTION}")
-
-set(CMAKE_SCRIPTS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/scripts/cmake/)
-set(DOWNLOAD_DEP_DIR  ${CMAKE_BINARY_DIR}/dependencies)
+set(CMAKE_SCRIPTS_DIR   ${CMAKE_CURRENT_SOURCE_DIR}/scripts/cmake)
+set(DOWNLOAD_DEP_DIR    ${CMAKE_BINARY_DIR}/dependencies)
+set(CMAKE_TOOLCHAIN_DIR ${CMAKE_SCRIPTS_DIR}/toolchains)
 
 include(${CMAKE_SCRIPTS_DIR}/source_gen_utils.cmake)
 include(${CMAKE_SCRIPTS_DIR}/util_functions.cmake)
@@ -88,15 +82,28 @@
     BOOL)
 
 if (TARGET_PLATFORM STREQUAL mps3)
-    message(STATUS "Platform: MPS3 FPGA Prototyping Board or SSE-XXX FVP")
+    message(STATUS "Platform: MPS3 FPGA Prototyping Board or FVP")
+    set(DEFAULT_TOOLCHAIN_FILE ${CMAKE_TOOLCHAIN_DIR}/bare-metal-gcc.cmake)
 elseif (TARGET_PLATFORM STREQUAL simple_platform)
-    message(STATUS "Platform: Simple platform within minimal peripherals")
+    message(STATUS "Platform: Simple platform with minimal peripherals")
+    set(DEFAULT_TOOLCHAIN_FILE ${CMAKE_TOOLCHAIN_DIR}/bare-metal-gcc.cmake)
 elseif (TARGET_PLATFORM STREQUAL native)
     message(STATUS "Platform: Native (Linux based x86_64/aarch64 system)")
+    set(DEFAULT_TOOLCHAIN_FILE ${CMAKE_TOOLCHAIN_DIR}/native-gcc.cmake)
 else ()
     message(FATAL_ERROR "Invalid platform specified: ${TARGET_PLATFORM}")
 endif ()
 
+if (NOT DEFINED CMAKE_TOOLCHAIN_FILE)
+    set(CMAKE_TOOLCHAIN_FILE ${DEFAULT_TOOLCHAIN_FILE}
+        CACHE FILEPATH "Toolchain file")
+endif()
+message(STATUS "Using CMAKE_TOOLCHAIN_FILE: ${CMAKE_TOOLCHAIN_FILE}")
+
+project(arm_ml_embedded_evaluation_kit
+        VERSION     21.05
+        DESCRIPTION "ARM ML Embedded Evaluation Kit for MPS3 FPGA and FastModel")
+
 enforce_compiler_version()
 setup_source_generator()
 
@@ -318,9 +325,11 @@
         "${SRC_GEN_DIR}/*.c"
         )
 
-    set(SRC_MAIN
-        "${SRC_PATH}/application/main/Main.cc"
-        )
+    set(SRC_MAIN "${SRC_PATH}/application/main/Main.cc")
+
+    set_source_files_properties(${SRC_MAIN}
+        PROPERTIES COMPILE_DEFINITIONS
+        "PRJ_VER_STR=\"${PROJECT_VERSION}\";PRJ_DES_STR=\"${PROJECT_DESCRIPTION}\"")
 
     set(UC_LIB_NAME lib${TARGET_NAME})
 
@@ -364,32 +373,30 @@
         $<$<BOOL:${ETHOS_U55_ENABLED}>:${ETHOS_U55_LIBS}>
         $<$<BOOL:${CMSIS_DSP_LIB}>:${CMSIS_DSP_LIB}>)
 
-    add_executable(${TARGET_NAME} ${SRC_MAIN})
+    add_executable(${TARGET_NAME} ${SRC_MAIN} ${PLAT_RETARGET_SOURCE})
 
     target_link_libraries(${TARGET_NAME} ${UC_LIB_NAME})
 
     if (${CMAKE_CROSSCOMPILING})
         set_target_properties(${TARGET_NAME} PROPERTIES SUFFIX ".axf")
+        add_target_map_file(${TARGET_NAME} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET_NAME}.map)
     endif()
 
     if (${TARGET_PLATFORM} STREQUAL mps3)
 
-        SET(SECTORS_DIR ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/sectors/${use_case})
-        file(REMOVE_RECURSE ${SECTORS_DIR})
-        file(MAKE_DIRECTORY ${SECTORS_DIR})
+        set(SECTORS_DIR ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/sectors)
+        set(SECTORS_BIN_DIR ${SECTORS_DIR}/${use_case})
 
-        add_custom_command(TARGET ${TARGET_NAME}
-            POST_BUILD
-            COMMAND fromelf --bin --output=${SECTORS_DIR}/
-            ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET_NAME}.axf)
+        file(REMOVE_RECURSE ${SECTORS_BIN_DIR})
+        file(MAKE_DIRECTORY ${SECTORS_BIN_DIR})
+        file(COPY ${MPS3_FPGA_CONFIG} DESTINATION ${SECTORS_DIR})
 
-        add_custom_target(
-            run-${use_case} ALL
-            COMMAND ${PYTHON} ${CMAKE_CURRENT_SOURCE_DIR}/scripts/py/gen_fpga_mem_map.py
-            --scatter_file_path ${SCAT_FILE}
-            --target_subsystem  ${TARGET_SUBSYSTEM}
-            --output_file_path  ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/images-${use_case}.txt
-            COMMENT "Generating FPGA mappings file")
+        add_bin_generation_command(
+            TARGET_NAME ${TARGET_NAME}
+            OUTPUT_DIR  ${SECTORS_BIN_DIR}
+            AXF_PATH    ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET_NAME}.axf
+            SECTION_PATTERNS    "${MPS3_SECTION_PATTERNS}"
+            OUTPUT_BIN_NAMES    "${MPS3_OUTPUT_BIN_NAMES}")
     elseif (${TARGET_PLATFORM} STREQUAL native)
 
         # If native build tests
diff --git a/build_default.py b/build_default.py
index 318ccb1..b61da13 100644
--- a/build_default.py
+++ b/build_default.py
@@ -26,12 +26,13 @@
 from set_up_default_resources import set_up_resources
 
 
-def run(download_resources, run_vela_on_models):
+def run(toolchain: str, download_resources: bool, run_vela_on_models: bool):
     """
     Run the helpers scripts.
 
     Parameters:
     ----------
+    toolchain (str)          :    Specifies if 'gnu' or 'arm' toolchain needs to be used.
     download_resources (bool):    Specifies if 'Download resources' step is performed.
     run_vela_on_models (bool):    Only if `download_resources` is True, specifies if run vela on downloaded models.
     """
@@ -40,16 +41,25 @@
     logging.basicConfig(filename='log_build_default.log', level=logging.DEBUG)
     logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
 
-    # 1. Download models if specified
+    # 1. Make sure the toolchain is supported, and set the right one here
+    supported_toolchain_ids = ["gnu", "arm"]
+    assert toolchain in supported_toolchain_ids, f"Toolchain must be from {supported_toolchain_ids}"
+    if toolchain == "arm":
+        toolchain_file_name = "bare-metal-armclang.cmake"
+    elif toolchain == "gnu":
+        toolchain_file_name = "bare-metal-gcc.cmake"
+
+    # 2. Download models if specified
     if download_resources is True:
         logging.info("Downloading resources.")
         set_up_resources(run_vela_on_models)
 
-    # 2. Build default configuration
+    # 3. Build default configuration
     logging.info("Building default configuration.")
     target_platform = "mps3"
     target_subsystem = "sse-300"
-    build_dir = os.path.join(current_file_dir, f"cmake-build-{target_platform}-{target_subsystem}-release")
+    build_dir = os.path.join(current_file_dir,
+        f"cmake-build-{target_platform}-{target_subsystem}-{toolchain}-release")
     try:
         os.mkdir(build_dir)
     except FileExistsError:
@@ -63,10 +73,14 @@
                     shutil.rmtree(filepath)
             except Exception as e:
                 logging.error('Failed to delete %s. Reason: %s' % (filepath, e))
+
     os.chdir(build_dir)
-    cmake_toolchain_file = os.path.join(current_file_dir, "scripts", "cmake", "bare-metal-toolchain.cmake")
-    cmake_command = (f"cmake .. -DTARGET_PLATFORM={target_platform} -DTARGET_SUBSYSTEM={target_subsystem} " +
-                     f"-DCMAKE_TOOLCHAIN_FILE={cmake_toolchain_file} ")
+    cmake_toolchain_file = os.path.join(current_file_dir, "scripts", "cmake",
+                                        "toolchains", toolchain_file_name)
+    cmake_command = (f"cmake .. -DTARGET_PLATFORM={target_platform} " +
+                     f"-DTARGET_SUBSYSTEM={target_subsystem} " +
+                     f" -DCMAKE_TOOLCHAIN_FILE={cmake_toolchain_file}")
+
     logging.info(cmake_command)
     state = subprocess.run(cmake_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
     logging.info(state.stdout.decode('utf-8'))
@@ -79,6 +93,11 @@
 
 if __name__ == '__main__':
     parser = ArgumentParser()
+    parser.add_argument("--toolchain", default="gnu",
+                        help="""
+                        Specify the toolchain to use (Arm or GNU).
+                        Options are [gnu, arm]; default is gnu.
+                        """)
     parser.add_argument("--skip-download",
                         help="Do not download resources: models and test vectors",
                         action="store_true")
@@ -86,4 +105,4 @@
                         help="Do not run Vela optimizer on downloaded models.",
                         action="store_true")
     args = parser.parse_args()
-    run(not args.skip_download, not args.skip_vela)
+    run(args.toolchain.lower(), not args.skip_download, not args.skip_vela)
diff --git a/docs/documentation.md b/docs/documentation.md
index ece2f56..050ca60 100644
--- a/docs/documentation.md
+++ b/docs/documentation.md
@@ -38,10 +38,9 @@
 - Linux x86_64 based machine or Windows Subsystem for Linux is preferable.
   Unfortunately, Windows is not supported as a build environment yet.
 
-- Arm Compiler license (version 6.14 or above).
-
-  - [Arm Compiler Download
-        Page](https://developer.arm.com/tools-and-software/embedded/arm-compiler/downloads/)
+- At least one of the following toolchains:
+  - GNU Arm Embedded Toolchain (version 10.2.1 or above) - [GNU Arm Embedded Toolchain Downloads](https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-rm/downloads)
+  - Arm Compiler (version 6.14 or above) with a valid license - [Arm Compiler Download Page](https://developer.arm.com/tools-and-software/embedded/arm-compiler/downloads/)
 
 - An Arm® MPS3 FPGA prototyping board and components for FPGA evaluation or a `Fixed Virtual Platform` binary:
   - An MPS3 board loaded with  Arm® Corstone™-300 reference package (`AN547`) from:
diff --git a/docs/quick_start.md b/docs/quick_start.md
index 5595152..f3565e8 100644
--- a/docs/quick_start.md
+++ b/docs/quick_start.md
@@ -71,9 +71,6 @@
 
     ```commandline
     cmake \
-        -DTARGET_PLATFORM=mps3 \
-        -DTARGET_SUBSYSTEM=sse-300 \
-        -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
         -DUSE_CASE_BUILD=kws \
         -Dkws_MODEL_TFLITE_PATH=output/ds_cnn_clustered_int8_vela.tflite \
         ..
diff --git a/docs/sections/building.md b/docs/sections/building.md
index c8eef7c..4b1514b 100644
--- a/docs/sections/building.md
+++ b/docs/sections/building.md
@@ -8,8 +8,15 @@
   - [Build options](#build-options)
   - [Build process](#build-process)
     - [Preparing build environment](#preparing-build-environment)
+      - [Fetching submodules](#fetching-submodules)
+      - [Fetching resource files](#fetching-resource-files)
     - [Create a build directory](#create-a-build-directory)
     - [Configuring the build for MPS3: SSE-300](#configuring-the-build-for-mps3-sse-300)
+      - [Using GNU Arm Embedded Toolchain](#using-gnu-arm-embedded-toolchain)
+      - [Using Arm Compiler](#using-arm-compiler)
+      - [Generating project for Arm Development Studio](#generating-project-for-arm-development-studio)
+      - [Working with model debugger from Arm FastModel Tools](#working-with-model-debugger-from-arm-fastmodel-tools)
+      - [Configuring with custom TPIP dependencies](#configuring-with-custom-tpip-dependencies)
     - [Configuring native unit-test build](#configuring-native-unit-test-build)
     - [Configuring the build for simple_platform](#configuring-the-build-for-simple_platform)
     - [Building the configured project](#building-the-configured-project)
@@ -26,8 +33,8 @@
 Before proceeding, please, make sure that the following prerequisites
 are fulfilled:
 
-- Arm Compiler version 6.14 or above is installed and available on the
-    path.
+- GNU Arm embedded toolchain 10.2.1 (or higher) or the Arm Compiler version 6.14 (or higher)
+    is installed and available on the path.
 
     Test the compiler by running:
 
@@ -40,11 +47,25 @@
     Component: ARM Compiler 6.14
     ```
 
+    Alternatively,
+
+    ```commandline
+    arm-none-eabi-gcc --version
+    ```
+    ```log
+    arm-none-eabi-gcc (GNU Arm Embedded Toolchain 10-2020-q4-major) 10.2.1 20201103 (release)
+    Copyright (C) 2020 Free Software Foundation, Inc.
+    This is free software; see the source for copying conditions.  There is NO
+    warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+    ```
+
     > **Note:** Add compiler to the path, if needed:
     >
     > `export PATH=/path/to/armclang/bin:$PATH`
+    >           OR
+    > `export PATH=/path/to/gcc-arm-none-eabi-toolchain/bin:$PATH`
 
-- Compiler license is configured correctly
+- Compiler license, if using the proprietary Arm Compiler, is configured correctly.
 
 - CMake version 3.15 or above is installed and available on the path.
     Test CMake by running:
@@ -93,26 +114,24 @@
 
     > **Note:** Add it to the path environment variable, if needed.
 
-- Access to the Internet to download the third party dependencies, specifically: TensorFlow Lite Micro, Arm Ethos-U55 NPU
+- Access to the Internet to download the third party dependencies, specifically: TensorFlow Lite Micro, Arm® Ethos™-U55 NPU
 driver and CMSIS. Instructions for downloading these are listed under [preparing build environment](#preparing-build-environment).
 
 ## Build options
 
-The project build system allows user to specify custom NN
-model (in `.tflite` format) or images and compile application binary from
-sources.
-
-The build system uses pre-built TensorFlow Lite for Microcontrollers
-library and Arm® Ethos™-U55 driver libraries from the delivery package.
+The project build system allows user to specify custom neural network
+models (in `.tflite` format) for each use case along with the network
+inputs. It also builds TensorFlow Lite for Microcontrollers library,
+Arm® Ethos™-U55 driver library, and CMSIS-DSP library from sources.
 
 The build script is parameterized to support different options. Default
-values for build parameters will build the executable compatible with
-the Ethos-U55 NPU Fast Model.
+values for build parameters will build the applications for all use cases
+for Arm® Corstone™-300 design that can execute on an MPS3 FPGA or the FVP.
 
 The build parameters are:
 
 - `TARGET_PLATFORM`: Target platform to execute application:
-  - `mps3`
+  - `mps3` (default)
   - `native`
   - `simple_platform`
 
@@ -122,6 +141,10 @@
     SSE-300:
   - `sse-300` (default - [Arm® Corstone™-300](https://developer.arm.com/ip-products/subsystem/corstone/corstone-300))
 
+- `CMAKE_TOOLCHAIN_FILE`: This built-in CMake parameter can be used to override the
+    default toolchain file used for the build. All the valid toolchain files are in the
+    scripts directory. For example, see [bare-metal-gcc.cmake](../../scripts/cmake/toolchains/bare-metal-gcc.cmake).
+
 - `TENSORFLOW_SRC_PATH`: Path to the root of the TensorFlow directory.
     The default value points to the TensorFlow submodule in the
     [ethos-u](https://git.mlplatform.org/ml/ethos-u/ethos-u.git/about/) `dependencies` folder.
@@ -221,6 +244,8 @@
 
 ### Preparing build environment
 
+#### Fetching submodules
+
 Certain third party sources are required to be present on the development machine to allow the example sources in this
 repository to link against.
 
@@ -252,6 +277,21 @@
 >paths can be overridden by CMake configuration arguments `TENSORFLOW_SRC_PATH`, `ETHOS_U55_DRIVER_SRC_PATH`,
 >and `CMSIS_SRC_PATH`.
 
+
+#### Fetching resource files
+
+All the ML use case examples in this repository also depend on external neural
+network models. To download these, run the following command from the root of
+the repository:
+
+```sh
+python3 ./set_up_default_resources.py
+```
+
+This will fetch all the models into `resources_downloaded` directory. It will
+also optimize the models using the Vela compiler for default 128 MAC configuration
+of Arm® Ethos™-U55 NPU.
+
 ### Create a build directory
 
 Create a build directory in the root of the project and navigate inside:
@@ -262,68 +302,88 @@
 
 ### Configuring the build for MPS3: SSE-300
 
-On Linux, execute the following command to build the application to run
-on the Ethos-U55 NPU when providing only the mandatory arguments for CMake configuration:
+#### Using GNU Arm Embedded Toolchain
+
+On Linux, if using `Arm GNU embedded toolchain`, execute the following command
+to build the application to run on the Arm® Ethos™-U55 NPU when providing only
+the mandatory arguments for CMake configuration:
+
 
 ```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake ..
+cmake ../
 ```
 
-Toolchain option `CMAKE_TOOLCHAIN_FILE` points to the toolchain specific
-file to set the compiler and platform specific parameters.
+The above command will build for the default target platform `mps3`, the default subsystem
+`sse-300`, and using the default toolchain file for the target as `bare-metal-gcc.` This is
+equivalent to:
+
+
+```commandline
+cmake .. \
+    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/toolchains/bare-metal-gcc.cmake
+    -DTARGET_PLATFORM=mps3 \
+    -DTARGET_SUBSYSTEM=sse-300
+```
+
+#### Using Arm Compiler
+
+If using `Arm Compiler` instead, the toolchain option `CMAKE_TOOLCHAIN_FILE` can be used to
+point to the ARMClang CMake file instead to set the compiler and platform specific parameters.
+
+```commandline
+cmake ../ -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/toolchains/bare-metal-armclang.cmake
+```
 
 To configure a build that can be debugged using Arm Development Studio, we can just specify
 the build type as `Debug`:
 
 ```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DCMAKE_BUILD_TYPE=Debug ..
+cmake .. \
+    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/toolchains/bare-metal-armclang.cmake \
+    -DCMAKE_BUILD_TYPE=Debug
 ```
 
+#### Generating project for Arm Development Studio
+
 To be able to import the project in Arm Development Studio, add the Eclipse project generator and CMAKE_ECLIPSE_VERSION in the CMake command. It is advisable that the build directory is one level up relative to the source directory. When the build has been generated, you need to follow the Import wizard in Arm Development Studio and import the existing project into the workspace. You can then compile and debug the project using Arm Development Studio. Note that the below command is executed one level up from the source directory.
 
 ```commandline
 cmake \
     -DTARGET_PLATFORM=mps3 \
     -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
+    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/toolchains/bare-metal-armclang.cmake \
     -DCMAKE_BUILD_TYPE=Debug \
     -G "Eclipse CDT4 - Unix Makefiles" \
     -DCMAKE_ECLIPSE_VERSION=4.15 \
     ml-embedded-evaluation-kit
 ```
 
+#### Working with model debugger from Arm FastModel Tools
+
 To configure a build that can be debugged using a tool that only supports
 DWARF format 3 (Modeldebugger for example), we can use:
 
 ```commandline
-cmake \
+cmake .. \
     -DTARGET_PLATFORM=mps3 \
     -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
+    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/toolchains/bare-metal-armclang.cmake \
     -DCMAKE_BUILD_TYPE=Debug \
-    -DARMCLANG_DEBUG_DWARF_LEVEL=3 ..
+    -DARMCLANG_DEBUG_DWARF_LEVEL=3
 ```
 
-If the TensorFlow source tree is not in its default expected location,
-set the path using `TENSORFLOW_SRC_PATH`.
-Similarly, if the Ethos-U55 NPU driver and CMSIS are not in the default location,
-`ETHOS_U55_DRIVER_SRC_PATH` and `CMSIS_SRC_PATH` can be used to configure their location. For example:
+#### Configuring with custom TPIP dependencies
+
+If the TensorFlow source tree is not in its default expected location, set the path
+using `TENSORFLOW_SRC_PATH`. Similarly, if the Ethos-U55 NPU driver and CMSIS are
+not in the default location, `ETHOS_U55_DRIVER_SRC_PATH` and `CMSIS_SRC_PATH` can be
+used to configure their location. For example:
 
 ```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
+cmake .. \
     -DTENSORFLOW_SRC_PATH=/my/custom/location/tensorflow \
     -DETHOS_U55_DRIVER_SRC_PATH=/my/custom/location/core_driver \
-    -DCMSIS_SRC_PATH=/my/custom/location/cmsis ..
+    -DCMSIS_SRC_PATH=/my/custom/location/cmsis
 ```
 
 > **Note:** If re-building with changed parameters values, it is
@@ -332,9 +392,7 @@
 ### Configuring native unit-test build
 
 ```commandline
-cmake \
-    -DTARGET_PLATFORM=native \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/native-toolchain.cmake ..
+cmake ../ -DTARGET_PLATFORM=native
 ```
 
 Results of the build will be placed under `build/bin/` folder:
@@ -348,9 +406,15 @@
 ### Configuring the build for simple_platform
 
 ```commandline
-cmake \
+cmake ../ -DTARGET_PLATFORM=simple_platform
+```
+
+Again, if using `Arm Compiler`, use:
+
+```commandline
+cmake .. \
     -DTARGET_PLATFORM=simple_platform \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake ..
+    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/toolchains/bare-metal-armclang.cmake
 ```
 
 ### Building the configured project
@@ -531,11 +595,7 @@
 An example of the build with custom timing adapter configuration:
 
 ```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DTA_CONFIG_FILE=scripts/cmake/my_ta_config.cmake ..
+cmake .. -DTA_CONFIG_FILE=scripts/cmake/my_ta_config.cmake
 ```
 
 ## Add custom inputs
@@ -563,12 +623,12 @@
 location of the associated labels file:
 
 ```commandline
-cmake \
+cmake .. \
     -D<use_case>_MODEL_TFLITE_PATH=<path/to/custom_model_after_vela.tflite> \
     -D<use_case>_LABELS_TXT_FILE=<path/to/labels_custom_model.txt> \
     -DTARGET_PLATFORM=mps3 \
     -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake ..
+    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/toolchains/bare-metal-armclang.cmake
 ```
 
 > **Note:** For the specific use case command see the relative section in the use case documentation.
diff --git a/docs/sections/customizing.md b/docs/sections/customizing.md
index b4b5bba..adf7749 100644
--- a/docs/sections/customizing.md
+++ b/docs/sections/customizing.md
@@ -2,20 +2,22 @@
 
 ## Contents
 
-- [Software project description](#software-project-description)
-- [HAL API](#hal-api)
-- [Main loop function](#main-loop-function)
-- [Application context](#application-context)
-- [Profiler](#profiler)
-- [NN Model API](#nn-model-api)
-- [Adding custom ML use case](#adding-custom-ml-use-case)
-- [Implementing main loop](#implementing-main-loop)
-- [Implementing custom NN model](#implementing-custom-nn-model)
-- [Executing inference](#executing-inference)
-- [Printing to console](#printing-to-console)
-- [Reading user input from console](#reading-user-input-from-console)
-- [Output to MPS3 LCD](#output-to-mps3-lcd)
-- [Building custom use case](#building-custom-use-case)
+- [Implementing custom ML application](#implementing-custom-ml-application)
+  - [Contents](#contents)
+  - [Software project description](#software-project-description)
+  - [HAL API](#hal-api)
+  - [Main loop function](#main-loop-function)
+  - [Application context](#application-context)
+  - [Profiler](#profiler)
+  - [NN Model API](#nn-model-api)
+  - [Adding custom ML use case](#adding-custom-ml-use-case)
+  - [Implementing main loop](#implementing-main-loop)
+  - [Implementing custom NN model](#implementing-custom-nn-model)
+  - [Executing inference](#executing-inference)
+  - [Printing to console](#printing-to-console)
+  - [Reading user input from console](#reading-user-input-from-console)
+  - [Output to MPS3 LCD](#output-to-mps3-lcd)
+  - [Building custom use case](#building-custom-use-case)
 
 This section describes how to implement a custom Machine Learning
 application running on `Arm® Corstone™-300` based FVP or on the Arm® MPS3 FPGA prototyping board.
@@ -731,11 +733,11 @@
 build command:
 
 ```commandline
-cmake \
+cmake .. \
   -DTARGET_PLATFORM=mps3 \
   -DTARGET_SUBSYSTEM=sse-300 \
   -DUSE_CASE_BUILD=hello_world \
-  -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake ..
+  -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/toolchains/bare-metal-armclang.cmake
 ```
 
 As a result, `ethos-u-hello_world.axf` should be created, MPS3 build
diff --git a/docs/sections/deployment.md b/docs/sections/deployment.md
index a6e9c3c..10acbcf 100644
--- a/docs/sections/deployment.md
+++ b/docs/sections/deployment.md
@@ -150,9 +150,9 @@
 
 For MPS3 board, instead of loading the axf file directly, the executable blobs
 generated under the *sectors/<use_case>* subdirectory need to be
-copied over to the MP3 board's micro SD card. Also, every use case build
-generates a corresponding images.txt file which is used by the MPS3 to
-understand which memory regions the blobs are to be loaded into.
+copied over to the MP3 board's micro SD card. Also, *sectors/images.txt* file is
+used by the MPS3 to understand which memory regions the blobs are to be loaded
+into.
 
 Once the USB A <--> B cable between the MPS3 and the development machine
 is connected and the MPS3 board powered on, the board should enumerate
@@ -160,10 +160,10 @@
 There might be two devices also, depending on the version of the board
 you are using. The device named `V2M-MPS3` or `V2MMPS3` is the `SD card`.
 
-If the axf/elf file is within 1MiB, it can be flashed into the FPGA
-memory directly without having to break it down into separate load
-region specific blobs. However, with neural network models exceeding
-this size, it becomes necessary to follow this approach.
+If the axf/elf file is within the ITCM load size limit, it can be copied into
+the FPGA memory directly without having to break it down into separate load
+region specific blobs. However, with neural network models exceeding this size,
+it becomes necessary to follow this approach.
 
 1. For example, the image classification use case will produce:
 
@@ -181,7 +181,7 @@
     cp -av ./bin/sectors/img_class/* /media/user/V2M-MPS3/SOFTWARE/
     ```
 
-2. The generated `\<use-case\>_images.txt` file needs to be copied
+2. The `./bin/sectors/images.txt` file needs to be copied
 over to the MPS3. The exact location for the destination will depend
 on the MPS3 board's version and the application note for the bit
 file in use.
@@ -190,7 +190,7 @@
 file:
 
     ```commandline
-    cp ./bin/images-img_class.txt /media/user/V2M-MPS3/MB/HBI0309C/ETHOSU/images.txt
+    cp ./bin/sectors/images.txt /media/user/V2M-MPS3/MB/HBI0309C/ETHOSU/images.txt
     ```
 
 3. Open the first serial port available from MPS3, for example,
diff --git a/docs/use_cases/ad.md b/docs/use_cases/ad.md
index 5a37a0a..5f210b1 100644
--- a/docs/use_cases/ad.md
+++ b/docs/use_cases/ad.md
@@ -108,74 +108,26 @@
 On Linux, execute the following command to build **only** Anomaly Detection application to run on the Ethos-U55 Fast Model when providing only the mandatory arguments for CMake configuration:
 
 ```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=./scripts/cmake/bare-metal-toolchain.cmake \
-    -DUSE_CASE_BUILD=ad ..
+cmake ../ -DUSE_CASE_BUILD=ad
 ```
-
-Toolchain option `CMAKE_TOOLCHAIN_FILE` points to the toolchain specific file to set the compiler and platform specific
-parameters.
-
 To configure a build that can be debugged using Arm-DS, we can just specify
-the build type as `Debug`:
+the build type as `Debug` and use the `Arm Compiler` toolchain file:
 
 ```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
+cmake .. \
+    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/toolchains/bare-metal-armclang.cmake \
     -DCMAKE_BUILD_TYPE=Debug \
-    -DUSE_CASE_BUILD=ad ..
+    -DUSE_CASE_BUILD=ad
 ```
 
-To configure a build that can be debugged using a tool that only supports
-DWARF format 3 (Modeldebugger for example), we can use:
+Also see:
+- [Configuring with custom TPIP dependencies](../sections/building.md#Configuring-with-custom-TPIP-dependencies)
+- [Using Arm Compiler](../sections/building.md#using-arm-compiler)
+- [Configuring the build for simple_platform](../sections/building.md#Configuring-the-build-for-simple_platform)
+- [Working with model debugger from Arm FastModel Tools](../sections/building.md#Working-with-model-debugger-from-Arm-FastModel-Tools)
 
-```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DCMAKE_BUILD_TYPE=Debug \
-    -DARMCLANG_DEBUG_DWARF_LEVEL=3 \
-    -DUSE_CASE_BUILD=ad ..
-```
-
-> **Note:** If building for different Ethos-U55 configurations, see
-[Configuring build for different Arm Ethos-U55 configurations](../sections/building.md#Configuring-build-for-different-Arm-Ethos-U55-configurations):
-
-If the TensorFlow source tree is not in its default expected location,
-set the path using `TENSORFLOW_SRC_PATH`.
-Similarly, if the Ethos-U55 driver is not in the default location,
-`ETHOS_U55_DRIVER_SRC_PATH` can be used to configure the location. For example:
-
-```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DTENSORFLOW_SRC_PATH=/my/custom/location/tensorflow \
-    -DETHOS_U55_DRIVER_SRC_PATH=/my/custom/location/core_driver \
-    -DUSE_CASE_BUILD=ad ..
-```
-
-Also, `CMSIS_SRC_PATH` parameter can be used to override the CMSIS sources used for compilation used by TensorFlow by
-default. For example, to use the CMSIS sources fetched by the ethos-u helper script, we can use:
-
-```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DTENSORFLOW_SRC_PATH=../ethos-u/core_software/tensorflow \
-    -DETHOS_U55_DRIVER_SRC_PATH=../ethos-u/core_software/core_driver \
-    -DCMSIS_SRC_PATH=../ethos-u/core_software/cmsis \
-    -DUSE_CASE_BUILD=ad ..
-```
-
-> **Note:** If re-building with changed parameters values, it is highly advised to clean the build directory and re-run the CMake command.
+> **Note:** If re-building with changed parameters values, it is highly advised to clean the build directory and re-run
+>the CMake command.
 
 If the CMake command succeeded, build the application as follows:
 
@@ -239,12 +191,9 @@
 Next set ad_FILE_PATH to the location of this folder when building:
 
 ```commandline
-cmake \
+cmake .. \
     -Dad_FILE_PATH=/tmp/custom_files/ \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DUSE_CASE_BUILD=ad ..
+    -DUSE_CASE_BUILD=ad
 ```
 
 The audio flies found in the `ad_FILE_PATH` folder will be picked up and automatically converted to C++ files during the CMake
@@ -268,12 +217,9 @@
 An example:
 
 ```commandline
-cmake \
+cmake .. \
     -Dad_MODEL_TFLITE_PATH=<path/to/custom_ad_model_after_vela.tflite> \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DUSE_CASE_BUILD=ad ..
+    -DUSE_CASE_BUILD=ad
 ```
 
 > **Note:** Clean the build directory before re-running the CMake command.
diff --git a/docs/use_cases/asr.md b/docs/use_cases/asr.md
index d20dc5a..ec10fdb 100644
--- a/docs/use_cases/asr.md
+++ b/docs/use_cases/asr.md
@@ -1,19 +1,19 @@
 # Automatic Speech Recognition Code Sample
 
-- [Introduction](#introduction)
-  - [Prerequisites](#prerequisites)
-- [Building the code sample application from sources](#building-the-code-sample-application-from-sources)
-  - [Build options](#build-options)
-  - [Build process](#build-process)
-  - [Add custom input](#add-custom-input)
-  - [Add custom model](#add-custom-model)
-- [Setting-up and running Ethos-U55 Code Sample](#setting-up-and-running-ethos-u55-code-sample)
-  - [Setting up the Ethos-U55 Fast Model](#setting-up-the-ethos-u55-fast-model)
-  - [Starting Fast Model simulation](#starting-fast-model-simulation)
-  - [Running Automatic Speech Recognition](#running-automatic-speech-recognition)
-- [Automatic Speech Recognition processing information](#automatic-speech-recognition-processing-information)
-  - [Preprocessing and feature extraction](#preprocessing-and-feature-extraction)
-  - [Postprocessing](#postprocessing)
+- [Automatic Speech Recognition Code Sample](#automatic-speech-recognition-code-sample)
+  - [Introduction](#introduction)
+    - [Preprocessing and feature extraction](#preprocessing-and-feature-extraction)
+    - [Postprocessing](#postprocessing)
+    - [Prerequisites](#prerequisites)
+  - [Building the code sample application from sources](#building-the-code-sample-application-from-sources)
+    - [Build options](#build-options)
+    - [Build process](#build-process)
+    - [Add custom input](#add-custom-input)
+    - [Add custom model](#add-custom-model)
+  - [Setting-up and running Ethos-U55 Code Sample](#setting-up-and-running-ethos-u55-code-sample)
+    - [Setting up the Ethos-U55 Fast Model](#setting-up-the-ethos-u55-fast-model)
+    - [Starting Fast Model simulation](#starting-fast-model-simulation)
+    - [Running Automatic Speech Recognition](#running-automatic-speech-recognition)
 
 ## Introduction
 
@@ -148,72 +148,24 @@
 Ethos-U55 Fast Model when providing only the mandatory arguments for CMake configuration:
 
 ```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=./scripts/cmake/bare-metal-toolchain.cmake \
-    -DUSE_CASE_BUILD=asr ..
+cmake ../ -DUSE_CASE_BUILD=asr
 ```
 
-Toolchain option `CMAKE_TOOLCHAIN_FILE` points to the toolchain specific file to set the compiler and platform specific
-parameters.
-
 To configure a build that can be debugged using Arm-DS, we can just specify
-the build type as `Debug`:
+the build type as `Debug` and use the `Arm Compiler` toolchain file:
 
 ```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
+cmake .. \
+    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/toolchains/bare-metal-armclang.cmake \
     -DCMAKE_BUILD_TYPE=Debug \
-    -DUSE_CASE_BUILD=asr ..
+    -DUSE_CASE_BUILD=asr
 ```
 
-To configure a build that can be debugged using a tool that only supports
-DWARF format 3 (Modeldebugger for example), we can use:
-
-```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DCMAKE_BUILD_TYPE=Debug \
-    -DARMCLANG_DEBUG_DWARF_LEVEL=3 \
-    -DUSE_CASE_BUILD=asr ..
-```
-
-> **Note:** If building for different Ethos-U55 configurations, see
->[Configuring build for different Arm Ethos-U55 configurations](../sections/building.md#Configuring-build-for-different-Arm-Ethos-U55-configurations):
-
-If the TensorFlow source tree is not in its default expected location,
-set the path using `TENSORFLOW_SRC_PATH`.
-Similarly, if the Ethos-U55 driver is not in the default location,
-`ETHOS_U55_DRIVER_SRC_PATH` can be used to configure the location. For example:
-
-```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DTENSORFLOW_SRC_PATH=/my/custom/location/tensorflow \
-    -DETHOS_U55_DRIVER_SRC_PATH=/my/custom/location/core_driver \
-    -DUSE_CASE_BUILD=asr ..
-```
-
-Also, `CMSIS_SRC_PATH` parameter can be used to override the CMSIS sources used for compilation used by TensorFlow by
-default. For example, to use the CMSIS sources fetched by the ethos-u helper script, we can use:
-
-```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DTENSORFLOW_SRC_PATH=../ethos-u/core_software/tensorflow \
-    -DETHOS_U55_DRIVER_SRC_PATH=../ethos-u/core_software/core_driver \
-    -DCMSIS_SRC_PATH=../ethos-u/core_software/cmsis \
-    -DUSE_CASE_BUILD=asr ..
-```
+Also see:
+- [Configuring with custom TPIP dependencies](../sections/building.md#Configuring-with-custom-TPIP-dependencies)
+- [Using Arm Compiler](../sections/building.md#using-arm-compiler)
+- [Configuring the build for simple_platform](../sections/building.md#Configuring-the-build-for-simple_platform)
+- [Working with model debugger from Arm FastModel Tools](../sections/building.md#Working-with-model-debugger-from-Arm-FastModel-Tools)
 
 > **Note:** If re-building with changed parameters values, it is highly advised to clean the build directory and re-run
 >the CMake command.
@@ -272,12 +224,9 @@
 Next set `asr_FILE_PATH` to the location of this folder when building:
 
 ```commandline
-cmake \
+cmake .. \
     -Dasr_FILE_PATH=/tmp/custom_wavs/ \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DUSE_CASE_BUILD=asr \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake ..
+    -DUSE_CASE_BUILD=asr
 ```
 
 The audio clips found in the `asr_FILE_PATH` folder will be picked up and automatically converted to C++ files during the
@@ -317,12 +266,10 @@
 An example:
 
 ```commandline
-cmake \
+cmake .. \
     -Dasr_MODEL_TFLITE_PATH=<path/to/custom_model_after_vela.tflite> \
     -Dasr_LABELS_TXT_FILE=<path/to/labels_custom_model.txt> \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake ..
+    -DUSE_CASE_BUILD=asr
 ```
 
 > **Note:** Clean the build directory before re-running the CMake command.
diff --git a/docs/use_cases/img_class.md b/docs/use_cases/img_class.md
index 0102409..68a5285 100644
--- a/docs/use_cases/img_class.md
+++ b/docs/use_cases/img_class.md
@@ -1,16 +1,17 @@
 # Image Classification Code Sample
 
-- [Introduction](#introduction)
-  - [Prerequisites](#prerequisites)
-- [Building the code sample application from sources](#building-the-code-sample-application-from-sources)
-  - [Build options](#build-options)
-  - [Build process](#build-process)
-  - [Add custom input](#add-custom-input)
-  - [Add custom model](#add-custom-model)
-- [Setting-up and running Ethos-U55 code sample](#setting-up-and-running-ethos-u55-code-sample)
-  - [Setting up the Ethos-U55 Fast Model](#setting-up-the-ethos-u55-fast-model)
-  - [Starting Fast Model simulation](#starting-fast-model-simulation)
-  - [Running Image Classification](#running-image-classification)
+- [Image Classification Code Sample](#image-classification-code-sample)
+  - [Introduction](#introduction)
+    - [Prerequisites](#prerequisites)
+  - [Building the code sample application from sources](#building-the-code-sample-application-from-sources)
+    - [Build options](#build-options)
+    - [Build process](#build-process)
+    - [Add custom input](#add-custom-input)
+    - [Add custom model](#add-custom-model)
+  - [Setting-up and running Ethos-U55 code sample](#setting-up-and-running-ethos-u55-code-sample)
+    - [Setting up the Ethos-U55 Fast Model](#setting-up-the-ethos-u55-fast-model)
+    - [Starting Fast Model simulation](#starting-fast-model-simulation)
+    - [Running Image Classification](#running-image-classification)
 
 ## Introduction
 
@@ -76,72 +77,24 @@
 Model when providing only the mandatory arguments for CMake configuration:
 
 ```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=./scripts/cmake/bare-metal-toolchain.cmake \
-    -DUSE_CASE_BUILD=img_class ..
+cmake ../ -DUSE_CASE_BUILD=img_class
 ```
 
-Toolchain option `CMAKE_TOOLCHAIN_FILE` points to the toolchain specific file to set the compiler and platform specific
-parameters.
-
 To configure a build that can be debugged using Arm-DS, we can just specify
-the build type as `Debug`:
+the build type as `Debug` and use the `Arm Compiler` toolchain file:
 
 ```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
+cmake .. \
+    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/toolchains/bare-metal-armclang.cmake \
     -DCMAKE_BUILD_TYPE=Debug \
-    -DUSE_CASE_BUILD=img_class ..
+    -DUSE_CASE_BUILD=img_class
 ```
 
-To configure a build that can be debugged using a tool that only supports
-DWARF format 3 (Modeldebugger for example), we can use:
-
-```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DCMAKE_BUILD_TYPE=Debug \
-    -DARMCLANG_DEBUG_DWARF_LEVEL=3 \
-    -DUSE_CASE_BUILD=img_class ..
-```
-
-> **Note:** If building for different Ethos-U55 configurations, see
->[Configuring build for different Arm Ethos-U55 configurations](../sections/building.md#Configuring-build-for-different-Arm-Ethos-U55-configurations):
-
-If the TensorFlow source tree is not in its default expected location,
-set the path using `TENSORFLOW_SRC_PATH`.
-Similarly, if the Ethos-U55 driver is not in the default location,
-`ETHOS_U55_DRIVER_SRC_PATH` can be used to configure the location. For example:
-
-```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DTENSORFLOW_SRC_PATH=/my/custom/location/tensorflow \
-    -DETHOS_U55_DRIVER_SRC_PATH=/my/custom/location/core_driver \
-    -DUSE_CASE_BUILD=img_class ..
-```
-
-Also, `CMSIS_SRC_PATH` parameter can be used to override the CMSIS sources used for compilation used by TensorFlow by
-default. For example, to use the CMSIS sources fetched by the ethos-u helper script, we can use:
-
-```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DTENSORFLOW_SRC_PATH=../ethos-u/core_software/tensorflow \
-    -DETHOS_U55_DRIVER_SRC_PATH=../ethos-u/core_software/core_driver \
-    -DCMSIS_SRC_PATH=../ethos-u/core_software/cmsis \
-    -DUSE_CASE_BUILD=img_class ..
-```
+Also see:
+- [Configuring with custom TPIP dependencies](../sections/building.md#Configuring-with-custom-TPIP-dependencies)
+- [Using Arm Compiler](../sections/building.md#using-arm-compiler)
+- [Configuring the build for simple_platform](../sections/building.md#Configuring-the-build-for-simple_platform)
+- [Working with model debugger from Arm FastModel Tools](../sections/building.md#Working-with-model-debugger-from-Arm-FastModel-Tools)
 
 > **Note:** If re-building with changed parameters values, it is highly advised to clean the build directory and re-run
 >the CMake command.
@@ -200,12 +153,9 @@
 Next set `img_class_FILE_PATH` to the location of this folder when building:
 
 ```commandline
-cmake \
+cmake .. \
     -Dimg_class_FILE_PATH=/tmp/custom_images/ \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DUSE_CASE_BUILD=img_class ..
+    -DUSE_CASE_BUILD=img_class
 ```
 
 The images found in the `img_class_FILE_PATH` folder will be picked up and automatically converted to C++ files during
@@ -249,13 +199,10 @@
 An example:
 
 ```commandline
-cmake \
+cmake .. \
     -Dimg_class_MODEL_TFLITE_PATH=<path/to/custom_model_after_vela.tflite> \
     -Dimg_class_LABELS_TXT_FILE=<path/to/labels_custom_model.txt> \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DUSE_CASE_BUILD=img_class ..
+    -DUSE_CASE_BUILD=img_class
 ```
 
 > **Note:** Clean the build directory before re-running the CMake command.
diff --git a/docs/use_cases/inference_runner.md b/docs/use_cases/inference_runner.md
index ad47e7e..ebc4677 100644
--- a/docs/use_cases/inference_runner.md
+++ b/docs/use_cases/inference_runner.md
@@ -1,16 +1,16 @@
 # Inference Runner Code Sample
 
-- [Introduction](#introduction)
-  - [Prerequisites](#prerequisites)
-- [Building the Code Samples application from sources](#building-the-code-samples-application-from-sources)
-  - [Build options](#build-options)
-  - [Build process](#build-process)
-  - [Add custom model](#add-custom-model)
-- [Setting-up and running Ethos-U55 code sample](#setting-up-and-running-ethos-u55-code-sample)
-  - [Setting up the Ethos-U55 Fast Model](#setting-up-the-ethos-u55-fast-model)
-  - [Starting Fast Model simulation](#starting-fast-model-simulation)
-  - [Running Inference Runner](#running-inference-runner)
-- [Inference Runner processing information](#inference-runner-processing-information)
+- [Inference Runner Code Sample](#inference-runner-code-sample)
+  - [Introduction](#introduction)
+    - [Prerequisites](#prerequisites)
+  - [Building the Code Samples application from sources](#building-the-code-samples-application-from-sources)
+    - [Build options](#build-options)
+    - [Build process](#build-process)
+    - [Add custom model](#add-custom-model)
+  - [Setting-up and running Ethos-U55 code sample](#setting-up-and-running-ethos-u55-code-sample)
+    - [Setting up the Ethos-U55 Fast Model](#setting-up-the-ethos-u55-fast-model)
+    - [Starting Fast Model simulation](#starting-fast-model-simulation)
+    - [Running Inference Runner](#running-inference-runner)
 
 ## Introduction
 
@@ -68,72 +68,23 @@
 Model when providing only the mandatory arguments for CMake configuration:
 
 ```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=./scripts/cmake/bare-metal-toolchain.cmake \
-    -DUSE_CASE_BUILD=inference_runner ..
+cmake ../ -DUSE_CASE_BUILD=inference_runner
 ```
-
-Toolchain option `CMAKE_TOOLCHAIN_FILE` points to the toolchain specific file to set the compiler and platform specific
-parameters.
-
 To configure a build that can be debugged using Arm-DS, we can just specify
-the build type as `Debug`:
+the build type as `Debug` and use the `Arm Compiler` toolchain file:
 
 ```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
+cmake .. \
+    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/toolchains/bare-metal-armclang.cmake \
     -DCMAKE_BUILD_TYPE=Debug \
-    -DUSE_CASE_BUILD=inference_runner ..
+    -DUSE_CASE_BUILD=inference_runner
 ```
 
-To configure a build that can be debugged using a tool that only supports
-DWARF format 3 (Modeldebugger for example), we can use:
-
-```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DCMAKE_BUILD_TYPE=Debug \
-    -DARMCLANG_DEBUG_DWARF_LEVEL=3 \
-    -DUSE_CASE_BUILD=inference_runner ..
-```
-
-> **Note:** If building for different Ethos-U55 configurations, see
->[Configuring build for different Arm Ethos-U55 configurations](../sections/building.md#Configuring-build-for-different-Arm-Ethos-U55-configurations):
-
-If the TensorFlow source tree is not in its default expected location,
-set the path using `TENSORFLOW_SRC_PATH`.
-Similarly, if the Ethos-U55 driver is not in the default location,
-`ETHOS_U55_DRIVER_SRC_PATH` can be used to configure the location. For example:
-
-```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DTENSORFLOW_SRC_PATH=/my/custom/location/tensorflow \
-    -DETHOS_U55_DRIVER_SRC_PATH=/my/custom/location/core_driver \
-    -DUSE_CASE_BUILD=inference_runner ..
-```
-
-Also, `CMSIS_SRC_PATH` parameter can be used to override the CMSIS sources used for compilation used by TensorFlow by
-default. For example, to use the CMSIS sources fetched by the ethos-u helper script, we can use:
-
-```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DTENSORFLOW_SRC_PATH=../ethos-u/core_software/tensorflow \
-    -DETHOS_U55_DRIVER_SRC_PATH=../ethos-u/core_software/core_driver \
-    -DCMSIS_SRC_PATH=../ethos-u/core_software/cmsis \
-    -DUSE_CASE_BUILD=inference_runner ..
-```
+Also see:
+- [Configuring with custom TPIP dependencies](../sections/building.md#Configuring-with-custom-TPIP-dependencies)
+- [Using Arm Compiler](../sections/building.md#using-arm-compiler)
+- [Configuring the build for simple_platform](../sections/building.md#Configuring-the-build-for-simple_platform)
+- [Working with model debugger from Arm FastModel Tools](../sections/building.md#Working-with-model-debugger-from-Arm-FastModel-Tools)
 
 > **Note:** If re-building with changed parameters values, it is highly advised to clean the build directory and re-run
 >the CMake command.
@@ -188,11 +139,9 @@
 An example:
 
 ```commandline
-cmake \
+cmake .. \
   -Dinference_runner_MODEL_TFLITE_PATH=<path/to/custom_model_after_vela.tflite> \
-  -DTARGET_PLATFORM=mps3 \
-  -DTARGET_SUBSYSTEM=sse-300 \
-  -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake ..
+  -DUSE_CASE_BUILD=inference_runner
 ```
 
 > **Note:** Clean the build directory before re-running the CMake command.
diff --git a/docs/use_cases/kws.md b/docs/use_cases/kws.md
index baf813a..8811efb 100644
--- a/docs/use_cases/kws.md
+++ b/docs/use_cases/kws.md
@@ -1,19 +1,19 @@
 # Keyword Spotting Code Sample
 
-- [Introduction](#introduction)
-  - [Prerequisites](#prerequisites)
-- [Building the code sample application from sources](#building-the-code-sample-application-from-sources)
-  - [Build options](#build-options)
-  - [Build process](#build-process)
-  - [Add custom input](#add-custom-input)
-  - [Add custom model](#add-custom-model)
-- [Setting-up and running Ethos-U55 code sample](#setting-up-and-running-ethos-u55-code-sample)
-  - [Setting up the Ethos-U55 Fast Model](#setting-up-the-ethos-u55-fast-model)
-  - [Starting Fast Model simulation](#starting-fast-model-simulation)
-  - [Running Keyword Spotting](#running-keyword-spotting)
-- [Keyword Spotting processing information](#keyword-spotting-processing-information)
-  - [Preprocessing and feature extraction](#preprocessing-and-feature-extraction)
-  - [Postprocessing](#postprocessing)
+- [Keyword Spotting Code Sample](#keyword-spotting-code-sample)
+  - [Introduction](#introduction)
+    - [Preprocessing and feature extraction](#preprocessing-and-feature-extraction)
+    - [Postprocessing](#postprocessing)
+    - [Prerequisites](#prerequisites)
+  - [Building the code sample application from sources](#building-the-code-sample-application-from-sources)
+    - [Build options](#build-options)
+    - [Build process](#build-process)
+    - [Add custom input](#add-custom-input)
+    - [Add custom model](#add-custom-model)
+  - [Setting-up and running Ethos-U55 code sample](#setting-up-and-running-ethos-u55-code-sample)
+    - [Setting up the Ethos-U55 Fast Model](#setting-up-the-ethos-u55-fast-model)
+    - [Starting Fast Model simulation](#starting-fast-model-simulation)
+    - [Running Keyword Spotting](#running-keyword-spotting)
 
 ## Introduction
 
@@ -117,70 +117,24 @@
 On Linux, execute the following command to build Keyword Spotting application to run on the Ethos-U55 Fast Model when providing only the mandatory arguments for CMake configuration:
 
 ```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=./scripts/cmake/bare-metal-toolchain.cmake \
-    -DUSE_CASE_BUILD=kws ..
+cmake ../ -DUSE_CASE_BUILD=kws
 ```
 
-Toolchain option `CMAKE_TOOLCHAIN_FILE` points to the toolchain specific file to set the compiler and platform specific
-parameters.
-
 To configure a build that can be debugged using Arm-DS, we can just specify
-the build type as `Debug`:
+the build type as `Debug` and use the `Arm Compiler` toolchain file:
 
 ```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
+cmake .. \
+    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/toolchains/bare-metal-armclang.cmake \
     -DCMAKE_BUILD_TYPE=Debug \
-    -DUSE_CASE_BUILD=kws ..
+    -DUSE_CASE_BUILD=kws
 ```
 
-To configure a build that can be debugged using a tool that only supports
-DWARF format 3 (Modeldebugger for example), we can use:
-
-```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DCMAKE_BUILD_TYPE=Debug \
-    -DARMCLANG_DEBUG_DWARF_LEVEL=3 \
-    -DUSE_CASE_BUILD=kws ..
-```
-
-> **Note:** If building for different Ethos-U55 configurations, see [Configuring build for different Arm Ethos-U55 configurations](../sections/building.md#Configuring-build-for-different-Arm-Ethos-U55-configurations):
-
-If the TensorFlow source tree is not in its default expected location,
-set the path using `TENSORFLOW_SRC_PATH`.
-Similarly, if the Ethos-U55 driver is not in the default location,
-`ETHOS_U55_DRIVER_SRC_PATH` can be used to configure the location. For example:
-
-```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DTENSORFLOW_SRC_PATH=/my/custom/location/tensorflow \
-    -DETHOS_U55_DRIVER_SRC_PATH=/my/custom/location/core_driver \
-    -DUSE_CASE_BUILD=kws ..
-```
-
-Also, `CMSIS_SRC_PATH` parameter can be used to override the CMSIS sources used for compilation used by TensorFlow by default. For example, to use the CMSIS sources fetched by the ethos-u helper script, we can use:
-
-```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DTENSORFLOW_SRC_PATH=../ethos-u/core_software/tensorflow \
-    -DETHOS_U55_DRIVER_SRC_PATH=../ethos-u/core_software/core_driver \
-    -DCMSIS_SRC_PATH=../ethos-u/core_software/cmsis \
-    -DUSE_CASE_BUILD=kws ..
-```
+Also see:
+- [Configuring with custom TPIP dependencies](../sections/building.md#Configuring-with-custom-TPIP-dependencies)
+- [Using Arm Compiler](../sections/building.md#using-arm-compiler)
+- [Configuring the build for simple_platform](../sections/building.md#Configuring-the-build-for-simple_platform)
+- [Working with model debugger from Arm FastModel Tools](../sections/building.md#Working-with-model-debugger-from-Arm-FastModel-Tools)
 
 > **Note:** If re-building with changed parameters values, it is highly advised to clean the build directory and re-run the CMake command.
 
@@ -237,12 +191,9 @@
 Next set `kws_FILE_PATH` to the location of this folder when building:
 
 ```commandline
-cmake \
+cmake .. \
     -Dkws_FILE_PATH=/tmp/custom_wavs/ \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DUSE_CASE_BUILD=kws \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake..
+    -DUSE_CASE_BUILD=kws
 ```
 
 The audio clips found in the `kws_FILE_PATH` folder will be picked up and automatically converted to C++ files during the
@@ -281,13 +232,10 @@
 An example:
 
 ```commandline
-cmake \
+cmake .. \
     -Dkws_MODEL_TFLITE_PATH=<path/to/custom_model_after_vela.tflite> \
     -Dkws_LABELS_TXT_FILE=<path/to/labels_custom_model.txt> \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DUSE_CASE_BUILD=kws \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake ..
+    -DUSE_CASE_BUILD=kws
 ```
 
 > **Note:** Clean the build directory before re-running the CMake command.
diff --git a/docs/use_cases/kws_asr.md b/docs/use_cases/kws_asr.md
index a347b16..b63ee3a 100644
--- a/docs/use_cases/kws_asr.md
+++ b/docs/use_cases/kws_asr.md
@@ -1,21 +1,21 @@
 # Keyword Spotting and Automatic Speech Recognition Code Sample
 
-- [Introduction](#introduction)
-  - [Prerequisites](#prerequisites)
-- [Building the code sample application from sources](#building-the-code-sample-application-from-sources)
-  - [Build options](#build-options)
-  - [Build process](#build-process)
-  - [Add custom input](#add-custom-input)
-  - [Add custom model](#add-custom-model)
-- [Setting-up and running Ethos-U55 Code Samples](#setting-up-and-running-ethos-u55-code-samples)
-  - [Setting up the Ethos-U55 Fast Model](#setting-up-the-ethos-u55-fast-model)
-  - [Starting Fast Model simulation](#starting-fast-model-simulation)
-  - [Running Keyword Spotting and Automatic Speech Recognition](#running-keyword-spotting-and-automatic-speech-recognition)
-- [Keyword Spotting and Automatic Speech Recognition processing information](#keyword-spotting-and-automatic-speech-recognition-processing-information)
-  - [Preprocessing and feature extraction](#preprocessing-and-feature-extraction)
-    - [Keyword Spotting Preprocessing](#keyword-spotting-preprocessing)
-    - [Automatic Speech Recognition Preprocessing](#automatic-speech-recognition-preprocessing)
-  - [Postprocessing](#postprocessing)
+- [Keyword Spotting and Automatic Speech Recognition Code Sample](#keyword-spotting-and-automatic-speech-recognition-code-sample)
+  - [Introduction](#introduction)
+    - [Preprocessing and feature extraction](#preprocessing-and-feature-extraction)
+      - [Keyword Spotting Preprocessing](#keyword-spotting-preprocessing)
+      - [Automatic Speech Recognition Preprocessing](#automatic-speech-recognition-preprocessing)
+    - [Postprocessing](#postprocessing)
+    - [Prerequisites](#prerequisites)
+  - [Building the code sample application from sources](#building-the-code-sample-application-from-sources)
+    - [Build options](#build-options)
+    - [Build process](#build-process)
+    - [Add custom input](#add-custom-input)
+    - [Add custom model](#add-custom-model)
+  - [Setting-up and running Ethos-U55 Code Samples](#setting-up-and-running-ethos-u55-code-samples)
+    - [Setting up the Ethos-U55 Fast Model](#setting-up-the-ethos-u55-fast-model)
+    - [Starting Fast Model simulation](#starting-fast-model-simulation)
+    - [Running Keyword Spotting and Automatic Speech Recognition](#running-keyword-spotting-and-automatic-speech-recognition)
 
 ## Introduction
 
@@ -188,70 +188,24 @@
 On Linux, execute the following command to build the application to run on the Ethos-U55 Fast Model when providing only the mandatory arguments for CMake configuration:
 
 ```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=./scripts/cmake/bare-metal-toolchain.cmake \
-    -DUSE_CASE_BUILD=kws_asr ..
+cmake ../ -DUSE_CASE_BUILD=kws_asr
 ```
 
-Toolchain option `CMAKE_TOOLCHAIN_FILE` points to the toolchain specific file to set the compiler and platform specific
-parameters.
-
 To configure a build that can be debugged using Arm-DS, we can just specify
-the build type as `Debug`:
+the build type as `Debug` and use the `Arm Compiler` toolchain file:
 
 ```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
+cmake .. \
+    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/toolchains/bare-metal-armclang.cmake \
     -DCMAKE_BUILD_TYPE=Debug \
-    -DUSE_CASE_BUILD=kws_asr ..
+    -DUSE_CASE_BUILD=kws_asr
 ```
 
-To configure a build that can be debugged using a tool that only supports
-DWARF format 3 (Modeldebugger for example), we can use:
-
-```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DCMAKE_BUILD_TYPE=Debug \
-    -DARMCLANG_DEBUG_DWARF_LEVEL=3 \
-    -DUSE_CASE_BUILD=kws_asr ..
-```
-
-> **Note:** If building for different Ethos-U55 configurations, see [Configuring build for different Arm Ethos-U55 configurations](../sections/building.md#Configuring-build-for-different-Arm-Ethos-U55-configurations):
-
-If the TensorFlow source tree is not in its default expected location,
-set the path using `TENSORFLOW_SRC_PATH`.
-Similarly, if the Ethos-U55 driver is not in the default location,
-`ETHOS_U55_DRIVER_SRC_PATH` can be used to configure the location. For example:
-
-```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DTENSORFLOW_SRC_PATH=/my/custom/location/tensorflow \
-    -DETHOS_U55_DRIVER_SRC_PATH=/my/custom/location/core_driver \
-    -DUSE_CASE_BUILD=kws_asr ..
-```
-
-Also, `CMSIS_SRC_PATH` parameter can be used to override the CMSIS sources used for compilation used by TensorFlow by default. For example, to use the CMSIS sources fetched by the ethos-u helper script, we can use:
-
-```commandline
-cmake \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DTENSORFLOW_SRC_PATH=../ethos-u/core_software/tensorflow \
-    -DETHOS_U55_DRIVER_SRC_PATH=../ethos-u/core_software/core_driver \
-    -DCMSIS_SRC_PATH=../ethos-u/core_software/cmsis \
-    -DUSE_CASE_BUILD=kws_asr ..
-```
+Also see:
+- [Configuring with custom TPIP dependencies](../sections/building.md#Configuring-with-custom-TPIP-dependencies)
+- [Using Arm Compiler](../sections/building.md#using-arm-compiler)
+- [Configuring the build for simple_platform](../sections/building.md#Configuring-the-build-for-simple_platform)
+- [Working with model debugger from Arm FastModel Tools](../sections/building.md#Working-with-model-debugger-from-Arm-FastModel-Tools)
 
 > **Note:** If re-building with changed parameters values, it is highly advised to clean the build directory and re-run the CMake command.
 
@@ -309,12 +263,9 @@
 Next set `kws_asr_FILE_PATH` to the location of this folder when building:
 
 ```commandline
-cmake \
+cmake .. \
     -Dkws_asr_FILE_PATH=/tmp/custom_files/ \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DUSE_CASE_BUILD=kws_asr- ..
+    -DUSE_CASE_BUILD=kws_asr
 ```
 
 The files found in the `kws_asr_FILE_PATH` folder will be picked up and automatically converted to C++ files during the
@@ -348,13 +299,10 @@
 An example:
 
 ```commandline
-cmake \
+cmake .. \
     -Dkws_asr_MODEL_TFLITE_PATH_ASR=<path/to/custom_asr_model_after_vela.tflite> \
     -Dkws_asr_LABELS_TXT_FILE_ASR=<path/to/labels_custom_model.txt> \
-    -DTARGET_PLATFORM=mps3 \
-    -DTARGET_SUBSYSTEM=sse-300 \
-    -DCMAKE_TOOLCHAIN_FILE=scripts/cmake/bare-metal-toolchain.cmake \
-    -DUSE_CASE_BUILD=kws_asr ..
+    -DUSE_CASE_BUILD=kws_asr
 ```
 
 > **Note:** Clean the build directory before re-running the CMake command.
diff --git a/release_notes.txt b/release_notes.txt
index abf605b..1b2ced7 100644
--- a/release_notes.txt
+++ b/release_notes.txt
@@ -2,14 +2,16 @@
     * Added script to download and optimize default models.
     * Added script to run default build flow.
     * Added a model for Anomaly Detection use case.
+    * added support for build with Arm GNU Embedded Toolchain (10.2.1).
+    * deprecated support for target subsystem SSE-200.
 
 Changes in 21.03
-    * simple platform support added
-    * model conditioning examples added
-    * documentation updated
-    * build changed to use sources of the dependency libraries
-    * tests for native platform added
-    * anomaly detection use case added
+    * simple platform support added.
+    * model conditioning examples added.
+    * documentation updated.
+    * build changed to use sources of the dependency libraries.
+    * tests for native platform added.
+    * anomaly detection use case added.
 
 Changes in 20.11
     * SSE-200 and SSE-300 system support was added.
diff --git a/scripts/cmake/bare-metal-sources.cmake b/scripts/cmake/bare-metal-sources.cmake
index 8b348e3..2bfe616 100644
--- a/scripts/cmake/bare-metal-sources.cmake
+++ b/scripts/cmake/bare-metal-sources.cmake
@@ -28,13 +28,15 @@
 if (NOT DEFINED MEM_PROFILES_SRC_DIR)
     set(MEM_PROFILES_SRC_DIR    ${CMAKE_CURRENT_SOURCE_DIR}/scripts/cmake/subsystem-profiles)
 endif()
+
 set(MEM_PROFILE_TEMPLATE    ${CMAKE_CURRENT_SOURCE_DIR}/scripts/cmake/templates/peripheral_memmap.h.template)
 set(IRQ_PROFILE_TEMPLATE    ${CMAKE_CURRENT_SOURCE_DIR}/scripts/cmake/templates/peripheral_irqs.h.template)
 set(MEM_REGIONS_TEMPLATE    ${CMAKE_CURRENT_SOURCE_DIR}/scripts/cmake/templates/mem_regions.h.template)
 set(TA_SETTINGS_TEMPLATE    ${CMAKE_CURRENT_SOURCE_DIR}/scripts/cmake/templates/timing_adapter_settings.template)
+set(LINKER_SCRIPT_DIR       "${PLAT_HAL}/bsp/mem_layout")
 set(TENSORFLOW_LITE_MICRO_PLATFORM_LIB_NAME  "libtensorflow-microlite.a")
 set(TENSORFLOW_LITE_MICRO_FLAG               "-DTF_LITE_STATIC_MEMORY")
-set(ETHOS_U55_FLAG          "-DARM_NPU=1")
+set(ETHOS_U55_FLAG                           "-DARM_NPU=1")
 
 if (ETHOS_U55_ENABLED)
     set(OPTIONAL_FLAGS      "${OPTIONAL_FLAGS} ${ETHOS_U55_FLAG}")
@@ -54,23 +56,33 @@
     if (TARGET_SUBSYSTEM STREQUAL sse-300)
         message(STATUS          "target subsystem is ${TARGET_SUBSYSTEM}")
         set(BSP_PACKAGE_DIR     "${PLAT_HAL}/bsp/bsp-packs/mps3")
-        set(SCAT_FILE           "${PLAT_HAL}/bsp/mem_layout/mps3-${TARGET_SUBSYSTEM}.sct")
+        set(LINKER_SCRIPT_NAME  "${TARGET_PLATFORM}-${TARGET_SUBSYSTEM}")
 
         # Include the mem profile definitions specific to our target subsystem
         include(${MEM_PROFILES_SRC_DIR}/corstone-${TARGET_SUBSYSTEM}.cmake)
         set(OPTIONAL_FLAGS      "${OPTIONAL_FLAGS} ${MPS3_PLATFORM_FLAG}")
+
+        # For deployment on the MPS3 FPGA platform, we need to produce
+        # two bin files - one that is loaded into the ITCM, and another
+        # that is loaded into the DDR region.
+        set(MPS3_SECTION_PATTERNS   "*.at_itcm" "*.at_ddr")
+        set(MPS3_OUTPUT_BIN_NAMES   "itcm.bin"  "ddr.bin")
+        set(MPS3_FPGA_CONFIG        "${CMAKE_CURRENT_SOURCE_DIR}/scripts/${TARGET_PLATFORM}/${TARGET_SUBSYSTEM}/images.txt")
     else ()
         message(FATAL_ERROR "Non compatible target subsystem: ${TARGET_SUBSYSTEM}")
     endif ()
 elseif (TARGET_PLATFORM STREQUAL simple_platform)
     set(BSP_PACKAGE_DIR     "${PLAT_HAL}/bsp/bsp-packs/${TARGET_PLATFORM}")
-    set(SCAT_FILE           "${PLAT_HAL}/bsp/mem_layout/${TARGET_PLATFORM}.sct")
+    set(LINKER_SCRIPT_NAME  "${TARGET_PLATFORM}")
     include(${MEM_PROFILES_SRC_DIR}/${TARGET_PLATFORM}.cmake)
     set(OPTIONAL_FLAGS      "${OPTIONAL_FLAGS}")
 else ()
     message(FATAL_ERROR "Non compatible target platform ${TARGET_PLATFORM}")
 endif ()
 
+# Add link options for the linker script to be used:
+add_linker_script(${LINKER_SCRIPT_DIR} ${LINKER_SCRIPT_NAME})
+
 if (ETHOS_U55_ENABLED)
     USER_OPTION(TA_CONFIG_FILE "Path to the timing adapter configuration file"
             "${CMAKE_SCRIPTS_DIR}/ta_config.cmake"
@@ -91,7 +103,6 @@
 configure_file("${MEM_REGIONS_TEMPLATE}" "${SOURCE_GEN_DIR}/mem_regions.h")
 configure_file("${TA_SETTINGS_TEMPLATE}" "${SOURCE_GEN_DIR}/timing_adapter_settings.h")
 
-message(STATUS "Scatter file: ${SCAT_FILE}")
 message(STATUS "Using BSP package from: ${BSP_PACKAGE_DIR}")
 
 if (DEFINED VERIFY_TEST_OUTPUT)
@@ -114,27 +125,14 @@
     set(OPTIONAL_FLAGS "${OPTIONAL_FLAGS} -gdwarf-${ARMCLANG_DEBUG_DWARF_LEVEL}")
 endif()
 
-set(COMPILER_FLAGS              "${ALL_COMMON_FLAGS} ${TENSORFLOW_LITE_MICRO_FLAG} ${PROFILING_OPT} ${OPTIONAL_FLAGS}")
+set(COMPILER_FLAGS              "${TENSORFLOW_LITE_MICRO_FLAG} ${PROFILING_OPT} ${OPTIONAL_FLAGS}")
 # For some reason, cmake doesn't pass the c++ standard flag, adding it manually
 set(CMAKE_CXX_FLAGS             "${COMPILER_FLAGS} -std=c++11" CACHE INTERNAL "")
 set(CMAKE_C_FLAGS               "${COMPILER_FLAGS}" CACHE INTERNAL "")
-set(CMAKE_ASM_FLAGS             "${CPU_LD}")
 set(CMAKE_ASM_COMPILE_OBJECT    ${CMAKE_CXX_FLAGS})
 
-add_link_options(--strict --callgraph --load_addr_map_info --map)
-add_link_options(--symbols --xref --scatter=${SCAT_FILE})
-
-# Warnings to be ignored:
-# L6314W = No section matches pattern
-# L6439W = Multiply defined Global Symbol
-add_link_options(--diag_suppress=L6439W,L6314W)
-add_link_options(--info sizes,totals,unused,veneers --entry Reset_Handler)
-
-if (CMAKE_BUILD_TYPE STREQUAL Release)
-    add_link_options(--no_debug)
-endif ()
-
-set(CMAKE_EXE_LINKER_FLAGS "${CPU_LD}")
+# Tell linker that reset interrupt handler is our entry point
+add_link_options(--entry Reset_Handler)
 
 set(PLAT_BSP_INCLUDES
     ${PLAT_HAL}/bsp/cmsis-device/include
@@ -168,3 +166,7 @@
     "${PLAT_HAL}/bsp/bsp-core/*.c"
     "${BSP_PACKAGE_DIR}/*.c"
     )
+
+# Special retarget source to direct stdin, stdout and stderr streams to the
+# UART block.
+set(PLAT_RETARGET_SOURCE "${PLAT_HAL}/bsp/bsp-core/retarget.c")
diff --git a/scripts/cmake/bare-metal-toolchain.cmake b/scripts/cmake/bare-metal-toolchain.cmake
deleted file mode 100644
index 5d91b98..0000000
--- a/scripts/cmake/bare-metal-toolchain.cmake
+++ /dev/null
@@ -1,65 +0,0 @@
-#----------------------------------------------------------------------------
-#  Copyright (c) 2021 Arm Limited. All rights reserved.
-#  SPDX-License-Identifier: Apache-2.0
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#----------------------------------------------------------------------------
-# specify the cross compiler
-set(CMAKE_C_COMPILER                armclang)
-set(CMAKE_CXX_COMPILER              armclang)
-set(CMAKE_C_LINKER_PREFERENCE       armlink)
-set(CMAKE_ASM_LINKER_PREFERENCE     armlink)
-set(CMAKE_ASM_COMPILER              armasm)
-set(CMAKE_ASM_COMPILER_AR           armar)
-
-set(CMAKE_CROSSCOMPILING            true)
-set(CMAKE_SYSTEM_NAME               Generic)
-
-set(MIN_ARM_CLANG_VERSION           6.14)
-
-if (NOT DEFINED CMAKE_SYSTEM_PROCESSOR)
-    set(CMAKE_SYSTEM_PROCESSOR      cortex-m55)
-endif()
-
-# Skip compiler test execution
-set(CMAKE_C_COMPILER_WORKS          1)
-set(CMAKE_CXX_COMPILER_WORKS        1)
-
-set(PLATFORM_HAL                    1)
-
-set(WARNING_OPTS                    "-Wall -Wextra -Wvla")
-set(SPECIAL_OPTS                    "-fno-rtti -funsigned-char -fno-function-sections -fno-exceptions")
-set(PLATFORM_FLAGS                  "-mthumb --target=arm-arm-non-eabi -mlittle-endian -DPLATFORM_HAL=${PLATFORM_HAL}")
-
-set(CMAKE_C_FLAGS_DEBUG             "-DDEBUG -O0")
-set(CMAKE_C_FLAGS_RELEASE           "-DNDEBUG -O3")
-
-set(CMAKE_CXX_FLAGS_DEBUG           "-DDEBUG -O0")
-set(CMAKE_CXX_FLAGS_RELEASE         "-DNDEBUG -O3")
-
-if (CMAKE_SYSTEM_PROCESSOR STREQUAL cortex-m55)
-    # Flags for cortex-m55
-    set(CPU_CORTEX_M55              1)
-    set(CPU_CC                      "-mcpu=cortex-m55 -mfloat-abi=hard -MD -DCPU_CORTEX_M55=1 -DARM_MATH_DSP -DARM_MATH_LOOPUNROLL -D__FPU_USED=1")
-    set(CPU_LD                      "--cpu=8.1-M.Main.dsp")
-elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL cortex-m33)
-    # Flags for cortex-m33 to go here
-endif()
-
-set(ALL_COMMON_FLAGS                "${CPU_CC} ${WARNING_OPTS} ${SPECIAL_OPTS} ${PLATFORM_FLAGS}")
-
-function(enforce_compiler_version)
-    if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${MIN_ARM_CLANG_VERSION})
-        message( FATAL_ERROR "Arm compiler version must be ${MIN_ARM_CLANG_VERSION} or greater to support ${CMAKE_SYSTEM_PROCESSOR} architecture." )
-    endif()
-endfunction()
diff --git a/scripts/cmake/cmsis-dsp.cmake b/scripts/cmake/cmsis-dsp.cmake
index cb0243b..bb26b69 100644
--- a/scripts/cmake/cmsis-dsp.cmake
+++ b/scripts/cmake/cmsis-dsp.cmake
@@ -37,7 +37,16 @@
 
 file(GLOB_RECURSE
     CMSIS_DSP_SRC
-    "${CMSIS_DSP_SRC_DIR}/arm_*.c")
+
+    "${CMSIS_DSP_SRC_DIR}/BasicMathFunctions/arm_*.c"
+    "${CMSIS_DSP_SRC_DIR}/FastMathFunctions/arm_*.c"
+    "${CMSIS_DSP_SRC_DIR}/CommonTables/arm_*.c"
+    "${CMSIS_DSP_SRC_DIR}/TransformFunctions/arm_*.c"
+    "${CMSIS_DSP_SRC_DIR}/StatisticsFunctions/arm_*.c"
+
+    # Issue with q15 and q31 functions with Arm GNU toolchain, we only
+    # need f32 functions.
+    "${CMSIS_DSP_SRC_DIR}/ComplexMathFunctions/arm_*f32.c")
 
 # 4. Create static library
 set(CMSIS_DSP_TARGET        cmsis-dsp)
@@ -50,6 +59,20 @@
 target_include_directories(${CMSIS_DSP_TARGET} PRIVATE
                            ${CMSIS_DSP_PRI_INC_DIR})
 
+if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    target_compile_options(${CMSIS_DSP_TARGET} PUBLIC -flax-vector-conversions)
+
+    # There is a known issue with -O0 optimisation option that affects
+    # FFT functions from CMSIS-DSP when compiling with Arm GNU embedded
+    # toolchain version 10.2.1
+    if (CMAKE_BUILD_TYPE STREQUAL Debug)
+        message(WARNING "There are known issues with CMSIS-DSP builds using "
+                        "MVE extension without optimisation. Forcing -O3 "
+                        "optimisation level")
+        target_compile_options(${CMSIS_DSP_TARGET} PUBLIC -O3)
+    endif()
+endif ()
+
 # 5. Add any custom/conditional flags for compilation or linkage
 if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL cortex-m55)
     target_compile_definitions(${CMSIS_DSP_TARGET} PUBLIC
diff --git a/scripts/cmake/native-sources.cmake b/scripts/cmake/native-sources.cmake
index 743e075..1b1431a 100644
--- a/scripts/cmake/native-sources.cmake
+++ b/scripts/cmake/native-sources.cmake
@@ -34,13 +34,11 @@
 set(TENSORFLOW_LITE_MICRO_FLAGS "-DTF_LITE_STATIC_MEMORY -DACTIVATION_BUF_SRAM_SZ=0")
 
 set(CMAKE_C_FLAGS
-        "${WARNING_FLAGS} ${SPECIAL_OPTS} ${PLATFORM_FLAGS}\
-        ${PROFILING_OPT} ${TF_FLAG} ${LOG_FLAG} ${TENSORFLOW_LITE_MICRO_FLAGS}"
+        "${PROFILING_OPT} ${LOG_FLAG} ${TENSORFLOW_LITE_MICRO_FLAGS}"
         CACHE INTERNAL "")
+
 set(CMAKE_CXX_FLAGS
-        "${WARNING_FLAGS} ${SPECIAL_OPTS} ${SPECIAL_OPTS_CXX}\
-        ${PLATFORM_FLAGS} ${PROFILING_OPT} ${TF_FLAG} ${LOG_FLAG}\
-        ${TENSORFLOW_LITE_MICRO_FLAGS}"
+        "${PROFILING_OPT} ${LOG_FLAG} ${TENSORFLOW_LITE_MICRO_FLAGS}"
         CACHE INTERNAL "")
 
 # Include directories:
diff --git a/scripts/cmake/toolchains/bare-metal-armclang.cmake b/scripts/cmake/toolchains/bare-metal-armclang.cmake
new file mode 100644
index 0000000..0a86eb6
--- /dev/null
+++ b/scripts/cmake/toolchains/bare-metal-armclang.cmake
@@ -0,0 +1,134 @@
+#----------------------------------------------------------------------------
+#  Copyright (c) 2021 Arm Limited. All rights reserved.
+#  SPDX-License-Identifier: Apache-2.0
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#----------------------------------------------------------------------------
+# specify the cross compiler
+set(CMAKE_C_COMPILER                armclang)
+set(CMAKE_CXX_COMPILER              armclang)
+set(CMAKE_C_LINKER_PREFERENCE       armlink)
+set(CMAKE_ASM_LINKER_PREFERENCE     armlink)
+set(CMAKE_ASM_COMPILER              armasm)
+set(CMAKE_ASM_COMPILER_AR           armar)
+
+set(CMAKE_CROSSCOMPILING            true)
+set(CMAKE_SYSTEM_NAME               Generic)
+
+set(MIN_ARM_CLANG_VERSION           6.14)
+
+# Skip compiler test execution
+set(CMAKE_C_COMPILER_WORKS          1)
+set(CMAKE_CXX_COMPILER_WORKS        1)
+set(PLATFORM_HAL                    1)
+
+if (NOT DEFINED CMAKE_SYSTEM_PROCESSOR)
+    set(CMAKE_SYSTEM_PROCESSOR      cortex-m55)
+endif()
+
+if (CMAKE_SYSTEM_PROCESSOR STREQUAL cortex-m55)
+    # Flags for cortex-m55
+    set(CPU_COMPILE_DEF             CPU_CORTEX_M55)
+    set(CPU_NAME                    ${CMAKE_SYSTEM_PROCESSOR})
+    set(FLOAT_ABI                   hard)
+    set(ARM_MATH_DSP                1)
+    set(ARM_MATH_LOOPUNROLL         1)
+    set(CPU_LINK_OPT                "--cpu=8.1-M.Main.dsp")
+elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL cortex-m33)
+    # Flags for cortex-m33 to go here
+endif()
+
+set(${CPU_COMPILE_DEF}              1)
+
+# Warning options
+add_compile_options(
+    -Wall
+    -Wextra
+    -Wvla)
+
+# General purpose compile options:
+add_compile_options(
+    -funsigned-char
+    -fno-function-sections
+    "$<$<COMPILE_LANGUAGE:CXX>:-fno-unwind-tables;-fno-rtti;-fno-exceptions>")
+
+# Arch compile options:
+add_compile_options(
+    -mthumb
+    -mcpu=${CPU_NAME}
+    -mfloat-abi=${FLOAT_ABI}
+    --target=arm-arm-non-eabi
+    -mlittle-endian
+    -MD)
+
+# Compile definitions:
+add_compile_definitions(
+    PLATFORM_HAL=${PLATFORM_HAL}
+    ${CPU_COMPILE_DEF}=1
+    $<$<BOOL:${ARM_MATH_DSP}>:ARM_MATH_DSP>
+    $<$<BOOL:${ARM_MATH_LOOPUNROLL}>:ARM_MATH_LOOPUNROLL>)
+
+# Link options:
+add_link_options(${CPU_LINK_OPT})
+set(CMAKE_ASM_FLAGS "${CPU_LINK_OPT}")
+
+# Warnings to be ignored:
+# L6314W = No section matches pattern
+# L6439W = Multiply defined Global Symbol
+add_link_options(
+    --diag_suppress=L6439W,L6314W
+    --info sizes,totals,unused,veneers
+    --strict
+    --callgraph
+    --load_addr_map_info
+    --xref
+    "$<$<CONFIG:RELEASE>:--no_debug>")
+
+# Function to add a map file output for the linker to dump diagnostic information to.
+function(add_target_map_file TARGET_NAME MAP_FILE_PATH)
+    target_link_options(${TARGET_NAME} PUBLIC
+        --map --symbols --list=${MAP_FILE_PATH})
+endfunction()
+
+# Function to add linker option to use the chosen linker script (scatter file).
+function(add_linker_script SCRIPT_DIR SCRIPT_NAME)
+    set(LINKER_SCRIPT_PATH ${SCRIPT_DIR}/${SCRIPT_NAME}.sct
+        CACHE STRING "Linker script path")
+    if (NOT EXISTS ${LINKER_SCRIPT_PATH})
+        message(FATAL_ERROR "Scatter file not found: ${LINKER_SCRIPT_PATH}")
+    endif()
+    message(STATUS "Using linker script: ${LINKER_SCRIPT_PATH}")
+    add_link_options(--scatter=${LINKER_SCRIPT_PATH})
+endfunction()
+
+# Function to set the command to copy/extract contents from an elf
+# into a binary file.
+function(add_bin_generation_command)
+
+    set(multiValueArgs SECTION_PATTERNS OUTPUT_BIN_NAMES)
+    set(oneValueArgs TARGET_NAME OUTPUT_DIR AXF_PATH)
+    cmake_parse_arguments(PARSED "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    add_custom_command(TARGET ${PARSED_TARGET_NAME}
+        POST_BUILD
+        COMMAND fromelf --bin --output=${PARSED_OUTPUT_DIR}/
+        ${PARSED_AXF_PATH})
+
+endfunction()
+
+# Function to assert the compiler version
+function(enforce_compiler_version)
+    if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${MIN_ARM_CLANG_VERSION})
+        message( FATAL_ERROR "Arm compiler version must be ${MIN_ARM_CLANG_VERSION} or greater to support ${CMAKE_SYSTEM_PROCESSOR} architecture." )
+    endif()
+endfunction()
diff --git a/scripts/cmake/toolchains/bare-metal-gcc.cmake b/scripts/cmake/toolchains/bare-metal-gcc.cmake
new file mode 100644
index 0000000..2ffc1bb
--- /dev/null
+++ b/scripts/cmake/toolchains/bare-metal-gcc.cmake
@@ -0,0 +1,145 @@
+#----------------------------------------------------------------------------
+#  Copyright (c) 2021 Arm Limited. All rights reserved.
+#  SPDX-License-Identifier: Apache-2.0
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#----------------------------------------------------------------------------
+# specify the cross compiler
+set(TRIPLET                         arm-none-eabi)
+
+set(CMAKE_C_COMPILER                ${TRIPLET}-gcc)
+set(CMAKE_CXX_COMPILER              ${TRIPLET}-g++)
+
+set(CMAKE_CROSSCOMPILING            true)
+set(CMAKE_SYSTEM_NAME               Generic)
+
+set(MIN_GCC_VERSION                 10.2.1)
+
+# Skip compiler test execution
+set(CMAKE_C_COMPILER_WORKS          1)
+set(CMAKE_CXX_COMPILER_WORKS        1)
+set(PLATFORM_HAL                    1)
+
+if (NOT DEFINED CMAKE_SYSTEM_PROCESSOR)
+    set(CMAKE_SYSTEM_PROCESSOR      cortex-m55)
+endif()
+
+if (CMAKE_SYSTEM_PROCESSOR STREQUAL cortex-m55)
+    # Flags for cortex-m55
+    set(CPU_COMPILE_DEF             CPU_CORTEX_M55)
+    set(CPU_NAME                    ${CMAKE_SYSTEM_PROCESSOR})
+    set(FLOAT_ABI                   hard)
+    set(ARM_MATH_DSP                1)
+    set(ARM_MATH_LOOPUNROLL         1)
+elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL cortex-m33)
+    # Flags for cortex-m33 to go here
+endif()
+
+set(${CPU_COMPILE_DEF}              1)
+
+# Warning options
+add_compile_options(
+    -Wall
+    -Wextra
+    -Wvla
+    -Wno-psabi)
+
+# General purpose compile options:
+add_compile_options(
+    -funsigned-char
+    -fno-function-sections
+    "$<$<COMPILE_LANGUAGE:CXX>:-fno-unwind-tables;-fno-rtti;-fno-exceptions>")
+
+# Arch compile options:
+add_compile_options(
+    -mthumb
+    -mcpu=${CPU_NAME}
+    -mfloat-abi=${FLOAT_ABI}
+    -mlittle-endian
+    -MD)
+
+# Compile definitions:
+add_compile_definitions(
+    PLATFORM_HAL=${PLATFORM_HAL}
+    ${CPU_COMPILE_DEF}=1
+    $<$<BOOL:${ARM_MATH_DSP}>:ARM_MATH_DSP>
+    $<$<BOOL:${ARM_MATH_LOOPUNROLL}>:ARM_MATH_LOOPUNROLL>)
+
+# Link options:
+add_link_options(
+    -mthumb
+    -mcpu=${CPU_NAME}
+    -mfloat-abi=${FLOAT_ABI}
+    -mlittle-endian
+    --specs=nosys.specs
+    --stats
+    "$<$<CONFIG:RELEASE>:--no-debug>")
+
+# Function to add a map file output for the linker to dump diagnostic information to.
+function(add_target_map_file TARGET_NAME MAP_FILE_PATH)
+    target_link_options(${TARGET_NAME} PUBLIC
+        -Xlinker -Map=${MAP_FILE_PATH})
+endfunction()
+
+# Function to add linker option to use the chosen linker script.
+function(add_linker_script SCRIPT_DIR SCRIPT_NAME)
+    set(LINKER_SCRIPT_PATH ${SCRIPT_DIR}/${SCRIPT_NAME}.ld
+        CACHE STRING "Linker script path")
+    if (NOT EXISTS ${LINKER_SCRIPT_PATH})
+        message(FATAL_ERROR "Linker script not found: ${LINKER_SCRIPT_PATH}")
+    endif()
+    message(STATUS "Using linker script: ${LINKER_SCRIPT_PATH}")
+    add_link_options("SHELL:-T ${LINKER_SCRIPT_PATH}")
+endfunction()
+
+# Function to set the command to copy/extract contents from an elf
+# into a binary file.
+function(add_bin_generation_command)
+
+    set(multiValueArgs SECTION_PATTERNS OUTPUT_BIN_NAMES)
+    set(oneValueArgs TARGET_NAME OUTPUT_DIR AXF_PATH)
+    cmake_parse_arguments(PARSED "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    list(LENGTH PARSED_SECTION_PATTERNS N_SECTION_PATTERNS)
+    list(LENGTH PARSED_OUTPUT_BIN_NAMES N_OUTPUT_BIN_NAMES)
+
+    if (NOT ${N_SECTION_PATTERNS} STREQUAL ${N_OUTPUT_BIN_NAMES})
+        message(FATAL_ERROR "Section patterns and the output binary names "
+                "should be of the same length")
+    endif()
+
+    message(STATUS "${TRIPLET}-objcopy requested to generate "
+                   "${N_OUTPUT_BIN_NAMES} bin files.")
+
+    math(EXPR MAX_IDX "${N_SECTION_PATTERNS} - 1")
+
+    foreach(IDX RANGE ${MAX_IDX})
+
+        list(GET PARSED_OUTPUT_BIN_NAMES ${IDX} OUTPUT_BIN_NAME)
+        list(GET PARSED_SECTION_PATTERNS ${IDX} SECTION_PATTERN)
+
+        add_custom_command(TARGET ${PARSED_TARGET_NAME}
+            POST_BUILD
+            COMMAND ${TRIPLET}-objcopy -O binary
+            --only-section ${SECTION_PATTERN} ${PARSED_AXF_PATH}
+            ${PARSED_OUTPUT_DIR}/${OUTPUT_BIN_NAME})
+    endforeach()
+
+endfunction()
+
+# Function to assert the compiler version
+function(enforce_compiler_version)
+    if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${MIN_GCC_VERSION})
+        message( FATAL_ERROR "arm-none-eabi-gcc version must be ${MIN_GCC_VERSION} or greater to support ${CMAKE_SYSTEM_PROCESSOR} architecture." )
+    endif()
+endfunction()
diff --git a/scripts/cmake/native-toolchain.cmake b/scripts/cmake/toolchains/native-gcc.cmake
similarity index 60%
rename from scripts/cmake/native-toolchain.cmake
rename to scripts/cmake/toolchains/native-gcc.cmake
index 2e28cd4..4b5a62b 100644
--- a/scripts/cmake/native-toolchain.cmake
+++ b/scripts/cmake/toolchains/native-gcc.cmake
@@ -19,22 +19,34 @@
 set(CMAKE_C_LINKER_PREFERENCE   gcc)
 set(CMAKE_CXX_LINKER_PREFERENCE gcc)
 
-set(CMAKE_C_FLAGS_DEBUG         "-DDEBUG -O0 -g")
-set(CMAKE_C_FLAGS_RELEASE       "-DNDEBUG -O3")
-
-set(CMAKE_CXX_FLAGS_DEBUG       "-DDEBUG -O0 -g")
-set(CMAKE_CXX_FLAGS_RELEASE     "-DNDEBUG -O3")
-
 # Platform specific directory:
 set(PLATFORM_HAL                3)
-set(WARNING_FLAGS               "-Wsign-compare -Wshadow         \
-                                 -Wextra -Wall -Wunused-function \
-                                 -Wmissing-field-initializers    \
-                                 -Wswitch -Wvla -Wunused-parameter")
-set(SPECIAL_OPTS                "-fPIC -pthread")
-set(PLATFORM_FLAGS              "-DPLATFORM_HAL=${PLATFORM_HAL}")
-set(SPECIAL_OPTS_CXX            "-fno-threadsafe-statics")
-set(CMAKE_EXE_LINKER_FLAGS      "-lm -lc -lstdc++ --verbose")
+
+# Warning compiler definitions:
+add_compile_options(
+    -Wsign-compare
+    -Wshadow
+    -Wextra
+    -Wall
+    -Wunused-function
+    -Wmissing-field-initializers
+    -Wswitch
+    -Wvla
+    -Wunused-parameter)
+
+# General purpose compile definitions:
+add_compile_options(
+    -fPIC
+    -pthread
+    -DPLATFORM_HAL=${PLATFORM_HAL}
+    "$<$<COMPILE_LANGUAGE:CXX>:-fno-threadsafe-statics>")
+
+# Linker options
+add_link_options(
+    -lm
+    -lc
+    -lstdc++
+    --verbose)
 
 function(enforce_compiler_version)
 endfunction()
diff --git a/scripts/mps3/sse-300/images.txt b/scripts/mps3/sse-300/images.txt
new file mode 100644
index 0000000..b00c8b7
--- /dev/null
+++ b/scripts/mps3/sse-300/images.txt
@@ -0,0 +1,23 @@
+TITLE: Arm MPS3 FPGA prototyping board Images Configuration File
+
+; MCC mapping for Corstone-300 MPS3 bitfile package AN547
+; +-------------+---------------+-------------------------------+
+; | FPGA addr   | MCC addr      |  Region                       |
+; +-------------+---------------+-------------------------------+
+; | 0x00000000  | 0x00000000    | ITCM (NS)                     |
+; | 0x01000000  | 0x02000000    | BRAM or FPGA's data SRAM (NS) |
+; | 0x60000000  | 0x08000000    | DDR (NS)                      |
+; | 0x70000000  | 0x0c000000    | DDR (S)                       |
+; +-------------+---------------+-------------------------------+
+
+[IMAGES]
+
+TOTALIMAGES: 2 ;Number of Images (Max: 32)
+
+IMAGE0ADDRESS: 0x00000000 ; MCC@0x00000000 <=> FPGA@0x00000000
+IMAGE0UPDATE: AUTO
+IMAGE0FILE: \SOFTWARE\itcm.bin
+
+IMAGE1ADDRESS: 0x0c000000 ; MCC@0x0c000000 <=> FPGA@0x70000000
+IMAGE1UPDATE: AUTO
+IMAGE1FILE: \SOFTWARE\ddr.bin
diff --git a/scripts/py/gen_fpga_mem_map.py b/scripts/py/gen_fpga_mem_map.py
deleted file mode 100644
index 5703a8d..0000000
--- a/scripts/py/gen_fpga_mem_map.py
+++ /dev/null
@@ -1,178 +0,0 @@
-#  Copyright (c) 2021 Arm Limited. All rights reserved.
-#  SPDX-License-Identifier: Apache-2.0
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-import os
-from argparse import ArgumentParser
-
-"""
-This file is used as part of post build steps to generate 'images.txt' file
-which can be copied over onto the MPS3 board's SD card. The purpose is to
-limit having to manually edit the file based on different load regions that
-the build scatter file might dictate.
-"""
-
-def is_commented(line):
-    if (line.startswith(";")):
-        return True
-    else:
-        return False
-
-
-def is_load_rom(line):
-    load_region_specifiers = ['LOAD_ROM', 'LD_ROM', 'LOAD_REGION']
-
-    for load_specifier in load_region_specifiers:
-        if line.startswith(load_specifier):
-            return True
-
-    return False
-
-
-class TargetSubsystem:
-
-    def __init__(self, target_subsystem_name: str):
-        """
-        Constructor for target class.
-        Arguments:
-            target_subsystem_name: name of the target subsystem
-        """
-        # Dict with mem map and binary names we expect
-        self.subsystems = {
-            "sse-300": {
-                "mmap_mcc" : {
-                    # FPGA addr |  MCC addr  |
-                    "0x00000000": "0x00000000", # ITCM (NS)
-                    "0x01000000": "0x02000000", # BRAM or FPGA's data SRAM (NS)
-                    "0x60000000": "0x08000000", # DDR (NS)
-                    "0x70000000": "0x0c000000"  # DDR (S)
-                },
-                "bin_names": {
-                    0: "itcm.bin",
-                    1: "dram.bin"
-                }
-            }
-        }
-
-        self.name = target_subsystem_name
-
-
-    def is_supported(self, target_subsystem: str) -> bool:
-        """
-        Checks if the target subsystem exists within systems
-        supported by this script
-        """
-        if target_subsystem in self.subsystems.keys():
-            return True
-
-        print(f"Platforms supported: {self.subsystems.keys()}")
-        return False
-
-
-    def mps3_mappings(self) -> dict:
-        """
-        Returns the FPGA <--> MCC address translations
-        as a dict
-        """
-        if self.is_supported(self.name):
-            return self.subsystems[self.name]['mmap_mcc']
-        return {}
-
-
-    def mps3_bin_names(self) -> dict:
-        """
-        Returns expected binary names for the executable built
-        for Cortex-M55 or Cortex-M55+Ethos-U55 targets in the
-        form of a dict with index and name
-        """
-        if self.is_supported(self.name):
-            return self.subsystems[self.name]['bin_names']
-
-        return {}
-
-
-def main(args):
-    """
-    Generates the output txt file with MCC to FPGA address mapping used
-    that is used by the MCC on FPGA to load executable regions into
-    correct regions in memory.
-    """
-    # List out arguments used:
-    scatter_file_path = args.scatter_file_path
-    target_subsystem_name = args.target_subsystem
-    output_file_path = args.output_file_path
-
-    target = TargetSubsystem(target_subsystem_name=target_subsystem_name)
-
-    if target.is_supported(target_subsystem_name) != True:
-        print(f'Target {target_subsystem_name} not supported.')
-        return
-
-    with open(scatter_file_path,'r') as scatter_file:
-        lines_read = scatter_file.readlines()
-        str_list = []
-
-        bin_names = None
-        mem_map = None
-
-        mem_map = target.mps3_mappings()
-        bin_names = target.mps3_bin_names()
-
-        str_list.append("TITLE: Arm MPS3 FPGA prototyping board Images Configuration File\n")
-        str_list.append("[IMAGES]\n\n")
-
-        cnt = 0
-        for line in lines_read:
-            if is_commented(line) or is_load_rom(line) != True:
-                continue
-
-            addr = line.split()[1]
-
-            if mem_map.get(addr, None) == None:
-                raise RuntimeError(
-                    'Translation for this address unavailable')
-            if cnt > len(bin_names):
-                raise RuntimeError(
-                    f"bin names len exceeded: {cnt}")
-
-            str_list.append("IMAGE" + str(cnt) + "ADDRESS: " +
-                mem_map[addr] + " ; MCC@" + mem_map[addr] +
-                " <=> FPGA@"  + addr + "\n")
-            str_list.append("IMAGE" + str(cnt) + "UPDATE: AUTO\n")
-            str_list.append("IMAGE" + str(cnt) + "FILE: \SOFTWARE\\" +
-                bin_names[cnt] + "\n\n")
-            cnt += 1
-
-        if cnt > 0 and cnt < 33:
-            str_list.insert(2,
-                "TOTALIMAGES: {} ;Number of Images (Max: 32)\n\n".format(
-                    cnt))
-        else:
-            raise RuntimeError('Invalid image count')
-
-        if os.path.exists(output_file_path):
-            os.remove(output_file_path)
-        print(''.join(str_list), file=open(output_file_path, "a"))
-
-
-if __name__ == "__main__":
-    parser = ArgumentParser()
-    parser.add_argument("--scatter_file_path", type=str, required=True,
-                        help="Path to the scatter file")
-    parser.add_argument("--target_subsystem", type=str, required=True,
-                        help="Target subsystem in use")
-    parser.add_argument("--output_file_path", type=str, required=True,
-                        help="Output file path")
-    args = parser.parse_args()
-    main(args)
diff --git a/source/application/hal/include/hal.h b/source/application/hal/include/hal.h
index 26ba1e3..a192ea7 100644
--- a/source/application/hal/include/hal.h
+++ b/source/application/hal/include/hal.h
@@ -33,10 +33,12 @@
 #include "data_psn.h"                   /* Data presentation abstraction */
 #include "timer.h"                      /* Timer/profiler API */
 
+#include <inttypes.h>
+
 /* Structure to define a platform context to be used by the application */
 typedef struct hal_platform_context {
     int inited;                         /**< initialised */
-    char plat_name[16];                 /**< name of this platform */
+    char plat_name[64];                 /**< name of this platform */
     data_acq_module * data_acq;         /**< data acquisition module pointer */
     data_psn_module * data_psn;         /**< data presentation module pointer */
     platform_timer *  timer;            /**< timer */
diff --git a/source/application/hal/platforms/bare-metal/bsp/bsp-core/retarget.c b/source/application/hal/platforms/bare-metal/bsp/bsp-core/retarget.c
index cf31a53..29c2023 100644
--- a/source/application/hal/platforms/bare-metal/bsp/bsp-core/retarget.c
+++ b/source/application/hal/platforms/bare-metal/bsp/bsp-core/retarget.c
@@ -14,177 +14,188 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
 
 #include "uart_stdout.h"
 #include "bsp_core_log.h"
 
-#if defined (MPS3_PLATFORM)
-#include "smm_mps3.h"
-#endif  /* MPS3_PLATFORM */
-
 #include <stdio.h>
 #include <string.h>
 #include <time.h>
+
+#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100)
+/* Arm compiler re-targeting */
+
 #include <rt_misc.h>
 #include <rt_sys.h>
 
 
 /* Standard IO device handles. */
-#define STDIN   0x8001
-#define STDOUT  0x8002
-#define STDERR  0x8003
+#define STDIN  0x8001
+#define STDOUT 0x8002
+#define STDERR 0x8003
+
+#define RETARGET(fun) _sys##fun
+
+#else
+/* GNU compiler re-targeting */
+
+/*
+ * This type is used by the _ I/O functions to denote an open
+ * file.
+ */
+typedef int FILEHANDLE;
+
+/*
+ * Open a file. May return -1 if the file failed to open.
+ */
+extern FILEHANDLE _open(const char * /*name*/, int /*openmode*/);
+
+/* Standard IO device handles. */
+#define STDIN  0x00
+#define STDOUT 0x01
+#define STDERR 0x02
+
+#define RETARGET(fun) fun
+
+#endif
 
 /* Standard IO device name defines. */
-const char __stdin_name[]   = "STDIN";
-const char __stdout_name[]  = "STDOUT";
-const char __stderr_name[]  = "STDERR";
+const char __stdin_name[] __attribute__((aligned(4)))  = "STDIN";
+const char __stdout_name[] __attribute__((aligned(4))) = "STDOUT";
+const char __stderr_name[] __attribute__((aligned(4))) = "STDERR";
 
-int fputc(int ch, FILE *f)
-{
-    UNUSED(f);
-    return (UartPutc(ch));
+void _ttywrch(int ch) {
+    (void)fputc(ch, stdout);
 }
 
-int fgetc(FILE *f)
-{
-    UNUSED(f);
-    return (UartPutc(UartGetc()));
-}
-
-int ferror(FILE *f)
-{
-    UNUSED(f);
-    /* Your implementation of ferror */
-    return EOF;
-}
-
-void _ttywrch(int ch)
-{
-    UartPutc(ch);
-}
-
-FILEHANDLE _sys_open(const char *name, int openmode)
+FILEHANDLE RETARGET(_open)(const char *name, int openmode)
 {
     UNUSED(openmode);
 
-    /* Register standard Input Output devices. */
-    if (strcmp(name, "STDIN") == 0)
-    {
+    if (strcmp(name, __stdin_name) == 0) {
         return (STDIN);
     }
-    if (strcmp(name, "STDOUT") == 0)
-    {
+
+    if (strcmp(name, __stdout_name) == 0) {
         return (STDOUT);
     }
-    if (strcmp(name, "STDERR") == 0)
-    {
+
+    if (strcmp(name, __stderr_name) == 0) {
         return (STDERR);
     }
-    return (-1);
+
+    return -1;
 }
 
-int _sys_close(FILEHANDLE fh)
-{
-    if (fh > 0x8000)
-    {
-        return (0);
-    }
-    return (-1);
-}
-
-int _sys_write(FILEHANDLE fh, const unsigned char *buf, unsigned int len, int mode)
+int RETARGET(_write)(FILEHANDLE fh, const unsigned char *buf, unsigned int len, int mode)
 {
     UNUSED(mode);
-    if (fh == STDOUT || fh == STDERR )
-    {
-        /* Standard Output device. */
-        for (; len; len--)
-        {
-            UartPutc(*buf++);
-        }
-        return (0);
-    }
 
-    if (fh > 0x8000)
-    {
-        return (-1);
+    switch (fh) {
+    case STDOUT:
+    case STDERR: {
+        int c;
+
+        while (len-- > 0) {
+            c = fputc(*buf++, stdout);
+            if (c == EOF) {
+                return EOF;
+            }
+        }
+
+        return 0;
     }
-    return (-1);
+    default:
+        return EOF;
+    }
 }
 
-int _sys_read(FILEHANDLE fh, unsigned char *buf, unsigned int len, int mode)
+int RETARGET(_read)(FILEHANDLE fh, unsigned char *buf, unsigned int len, int mode)
 {
     UNUSED(mode);
-    if (fh == STDIN)
-    {
-        /* Standard Input device. */
-        for (; len; len--)
-        {
-            *buf++ = UartGetc();
+
+    switch (fh) {
+    case STDIN: {
+        int c;
+
+        while (len-- > 0) {
+            c = fgetc(stdin);
+            if (c == EOF) {
+                return EOF;
+            }
+
+            *buf++ = (unsigned char)c;
         }
-        return (0);
-    }
 
-    if (fh > 0x8000)
-    {
-        return (-1);
+        return 0;
     }
-    return (-1);
+    default:
+        return EOF;
+    }
 }
 
-int _sys_istty(FILEHANDLE fh)
+int RETARGET(_istty)(FILEHANDLE fh)
 {
-    if (fh > 0x8000)
-    {
-        return (1);
+    switch (fh) {
+    case STDIN:
+    case STDOUT:
+    case STDERR:
+        return 1;
+    default:
+        return 0;
     }
-    return (0);
 }
 
-int _sys_seek(FILEHANDLE fh, long pos)
+int RETARGET(_close)(FILEHANDLE fh)
 {
+    if (RETARGET(_istty(fh))) {
+        return 0;
+    }
+
+    return -1;
+}
+
+int RETARGET(_seek)(FILEHANDLE fh, long pos)
+{
+    UNUSED(fh);
     UNUSED(pos);
-    if (fh > 0x8000)
-    {
-        return (-1);
-    }
-    return (-1);
+
+    return -1;
 }
 
-int _sys_ensure(FILEHANDLE fh)
+int RETARGET(_ensure)(FILEHANDLE fh)
 {
-    if (fh > 0x8000)
-    {
-        return (-1);
-    }
-    return (-1);
+    UNUSED(fh);
+
+    return -1;
 }
 
-long _sys_flen(FILEHANDLE fh)
+long RETARGET(_flen)(FILEHANDLE fh)
 {
-    if (fh > 0x8000)
-    {
-        return (0);
+    if (RETARGET(_istty)(fh)) {
+        return 0;
     }
-    return (-1);
+
+    return -1;
 }
 
-int _sys_tmpnam(char *name, int sig, unsigned maxlen)
+int RETARGET(_tmpnam)(char *name, int sig, unsigned int maxlen)
 {
     UNUSED(name);
     UNUSED(sig);
     UNUSED(maxlen);
-    return (1);
+
+    return 1;
 }
 
-char *_sys_command_string(char *cmd, int len)
+char *RETARGET(_command_string)(char *cmd, int len)
 {
     UNUSED(len);
-    return (cmd);
+
+    return cmd;
 }
 
-void _sys_exit(int return_code)
+void RETARGET(_exit)(int return_code)
 {
     UartEndSimulation(return_code);
 }
@@ -192,44 +203,66 @@
 int system(const char *cmd)
 {
     UNUSED(cmd);
-    return (0);
+
+    return 0;
 }
 
 time_t time(time_t *timer)
 {
     time_t current;
 
-#if defined (MPS3_PLATFORM)
-    current = MPS3_FPGAIO->COUNTER;
-#else   /* MPS3_PLATFORM */
-    current  = 0;   /* No RTC implementation available. */
-#endif  /* MPS3_PLATFORM */
+    current = 0; // To Do !! No RTC implemented
 
     if (timer != NULL) {
         *timer = current;
     }
 
-    return (current);
+    return current;
 }
 
-#else   /* #if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) */
+void _clock_init(void) {}
 
-/******************************************************************************/
-/* Retarget functions for GNU Tools for ARM Embedded Processors               */
-/******************************************************************************/
-#include <stdio.h>
-#include <sys/stat.h>
-
-extern unsigned char UartPutc(unsigned char my_ch);
-
-__attribute__((used)) int _write(int fd, char *ptr, int len)
+clock_t clock(void)
 {
-    size_t i;
-    for (i = 0; i < len; i++)
-    {
-        UartPutc(ptr[i]); /* call character output function. */
-    }
-    return len;
+    return (clock_t)-1;
 }
 
-#endif /* #if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) */
+int remove(const char *arg) {
+    UNUSED(arg);
+
+    return 0;
+}
+
+int rename(const char *oldn, const char *newn)
+{
+    UNUSED(oldn);
+    UNUSED(newn);
+
+    return 0;
+}
+
+int fputc(int ch, FILE *f)
+{
+    UNUSED(f);
+
+    return UartPutc(ch);
+}
+
+int fgetc(FILE *f)
+{
+    UNUSED(f);
+
+    return UartPutc(UartGetc());
+}
+
+#ifndef ferror
+
+/* arm-none-eabi-gcc with newlib uses a define for ferror */
+int ferror(FILE *f)
+{
+    UNUSED(f);
+
+    return EOF;
+}
+
+#endif /* #ifndef ferror */
diff --git a/source/application/hal/platforms/bare-metal/bsp/bsp-packs/mps3/device_mps3.c b/source/application/hal/platforms/bare-metal/bsp/bsp-packs/mps3/device_mps3.c
index f4f2e6b..7040cf3 100644
--- a/source/application/hal/platforms/bare-metal/bsp/bsp-packs/mps3/device_mps3.c
+++ b/source/application/hal/platforms/bare-metal/bsp/bsp-packs/mps3/device_mps3.c
@@ -19,6 +19,8 @@
 #include "bsp_core_log.h"
 #include "smm_mps3.h"
 
+#include <inttypes.h>
+
 uint32_t GetMPS3CoreClock(void)
 {
     const uint32_t default_clock = 32000000;
@@ -28,7 +30,7 @@
     }
 
     if (!warned_once) {
-        warn("MPS3_SCC->CFG_ACLK reads 0. Assuming default clock of %u\n",
+        warn("MPS3_SCC->CFG_ACLK reads 0. Assuming default clock of %" PRIu32 "\n",
             default_clock);
         warned_once = 1;
     }
diff --git a/source/application/hal/platforms/bare-metal/bsp/bsp-packs/mps3/timer_mps3.c b/source/application/hal/platforms/bare-metal/bsp/bsp-packs/mps3/timer_mps3.c
index 0a3a8b1..a72103c 100644
--- a/source/application/hal/platforms/bare-metal/bsp/bsp-packs/mps3/timer_mps3.c
+++ b/source/application/hal/platforms/bare-metal/bsp/bsp-packs/mps3/timer_mps3.c
@@ -19,6 +19,8 @@
 #include "bsp_core_log.h"
 #include "device_mps3.h"
 
+#include <inttypes.h>
+
 void timer_reset(void)
 {
     MPS3_FPGAIO->CLK1HZ   = 0;
@@ -39,11 +41,11 @@
         .counter_fpga       = MPS3_FPGAIO->COUNTER,
         .counter_systick    = Get_SysTick_Cycle_Count()
     };
-    debug("Timestamp:\
-        \n\tCounter 1 Hz:   %u\
-        \n\tCounter 100 Hz: %u\
-        \n\tCounter FPGA:   %u\
-        \n\tCounter CPU:    %llu\n",
+    debug("Timestamp:"
+        "\n\tCounter 1 Hz:   %" PRIu32
+        "\n\tCounter 100 Hz: %" PRIu32
+        "\n\tCounter FPGA:   %" PRIu32
+        "\n\tCounter CPU:    %" PRIu64 "\n",
         t.counter_1Hz, t.counter_100Hz, t.counter_fpga, t.counter_systick);
     return t;
 }
diff --git a/source/application/hal/platforms/bare-metal/bsp/bsp-packs/mps3/uart_stdout.c b/source/application/hal/platforms/bare-metal/bsp/bsp-packs/mps3/uart_stdout.c
index 1bf8291..ed12c8b 100644
--- a/source/application/hal/platforms/bare-metal/bsp/bsp-packs/mps3/uart_stdout.c
+++ b/source/application/hal/platforms/bare-metal/bsp/bsp-packs/mps3/uart_stdout.c
@@ -107,10 +107,11 @@
                 return false;
 
             case CR:                            /* CR - done, stop editing line.  */
-                *lp = c;
+                UartPutc (*lp = c);             /* Echo and store character.      */
                 lp++;                           /* Increment line pointer         */
                 cnt++;                          /* and count.                     */
                 c = LF;
+                break;
             default:
                 UartPutc (*lp = c);             /* Echo and store character.      */
                 fflush (stdout);
@@ -124,7 +125,7 @@
     return true;
 }
 
-void UartEndSimulation(int code)
+__attribute__((noreturn)) void UartEndSimulation(int code)
 {
     UartPutc((char) 0x4);   /* End of simulation */
     UartPutc((char) code);  /* End of simulation */
diff --git a/source/application/hal/platforms/bare-metal/bsp/cmsis-device/cmsis.c b/source/application/hal/platforms/bare-metal/bsp/cmsis-device/cmsis.c
index c9cf53d..b7f318c 100644
--- a/source/application/hal/platforms/bare-metal/bsp/cmsis-device/cmsis.c
+++ b/source/application/hal/platforms/bare-metal/bsp/cmsis-device/cmsis.c
@@ -24,9 +24,6 @@
 #define __XTAL            (25000000)      /* Oscillator frequency             */
 #define __SYSTEM_CLOCK    (__XTAL)
 
-#define STR(x) #x
-#define RESET_REG(n) __ASM volatile("MOV " STR(r##n) ", #0" : : : STR(r##n))
-
 #if defined(CPU_CORTEX_M55)
 #define CCR_DL   (1 << 19)
 #else
@@ -69,25 +66,6 @@
                  (3U << 11U*2U) );
 #endif
 
-    /* Initialise registers r0-r12 and LR(=r14)
-     * They must have a valid value before being potentially pushed to stack by
-     * C calling convention or by context saving in exception handling
-     */
-    RESET_REG(0);
-    RESET_REG(1);
-    RESET_REG(2);
-    RESET_REG(3);
-    RESET_REG(4);
-    RESET_REG(5);
-    RESET_REG(6);
-    RESET_REG(7);
-    RESET_REG(8);
-    RESET_REG(9);
-    RESET_REG(10);
-    RESET_REG(11);
-    RESET_REG(12);
-    RESET_REG(14);
-
 #if defined (__VTOR_PRESENT) && (__VTOR_PRESENT == 1U)
   SCB->VTOR = (uint32_t) &__Vectors;
 #endif
diff --git a/source/application/hal/platforms/bare-metal/bsp/cmsis-device/irqs.c b/source/application/hal/platforms/bare-metal/bsp/cmsis-device/irqs.c
index c6f54b1..7c9f4b8 100644
--- a/source/application/hal/platforms/bare-metal/bsp/cmsis-device/irqs.c
+++ b/source/application/hal/platforms/bare-metal/bsp/cmsis-device/irqs.c
@@ -23,24 +23,36 @@
 #include "cmsis.h"
 
 #include <stdio.h>
+#include <inttypes.h>
 
 static uint64_t cpu_cycle_count = 0;
 
 /**
+ * External references
+ */
+extern uint32_t __INITIAL_SP;
+extern uint32_t __STACK_LIMIT;
+
+#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U)
+    extern uint32_t __STACK_SEAL;
+#endif
+
+extern __NO_RETURN void __PROGRAM_START(void);
+
+/**
  * @brief   Dump core registers on stdout
  */
 static void LogCoreCPURegisters(void)
 {
-    printf("CTRL    : 0x%08x\n", __get_CONTROL());
-    printf("IPSR    : 0x%08x\n", __get_IPSR());
-    printf("APSR    : 0x%08x\n", __get_APSR());
-    printf("xPSR    : 0x%08x\n", __get_xPSR());
-    printf("PSP     : 0x%08x\n", __get_PSP());
-    printf("MSP     : 0x%08x\n", __get_MSP());
-    printf("PRIMASK : 0x%08x\n", __get_PRIMASK());
-    printf("BASEPRI : 0x%08x\n", __get_BASEPRI());
-    printf("FAULTMSK: 0x%08x\n", __get_FAULTMASK());
-    printf("PC      : 0x%08x\n", __current_pc());
+    printf("CTRL    : 0x%08" PRIx32 "\n", __get_CONTROL());
+    printf("IPSR    : 0x%08" PRIx32 "\n", __get_IPSR());
+    printf("APSR    : 0x%08" PRIx32 "\n", __get_APSR());
+    printf("xPSR    : 0x%08" PRIx32 "\n", __get_xPSR());
+    printf("PSP     : 0x%08" PRIx32 "\n", __get_PSP());
+    printf("MSP     : 0x%08" PRIx32 "\n", __get_MSP());
+    printf("PRIMASK : 0x%08" PRIx32 "\n", __get_PRIMASK());
+    printf("BASEPRI : 0x%08" PRIx32 "\n", __get_BASEPRI());
+    printf("FAULTMSK: 0x%08" PRIx32 "\n", __get_FAULTMASK());
 }
 
 /**
@@ -158,6 +170,9 @@
     cpu_cycle_count += SysTick->LOAD + 1;
 }
 
+/**
+ * Gets the current SysTick derived counter value
+ */
 uint64_t Get_SysTick_Cycle_Count(void)
 {
     uint32_t systick_val;
@@ -169,47 +184,27 @@
     return cpu_cycle_count + (SysTick->LOAD - systick_val);
 }
 
-
-/**
- * These symbols are provided by the ARM lib - needs the stack and heap
- * regions in the scatter file.
- */
-extern void Image$$ARM_LIB_STACK$$ZI$$Base();
-extern void Image$$ARM_LIB_STACK$$ZI$$Limit();
-extern void Image$$ARM_LIB_HEAP$$ZI$$Base();
-extern void Image$$ARM_LIB_HEAP$$ZI$$Limit();
-extern __attribute__((noreturn)) void __main();
-
-__attribute__((naked, used)) void __user_setup_stackheap()
-{
-    __ASM volatile("LDR  r0, =Image$$ARM_LIB_HEAP$$ZI$$Base");
-    __ASM volatile("LDR  r1, =Image$$ARM_LIB_STACK$$ZI$$Limit");
-    __ASM volatile("LDR  r2, =Image$$ARM_LIB_HEAP$$ZI$$Limit");
-    __ASM volatile("LDR  r3, =Image$$ARM_LIB_STACK$$ZI$$Base");
-    __ASM volatile("bx   lr");
-}
-
 /**
  * Interrupt vector table.
  */
-irq_vec_type __Vectors[] __attribute__((section("RESET"), used)) = {
-    &Image$$ARM_LIB_STACK$$ZI$$Limit,  /* 0 Initial SP */
-    &Reset_Handler      , /* 1 Initial PC, set to entry point */
+irq_vec_type __VECTOR_TABLE[] __VECTOR_TABLE_ATTRIBUTE = {
+    (irq_vec_type)(&__INITIAL_SP),  /*     Initial Stack Pointer */
+    Reset_Handler      , /* 1 Initial PC, set to entry point */
 
-    &NMI_Handler        , /* 2 (-14) NMI Handler            */
-    &HardFault_Handler  , /* 3 (-13) Hard Fault Handler     */
-    &MemManage_Handler  , /* 4 (-12) MPU Fault Handler      */
-    &BusFault_Handler   , /* 5 (-11) Bus Fault Handler      */
-    &UsageFault_Handler , /* 6 (-10) Usage Fault Handler    */
-    &SecureFault_Handler, /* 7 ( -9) Secure Fault Handler   */
+    NMI_Handler        , /* 2 (-14) NMI Handler            */
+    HardFault_Handler  , /* 3 (-13) Hard Fault Handler     */
+    MemManage_Handler  , /* 4 (-12) MPU Fault Handler      */
+    BusFault_Handler   , /* 5 (-11) Bus Fault Handler      */
+    UsageFault_Handler , /* 6 (-10) Usage Fault Handler    */
+    SecureFault_Handler, /* 7 ( -9) Secure Fault Handler   */
     0                   , /* 8 ( -8) Reserved               */
     0                   , /* 9 ( -7) Reserved               */
     0                   , /* 10 ( -6) Reserved              */
-    &SVC_Handler        , /* 11 ( -5) SVCall Handler        */
-    &DebugMon_Handler   , /* 12 ( -4) Debug Monitor Handler */
+    SVC_Handler        , /* 11 ( -5) SVCall Handler        */
+    DebugMon_Handler   , /* 12 ( -4) Debug Monitor Handler */
     0                   , /* 13 ( -3) Reserved              */
-    &PendSV_Handler     , /* 14 ( -2) PendSV Handler        */
-    &SysTick_Handler    , /* 15 ( -1) SysTick Handler       */
+    PendSV_Handler     , /* 14 ( -2) PendSV Handler        */
+    SysTick_Handler    , /* 15 ( -1) SysTick Handler       */
 
     /* External sources to be populated by user. */
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*   0 -  16 */
@@ -222,6 +217,9 @@
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 112 -  128 */
 };
 
+/**
+ * SysTick initialisation
+ */
 int Init_SysTick(void)
 {
     const uint32_t ticks_10ms = GetSystemCoreClock()/100 + 1;
@@ -252,8 +250,8 @@
     /* Configure the system tick. */
     Init_SysTick();
 
-    /* libcxx supplied entry point. */
-    __main();
+    /* cmsis supplied entry point. */
+    __PROGRAM_START();
 }
 
 #ifdef __cplusplus
diff --git a/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.ld b/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.ld
new file mode 100644
index 0000000..8bb99cd
--- /dev/null
+++ b/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.ld
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+__STACK_SIZE = 0x00060000;
+__HEAP_SIZE  = 0x000f0000;
+
+/* System memory brief */
+MEMORY
+{
+  ITCM  (rx)  : ORIGIN = 0x00000000, LENGTH = 0x00080000
+  DTCM  (rwx) : ORIGIN = 0x20000000, LENGTH = 0x00080000
+  BRAM  (rwx) : ORIGIN = 0x11000000, LENGTH = 0x00200000
+  SRAM  (rwx) : ORIGIN = 0x31000000, LENGTH = 0x00400000
+  DDR   (rwx) : ORIGIN = 0x70000000, LENGTH = 0x02000000
+}
+
+/* Linker script to place sections and symbol values. Should be used together
+ * with other linker script that defines memory regions ITCM and RAM.
+ * It references following symbols, which must be defined in code:
+ *   Reset_Handler : Entry of reset handler
+ *
+ * It defines following symbols, which code can use without definition:
+ *   __exidx_start
+ *   __exidx_end
+ *   __copy_table_start__
+ *   __copy_table_end__
+ *   __zero_table_start__
+ *   __zero_table_end__
+ *   __etext
+ *   __data_start__
+ *   __preinit_array_start
+ *   __preinit_array_end
+ *   __init_array_start
+ *   __init_array_end
+ *   __fini_array_start
+ *   __fini_array_end
+ *   __data_end__
+ *   __bss_start__
+ *   __bss_end__
+ *   __end__
+ *   end
+ *   __HeapLimit
+ *   __StackLimit
+ *   __StackTop
+ *   __stack
+ */
+ENTRY(Reset_Handler)
+
+SECTIONS
+{
+  .text.at_itcm :
+  {
+    KEEP(*(.vectors))
+    *(.text*)
+
+    KEEP(*(.init))
+    KEEP(*(.fini))
+
+    /* .ctors */
+    *crtbegin.o(.ctors)
+    *crtbegin?.o(.ctors)
+    *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors)
+    *(SORT(.ctors.*))
+    *(.ctors)
+
+    /* .dtors */
+    *crtbegin.o(.dtors)
+    *crtbegin?.o(.dtors)
+    *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors)
+    *(SORT(.dtors.*))
+    *(.dtors)
+
+    KEEP(*(.eh_frame*))
+  } > ITCM
+
+  .ARM.extab.at_itcm :
+  {
+    *(.ARM.extab* .gnu.linkonce.armextab.*)
+  } > ITCM
+
+  __exidx_start = .;
+  .ARM.exidx.at_itcm :
+  {
+    *(.ARM.exidx* .gnu.linkonce.armexidx.*)
+  } > ITCM
+  __exidx_end = .;
+
+  .zero.table.at_itcm :
+  {
+    . = ALIGN(4);
+    __zero_table_start__ = .;
+
+    LONG (__bss_start__)
+    LONG ((__bss_end__ - __bss_start__)/4) /* Size is in 32-bit words */
+
+    __zero_table_end__ = .;
+  } > ITCM
+
+  .copy.table.at_itcm :
+  {
+    . = ALIGN(4);
+    __copy_table_start__ = .;
+
+    /* Section to be copied - part 1: any data to be placed in BRAM */
+    LONG (__etext)
+    LONG (__data_start__)
+    LONG ((__data_end__ - __data_start__)/4) /* Size is in 32-bit words */
+
+    /* Section to be copied - part 2: RO data for for DTCM */
+    LONG (__etext2)
+    LONG (__ro_data_start__)
+    LONG ((__ro_data_end__ - __ro_data_start__)/4) /* Size is in 32-bit words */
+
+    __copy_table_end__ = .;
+  } > ITCM
+
+  __itcm_total = ALIGN(4);
+
+  ASSERT( __itcm_total < (ORIGIN(ITCM) + LENGTH(ITCM)), "ITCM overflow")
+
+  .sram :
+  {
+    . = ALIGN(16);
+    *(.bss.NoInit.activation_buf)
+    . = ALIGN(16);
+  } > SRAM AT > SRAM
+
+  .bss :
+  {
+    . = ALIGN(4);
+    __bss_start__ = .;
+    *(.bss)
+    *(.bss.*)
+    *(COMMON)
+    . = ALIGN(4);
+    __bss_end__ = .;
+  } > DTCM AT > DTCM
+
+  .stack (ORIGIN(DTCM) + LENGTH(DTCM) - __STACK_SIZE) (COPY) :
+  {
+    . = ALIGN(8);
+    __StackLimit = .;
+    . = . + __STACK_SIZE;
+    . = ALIGN(8);
+    __StackTop = .;
+  } > DTCM
+  PROVIDE(__stack = __StackTop);
+  ASSERT(
+    (__STACK_SIZE + __bss_end__ - __bss_start__) <= LENGTH(DTCM),
+    "DTCM overflow")
+
+  .ddr.at_ddr :
+  {
+    /* __attribute__((aligned(16))) is not handled by the CMSIS startup code.
+     * Force the alignment here as a workaround */
+    . = ALIGN(16);
+    *(ifm)
+    . = ALIGN(16);
+    *(nn_model)
+    . = ALIGN (16);
+    *(labels)
+    . = ALIGN (16);
+    *(activation_buf)
+    . = ALIGN (16);
+  } > DDR AT > DDR
+
+  /**
+   * Location counter can end up 2byte aligned with narrow Thumb code but
+   * __etext is assumed by startup code to be the LMA of a section in DTCM
+   * which must be 4byte aligned
+   */
+  __etext = ALIGN (4);
+
+  .bram.at_ddr :  AT (__etext)
+  {
+    __data_start__ = .;
+    *(vtable)
+    *(.data)
+    *(.data.*)
+    . = ALIGN(4);
+    PROVIDE_HIDDEN (__preinit_array_start = .);
+    KEEP(*(.preinit_array))
+    PROVIDE_HIDDEN (__preinit_array_end = .);
+    . = ALIGN(4);
+    PROVIDE_HIDDEN (__init_array_start = .);
+    KEEP(*(SORT(.init_array.*)))
+    KEEP(*(.init_array))
+    PROVIDE_HIDDEN (__init_array_end = .);
+    . = ALIGN(4);
+    PROVIDE_HIDDEN (__fini_array_start = .);
+    KEEP(*(SORT(.fini_array.*)))
+    KEEP(*(.fini_array))
+    PROVIDE_HIDDEN (__fini_array_end = .);
+    KEEP(*(.jcr*))
+    . = ALIGN(4);
+
+    __data_end__ = .;
+  } > BRAM
+
+  __etext2 = __etext + (__data_end__ - __data_start__);
+
+  .data.at_ddr : AT (__etext2)
+  {
+    . = ALIGN(4);
+    __ro_data_start__ = .;
+
+    *(.rodata*)
+    . = ALIGN(4);
+    * (npu_driver_version)
+    . = ALIGN(4);
+    * (npu_driver_arch_version)
+    . = ALIGN(4);
+
+    __ro_data_end__ = .;
+  } > BRAM
+
+  .heap (COPY) :
+  {
+    . = ALIGN(8);
+    __end__ = .;
+    PROVIDE(end = .);
+    . = . + __HEAP_SIZE;
+    . = ALIGN(8);
+    __HeapLimit = .;
+  } > BRAM
+
+  ASSERT (
+      (__ro_data_end__ - __ro_data_start__)
+    + (__data_end__  - __data_start__)
+    + __HEAP_SIZE <= LENGTH(BRAM),
+    "BRAM overflow")
+}
diff --git a/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.sct b/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.sct
index 327d511..55ed5d7 100644
--- a/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.sct
+++ b/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.sct
@@ -78,12 +78,12 @@
 LOAD_REGION_1       0x70000000                  0x02000000
 {
     ;-----------------------------------------------------
-    ; 32 MiB of DRAM space for neural network model,
+    ; 32 MiB of DDR space for neural network model,
     ; input vectors and labels. If the activation buffer
     ; size required by the network is bigger than the
     ; SRAM size available, it is accommodated here.
     ;-----------------------------------------------------
-    dram.bin        0x70000000 ALIGN 16         0x02000000
+    ddr.bin        0x70000000 ALIGN 16         0x02000000
     {
         ; nn model's baked in input matrices
         *.o (ifm)
@@ -110,9 +110,9 @@
     }
 
     ;-----------------------------------------------------
-    ; Remaining part of the 2MiB BRAM used as heap space.
-    ; 0x00200000 - 0x00040000 = 0x001C0000 (1.75 MiB)
+    ; 960 KiB of remaining part of the 2MiB BRAM used as
+    ; heap space. 0x000F0000 of 0x0x001C0000 available.
     ;-----------------------------------------------------
-    ARM_LIB_HEAP    0x11040000 EMPTY ALIGN 8    0x001C0000
+    ARM_LIB_HEAP    0x11040000 EMPTY ALIGN 8    0x000F0000
     {}
 }
diff --git a/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.ld b/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.ld
new file mode 100644
index 0000000..8bb99cd
--- /dev/null
+++ b/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.ld
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+__STACK_SIZE = 0x00060000;
+__HEAP_SIZE  = 0x000f0000;
+
+/* System memory brief */
+MEMORY
+{
+  ITCM  (rx)  : ORIGIN = 0x00000000, LENGTH = 0x00080000
+  DTCM  (rwx) : ORIGIN = 0x20000000, LENGTH = 0x00080000
+  BRAM  (rwx) : ORIGIN = 0x11000000, LENGTH = 0x00200000
+  SRAM  (rwx) : ORIGIN = 0x31000000, LENGTH = 0x00400000
+  DDR   (rwx) : ORIGIN = 0x70000000, LENGTH = 0x02000000
+}
+
+/* Linker script to place sections and symbol values. Should be used together
+ * with other linker script that defines memory regions ITCM and RAM.
+ * It references following symbols, which must be defined in code:
+ *   Reset_Handler : Entry of reset handler
+ *
+ * It defines following symbols, which code can use without definition:
+ *   __exidx_start
+ *   __exidx_end
+ *   __copy_table_start__
+ *   __copy_table_end__
+ *   __zero_table_start__
+ *   __zero_table_end__
+ *   __etext
+ *   __data_start__
+ *   __preinit_array_start
+ *   __preinit_array_end
+ *   __init_array_start
+ *   __init_array_end
+ *   __fini_array_start
+ *   __fini_array_end
+ *   __data_end__
+ *   __bss_start__
+ *   __bss_end__
+ *   __end__
+ *   end
+ *   __HeapLimit
+ *   __StackLimit
+ *   __StackTop
+ *   __stack
+ */
+ENTRY(Reset_Handler)
+
+SECTIONS
+{
+  .text.at_itcm :
+  {
+    KEEP(*(.vectors))
+    *(.text*)
+
+    KEEP(*(.init))
+    KEEP(*(.fini))
+
+    /* .ctors */
+    *crtbegin.o(.ctors)
+    *crtbegin?.o(.ctors)
+    *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors)
+    *(SORT(.ctors.*))
+    *(.ctors)
+
+    /* .dtors */
+    *crtbegin.o(.dtors)
+    *crtbegin?.o(.dtors)
+    *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors)
+    *(SORT(.dtors.*))
+    *(.dtors)
+
+    KEEP(*(.eh_frame*))
+  } > ITCM
+
+  .ARM.extab.at_itcm :
+  {
+    *(.ARM.extab* .gnu.linkonce.armextab.*)
+  } > ITCM
+
+  __exidx_start = .;
+  .ARM.exidx.at_itcm :
+  {
+    *(.ARM.exidx* .gnu.linkonce.armexidx.*)
+  } > ITCM
+  __exidx_end = .;
+
+  .zero.table.at_itcm :
+  {
+    . = ALIGN(4);
+    __zero_table_start__ = .;
+
+    LONG (__bss_start__)
+    LONG ((__bss_end__ - __bss_start__)/4) /* Size is in 32-bit words */
+
+    __zero_table_end__ = .;
+  } > ITCM
+
+  .copy.table.at_itcm :
+  {
+    . = ALIGN(4);
+    __copy_table_start__ = .;
+
+    /* Section to be copied - part 1: any data to be placed in BRAM */
+    LONG (__etext)
+    LONG (__data_start__)
+    LONG ((__data_end__ - __data_start__)/4) /* Size is in 32-bit words */
+
+    /* Section to be copied - part 2: RO data for for DTCM */
+    LONG (__etext2)
+    LONG (__ro_data_start__)
+    LONG ((__ro_data_end__ - __ro_data_start__)/4) /* Size is in 32-bit words */
+
+    __copy_table_end__ = .;
+  } > ITCM
+
+  __itcm_total = ALIGN(4);
+
+  ASSERT( __itcm_total < (ORIGIN(ITCM) + LENGTH(ITCM)), "ITCM overflow")
+
+  .sram :
+  {
+    . = ALIGN(16);
+    *(.bss.NoInit.activation_buf)
+    . = ALIGN(16);
+  } > SRAM AT > SRAM
+
+  .bss :
+  {
+    . = ALIGN(4);
+    __bss_start__ = .;
+    *(.bss)
+    *(.bss.*)
+    *(COMMON)
+    . = ALIGN(4);
+    __bss_end__ = .;
+  } > DTCM AT > DTCM
+
+  .stack (ORIGIN(DTCM) + LENGTH(DTCM) - __STACK_SIZE) (COPY) :
+  {
+    . = ALIGN(8);
+    __StackLimit = .;
+    . = . + __STACK_SIZE;
+    . = ALIGN(8);
+    __StackTop = .;
+  } > DTCM
+  PROVIDE(__stack = __StackTop);
+  ASSERT(
+    (__STACK_SIZE + __bss_end__ - __bss_start__) <= LENGTH(DTCM),
+    "DTCM overflow")
+
+  .ddr.at_ddr :
+  {
+    /* __attribute__((aligned(16))) is not handled by the CMSIS startup code.
+     * Force the alignment here as a workaround */
+    . = ALIGN(16);
+    *(ifm)
+    . = ALIGN(16);
+    *(nn_model)
+    . = ALIGN (16);
+    *(labels)
+    . = ALIGN (16);
+    *(activation_buf)
+    . = ALIGN (16);
+  } > DDR AT > DDR
+
+  /**
+   * Location counter can end up 2byte aligned with narrow Thumb code but
+   * __etext is assumed by startup code to be the LMA of a section in DTCM
+   * which must be 4byte aligned
+   */
+  __etext = ALIGN (4);
+
+  .bram.at_ddr :  AT (__etext)
+  {
+    __data_start__ = .;
+    *(vtable)
+    *(.data)
+    *(.data.*)
+    . = ALIGN(4);
+    PROVIDE_HIDDEN (__preinit_array_start = .);
+    KEEP(*(.preinit_array))
+    PROVIDE_HIDDEN (__preinit_array_end = .);
+    . = ALIGN(4);
+    PROVIDE_HIDDEN (__init_array_start = .);
+    KEEP(*(SORT(.init_array.*)))
+    KEEP(*(.init_array))
+    PROVIDE_HIDDEN (__init_array_end = .);
+    . = ALIGN(4);
+    PROVIDE_HIDDEN (__fini_array_start = .);
+    KEEP(*(SORT(.fini_array.*)))
+    KEEP(*(.fini_array))
+    PROVIDE_HIDDEN (__fini_array_end = .);
+    KEEP(*(.jcr*))
+    . = ALIGN(4);
+
+    __data_end__ = .;
+  } > BRAM
+
+  __etext2 = __etext + (__data_end__ - __data_start__);
+
+  .data.at_ddr : AT (__etext2)
+  {
+    . = ALIGN(4);
+    __ro_data_start__ = .;
+
+    *(.rodata*)
+    . = ALIGN(4);
+    * (npu_driver_version)
+    . = ALIGN(4);
+    * (npu_driver_arch_version)
+    . = ALIGN(4);
+
+    __ro_data_end__ = .;
+  } > BRAM
+
+  .heap (COPY) :
+  {
+    . = ALIGN(8);
+    __end__ = .;
+    PROVIDE(end = .);
+    . = . + __HEAP_SIZE;
+    . = ALIGN(8);
+    __HeapLimit = .;
+  } > BRAM
+
+  ASSERT (
+      (__ro_data_end__ - __ro_data_start__)
+    + (__data_end__  - __data_start__)
+    + __HEAP_SIZE <= LENGTH(BRAM),
+    "BRAM overflow")
+}
diff --git a/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.sct b/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.sct
index a1ffb49..deb4214 100644
--- a/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.sct
+++ b/source/application/hal/platforms/bare-metal/bsp/mem_layout/simple_platform.sct
@@ -36,35 +36,36 @@
     }
 
     ;-----------------------------------------------------
-    ; BRAM or FPGA data SRAM region worth 2MiB
-    ;-----------------------------------------------------
-    bram.bin        0x11000000  UNINIT ALIGN 16 0x00200000
-    {
-        ; activation buffers a.k.a tensor arena
-        *.o (.bss.NoInit.activation_buf)
-    }
-
-    ;-----------------------------------------------------
-    ; 128kiB of 512kiB bank is used for any other RW or ZI
+    ; 128kiB of 512kiB DTCM is used for any other RW or ZI
     ; data. Note: this region is internal to the Cortex-M
-    ; CPU
+    ; CPU.
     ;-----------------------------------------------------
     dtcm.bin        0x20000000                  0x00020000
     {
+        ; Any R/W and/or zero initialised data
         .ANY(+RW +ZI)
     }
 
     ;-----------------------------------------------------
-    ; 128kiB of stack space within the DTCM region
+    ; 384kiB of stack space within the DTCM region. See
+    ; `dtcm.bin` for the first section. Note: by virtue of
+    ; being part of DTCM, this region is only accessible
+    ; from Cortex-M55.
     ;-----------------------------------------------------
-    ARM_LIB_STACK   0x20020000 EMPTY ALIGN 8    0x00020000
+    ARM_LIB_STACK   0x20020000 EMPTY ALIGN 8    0x00060000
     {}
 
     ;-----------------------------------------------------
-    ; 256kiB of heap space within the DTCM region
+    ; SSE-300's internal SRAM of 4MiB - reserved for
+    ; activation buffers.
+    ; This region should have 3 cycle read latency from
+    ; both Cortex-M55 and Ethos-U55
     ;-----------------------------------------------------
-    ARM_LIB_HEAP    0x20040000 EMPTY ALIGN 8    0x00040000
-    {}
+    isram.bin       0x31000000  UNINIT ALIGN 16 0x00400000
+    {
+        ; activation buffers a.k.a tensor arena
+        *.o (.bss.NoInit.activation_buf)
+    }
 }
 
 ;---------------------------------------------------------
@@ -73,9 +74,12 @@
 LOAD_REGION_1       0x70000000                  0x02000000
 {
     ;-----------------------------------------------------
-    ; 32 MiB of DRAM space for nn model and input vectors
+    ; 32 MiB of DDR space for neural network model,
+    ; input vectors and labels. If the activation buffer
+    ; size required by the network is bigger than the
+    ; SRAM size available, it is accommodated here.
     ;-----------------------------------------------------
-    dram.bin        0x70000000 ALIGN 16         0x02000000
+    ddr.bin        0x70000000 ALIGN 16         0x02000000
     {
         ; nn model's baked in input matrices
         *.o (ifm)
@@ -83,20 +87,28 @@
         ; nn model
         *.o (nn_model)
 
+        ; labels
+        *.o (labels)
+
         ; if the activation buffer (tensor arena) doesn't
         ; fit in the SRAM region, we accommodate it here
         *.o (activation_buf)
     }
 
     ;-----------------------------------------------------
-    ; SSE-300's internal SRAM of 2MiB - reserved for
-    ; activation buffers.
-    ; This region should have 3 cycle read latency from
-    ; both Cortex-M55 and Ethos-U55
+    ; First 256kiB of BRAM (FPGA SRAM) used for RO data.
+    ; Note: Total BRAM size available is 2MiB.
     ;-----------------------------------------------------
-    isram.bin       0x31000000                  0x00080000
+    bram.bin        0x11000000          ALIGN 8 0x00040000
     {
         ; RO data (incl. unwinding tables for debugging)
         .ANY (+RO-DATA)
     }
+
+    ;-----------------------------------------------------
+    ; 960 KiB of remaining part of the 2MiB BRAM used as
+    ; heap space. 0x000F0000 of 0x0x001C0000 available.
+    ;-----------------------------------------------------
+    ARM_LIB_HEAP    0x11040000 EMPTY ALIGN 8    0x000F0000
+    {}
 }
diff --git a/source/application/hal/platforms/bare-metal/timer/baremetal_timer.c b/source/application/hal/platforms/bare-metal/timer/baremetal_timer.c
index ef31a71..64f2376 100644
--- a/source/application/hal/platforms/bare-metal/timer/baremetal_timer.c
+++ b/source/application/hal/platforms/bare-metal/timer/baremetal_timer.c
@@ -19,6 +19,7 @@
 
 #include <assert.h>
 #include <string.h>
+#include <inttypes.h>
 
 #if defined (ARM_NPU)
 
@@ -40,7 +41,7 @@
 static uint64_t bm_get_npu_total_cycle_diff(time_counter *st,
                                             time_counter *end);
 
-/** 
+/**
  * @brief       Gets the difference in active NPU cycle counts.
  * @param[in]   st      Pointer to time_counter value at start time.
  * @param[in]   end     Pointer to time_counter value at end.
@@ -100,7 +101,7 @@
 #endif /* defined (ARM_NPU) */
 
 #if defined(MPS3_PLATFORM)
-/** 
+/**
  * @brief       Wrapper for getting milliseconds duration between time counters
  * @param[in]   st      Pointer to time_counter value at start time.
  * @param[in]   end     Pointer to time_counter value at end.
@@ -148,7 +149,7 @@
 static uint32_t bm_get_cpu_cycles_diff(time_counter *st, time_counter *end);
 
 /**
- * @brief       Initialiser for bare metal timer. 
+ * @brief       Initialiser for bare metal timer.
  * @param[in]   timer  Platform timer to initialize.
  **/
 void init_timer(platform_timer *timer)
@@ -307,7 +308,11 @@
     };
 
 #if defined (ARM_NPU)
-    debug("NPU total cc: %llu; NPU idle cc: %u; NPU axi0 read cc: %u;  NPU axi0 write cc: %u; NPU axi1 read cc: %u\n",
+    debug("NPU total cc: %" PRIu64
+        "; NPU idle cc: %" PRIu32
+        "; NPU axi0 read cc: %" PRIu32
+        "; NPU axi0 write cc: %" PRIu32
+        "; NPU axi1 read cc: %" PRIu32 "\n",
         t.npu_total_ccnt,
         t.npu_idle_ccnt,
         t.npu_axi0_read_ccnt,
diff --git a/source/application/hal/platforms/bare-metal/utils/system_init.c b/source/application/hal/platforms/bare-metal/utils/system_init.c
index 0a6a1b3..f95f214 100644
--- a/source/application/hal/platforms/bare-metal/utils/system_init.c
+++ b/source/application/hal/platforms/bare-metal/utils/system_init.c
@@ -19,11 +19,12 @@
 #include "uart_stdout.h"
 
 #include <string.h>
+#include <inttypes.h>
 
 #if defined(MPS3_PLATFORM)
-#define CREATE_MASK(msb, lsb)           (((1U << ((msb) - (lsb) + 1)) - 1) << (lsb))
-#define MASK_BITS(arg, msb, lsb)        ((arg) & CREATE_MASK(msb, lsb))
-#define EXTRACT_BITS(arg, msb, lsb)     (MASK_BITS(arg, msb, lsb) >> (lsb))
+#define CREATE_MASK(msb, lsb)           (int)(((1U << ((msb) - (lsb) + 1)) - 1) << (lsb))
+#define MASK_BITS(arg, msb, lsb)        (int)((arg) & CREATE_MASK(msb, lsb))
+#define EXTRACT_BITS(arg, msb, lsb)     (int)(MASK_BITS(arg, msb, lsb) >> (lsb))
 #endif /* MPS3_PLATFORM */
 
 int system_init(void)
@@ -35,6 +36,7 @@
     uint32_t rev = 0;
     uint32_t aid = 0;
     uint32_t fpga_clk = 0;
+    const uint32_t ascii_A = (uint32_t)('A');
 
     /* Initialise the LEDs as the switches are */
     MPS3_FPGAIO->LED = MPS3_FPGAIO->SWITCHES & 0xFF;
@@ -43,7 +45,7 @@
     /* UART init - will enable valid use of printf (stdout
      * re-directed at this UART (UART0) */
     UartStdOutInit();
-    info("Processor internal clock: %u Hz\n", GetSystemCoreClock());
+    info("Processor internal clock: %" PRIu32 "Hz\n", GetSystemCoreClock());
 
 #if defined(MPS3_PLATFORM)
     /* Get revision information from various registers */
@@ -53,15 +55,15 @@
     apnote = EXTRACT_BITS(fpgaid, 15, 4);
     fpga_clk = GetMPS3CoreClock();
 
-    info("V2M-MPS3 revision %c\n\n", rev + 'A');
-    info("Application Note AN%x, Revision %c\n", apnote,
-        EXTRACT_BITS(aid, 23, 20) + 'A');
+    info("V2M-MPS3 revision %c\n\n", (char)(rev + ascii_A));
+    info("Application Note AN%" PRIx32 ", Revision %c\n", apnote,
+        (char)(EXTRACT_BITS(aid, 23, 20) + ascii_A));
     info("MPS3 build %d\n", EXTRACT_BITS(aid, 31, 24));
-    info("MPS3 core clock has been set to: %d Hz\n", fpga_clk);
+    info("MPS3 core clock has been set to: %" PRIu32 "Hz\n", fpga_clk);
 
     /* Display CPU ID */
     id = SCB->CPUID;
-    info("CPU ID: 0x%08x\n", id);
+    info("CPU ID: 0x%08" PRIx32 "\n", id);
 
     if(EXTRACT_BITS(id, 15, 8) == 0xD2) {
         if (EXTRACT_BITS(id, 7, 4) == 2) {
@@ -110,9 +112,5 @@
 
 void system_name(char* name, size_t size)
 {
-#if defined (MPS3_PLATFORM)
-    strncpy(name, "mps3-bare", size);
-#else /* MPS3_PLATFORM */
-    strncpy(name, "FVP", size);
-#endif /* MPS3_PLATFORM */
+    strncpy(name, DESIGN_NAME, size);
 }
\ No newline at end of file
diff --git a/source/application/hal/platforms/native/data_acquisition/data_acq.c b/source/application/hal/platforms/native/data_acquisition/data_acq.c
index 01f47fa..9b6815b 100644
--- a/source/application/hal/platforms/native/data_acquisition/data_acq.c
+++ b/source/application/hal/platforms/native/data_acquisition/data_acq.c
@@ -37,7 +37,9 @@
  **/
 static int get_user_input(char* user_input, int size)
 {
-    fgets(user_input, size, stdin);
+    if (NULL == fgets(user_input, size, stdin)) {
+        return 1;
+    }
     return 0;
 }
 
diff --git a/source/application/hal/platforms/native/data_presentation/log/log.c b/source/application/hal/platforms/native/data_presentation/log/log.c
index 6ce6684..8dffba9 100644
--- a/source/application/hal/platforms/native/data_presentation/log/log.c
+++ b/source/application/hal/platforms/native/data_presentation/log/log.c
@@ -20,6 +20,14 @@
 
 #include <stdint.h>
 
+#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
+    #pragma clang diagnostic push
+    #pragma clang diagnostic ignored "-Wunused-parameter"
+#elif defined(__GNUC__)
+    #pragma GCC diagnostic push
+    #pragma GCC diagnostic ignored "-Wunused-parameter"
+#endif
+
 int log_psn_init(void)
 {
     return 0;
@@ -50,10 +58,10 @@
 
 
 int log_display_box_icon(const uint32_t pos_x, const uint32_t pos_y,
-                         const uint32_t width, const uint32_t height, 
+                         const uint32_t width, const uint32_t height,
                          const uint16_t color)
 {
-    debug("Showing rectangular, width: %d, height: %d, color: %d, x: %d, y: %d\n", 
+    debug("Showing rectangular, width: %d, height: %d, color: %d, x: %d, y: %d\n",
             width, height, color, pos_x, pos_y);
     return 0;
 }
@@ -69,3 +77,9 @@
     debug("Setting text color: %d\n", color);
     return 0;
 }
+
+#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
+    #pragma clang diagnostic pop
+#elif defined(__GNUC__)
+    #pragma GCC diagnostic pop
+#endif
diff --git a/source/application/main/Classifier.cc b/source/application/main/Classifier.cc
index 9a47f3d..c5519fb 100644
--- a/source/application/main/Classifier.cc
+++ b/source/application/main/Classifier.cc
@@ -23,6 +23,7 @@
 #include <string>
 #include <set>
 #include <cstdint>
+#include <inttypes.h>
 
 namespace arm {
 namespace app {
@@ -125,7 +126,7 @@
 
         /* Sanity checks. */
         if (totalOutputSize < topNCount) {
-            printf_err("Output vector is smaller than %u\n", topNCount);
+            printf_err("Output vector is smaller than %" PRIu32 "\n", topNCount);
             return false;
         } else if (totalOutputSize != labels.size()) {
             printf_err("Output size doesn't match the labels' size\n");
diff --git a/source/application/main/Mfcc.cc b/source/application/main/Mfcc.cc
index 9ddcb5d..c8ad138 100644
--- a/source/application/main/Mfcc.cc
+++ b/source/application/main/Mfcc.cc
@@ -19,6 +19,7 @@
 #include "PlatformMath.hpp"
 
 #include <cfloat>
+#include <inttypes.h>
 
 namespace arm {
 namespace app {
@@ -49,16 +50,16 @@
         char strC[1024];
         snprintf(strC, sizeof(strC) - 1, "\n   \
             \n\t Sampling frequency:         %f\
-            \n\t Number of filter banks:     %u\
+            \n\t Number of filter banks:     %" PRIu32 "\
             \n\t Mel frequency limit (low):  %f\
             \n\t Mel frequency limit (high): %f\
-            \n\t Number of MFCC features:    %u\
-            \n\t Frame length:               %u\
-            \n\t Padded frame length:        %u\
+            \n\t Number of MFCC features:    %" PRIu32 "\
+            \n\t Frame length:               %" PRIu32 "\
+            \n\t Padded frame length:        %" PRIu32 "\
             \n\t Using HTK for Mel scale:    %s\n",
-                this->m_samplingFreq, this->m_numFbankBins, this->m_melLoFreq,
-                this->m_melHiFreq, this->m_numMfccFeatures, this->m_frameLen,
-                this->m_frameLenPadded, this->m_useHtkMethod ? "yes" : "no");
+            this->m_samplingFreq, this->m_numFbankBins, this->m_melLoFreq,
+            this->m_melHiFreq, this->m_numMfccFeatures, this->m_frameLen,
+            this->m_frameLenPadded, this->m_useHtkMethod ? "yes" : "no");
         return std::string{strC};
     }
 
diff --git a/source/application/main/Profiler.cc b/source/application/main/Profiler.cc
index 10a828a..5924414 100644
--- a/source/application/main/Profiler.cc
+++ b/source/application/main/Profiler.cc
@@ -208,7 +208,7 @@
     }
 
     void printStatisticsHeader(uint32_t samplesNum) {
-        info("Number of samples: %i\n", samplesNum);
+        info("Number of samples: %" PRIu32 "\n", samplesNum);
         info("%s\n", "Total / Avg./ Min / Max");
     }
 
@@ -224,7 +224,8 @@
 
             for (Statistics &stat: result.data) {
                 if (printFullStat) {
-                    info("%s %s: %llu / %.0f / %llu / %llu \n", stat.name.c_str(), stat.unit.c_str(),
+                    info("%s %s: %" PRIu64 "/ %.0f / %" PRIu64 " / %" PRIu64 " \n",
+                         stat.name.c_str(), stat.unit.c_str(),
                          stat.total, stat.avrg, stat.min, stat.max);
                 } else {
                     info("%s %s: %.0f\n", stat.name.c_str(), stat.unit.c_str(), stat.avrg);
diff --git a/source/application/main/UseCaseCommonUtils.cc b/source/application/main/UseCaseCommonUtils.cc
index 3acf53f..b3653d9 100644
--- a/source/application/main/UseCaseCommonUtils.cc
+++ b/source/application/main/UseCaseCommonUtils.cc
@@ -18,6 +18,8 @@
 
 #include "InputFiles.hpp"
 
+#include <inttypes.h>
+
 namespace arm {
 namespace app {
 
@@ -103,7 +105,7 @@
                                                  currentFilename.size(),
                                                  dataPsnTxtStartX, yVal, 0);
 
-            info("\t%u => %s\n", i, currentFilename.c_str());
+            info("\t%" PRIu32 " => %s\n", i, currentFilename.c_str());
         }
 #endif /* NUMBER_OF_FILES > 0 */
 
diff --git a/source/application/tensorflow-lite-micro/Model.cc b/source/application/tensorflow-lite-micro/Model.cc
index abf97b6..4a7f0a4 100644
--- a/source/application/tensorflow-lite-micro/Model.cc
+++ b/source/application/tensorflow-lite-micro/Model.cc
@@ -19,6 +19,7 @@
 #include "hal.h"
 
 #include <cstdint>
+#include <inttypes.h>
 
 /* Initialise the model */
 arm::app::Model::~Model()
@@ -156,8 +157,8 @@
 
     debug("\ttensor is assigned to 0x%p\n", tensor);
     info("\ttensor type is %s\n", TfLiteTypeGetName(tensor->type));
-    info("\ttensor occupies %u bytes with dimensions\n",
-         (uint32_t)tensor->bytes);
+    info("\ttensor occupies %zu bytes with dimensions\n",
+         tensor->bytes);
     for (int i = 0 ; i < tensor->dims->size; ++i) {
         info ("\t\t%d: %3d\n", i, tensor->dims->data[i]);
     }
@@ -165,7 +166,7 @@
     TfLiteQuantization quant = tensor->quantization;
     if (kTfLiteAffineQuantization == quant.type) {
         auto* quantParams = (TfLiteAffineQuantization*)quant.params;
-        info("Quant dimension: %u\n", quantParams->quantized_dimension);
+        info("Quant dimension: %" PRIi32 "\n", quantParams->quantized_dimension);
         for (int i = 0; i < quantParams->scale->size; ++i) {
             info("Scale[%d] = %f\n", i, quantParams->scale->data[i]);
         }
@@ -195,11 +196,11 @@
     info("Activation buffer (a.k.a tensor arena) size used: %zu\n",
         this->_m_pInterpreter->arena_used_bytes());
 
-    const uint32_t nOperators = this->_m_pInterpreter->operators_size();
-    info("Number of operators: %u\n", nOperators);
+    const size_t nOperators = this->_m_pInterpreter->operators_size();
+    info("Number of operators: %zu\n", nOperators);
 
     /* For each operator, display registration information */
-    for (uint32_t i = 0 ; i < nOperators; ++i) {
+    for (size_t i = 0 ; i < nOperators; ++i) {
         const tflite::NodeAndRegistration nodeReg =
             this->_m_pInterpreter->node_and_registration(i);
         const TfLiteRegistration* reg = nodeReg.registration;
@@ -213,7 +214,7 @@
                             tflite::BuiltinOperator(reg->builtin_code)));
             }
         }
-        info("\tOperator %u: %s\n", i, opName.c_str());
+        info("\tOperator %zu: %s\n", i, opName.c_str());
     }
 }
 
diff --git a/source/use_case/ad/src/MelSpectrogram.cc b/source/use_case/ad/src/MelSpectrogram.cc
index 372ebd8..f1752e1 100644
--- a/source/use_case/ad/src/MelSpectrogram.cc
+++ b/source/use_case/ad/src/MelSpectrogram.cc
@@ -19,6 +19,7 @@
 #include "PlatformMath.hpp"
 
 #include <cfloat>
+#include <inttypes.h>
 
 namespace arm {
 namespace app {
@@ -46,16 +47,16 @@
     {
         char strC[1024];
         snprintf(strC, sizeof(strC) - 1, "\n   \
-    \n\t Sampling frequency:         %f\
-    \n\t Number of filter banks:     %u\
-    \n\t Mel frequency limit (low):  %f\
-    \n\t Mel frequency limit (high): %f\
-    \n\t Frame length:               %u\
-    \n\t Padded frame length:        %u\
-    \n\t Using HTK for Mel scale:    %s\n",
-                 this->m_samplingFreq, this->m_numFbankBins, this->m_melLoFreq,
-                 this->m_melHiFreq, this->m_frameLen,
-                 this->m_frameLenPadded, this->m_useHtkMethod ? "yes" : "no");
+            \n\t Sampling frequency:         %f\
+            \n\t Number of filter banks:     %" PRIu32 "\
+            \n\t Mel frequency limit (low):  %f\
+            \n\t Mel frequency limit (high): %f\
+            \n\t Frame length:               %" PRIu32 "\
+            \n\t Padded frame length:        %" PRIu32 "\
+            \n\t Using HTK for Mel scale:    %s\n",
+            this->m_samplingFreq, this->m_numFbankBins, this->m_melLoFreq,
+            this->m_melHiFreq, this->m_frameLen,
+            this->m_frameLenPadded, this->m_useHtkMethod ? "yes" : "no");
         return std::string{strC};
     }
 
diff --git a/source/use_case/ad/src/UseCaseHandler.cc b/source/use_case/ad/src/UseCaseHandler.cc
index e99821f..233b0f4 100644
--- a/source/use_case/ad/src/UseCaseHandler.cc
+++ b/source/use_case/ad/src/UseCaseHandler.cc
@@ -167,7 +167,7 @@
             platform.data_psn->present_data_text(
                     str_inf.c_str(), str_inf.size(),
                     dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
-            info("Running inference on audio clip %u => %s\n", currentIndex, get_filename(currentIndex));
+            info("Running inference on audio clip %" PRIu32 " => %s\n", currentIndex, get_filename(currentIndex));
 
             /* Start sliding through audio clip. */
             while (audioDataSlider.HasNext()) {
@@ -246,7 +246,7 @@
     static bool SetAppCtxClipIdx(ApplicationContext& ctx, uint32_t idx)
     {
         if (idx >= NUMBER_OF_FILES) {
-            printf_err("Invalid idx %u (expected less than %u)\n",
+            printf_err("Invalid idx %" PRIu32 " (expected less than %u)\n",
                        idx, NUMBER_OF_FILES);
             return false;
         }
diff --git a/source/use_case/asr/src/AsrClassifier.cc b/source/use_case/asr/src/AsrClassifier.cc
index df26a7f..c18bd88 100644
--- a/source/use_case/asr/src/AsrClassifier.cc
+++ b/source/use_case/asr/src/AsrClassifier.cc
@@ -91,7 +91,7 @@
             printf_err("Output tensor expected to be %dD\n", minTensorDims);
             return false;
         } else if (static_cast<uint32_t>(outputTensor->dims->data[outColsIdx]) < topNCount) {
-            printf_err("Output vectors are smaller than %u\n", topNCount);
+            printf_err("Output vectors are smaller than %" PRIu32 "\n", topNCount);
             return false;
         } else if (static_cast<uint32_t>(outputTensor->dims->data[outColsIdx]) != labels.size()) {
             printf("Output size doesn't match the labels' size\n");
diff --git a/source/use_case/asr/src/MainLoop.cc b/source/use_case/asr/src/MainLoop.cc
index c5a26a4..9950541 100644
--- a/source/use_case/asr/src/MainLoop.cc
+++ b/source/use_case/asr/src/MainLoop.cc
@@ -208,7 +208,7 @@
     /* Check to make sure that the input tensor supports the above
      * context and inner lengths. */
     if (inputRows <= 2 * inputCtxLen || inputRows <= inputInnerLen) {
-        printf_err("Input rows not compatible with ctx of %u\n",
+        printf_err("Input rows not compatible with ctx of %" PRIu32 "\n",
             inputCtxLen);
         return 0;
     }
diff --git a/source/use_case/asr/src/UseCaseHandler.cc b/source/use_case/asr/src/UseCaseHandler.cc
index 7199785..43b17dc 100644
--- a/source/use_case/asr/src/UseCaseHandler.cc
+++ b/source/use_case/asr/src/UseCaseHandler.cc
@@ -131,7 +131,8 @@
 
             /* Audio clip must have enough samples to produce 1 MFCC feature. */
             if (audioArrSize < mfccParamsWinLen) {
-                printf_err("Not enough audio samples, minimum needed is %u\n", mfccParamsWinLen);
+                printf_err("Not enough audio samples, minimum needed is %" PRIu32 "\n",
+                    mfccParamsWinLen);
                 return false;
             }
 
@@ -151,7 +152,7 @@
                                 str_inf.c_str(), str_inf.size(),
                                 dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
 
-            info("Running inference on audio clip %u => %s\n", currentIndex,
+            info("Running inference on audio clip %" PRIu32 " => %s\n", currentIndex,
                  get_filename(currentIndex));
 
             size_t inferenceWindowLen = audioParamsWinLen;
@@ -237,7 +238,7 @@
     static bool SetAppCtxClipIdx(ApplicationContext& ctx, uint32_t idx)
     {
         if (idx >= NUMBER_OF_FILES) {
-            printf_err("Invalid idx %u (expected less than %u)\n",
+            printf_err("Invalid idx %" PRIu32 " (expected less than %u)\n",
                        idx, NUMBER_OF_FILES);
             return false;
         }
@@ -269,7 +270,7 @@
         for (const auto & result : results) {
             std::string infResultStr = audio::asr::DecodeOutput(result.m_resultVec);
 
-            info("For timestamp: %f (inference #: %u); label: %s\n",
+            info("For timestamp: %f (inference #: %" PRIu32 "); label: %s\n",
                  result.m_timeStamp, result.m_inferenceNumber,
                  infResultStr.c_str());
         }
diff --git a/source/use_case/asr/src/Wav2LetterPostprocess.cc b/source/use_case/asr/src/Wav2LetterPostprocess.cc
index 9157a6f..b1bcdc8 100644
--- a/source/use_case/asr/src/Wav2LetterPostprocess.cc
+++ b/source/use_case/asr/src/Wav2LetterPostprocess.cc
@@ -69,7 +69,7 @@
                                                   tensor->dims->data[arm::app::Wav2LetterModel::ms_outputRowsIdx],
                                                   lastIteration);
             default:
-                printf_err("Unsupported axis index: %u\n", axisIdx);
+                printf_err("Unsupported axis index: %" PRIu32 "\n", axisIdx);
         }
 
         return false;
@@ -83,7 +83,7 @@
         }
 
         if (static_cast<int>(axisIdx) >= tensor->dims->size) {
-            printf_err("Invalid axis index: %u; Max: %d\n",
+            printf_err("Invalid axis index: %" PRIu32 "; Max: %d\n",
                 axisIdx, tensor->dims->size);
             return false;
         }
diff --git a/source/use_case/img_class/src/UseCaseHandler.cc b/source/use_case/img_class/src/UseCaseHandler.cc
index 22e6ba0..fa77512 100644
--- a/source/use_case/img_class/src/UseCaseHandler.cc
+++ b/source/use_case/img_class/src/UseCaseHandler.cc
@@ -22,6 +22,8 @@
 #include "UseCaseCommonUtils.hpp"
 #include "hal.h"
 
+#include <inttypes.h>
+
 using ImgClassClassifier = arm::app::Classifier;
 
 namespace arm {
@@ -142,7 +144,7 @@
                                     dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
 
             /* Run inference over this image. */
-            info("Running inference on image %u => %s\n", ctx.Get<uint32_t>("imgIndex"),
+            info("Running inference on image %" PRIu32 " => %s\n", ctx.Get<uint32_t>("imgIndex"),
                 get_filename(ctx.Get<uint32_t>("imgIndex")));
 
             if (!RunInference(model, profiler)) {
@@ -185,13 +187,13 @@
                               inputTensor->bytes : IMAGE_DATA_SIZE;
         const uint8_t* imgSrc = get_img_array(imIdx);
         if (nullptr == imgSrc) {
-            printf_err("Failed to get image index %u (max: %u)\n", imIdx,
+            printf_err("Failed to get image index %" PRIu32 " (max: %u)\n", imIdx,
                        NUMBER_OF_FILES - 1);
             return false;
         }
 
         memcpy(inputTensor->data.data, imgSrc, copySz);
-        debug("Image %u loaded\n", imIdx);
+        debug("Image %" PRIu32 " loaded\n", imIdx);
         return true;
     }
 
@@ -210,7 +212,7 @@
     static bool SetAppCtxImageIdx(ApplicationContext& ctx, uint32_t idx)
     {
         if (idx >= NUMBER_OF_FILES) {
-            printf_err("Invalid idx %u (expected less than %u)\n",
+            printf_err("Invalid idx %" PRIu32 " (expected less than %u)\n",
                        idx, NUMBER_OF_FILES);
             return false;
         }
@@ -254,8 +256,9 @@
                                         dataPsnTxtStartX2, rowIdx2, 0);
             rowIdx2 += dataPsnTxtYIncr;
 
-            info("%u) %u (%f) -> %s\n", i, results[i].m_labelIdx,
-                 results[i].m_normalisedVal, results[i].m_label.c_str());
+            info("%" PRIu32 ") %" PRIu32 " (%f) -> %s\n", i,
+                results[i].m_labelIdx, results[i].m_normalisedVal,
+                results[i].m_label.c_str());
         }
 
         return true;
diff --git a/source/use_case/kws/src/UseCaseHandler.cc b/source/use_case/kws/src/UseCaseHandler.cc
index d61985d..eaf53c1 100644
--- a/source/use_case/kws/src/UseCaseHandler.cc
+++ b/source/use_case/kws/src/UseCaseHandler.cc
@@ -187,7 +187,7 @@
             platform.data_psn->present_data_text(
                                 str_inf.c_str(), str_inf.size(),
                                 dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
-            info("Running inference on audio clip %u => %s\n", currentIndex,
+            info("Running inference on audio clip %" PRIu32 " => %s\n", currentIndex,
                  get_filename(currentIndex));
 
             /* Start sliding through audio clip. */
@@ -270,7 +270,7 @@
     static bool SetAppCtxClipIdx(ApplicationContext& ctx, uint32_t idx)
     {
         if (idx >= NUMBER_OF_FILES) {
-            printf_err("Invalid idx %u (expected less than %u)\n",
+            printf_err("Invalid idx %" PRIu32 " (expected less than %u)\n",
                        idx, NUMBER_OF_FILES);
             return false;
         }
@@ -313,13 +313,15 @@
             rowIdx1 += dataPsnTxtYIncr;
 
             if (results[i].m_resultVec.empty()) {
-                info("For timestamp: %f (inference #: %u); label: %s; threshold: %f\n",
+                info("For timestamp: %f (inference #: %" PRIu32
+                     "); label: %s; threshold: %f\n",
                      results[i].m_timeStamp, results[i].m_inferenceNumber,
                      topKeyword.c_str(),
                      results[i].m_threshold);
             } else {
                 for (uint32_t j = 0; j < results[i].m_resultVec.size(); ++j) {
-                    info("For timestamp: %f (inference #: %u); label: %s, score: %f; threshold: %f\n",
+                    info("For timestamp: %f (inference #: %" PRIu32
+                         "); label: %s, score: %f; threshold: %f\n",
                          results[i].m_timeStamp,
                          results[i].m_inferenceNumber,
                          results[i].m_resultVec[j].m_label.c_str(),
diff --git a/source/use_case/kws_asr/src/AsrClassifier.cc b/source/use_case/kws_asr/src/AsrClassifier.cc
index f1fa6f1..57d5058 100644
--- a/source/use_case/kws_asr/src/AsrClassifier.cc
+++ b/source/use_case/kws_asr/src/AsrClassifier.cc
@@ -91,7 +91,7 @@
             printf_err("Output tensor expected to be 3D (1, m, n)\n");
             return false;
         } else if (static_cast<uint32_t>(outputTensor->dims->data[outColsIdx]) < topNCount) {
-            printf_err("Output vectors are smaller than %u\n", topNCount);
+            printf_err("Output vectors are smaller than %" PRIu32 "\n", topNCount);
             return false;
         } else if (static_cast<uint32_t>(outputTensor->dims->data[outColsIdx]) != labels.size()) {
             printf("Output size doesn't match the labels' size\n");
diff --git a/source/use_case/kws_asr/src/MainLoop.cc b/source/use_case/kws_asr/src/MainLoop.cc
index 95e5a8f..631b7c1 100644
--- a/source/use_case/kws_asr/src/MainLoop.cc
+++ b/source/use_case/kws_asr/src/MainLoop.cc
@@ -212,7 +212,7 @@
 
     /* Check to make sure that the input tensor supports the above context and inner lengths. */
     if (inputRows <= 2 * inputCtxLen || inputRows <= inputInnerLen) {
-        printf_err("Input rows not compatible with ctx of %u\n",
+        printf_err("Input rows not compatible with ctx of %" PRIu32 "\n",
                    inputCtxLen);
         return 0;
     }
diff --git a/source/use_case/kws_asr/src/UseCaseHandler.cc b/source/use_case/kws_asr/src/UseCaseHandler.cc
index 7025d6d..0560e88 100644
--- a/source/use_case/kws_asr/src/UseCaseHandler.cc
+++ b/source/use_case/kws_asr/src/UseCaseHandler.cc
@@ -159,7 +159,7 @@
          * this means an overlap of 0.5 seconds. */
         auto kwsAudioDataStride = kwsAudioDataWindowSize / 2;
 
-        info("KWS audio data window size %u\n", kwsAudioDataWindowSize);
+        info("KWS audio data window size %" PRIu32 "\n", kwsAudioDataWindowSize);
 
         /* Stride must be multiple of mfcc features window stride to re-use features. */
         if (0 != kwsAudioDataStride % kwsMfccWindowStride) {
@@ -208,7 +208,7 @@
                             str_inf.c_str(), str_inf.size(),
                             dataPsnTxtInfStartX, dataPsnTxtInfStartY, false);
 
-        info("Running KWS inference on audio clip %u => %s\n",
+        info("Running KWS inference on audio clip %" PRIu32 " => %s\n",
              currentIndex, get_filename(currentIndex));
 
         /* Start sliding through audio clip. */
@@ -329,7 +329,8 @@
 
         /* Make sure the input tensor supports the above context and inner lengths. */
         if (asrInputRows <= 2 * asrInputCtxLen || asrInputRows <= asrInputInnerLen) {
-            printf_err("ASR input rows not compatible with ctx length %u\n", asrInputCtxLen);
+            printf_err("ASR input rows not compatible with ctx length %" PRIu32 "\n",
+                asrInputCtxLen);
             return false;
         }
 
@@ -354,7 +355,8 @@
         /* Audio clip must have enough samples to produce 1 MFCC feature. */
         std::vector<int16_t> audioBuffer = std::vector<int16_t>(audioArr, audioArr + audioArrSize);
         if (audioArrSize < asrMfccParamsWinLen) {
-            printf_err("Not enough audio samples, minimum needed is %u\n", asrMfccParamsWinLen);
+            printf_err("Not enough audio samples, minimum needed is %" PRIu32 "\n",
+                asrMfccParamsWinLen);
             return false;
         }
 
@@ -485,7 +487,7 @@
     static bool SetAppCtxClipIdx(ApplicationContext& ctx, uint32_t idx)
     {
         if (idx >= NUMBER_OF_FILES) {
-            printf_err("Invalid idx %u (expected less than %u)\n",
+            printf_err("Invalid idx %" PRIu32 " (expected less than %u)\n",
                 idx, NUMBER_OF_FILES);
             return false;
         }
@@ -525,11 +527,11 @@
                         dataPsnTxtStartX1, rowIdx1, 0);
             rowIdx1 += dataPsnTxtYIncr;
 
-            info("For timestamp: %f (inference #: %u); threshold: %f\n",
+            info("For timestamp: %f (inference #: %" PRIu32 "); threshold: %f\n",
                  results[i].m_timeStamp, results[i].m_inferenceNumber,
                  results[i].m_threshold);
             for (uint32_t j = 0; j < results[i].m_resultVec.size(); ++j) {
-                info("\t\tlabel @ %u: %s, score: %f\n", j,
+                info("\t\tlabel @ %" PRIu32 ": %s, score: %f\n", j,
                      results[i].m_resultVec[j].m_label.c_str(),
                      results[i].m_resultVec[j].m_normalisedVal);
             }
@@ -558,7 +560,7 @@
             /* Get the final result string using the decoder. */
             std::string infResultStr = audio::asr::DecodeOutput(result.m_resultVec);
 
-            info("Result for inf %u: %s\n", result.m_inferenceNumber,
+            info("Result for inf %" PRIu32 ": %s\n", result.m_inferenceNumber,
                  infResultStr.c_str());
         }
 
diff --git a/source/use_case/kws_asr/src/Wav2LetterPostprocess.cc b/source/use_case/kws_asr/src/Wav2LetterPostprocess.cc
index ee3aba0..e3c0c20 100644
--- a/source/use_case/kws_asr/src/Wav2LetterPostprocess.cc
+++ b/source/use_case/kws_asr/src/Wav2LetterPostprocess.cc
@@ -63,7 +63,7 @@
                                                   tensor->dims->data[arm::app::Wav2LetterModel::ms_outputColsIdx],
                                                   lastIteration);
             default:
-                printf_err("Unsupported axis index: %u\n", axisIdx);
+                printf_err("Unsupported axis index: %" PRIu32 "\n", axisIdx);
         }
 
         return false;
@@ -77,7 +77,7 @@
         }
 
         if (static_cast<int>(axisIdx) >= tensor->dims->size) {
-            printf_err("Invalid axis index: %u; Max: %d\n",
+            printf_err("Invalid axis index: %" PRIu32 "; Max: %d\n",
                 axisIdx, tensor->dims->size);
             return false;
         }
diff --git a/source/use_case/kws_asr/usecase.cmake b/source/use_case/kws_asr/usecase.cmake
index d8ac897..3eb61e5 100644
--- a/source/use_case/kws_asr/usecase.cmake
+++ b/source/use_case/kws_asr/usecase.cmake
@@ -15,20 +15,6 @@
 #  limitations under the License.
 #----------------------------------------------------------------------------
 
-# If the path to a directory or source file has been defined,
-# get the type here (FILEPATH or PATH):
-#if (DEFINED ${use_case}_FILE_PATH)
-#    get_path_type(${${use_case}_FILE_PATH} PATH_TYPE)
-#
-#    # Set the default type if path is not a dir or file path (or undefined)
-#    if (NOT ${PATH_TYPE} STREQUAL PATH AND NOT ${PATH_TYPE} STREQUAL FILEPATH)
-#        message(FATAL_ERROR "Invalid ${use_case}_FILE_PATH. It should be a dir or file path.")
-#    endif()
-#else()
-#    # Default is a directory path
-#    set(PATH_TYPE PATH)
-#endif()
-
 USER_OPTION(${use_case}_FILE_PATH "Directory with WAV files, or path to a single WAV file, to use in the evaluation application."
     ${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/samples/
     PATH_OR_FILE)