MLECO-2492 Add CPP OD example with TFLITE-ArmnnDelegate
Signed-off-by: Dvir Markovich <dvir.markovich@arm.com>
Change-Id: If412c15ba49abe8370a570260b0a8ed8de305b7c
diff --git a/samples/ObjectDetection/CMakeLists.txt b/samples/ObjectDetection/CMakeLists.txt
index dbcd55f..953c4ed 100644
--- a/samples/ObjectDetection/CMakeLists.txt
+++ b/samples/ObjectDetection/CMakeLists.txt
@@ -2,9 +2,12 @@
# SPDX-License-Identifier: MIT
cmake_minimum_required(VERSION 3.0.2)
+project (object_detection_example)
set(CMAKE_C_STANDARD 99)
set(CMAKE_CXX_STANDARD 14)
+#location of FindTfLite.cmake and FindTfLiteSrc.cmake
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/../../delegate/cmake/Modules/")
# Make the standard a requirement => prevent fallback to previous
# supported standard
@@ -15,14 +18,15 @@
set(CMAKE_C_EXTENSIONS OFF)
set(CMAKE_CXX_EXTENSIONS OFF)
-project (object_detection_example)
-
set(CMAKE_C_FLAGS_DEBUG "-DDEBUG -O0 -g -fPIC")
set(CMAKE_C_FLAGS_RELEASE "-DNDEBUG -O3 -fPIC")
set(CMAKE_CXX_FLAGS_DEBUG "-DDEBUG -O0 -g -fPIC")
set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -O3 -fPIC")
+SET(USE_ARMNN_DELEGATE False CACHE BOOL "Use delegate file")
+message("USE_ARMNN_DELEGATE=${USE_ARMNN_DELEGATE}")
+
include(ExternalProject)
# Build in release mode by default
@@ -40,9 +44,23 @@
include(../common/cmake/find_opencv.cmake)
include(../common/cmake/find_armnn.cmake)
+if( USE_ARMNN_DELEGATE )
+ ## Add TfLite dependency
+ find_package(TfLiteSrc REQUIRED MODULE)
+ find_package(TfLite REQUIRED MODULE)
+ ## Add Flatbuffers dependency
+ find_package(Flatbuffers REQUIRED MODULE)
+
+ add_definitions(-DUSE_TF_LITE_DELEGATE)
+endif()
include_directories(include)
-include_directories(../common/include/ArmnnUtils)
+## chose the correct instance of ArmnnNetworkExecutor.hpp
+if( USE_ARMNN_DELEGATE )
+ include_directories(include/delegate)
+else()
+ include_directories(../common/include/ArmnnUtils)
+endif()
include_directories(../common/include/Utils)
include_directories(../common/include/CVUtils)
@@ -50,7 +68,22 @@
file(GLOB CVUTILS_SOURCES "../common/src/CVUtils**/*.cpp")
file(GLOB UTILS_SOURCES "../common/src/Utils**/*.cpp")
list(REMOVE_ITEM SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/Main.cpp)
-file(GLOB TEST_SOURCES "test/*.cpp")
+if( USE_ARMNN_DELEGATE )
+ file(GLOB TEST_SOURCES "test/delegate/*.cpp" "test/*.cpp")
+
+ # Various tflite header files are not warning clean
+ # We can't change compilation flags on header files directly, so we need to add them to an interface library first
+ add_library(tflite_headers INTERFACE)
+ target_include_directories(tflite_headers INTERFACE $<BUILD_INTERFACE:${TfLite_INCLUDE_DIR}>
+ $<INSTALL_INTERFACE:include/tflite_headers>)
+
+ target_compile_options(tflite_headers INTERFACE -Wno-conversion
+ -Wno-sign-conversion
+ -Wno-unused-parameter
+ -Wno-unused-function)
+else()
+ file(GLOB TEST_SOURCES "test/*.cpp")
+endif()
file(GLOB APP_MAIN "src/Main.cpp")
if(BUILD_UNIT_TESTS)
@@ -62,6 +95,15 @@
add_executable("${APP_TARGET_NAME}" ${SOURCES} ${CVUTILS_SOURCES} ${UTILS_SOURCES} ${APP_MAIN})
+if( USE_ARMNN_DELEGATE )
+ set(CMAKE_CXX_FLAGS " -ldl -lrt -Wl,--copy-dt-needed-entries")
+ target_link_libraries("${APP_TARGET_NAME}" PUBLIC ${TfLite_LIB})
+
+ target_link_libraries("${APP_TARGET_NAME}" PUBLIC tflite_headers)
+ target_include_directories("${APP_TARGET_NAME}" PUBLIC ${Flatbuffers_INCLUDE_DIR})
+ target_link_libraries("${APP_TARGET_NAME}" PUBLIC ${Flatbuffers_LIB})
+endif()
+
if (NOT OPENCV_LIBS_FOUND)
message("Building OpenCV libs")
add_dependencies("${APP_TARGET_NAME}" "${OPENCV_LIB}")
diff --git a/samples/ObjectDetection/Readme.md b/samples/ObjectDetection/Readme.md
index 04ac011..194a3e9 100644
--- a/samples/ObjectDetection/Readme.md
+++ b/samples/ObjectDetection/Readme.md
@@ -1,7 +1,11 @@
# Object Detection Example
## Introduction
-This is a sample code showing object detection using Arm NN public C++ API. The compiled application can take
+This is a sample code showing object detection using Arm NN in two different modes:
+1. Utilizing public Arm NN C++ API.
+2. Utilizing Tensorflow lite delegate file mechanism together with Armnn delegate file.
+
+The compiled application can take
* a video file
@@ -13,8 +17,22 @@
## Dependencies
-This example utilises OpenCV functions to capture and output video data. Top level inference API is provided by Arm NN
-library.
+This example utilizes OpenCV functions to capture and output video data.
+1. Public Arm NN C++ API is provided by Arm NN library.
+2. For Delegate file mode following dependencies exist:
+2.1 Tensorflow version 2.5.0
+2.2 Flatbuffers version 1.12.0
+2.3 Arm NN delegate library
+
+## System
+
+This example was created on Ubuntu 20.04 with gcc and g++ version 9.
+If encountered any compiler errors while running with a different compiler version, you can install version 9 with:
+```commandline
+sudo apt install gcc-9 g++-9
+```
+and add to every cmake command those compiler flags:
+-DCMAKE_C_COMPILER=gcc-9 -DCMAKE_CXX_COMPILER=g++-9
### Arm NN
@@ -22,7 +40,13 @@
please ensure that Arm NN libraries and header files are available on your build platform.
The application executable binary dynamically links with the following Arm NN libraries:
* libarmnn.so
+For Arm NN public C++ API mode:
* libarmnnTfLiteParser.so
+For Delegate file mode:
+* libarmnnDelegate.so
+
+Pre compiled Arm NN libraries can be downloaded from https://github.com/ARM-software/armnn/releases/download/v21.11/ArmNN-linux-aarch64.tar.gz
+the "lib" and "include" directories should be taken together.
The build script searches for available Arm NN libraries in the following order:
1. Inside custom user directory specified by ARMNN_LIB_DIR cmake option.
@@ -37,8 +61,11 @@
### OpenCV
This application uses [OpenCV (Open Source Computer Vision Library)](https://opencv.org/) for video stream processing.
-Your host platform may have OpenCV available through linux package manager. If this is the case, please install it using
-standard way. If not, our build system has a script to download and cross-compile required OpenCV modules
+Your host platform may have OpenCV available through linux package manager. If this is the case, please install it using standard way.
+```commandline
+sudo apt install python3-opencv
+```
+If not, our build system has a script to download and cross-compile required OpenCV modules
as well as [FFMPEG](https://ffmpeg.org/) and [x264 encoder](https://www.videolan.org/developers/x264.html) libraries.
The latter will build limited OpenCV functionality and application will support only video file input and video file output
way of working. Displaying video frames in a window requires building OpenCV with GTK and OpenGL support.
@@ -68,6 +95,49 @@
Please see [find_opencv.cmake](./cmake/find_opencv.cmake) for implementation details.
+### Tensorflow Lite (Needed only in delegate file mode)
+
+This application uses [Tensorflow Lite)](https://www.tensorflow.org/) version 2.5.0 for demonstrating use of 'armnnDelegate'.
+armnnDelegate is a library for accelerating certain TensorFlow Lite operators on Arm hardware by providing
+the TensorFlow Lite interpreter with an alternative implementation of the operators via its delegation mechanism.
+You may clone and build Tensorflow lite and provide the path to its root and output library directories through the cmake
+flags TENSORFLOW_ROOT and TFLITE_LIB_ROOT respectively.
+For implementation details see the scripts FindTfLite.cmake and FindTfLiteSrc.cmake
+
+The application links with the Tensorflow lite library libtensorflow-lite.a
+
+#### Download and build Tensorflow Lite version 2.5.0
+Example for Tensorflow Lite native compilation
+```commandline
+sudo apt install build-essential
+git clone https://github.com/tensorflow/tensorflow.git
+cd tensorflow/tensorflow
+git checkout tags/v2.5.0
+mkdir build && cd build
+cmake ../lite -DTFLITE_ENABLE_XNNPACK=OFF
+make
+```
+
+### Flatbuffers (needed only in delegate file mode)
+
+This application uses [Flatbuffers)](https://google.github.io/flatbuffers/) version 1.12.0 for serialization
+You may clone and build Flatbuffers and provide the path to its root directory through the cmake
+flag FLATBUFFERS_ROOT.
+Please see [FindFlatbuffers.cmake] for implementation details.
+
+The application links with the Flatbuffers library libflatbuffers.a
+
+#### Download and build flatbuffers version 1.12.0
+Example for flatbuffer native compilation
+```commandline
+wget -O flatbuffers-1.12.0.zip https://github.com/google/flatbuffers/archive/v1.12.0.zip
+unzip -d . flatbuffers-1.12.0.zip
+cd flatbuffers-1.12.0
+mkdir install && cd install
+cmake .. -DCMAKE_INSTALL_PREFIX:PATH=`pwd`
+make install
+```
+
## Building
There are two flows for building this application:
* native build on a host platform,
@@ -83,6 +153,12 @@
* BUILD_UNIT_TESTS - set to `1` to build tests. Additionally to the main application, `object_detection_example-tests`
unit tests executable will be created.
+* For the Delegate file mode:
+* USE_ARMNN_DELEGATE - set to True to build the application with Tflite and delegate file mode. default is False.
+* TFLITE_LIB_ROOT - point to the custom location of Tflite lib
+* TENSORFLOW_ROOT - point to the custom location of Tensorflow root directory
+* FLATBUFFERS_ROOT - point to the custom location of Flatbuffers root directory
+
### Native Build
To build this application on a host platform, firstly ensure that required dependencies are installed:
For example, for raspberry PI:
@@ -90,7 +166,7 @@
sudo apt-get update
sudo apt-get -yq install pkg-config
sudo apt-get -yq install libgtk2.0-dev zlib1g-dev libjpeg-dev libpng-dev libxvidcore-dev libx264-dev
-sudo apt-get -yq install libavcodec-dev libavformat-dev libswscale-dev
+sudo apt-get -yq install libavcodec-dev libavformat-dev libswscale-dev ocl-icd-opencl-dev
```
To build demo application, create a build directory:
@@ -114,6 +190,15 @@
make
```
+If you have build with Delegate file mode and have custom Arm NN, Tflite, and Flatbuffers locations,
+use the USE_ARMNN_DELEGATE flag together with `TFLITE_LIB_ROOT`, `TENSORFLOW_ROOT`, `FLATBUFFERS_ROOT` and
+`ARMNN_LIB_DIR` options:
+```commandline
+cmake -DARMNN_LIB_DIR=/path/to/armnn/build/lib/ -DUSE_ARMNN_DELEGATE=True -DTFLITE_LIB_ROOT=/path/to/tensorflow/
+ -DTENSORFLOW_ROOT=/path/to/tensorflow/ -DFLATBUFFERS_ROOT=/path/to/flatbuffers/ ..
+make
+```
+
### Cross-compilation
This section will explain how to cross-compile the application and dependencies on a Linux x86 machine
@@ -170,8 +255,18 @@
libarmnn.so
libarmnn.so.29
libarmnn.so.29.0
+For Arm NN public C++ API mode:
libarmnnTfLiteParser.so
libarmnnTfLiteParser.so.24.4
+end
+For Delegate file mode:
+libarmnnDelegate.so
+libarmnnDelegate.so.25
+libarmnnDelegate.so.25.0
+libtensorflow-lite.a
+libflatbuffers.a
+end
+
libavcodec.so
libavcodec.so.58
libavcodec.so.58.54.100
@@ -230,6 +325,9 @@
* --preferred-backends: Takes the preferred backends in preference order, separated by comma.
For example: CpuAcc,GpuAcc,CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc].
Defaults to CpuRef **[OPTIONAL]**
+* --profiling_enabled: Enabling this option will print important ML related milestones timing
+ information in micro-seconds. By default, this option is disabled.
+ Accepted options are true/false **[OPTIONAL]**
### Object Detection on a supplied video file
@@ -258,10 +356,15 @@
1. Initialisation
1. Reading from Video Source
2. Preparing Labels and Model Specific Functions
-2. Creating a Network
- 1. Creating Parser and Importing Graph
- 3. Optimizing Graph for Compute Device
- 4. Creating Input and Output Binding Information
+2. Creating a Network (two modes are available)
+ a. Armnn C++ API mode:
+ 1. Creating Parser and Importing Graph
+ 2. Optimizing Graph for Compute Device
+ 3. Creating Input and Output Binding Information
+ b. using Tflite and delegate file mode:
+ 1. Building a Model and creating Interpreter
+ 2. Creating Arm NN delegate file
+ 3. Registering the Arm NN delegate file to the Interpreter
3. Object detection pipeline
1. Pre-processing the Captured Frame
2. Making Input and Output Tensors
@@ -298,10 +401,14 @@
Depending on the model being used, `CreatePipeline` function returns specific implementation of the object detection
pipeline.
-### Creating a Network
-All operations with Arm NN and networks are encapsulated in [`ArmnnNetworkExecutor`](./include/ArmnnNetworkExecutor.hpp)
-class.
+### There are two ways for Creating the Network. The first is using the Arm NN C++ API, and the second is using
+### Tflite with Arm NN delegate file
+
+#### Creating a Network using the Arm NN C++ API
+
+All operations with Arm NN and networks are encapsulated in
+[`ArmnnNetworkExecutor`](./common/include/ArmnnUtils/ArmnnNetworkExecutor.hpp) class.
##### Creating Parser and Importing Graph
The first step with Arm NN SDK is to import a graph from file by using the appropriate parser.
@@ -374,9 +481,67 @@
Similarly, we can get the output binding information for an output layer by using the parser to retrieve output
tensor names and calling `GetNetworkOutputBindingInfo()`.
+#### Creating a Network using Tflite and Arm NN delegate file
+
+All operations with Tflite and networks are encapsulated in [`ArmnnNetworkExecutor`](./include/delegate/ArmnnNetworkExecutor.hpp)
+class.
+
+##### Building a Model and creating Interpreter
+The first step with Tflite is to build a model from file by using Flatbuffer model class.
+with that model we create the Tflite Interpreter.
+```c++
+#include <tensorflow/lite/interpreter.h>
+
+armnnTfLiteParser::ITfLiteParserPtr parser = armnnTfLiteParser::ITfLiteParser::Create();m_model = tflite::FlatBufferModel::BuildFromFile(modelPath.c_str());
+tflite::ops::builtin::BuiltinOpResolver resolver;
+tflite::InterpreterBuilder(*m_model, resolver)(&m_interpreter);
+```
+after the Interpreter is created we allocate tensors using the AllocateTensors function of the Interpreter
+```c++
+m_interpreter->AllocateTensors();
+```
+
+##### Creating Arm NN Delegate file
+Arm NN Delegate file is created using the ArmnnDelegate constructor
+The constructor accepts a DelegateOptions object that is created from the
+list of the preferred backends that we want to use, and the optimizerOptions object (optional).
+In this example we enable fast math and reduce all float32 operators to float16 optimizations.
+These optimizations can sometime improve the performance but can also cause degredation,
+depending on the model and the backends involved, therefore one should try it out and
+decide whether to use it or not.
+
+
+```c++
+#include <armnn_delegate.hpp>
+#include <DelegateOptions.hpp>
+#include <DelegateUtils.hpp>
+
+/* enable fast math optimization */
+armnn::BackendOptions modelOptionGpu("GpuAcc", {{"FastMathEnabled", true}});
+optimizerOptions.m_ModelOptions.push_back(modelOptionGpu);
+
+armnn::BackendOptions modelOptionCpu("CpuAcc", {{"FastMathEnabled", true}});
+optimizerOptions.m_ModelOptions.push_back(modelOptionCpu);
+/* enable reduce float32 to float16 optimization */
+optimizerOptions.m_ReduceFp32ToFp16 = true;
+
+armnnDelegate::DelegateOptions delegateOptions(preferredBackends, optimizerOptions);
+/* create delegate object */
+std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+ theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+ armnnDelegate::TfLiteArmnnDelegateDelete);
+```
+##### Registering the Arm NN delegate file to the Interpreter
+Registering the Arm NN delegate file will provide the TensorFlow Lite interpreter with an alternative implementation
+of the operators that can be accelerated by the Arm hardware
+For example:
+```c++
+ /* Register the delegate file */
+ m_interpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
+```
### Object detection pipeline
-Generic object detection pipeline has 3 steps to perform data pre-processing, run inference and decode inference results
+Generic object detection pipeline has 3 steps, to perform data pre-processing, run inference and decode inference results
in the post-processing step.
See [`ObjDetectionPipeline`](include/ObjectDetectionPipeline.hpp) and implementations for [`MobileNetSSDv1`](include/ObjectDetectionPipeline.hpp)
@@ -406,6 +571,13 @@
objectDetectionPipeline->Inference(processed, results);
```
Inference step will call `ArmnnNetworkExecutor::Run` method that will prepare input tensors and execute inference.
+We have two separate implementations of the `ArmnnNetworkExecutor` class and its functions including `ArmnnNetworkExecutor::Run`
+The first Implementation [`ArmnnNetworkExecutor`](./common/include/ArmnnUtils/ArmnnNetworkExecutor.hpp)is utilizing
+Arm NN C++ API,
+while the second implementation [`ArmnnNetworkExecutor`](./include/delegate/ArmnnNetworkExecutor.hpp) is utilizing
+Tensorflow lite and its Delegate file mechanism.
+
+##### Executing Inference utilizing the Arm NN C++ API
A compute device performs inference for the loaded network using the `EnqueueWorkload()` function of the runtime context.
For example:
```c++
@@ -416,8 +588,22 @@
runtime->EnqueueWorkload(0, inputTensors, outputTensors);
```
We allocate memory for output data once and map it to output tensor objects. After successful inference, we read data
-from the pre-allocated output data buffer. See [`ArmnnNetworkExecutor::ArmnnNetworkExecutor`](./src/ArmnnNetworkExecutor.cpp)
-and [`ArmnnNetworkExecutor::Run`](./src/ArmnnNetworkExecutor.cpp) for more details.
+from the pre-allocated output data buffer.
+See [`ArmnnNetworkExecutor::ArmnnNetworkExecutor`](./common/include/ArmnnUtils/ArmnnNetworkExecutor.hpp)
+and [`ArmnnNetworkExecutor::Run`](./common/include/ArmnnUtils/ArmnnNetworkExecutor.hpp) for more details.
+
+##### Executing Inference utilizing the Tensorflow lite and Arm NN delegate file
+Inside the `PrepareTensors(..)` function, the input frame is copied to the Tflite Interpreter input tensor,
+than the Tflite Interpreter performs inference for the loaded network using the `Invoke()` function.
+For example:
+```c++
+PrepareTensors(inputData, dataBytes);
+
+if (m_interpreter->Invoke() == kTfLiteOk)
+```
+After successful inference, we read data from the Tflite Interpreter output tensor and copy
+it to the outResults vector.
+See [`ArmnnNetworkExecutor::Run`](./include/delegate/ArmnnNetworkExecutor.hpp) for more details.
#### Postprocessing
@@ -430,7 +616,7 @@
See [`SSDResultDecoder`](./include/SSDResultDecoder.hpp) for more details.
For YOLO V3 Tiny models, we decode the output and perform non-maximum suppression to filter out any weak detections
-below a confidence threshold and any redudant bounding boxes above an intersection-over-union threshold.
+below a confidence threshold and any redundant bounding boxes above an intersection-over-union threshold.
See [`YoloResultDecoder`](./include/YoloResultDecoder.hpp) for more details.
It is encouraged to experiment with threshold values for confidence and intersection-over-union (IoU)
@@ -450,4 +636,4 @@
AddInferenceOutputToFrame(detects, *frame, labels);
});
```
-The processed frames are written to a file or displayed in a separate window.
\ No newline at end of file
+The processed frames are written to a file or displayed in a separate window.
diff --git a/samples/ObjectDetection/cmake/unit_tests.cmake b/samples/ObjectDetection/cmake/unit_tests.cmake
index dd3de70..6b2a9bb 100644
--- a/samples/ObjectDetection/cmake/unit_tests.cmake
+++ b/samples/ObjectDetection/cmake/unit_tests.cmake
@@ -6,8 +6,6 @@
add_definitions (-DTEST_RESOURCE_DIR="${TEST_RESOURCES_DIR}")
set(TEST_TARGET_NAME "${CMAKE_PROJECT_NAME}-tests")
-file(GLOB TEST_SOURCES "test/*")
-
include(../common/cmake/find_catch.cmake)
ExternalProject_Add(basketball-image
@@ -42,6 +40,14 @@
INSTALL_COMMAND ""
)
+ExternalProject_Add(yolo_v3
+ URL https://github.com/ARM-software/ML-zoo/raw/master/models/object_detection/yolo_v3_tiny/tflite_fp32/yolo_v3_tiny_darknet_fp32.tflite
+ DOWNLOAD_NO_EXTRACT 1
+ CONFIGURE_COMMAND ""
+ BUILD_COMMAND ${CMAKE_COMMAND} -E copy <DOWNLOAD_DIR>/yolo_v3_tiny_darknet_fp32.tflite ${CMAKE_CURRENT_SOURCE_DIR}/test/resources
+ INSTALL_COMMAND ""
+ )
+
add_executable("${TEST_TARGET_NAME}" ${SOURCES} ${TEST_SOURCES} ${CVUTILS_SOURCES} ${UTILS_SOURCES})
add_dependencies(
@@ -61,4 +67,11 @@
${ARMNN_INCLUDE_DIR}
${OPENCV_INCLUDE_DIR} ${DEPENDENCIES_DIR} ${TEST_RESOURCES_DIR} ${COMMON_INCLUDE_DIR})
-target_link_libraries("${TEST_TARGET_NAME}" PUBLIC ${ARMNN_LIBS} ${OPENCV_LIBS} ${FFMPEG_LIBS})
\ No newline at end of file
+target_link_libraries("${TEST_TARGET_NAME}" PUBLIC ${ARMNN_LIBS} ${OPENCV_LIBS} ${FFMPEG_LIBS})
+if( USE_ARMNN_DELEGATE )
+ set(CMAKE_CXX_FLAGS " -ldl -lrt -Wl,--copy-dt-needed-entries")
+ target_link_libraries("${TEST_TARGET_NAME}" PUBLIC ${TfLite_LIB})
+ target_link_libraries("${TEST_TARGET_NAME}" PUBLIC tflite_headers)
+ target_include_directories("${TEST_TARGET_NAME}" PUBLIC ${Flatbuffers_INCLUDE_DIR})
+ target_link_libraries("${TEST_TARGET_NAME}" PUBLIC ${Flatbuffers_LIB})
+endif()
\ No newline at end of file
diff --git a/samples/ObjectDetection/include/delegate/ArmnnNetworkExecutor.hpp b/samples/ObjectDetection/include/delegate/ArmnnNetworkExecutor.hpp
new file mode 100644
index 0000000..c8875a2
--- /dev/null
+++ b/samples/ObjectDetection/include/delegate/ArmnnNetworkExecutor.hpp
@@ -0,0 +1,253 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "Types.hpp"
+
+#include "armnn/ArmNN.hpp"
+#include <armnn/Logging.hpp>
+#include <armnn_delegate.hpp>
+#include <DelegateOptions.hpp>
+#include <DelegateUtils.hpp>
+#include <Profiling.hpp>
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/optional_debug_tools.h>
+#include <tensorflow/lite/kernels/builtin_op_kernels.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+
+#include <string>
+#include <vector>
+
+namespace common
+{
+/**
+* @brief Used to load in a network through Tflite Interpreter,
+* register Armnn Delegate file to it, and run inference
+* on it against a given backend.
+* currently it is assumed that the input data will be
+* cv:MAT (Frame), the assumption is implemented in
+* PrepareTensors method, it can be generalized later
+*
+*/
+template <typename Tout>
+class ArmnnNetworkExecutor
+{
+private:
+ std::unique_ptr<tflite::Interpreter> m_interpreter;
+ std::unique_ptr<tflite::FlatBufferModel> m_model;
+ Profiling m_profiling;
+
+ void PrepareTensors(const void* inputData, const size_t dataBytes);
+
+ template <typename Enumeration>
+ auto log_as_int(Enumeration value)
+ -> typename std::underlying_type<Enumeration>::type
+ {
+ return static_cast<typename std::underlying_type<Enumeration>::type>(value);
+ }
+
+public:
+ ArmnnNetworkExecutor() = delete;
+
+ /**
+ * @brief Initializes the network with the given input data.
+ *
+ *
+ * * @param[in] modelPath - Relative path to the model file
+ * * @param[in] backends - The list of preferred backends to run inference on
+ */
+ ArmnnNetworkExecutor(std::string& modelPath,
+ std::vector<armnn::BackendId>& backends,
+ bool isProfilingEnabled = false);
+
+ /**
+ * @brief Returns the aspect ratio of the associated model in the order of width, height.
+ */
+ Size GetImageAspectRatio();
+
+ /**
+ * @brief Returns the data type of the associated model.
+ */
+ armnn::DataType GetInputDataType() const;
+
+ float GetQuantizationScale();
+
+ int GetQuantizationOffset();
+
+ float GetOutputQuantizationScale(int tensorIndex);
+
+ int GetOutputQuantizationOffset(int tensorIndex);
+
+
+ /**
+ * @brief Runs inference on the provided input data, and stores the results
+ * in the provided InferenceResults object.
+ *
+ * @param[in] inputData - input frame data
+ * @param[in] dataBytes - input data size in bytes
+ * @param[out] outResults - Vector of DetectionResult objects used to store the output result.
+ */
+ bool Run(const void *inputData, const size_t dataBytes,
+ InferenceResults<Tout> &outResults);
+};
+
+template <typename Tout>
+ArmnnNetworkExecutor<Tout>::ArmnnNetworkExecutor(std::string& modelPath,
+ std::vector<armnn::BackendId>& preferredBackends,
+ bool isProfilingEnabled):
+ m_profiling(isProfilingEnabled)
+{
+ m_profiling.ProfilingStart();
+ armnn::OptimizerOptions optimizerOptions;
+ m_model = tflite::FlatBufferModel::BuildFromFile(modelPath.c_str());
+ if (m_model == nullptr)
+ {
+ const std::string errorMessage{"ArmnnNetworkExecutor: Failed to build the model"};
+ ARMNN_LOG(error) << errorMessage;
+ throw armnn::Exception(errorMessage);
+ }
+ m_profiling.ProfilingStopAndPrintUs("Loading the model took");
+
+ m_profiling.ProfilingStart();
+ tflite::ops::builtin::BuiltinOpResolver resolver;
+ tflite::InterpreterBuilder(*m_model, resolver)(&m_interpreter);
+ if (m_interpreter->AllocateTensors() != kTfLiteOk)
+ {
+ const std::string errorMessage{"ArmnnNetworkExecutor: Failed to alloc tensors"};
+ ARMNN_LOG(error) << errorMessage;
+ throw armnn::Exception(errorMessage);
+ }
+ m_profiling.ProfilingStopAndPrintUs("Create the tflite interpreter");
+
+ /* create delegate options */
+ m_profiling.ProfilingStart();
+
+ /* enable fast math optimization */
+ armnn::BackendOptions modelOptionGpu("GpuAcc", {{"FastMathEnabled", true}});
+ optimizerOptions.m_ModelOptions.push_back(modelOptionGpu);
+
+ armnn::BackendOptions modelOptionCpu("CpuAcc", {{"FastMathEnabled", true}});
+ optimizerOptions.m_ModelOptions.push_back(modelOptionCpu);
+ /* enable reduce float32 to float16 optimization */
+ optimizerOptions.m_ReduceFp32ToFp16 = true;
+
+ armnnDelegate::DelegateOptions delegateOptions(preferredBackends, optimizerOptions);
+
+ /* create delegate object */
+ std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+ theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+ armnnDelegate::TfLiteArmnnDelegateDelete);
+
+ /* Register the delegate file */
+ m_interpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
+ m_profiling.ProfilingStopAndPrintUs("Create and load ArmNN Delegate");
+
+}
+
+template<typename Tout>
+void ArmnnNetworkExecutor<Tout>::PrepareTensors(const void *inputData, const size_t dataBytes)
+{
+ size_t inputTensorSize = m_interpreter->input_tensor(0)->bytes;
+ auto * inputTensorPtr = m_interpreter->input_tensor(0)->data.raw;
+ assert(inputTensorSize >= dataBytes);
+ if (inputTensorPtr != nullptr)
+ {
+ memcpy(inputTensorPtr, inputData, inputTensorSize);
+ }
+ else
+ {
+ const std::string errorMessage{"ArmnnNetworkExecutor: input tensor is null"};
+ ARMNN_LOG(error) << errorMessage;
+ throw armnn::Exception(errorMessage);
+ }
+
+}
+
+template <typename Tout>
+bool ArmnnNetworkExecutor<Tout>::Run(const void *inputData, const size_t dataBytes,
+ InferenceResults<Tout>& outResults)
+{
+ bool ret = false;
+ m_profiling.ProfilingStart();
+ PrepareTensors(inputData, dataBytes);
+
+ if (m_interpreter->Invoke() == kTfLiteOk)
+ {
+
+
+ ret = true;
+ // Extract the output tensor data.
+ outResults.clear();
+ outResults.reserve(m_interpreter->outputs().size());
+ for (int index = 0; index < m_interpreter->outputs().size(); index++)
+ {
+ size_t size = m_interpreter->output_tensor(index)->bytes / sizeof(Tout);
+ const Tout *p_Output = m_interpreter->typed_output_tensor<Tout>(index);
+ if (p_Output != nullptr) {
+ InferenceResult<float> outRes(p_Output, p_Output + size);
+ outResults.emplace_back(outRes);
+ }
+ else
+ {
+ const std::string errorMessage{"ArmnnNetworkExecutor: p_Output tensor is null"};
+ ARMNN_LOG(error) << errorMessage;
+ ret = false;
+ }
+ }
+ }
+ else
+ {
+ const std::string errorMessage{"ArmnnNetworkExecutor: Invoke has failed"};
+ ARMNN_LOG(error) << errorMessage;
+ }
+ m_profiling.ProfilingStopAndPrintUs("Perform inference");
+ return ret;
+}
+
+template <typename Tout>
+Size ArmnnNetworkExecutor<Tout>::GetImageAspectRatio()
+{
+ assert(m_interpreter->tensor(m_interpreter->inputs()[0])->dims->size == 4);
+ return Size(m_interpreter->tensor(m_interpreter->inputs()[0])->dims->data[2],
+ m_interpreter->tensor(m_interpreter->inputs()[0])->dims->data[1]);
+}
+
+template <typename Tout>
+armnn::DataType ArmnnNetworkExecutor<Tout>::GetInputDataType() const
+{
+ return GetDataType(*(m_interpreter->tensor(m_interpreter->inputs()[0])));
+}
+
+template <typename Tout>
+float ArmnnNetworkExecutor<Tout>::GetQuantizationScale()
+{
+ return m_interpreter->tensor(m_interpreter->inputs()[0])->params.scale;
+}
+
+template <typename Tout>
+int ArmnnNetworkExecutor<Tout>::GetQuantizationOffset()
+{
+ return m_interpreter->tensor(m_interpreter->inputs()[0])->params.zero_point;
+}
+
+template <typename Tout>
+float ArmnnNetworkExecutor<Tout>::GetOutputQuantizationScale(int tensorIndex)
+{
+ assert(m_interpreter->outputs().size() > tensorIndex);
+ return m_interpreter->tensor(m_interpreter->outputs()[tensorIndex])->params.scale;
+}
+
+template <typename Tout>
+int ArmnnNetworkExecutor<Tout>::GetOutputQuantizationOffset(int tensorIndex)
+{
+ assert(m_interpreter->outputs().size() > tensorIndex);
+ return m_interpreter->tensor(m_interpreter->outputs()[tensorIndex])->params.zero_point;
+}
+
+}// namespace common
\ No newline at end of file
diff --git a/samples/ObjectDetection/src/Main.cpp b/samples/ObjectDetection/src/Main.cpp
index e057981..8bc2f0d 100644
--- a/samples/ObjectDetection/src/Main.cpp
+++ b/samples/ObjectDetection/src/Main.cpp
@@ -20,6 +20,7 @@
const std::string OUTPUT_VIDEO_FILE_PATH = "--output-video-file-path";
const std::string LABEL_PATH = "--label-path";
const std::string PREFERRED_BACKENDS = "--preferred-backends";
+const std::string PROFILING_ENABLED = "--profiling_enabled";
const std::string HELP = "--help";
/*
@@ -29,13 +30,16 @@
{VIDEO_FILE_PATH, "[REQUIRED] Path to the video file to run object detection on"},
{MODEL_FILE_PATH, "[REQUIRED] Path to the Object Detection model to use"},
{LABEL_PATH, "[REQUIRED] Path to the label set for the provided model file. "
- "Label file is should just be an ordered list, seperated by new line."},
+ "Label file should be an ordered list, separated by a new line."},
{MODEL_NAME, "[REQUIRED] The name of the model being used. Accepted options: YOLO_V3_TINY, SSD_MOBILE"},
{OUTPUT_VIDEO_FILE_PATH, "[OPTIONAL] Path to the output video file with detections added in. "
"If specified will save file to disk, else displays the output to screen"},
{PREFERRED_BACKENDS, "[OPTIONAL] Takes the preferred backends in preference order, separated by comma."
" For example: CpuAcc,GpuAcc,CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]."
- " Defaults to CpuAcc,CpuRef"}
+ " Defaults to CpuAcc,CpuRef"},
+ {PROFILING_ENABLED, "[OPTIONAL] Enabling this option will print important ML related milestones timing"
+ "information in micro-seconds. By default, this option is disabled."
+ "Accepted options are true/false."}
};
/*
@@ -137,6 +141,10 @@
pipelineOptions.m_ModelFilePath = GetSpecifiedOption(options, MODEL_FILE_PATH);
pipelineOptions.m_ModelName = GetSpecifiedOption(options, MODEL_NAME);
+ if (CheckOptionSpecified(options, PROFILING_ENABLED))
+ {
+ pipelineOptions.m_ProfilingEnabled = GetSpecifiedOption(options, PROFILING_ENABLED) == "true";
+ }
if(CheckOptionSpecified(options, PREFERRED_BACKENDS))
{
pipelineOptions.m_backends = GetPreferredBackendList((GetSpecifiedOption(options, PREFERRED_BACKENDS)));
@@ -148,6 +156,8 @@
auto labels = AssignColourToLabel(GetSpecifiedOption(options, LABEL_PATH));
+ common::Profiling profiling(pipelineOptions.m_ProfilingEnabled);
+ profiling.ProfilingStart();
od::IPipelinePtr objectDetectionPipeline = od::CreatePipeline(pipelineOptions);
auto inputAndOutput = GetFrameSourceAndSink(options);
@@ -180,5 +190,6 @@
frame = reader->ReadFrame();
}
sink->Close();
+ profiling.ProfilingStopAndPrintUs("Overall compute time");
return 0;
}
diff --git a/samples/ObjectDetection/src/ObjectDetectionPipeline.cpp b/samples/ObjectDetection/src/ObjectDetectionPipeline.cpp
index 077caa4..2c4a76d 100644
--- a/samples/ObjectDetection/src/ObjectDetectionPipeline.cpp
+++ b/samples/ObjectDetection/src/ObjectDetectionPipeline.cpp
@@ -11,8 +11,8 @@
ObjDetectionPipeline::ObjDetectionPipeline(std::unique_ptr<common::ArmnnNetworkExecutor<float>> executor,
std::unique_ptr<IDetectionResultDecoder> decoder) :
- m_executor(std::move(executor)),
- m_decoder(std::move(decoder)){}
+ m_executor(std::move(executor)),
+ m_decoder(std::move(decoder)){}
void od::ObjDetectionPipeline::Inference(const cv::Mat& processed, common::InferenceResults<float>& result)
{
@@ -39,8 +39,8 @@
MobileNetSSDv1::MobileNetSSDv1(std::unique_ptr<common::ArmnnNetworkExecutor<float>> executor,
float objectThreshold) :
- ObjDetectionPipeline(std::move(executor),
- std::make_unique<SSDResultDecoder>(objectThreshold))
+ ObjDetectionPipeline(std::move(executor),
+ std::make_unique<SSDResultDecoder>(objectThreshold))
{}
void MobileNetSSDv1::PreProcessing(const cv::Mat& frame, cv::Mat& processed)
@@ -52,13 +52,12 @@
processed.convertTo(processed, CV_32FC3, 1 / 127.5, -1);
}
}
-
YoloV3Tiny::YoloV3Tiny(std::unique_ptr<common::ArmnnNetworkExecutor<float>> executor,
float NMSThreshold, float ClsThreshold, float ObjectThreshold) :
- ObjDetectionPipeline(std::move(executor),
- std::move(std::make_unique<YoloResultDecoder>(NMSThreshold,
- ClsThreshold,
- ObjectThreshold)))
+ ObjDetectionPipeline(std::move(executor),
+ std::move(std::make_unique<YoloResultDecoder>(NMSThreshold,
+ ClsThreshold,
+ ObjectThreshold)))
{}
void YoloV3Tiny::PreProcessing(const cv::Mat& frame, cv::Mat& processed)
@@ -72,11 +71,12 @@
IPipelinePtr CreatePipeline(common::PipelineOptions& config)
{
- auto executor = std::make_unique<common::ArmnnNetworkExecutor<float>>(config.m_ModelFilePath, config.m_backends);
-
+ auto executor = std::make_unique<common::ArmnnNetworkExecutor<float>>(config.m_ModelFilePath,
+ config.m_backends,
+ config.m_ProfilingEnabled);
if (config.m_ModelName == "SSD_MOBILE")
{
- float detectionThreshold = 0.6;
+ float detectionThreshold = 0.5;
return std::make_unique<od::MobileNetSSDv1>(std::move(executor),
detectionThreshold
@@ -99,4 +99,4 @@
}
}
-}// namespace od
\ No newline at end of file
+}// namespace od
diff --git a/samples/ObjectDetection/test/PipelineTest.cpp b/samples/ObjectDetection/test/PipelineTest.cpp
index 7af0900..48ac32c 100644
--- a/samples/ObjectDetection/test/PipelineTest.cpp
+++ b/samples/ObjectDetection/test/PipelineTest.cpp
@@ -35,7 +35,7 @@
common::PipelineOptions options;
options.m_ModelFilePath = GetResourceFilePath("ssd_mobilenet_v1.tflite");
options.m_ModelName = "SSD_MOBILE";
- options.m_backends = {"CpuRef"};
+ options.m_backends = {"CpuAcc", "CpuRef"};
od::IPipelinePtr objectDetectionPipeline = od::CreatePipeline(options);
diff --git a/samples/ObjectDetection/test/delegate/ArmnnDelegateNetworkExecutorTest.cpp b/samples/ObjectDetection/test/delegate/ArmnnDelegateNetworkExecutorTest.cpp
new file mode 100644
index 0000000..4700660
--- /dev/null
+++ b/samples/ObjectDetection/test/delegate/ArmnnDelegateNetworkExecutorTest.cpp
@@ -0,0 +1,129 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#include <catch.hpp>
+#include <opencv2/opencv.hpp>
+#include "ArmnnNetworkExecutor.hpp"
+#include "Types.hpp"
+#include "ImageUtils.hpp"
+#include "SSDResultDecoder.hpp"
+#include "YoloResultDecoder.hpp"
+
+using namespace std;
+
+static string GetResourceFilePath(const string& filename)
+{
+ string testResources = TEST_RESOURCE_DIR;
+
+ if(testResources.back() != '/')
+ {
+ return testResources + "/" + filename;
+ }
+ else
+ {
+ return testResources + filename;
+ }
+}
+
+TEST_CASE("Test Delegate Execution SSD_MOBILE")
+{
+ string testResources = TEST_RESOURCE_DIR;
+ REQUIRE(testResources != "");
+
+ vector<armnn::BackendId> m_backends = {"CpuRef"};
+ string file_path = GetResourceFilePath("ssd_mobilenet_v1.tflite");
+ common::InferenceResults<float> results;
+ cv::Mat processed;
+ cv::Mat cache;
+ float detectionThreshold = 0.6;
+ common::Profiling profiling(true);
+
+ profiling.ProfilingStart();
+ auto executor = make_unique<common::ArmnnNetworkExecutor<float>>(file_path, m_backends, true);
+ int width = executor->GetImageAspectRatio().m_Width;
+ int height = executor->GetImageAspectRatio().m_Height;
+ od::SSDResultDecoder ssdResult(detectionThreshold);
+
+ /* check GetInputDataType */
+ CHECK(executor->GetInputDataType() == armnn::DataType::QAsymmU8);
+ /* check GetImageAspectRatio */
+ CHECK(width == 300);
+ CHECK(height == 300);
+
+ cv::Mat inputFrame = cv::imread(GetResourceFilePath("basketball1.png"), cv::IMREAD_COLOR);
+ cv::cvtColor(inputFrame, inputFrame, cv::COLOR_BGR2RGB);
+ ResizeWithPad(inputFrame, processed, cache, common::Size(width,height));
+ CHECK(executor->Run(processed.data, processed.total() * processed.elemSize(), results) == true);
+ od::DetectedObjects detections = ssdResult.Decode(results,
+ common::Size(inputFrame.size().width, inputFrame.size().height),
+ common::Size(width, height), {});
+
+ /* Make sure we've found 2 persons in the image */
+ CHECK(detections.size() == 2 );
+ CHECK(detections[0].GetLabel() == "0");
+ CHECK(detections[1].GetLabel() == "0");
+ /* check GetQuantizationScale */
+ CHECK(to_string(executor->GetQuantizationScale()) == string("0.007812"));
+ /* check GetQuantizationOffset */
+ CHECK(executor->GetQuantizationOffset() == 128);
+ /* check GetQuantizationScale */
+ CHECK(executor->GetOutputQuantizationScale(0) == 0.0f);
+ /* check GetOutputQuantizationOffset */
+ CHECK(executor->GetOutputQuantizationOffset(0) == 0);
+ profiling.ProfilingStopAndPrintUs("Overall test");
+}
+
+TEST_CASE("Test Delegate Execution YOLO_V3")
+{
+ string testResources = TEST_RESOURCE_DIR;
+ REQUIRE(testResources != "");
+
+ vector<armnn::BackendId> m_backends = {"CpuRef"};
+ string file_path = GetResourceFilePath("yolo_v3_tiny_darknet_fp32.tflite");
+ common::InferenceResults<float> results;
+ cv::Mat processed;
+ cv::Mat cache;
+ float NMSThreshold = 0.3f;
+ float ClsThreshold = 0.3f;
+ float ObjectThreshold = 0.3f;
+
+
+ auto executor = make_unique<common::ArmnnNetworkExecutor<float>>(file_path, m_backends);
+ int width = executor->GetImageAspectRatio().m_Width;
+ int height = executor->GetImageAspectRatio().m_Height;
+ od::YoloResultDecoder yoloResult(NMSThreshold, ClsThreshold, ObjectThreshold);
+
+ /* check GetInputDataType */
+ CHECK(executor->GetInputDataType() == armnn::DataType::Float32);
+ /* check GetImageAspectRatio */
+ CHECK(width == 416);
+ CHECK(height == 416);
+
+ /* read the image */
+ cv::Mat inputFrame = cv::imread(GetResourceFilePath("basketball1.png"), cv::IMREAD_COLOR);
+ /* resize it according to the the input tensor requirments */
+ ResizeWithPad(inputFrame, processed, cache, common::Size(width,height));
+ /* converting to 3 channel matrix of 32 bits floats */
+ processed.convertTo(processed, CV_32FC3);
+ /* run the inference */
+ CHECK(executor->Run(processed.data, processed.total() * processed.elemSize(), results) == true);
+ /* decode the results */
+ od::DetectedObjects detections = yoloResult.Decode(results,
+ common::Size(inputFrame.size().width, inputFrame.size().height),
+ common::Size(width, height), {});
+
+ /* Make sure we've found 2 persons in the image */
+ CHECK(detections.size() == 2 );
+ CHECK(detections[0].GetLabel() == "0");
+ CHECK(detections[1].GetLabel() == "0");
+ /* check GetQuantizationScale */
+ CHECK(to_string(executor->GetQuantizationScale()) == string("0.000000"));
+ /* check GetQuantizationOffset */
+ CHECK(executor->GetQuantizationOffset() == 0);
+ /* check GetQuantizationScale */
+ CHECK(executor->GetOutputQuantizationScale(0) == 0.0f);
+ /* check GetOutputQuantizationOffset */
+ CHECK(executor->GetOutputQuantizationOffset(0) == 0);
+
+}
diff --git a/samples/common/cmake/find_armnn.cmake b/samples/common/cmake/find_armnn.cmake
index 289e912..35f87eb 100644
--- a/samples/common/cmake/find_armnn.cmake
+++ b/samples/common/cmake/find_armnn.cmake
@@ -2,8 +2,13 @@
# SPDX-License-Identifier: MIT
# Search for ArmNN built libraries in user-provided path first, then current repository, then system
-set(ARMNN_LIB_NAMES "libarmnn.so"
- "libarmnnTfLiteParser.so")
+if( USE_ARMNN_DELEGATE )
+ set(ARMNN_LIB_NAMES "libarmnn.so"
+ "libarmnnDelegate.so")
+else()
+ set(ARMNN_LIB_NAMES "libarmnn.so"
+ "libarmnnTfLiteParser.so")
+endif()
set(ARMNN_LIBS "")
@@ -26,7 +31,13 @@
list(APPEND ARMNN_LIBS ${ARMNN_${armnn_lib}})
get_filename_component(LIB_DIR ${ARMNN_${armnn_lib}} DIRECTORY)
get_filename_component(LIB_PARENT_DIR ${LIB_DIR} DIRECTORY)
- set(ARMNN_INCLUDE_DIR ${LIB_PARENT_DIR}/include)
+ if( USE_ARMNN_DELEGATE )
+ set(ARMNN_INCLUDE_DIR ${LIB_PARENT_DIR}/include
+ ${PARENT_DIR}/../delegate/include
+ ${PARENT_DIR}/../delegate/src)
+ else()
+ set(ARMNN_INCLUDE_DIR ${LIB_PARENT_DIR}/include)
+ endif()
endif()
endforeach()
diff --git a/samples/common/cmake/find_catch.cmake b/samples/common/cmake/find_catch.cmake
index 584b807..f55654e 100644
--- a/samples/common/cmake/find_catch.cmake
+++ b/samples/common/cmake/find_catch.cmake
@@ -8,9 +8,10 @@
file(MAKE_DIRECTORY ${TEST_TPIP_INCLUDE})
ExternalProject_Add(catch2-headers
- URL https://github.com/catchorg/Catch2/releases/download/v2.11.1/catch.hpp
+ URL https://github.com/catchorg/Catch2/releases/download/v2.13.5/catch.hpp
+ URL_HASH MD5=b43c586fe617aefdee3e480e9fa8f370
DOWNLOAD_NO_EXTRACT 1
CONFIGURE_COMMAND ""
BUILD_COMMAND ${CMAKE_COMMAND} -E copy <DOWNLOAD_DIR>/catch.hpp ${TEST_TPIP_INCLUDE}
INSTALL_COMMAND ""
- )
\ No newline at end of file
+ )
diff --git a/samples/common/include/ArmnnUtils/ArmnnNetworkExecutor.hpp b/samples/common/include/ArmnnUtils/ArmnnNetworkExecutor.hpp
index 9f1ef54..80558d8 100644
--- a/samples/common/include/ArmnnUtils/ArmnnNetworkExecutor.hpp
+++ b/samples/common/include/ArmnnUtils/ArmnnNetworkExecutor.hpp
@@ -11,6 +11,7 @@
#include "armnnTfLiteParser/ITfLiteParser.hpp"
#include "armnnUtils/DataLayoutIndexed.hpp"
#include <armnn/Logging.hpp>
+#include "Profiling.hpp"
#include <string>
#include <vector>
@@ -21,7 +22,7 @@
* @brief Used to load in a network through ArmNN and run inference on it against a given backend.
*
*/
-template <class Tout>
+template <typename Tout>
class ArmnnNetworkExecutor
{
private:
@@ -31,7 +32,7 @@
armnn::InputTensors m_InputTensors;
armnn::OutputTensors m_OutputTensors;
std::vector<armnnTfLiteParser::BindingPointInfo> m_outputBindingInfo;
-
+ Profiling m_profiling;
std::vector<std::string> m_outputLayerNamesList;
armnnTfLiteParser::BindingPointInfo m_inputBindingInfo;
@@ -59,7 +60,8 @@
* * @param[in] backends - The list of preferred backends to run inference on
*/
ArmnnNetworkExecutor(std::string& modelPath,
- std::vector<armnn::BackendId>& backends);
+ std::vector<armnn::BackendId>& backends,
+ bool isProfilingEnabled = false);
/**
* @brief Returns the aspect ratio of the associated model in the order of width, height.
@@ -87,12 +89,15 @@
};
-template <class Tout>
+template <typename Tout>
ArmnnNetworkExecutor<Tout>::ArmnnNetworkExecutor(std::string& modelPath,
- std::vector<armnn::BackendId>& preferredBackends)
- : m_Runtime(armnn::IRuntime::Create(armnn::IRuntime::CreationOptions()))
+ std::vector<armnn::BackendId>& preferredBackends,
+ bool isProfilingEnabled):
+ m_profiling(isProfilingEnabled),
+ m_Runtime(armnn::IRuntime::Create(armnn::IRuntime::CreationOptions()))
{
// Import the TensorFlow lite model.
+ m_profiling.ProfilingStart();
armnnTfLiteParser::ITfLiteParserPtr parser = armnnTfLiteParser::ITfLiteParser::Create();
armnn::INetworkPtr network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
@@ -151,16 +156,16 @@
));
}
}
-
+ m_profiling.ProfilingStopAndPrintUs("ArmnnNetworkExecutor time");
}
-template <class Tout>
+template <typename Tout>
armnn::DataType ArmnnNetworkExecutor<Tout>::GetInputDataType() const
{
return m_inputBindingInfo.second.GetDataType();
}
-template <class Tout>
+template <typename Tout>
void ArmnnNetworkExecutor<Tout>::PrepareTensors(const void* inputData, const size_t dataBytes)
{
assert(m_inputBindingInfo.second.GetNumBytes() >= dataBytes);
@@ -168,9 +173,10 @@
m_InputTensors = {{ m_inputBindingInfo.first, armnn::ConstTensor(m_inputBindingInfo.second, inputData)}};
}
-template <class Tout>
+template <typename Tout>
bool ArmnnNetworkExecutor<Tout>::Run(const void* inputData, const size_t dataBytes, InferenceResults<Tout>& outResults)
{
+ m_profiling.ProfilingStart();
/* Prepare tensors if they are not ready */
ARMNN_LOG(debug) << "Preparing tensors...";
this->PrepareTensors(inputData, dataBytes);
@@ -190,37 +196,37 @@
outResults.reserve(m_outputLayerNamesList.size());
outResults = m_OutputBuffer;
-
+ m_profiling.ProfilingStopAndPrintUs("Total inference time");
return (armnn::Status::Success == ret);
}
-template <class Tout>
+template <typename Tout>
float ArmnnNetworkExecutor<Tout>::GetQuantizationScale()
{
return this->m_inputBindingInfo.second.GetQuantizationScale();
}
-template <class Tout>
+template <typename Tout>
int ArmnnNetworkExecutor<Tout>::GetQuantizationOffset()
{
return this->m_inputBindingInfo.second.GetQuantizationOffset();
}
-template <class Tout>
+template <typename Tout>
float ArmnnNetworkExecutor<Tout>::GetOutputQuantizationScale(int tensorIndex)
{
assert(this->m_outputLayerNamesList.size() > tensorIndex);
return this->m_outputBindingInfo[tensorIndex].second.GetQuantizationScale();
}
-template <class Tout>
+template <typename Tout>
int ArmnnNetworkExecutor<Tout>::GetOutputQuantizationOffset(int tensorIndex)
{
assert(this->m_outputLayerNamesList.size() > tensorIndex);
return this->m_outputBindingInfo[tensorIndex].second.GetQuantizationOffset();
}
-template <class Tout>
+template <typename Tout>
Size ArmnnNetworkExecutor<Tout>::GetImageAspectRatio()
{
const auto shape = m_inputBindingInfo.second.GetShape();
diff --git a/samples/common/include/Utils/Profiling.hpp b/samples/common/include/Utils/Profiling.hpp
new file mode 100644
index 0000000..cca5632
--- /dev/null
+++ b/samples/common/include/Utils/Profiling.hpp
@@ -0,0 +1,90 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+#include <chrono>
+#include <iostream>
+#include <string>
+
+using namespace std::chrono;
+
+namespace common
+{
+/**
+* @brief Used for meausuring performance of specific actions in the code.
+ * Profiling should be enabled with a parameter passed to the constructor and
+ * it's disabled by default.
+ * In order to measure timing, wrap the desired code section with
+ * ProfilingStart() and ProfilingStopAndPrintUs(title)
+*/
+class Profiling {
+private:
+
+ struct group_thousands : std::numpunct<char>
+ {
+ std::string do_grouping() const override { return "\3"; }
+ };
+
+ bool mProfilingEnabled{};
+ steady_clock::time_point mStart{};
+ steady_clock::time_point mStop{};
+public:
+ Profiling() : mProfilingEnabled(false) {};
+
+ /**
+ * @brief Initializes the profiling object.
+ *
+ * * @param[in] isEnabled - Enables the profiling computation and prints.
+ */
+ explicit Profiling(bool isEnabled) : mProfilingEnabled(isEnabled) {};
+
+/**
+* @brief Starts the profiling measurement.
+*
+*/
+
+ void ProfilingStart()
+ {
+ if (mProfilingEnabled)
+ {
+ mStart = steady_clock::now();
+ }
+ }
+
+/**
+* @brief Stops the profiling measurement, without printing the results.
+*
+*/
+ auto ProfilingStop()
+ {
+ if (mProfilingEnabled)
+ {
+ mStop = steady_clock::now();
+ }
+ }
+
+/**
+* @brief Get the measurement result in micro-seconds.
+*
+*/
+ auto ProfilingGetUs()
+ {
+ return mProfilingEnabled ? duration_cast<microseconds>(mStop - mStart).count() : 0;
+ }
+
+/**
+* @brief Stop the profiling measurement and print the result in micro-seconds.
+*
+*/
+ void ProfilingStopAndPrintUs(const std::string &title)
+ {
+ ProfilingStop();
+ if (mProfilingEnabled) {
+ std::cout.imbue(std::locale(std::cout.getloc(), new group_thousands));
+ std::cout << "Profiling: " << title << ": " << ProfilingGetUs() << " uSeconds" << std::endl;
+ }
+ }
+};
+}// namespace common
\ No newline at end of file
diff --git a/samples/common/include/Utils/Types.hpp b/samples/common/include/Utils/Types.hpp
index 4d1f708..184e02a 100644
--- a/samples/common/include/Utils/Types.hpp
+++ b/samples/common/include/Utils/Types.hpp
@@ -44,6 +44,7 @@
std::string m_ModelName;
std::string m_ModelFilePath;
std::vector<armnn::BackendId> m_backends;
+ bool m_ProfilingEnabled = false;
};
template<typename T>