blob: 9c8583e8ea81007f278c8ac109fe0177ae685821 [file] [log] [blame]
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +00001//
John Mcloughlinc5ee0d72023-03-24 12:07:25 +00002// Copyright © 2022, 2023 Arm Ltd and Contributors. All rights reserved.
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +00003// SPDX-License-Identifier: MIT
4//
5
6#pragma once
7
8#include "Types.hpp"
9
10#include "armnn/ArmNN.hpp"
11#include <armnn/Logging.hpp>
12#include <armnn_delegate.hpp>
13#include <DelegateOptions.hpp>
14#include <DelegateUtils.hpp>
15#include <Profiling.hpp>
16#include <tensorflow/lite/builtin_ops.h>
17#include <tensorflow/lite/c/builtin_op_data.h>
18#include <tensorflow/lite/c/common.h>
19#include <tensorflow/lite/optional_debug_tools.h>
20#include <tensorflow/lite/kernels/builtin_op_kernels.h>
21#include <tensorflow/lite/interpreter.h>
22#include <tensorflow/lite/kernels/register.h>
23
24#include <string>
25#include <vector>
26
27namespace common
28{
29/**
30* @brief Used to load in a network through Tflite Interpreter,
31* register Armnn Delegate file to it, and run inference
32* on it against a given backend.
33* currently it is assumed that the input data will be
34* cv:MAT (Frame), the assumption is implemented in
35* PrepareTensors method, it can be generalized later
36*
37*/
38template <typename Tout>
39class ArmnnNetworkExecutor
40{
41private:
42 std::unique_ptr<tflite::Interpreter> m_interpreter;
43 std::unique_ptr<tflite::FlatBufferModel> m_model;
44 Profiling m_profiling;
45
46 void PrepareTensors(const void* inputData, const size_t dataBytes);
47
48 template <typename Enumeration>
49 auto log_as_int(Enumeration value)
50 -> typename std::underlying_type<Enumeration>::type
51 {
52 return static_cast<typename std::underlying_type<Enumeration>::type>(value);
53 }
54
55public:
56 ArmnnNetworkExecutor() = delete;
57
58 /**
59 * @brief Initializes the network with the given input data.
60 *
61 *
62 * * @param[in] modelPath - Relative path to the model file
63 * * @param[in] backends - The list of preferred backends to run inference on
64 */
65 ArmnnNetworkExecutor(std::string& modelPath,
66 std::vector<armnn::BackendId>& backends,
67 bool isProfilingEnabled = false);
68
69 /**
70 * @brief Returns the aspect ratio of the associated model in the order of width, height.
71 */
72 Size GetImageAspectRatio();
73
74 /**
75 * @brief Returns the data type of the associated model.
76 */
77 armnn::DataType GetInputDataType() const;
78
79 float GetQuantizationScale();
80
81 int GetQuantizationOffset();
82
83 float GetOutputQuantizationScale(int tensorIndex);
84
85 int GetOutputQuantizationOffset(int tensorIndex);
86
87
88 /**
89 * @brief Runs inference on the provided input data, and stores the results
90 * in the provided InferenceResults object.
91 *
92 * @param[in] inputData - input frame data
93 * @param[in] dataBytes - input data size in bytes
94 * @param[out] outResults - Vector of DetectionResult objects used to store the output result.
95 */
96 bool Run(const void *inputData, const size_t dataBytes,
97 InferenceResults<Tout> &outResults);
98};
99
100template <typename Tout>
101ArmnnNetworkExecutor<Tout>::ArmnnNetworkExecutor(std::string& modelPath,
102 std::vector<armnn::BackendId>& preferredBackends,
103 bool isProfilingEnabled):
104 m_profiling(isProfilingEnabled)
105{
106 m_profiling.ProfilingStart();
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000107 armnn::OptimizerOptionsOpaque optimizerOptions;
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +0000108 m_model = tflite::FlatBufferModel::BuildFromFile(modelPath.c_str());
109 if (m_model == nullptr)
110 {
111 const std::string errorMessage{"ArmnnNetworkExecutor: Failed to build the model"};
112 ARMNN_LOG(error) << errorMessage;
113 throw armnn::Exception(errorMessage);
114 }
115 m_profiling.ProfilingStopAndPrintUs("Loading the model took");
116
117 m_profiling.ProfilingStart();
118 tflite::ops::builtin::BuiltinOpResolver resolver;
119 tflite::InterpreterBuilder(*m_model, resolver)(&m_interpreter);
120 if (m_interpreter->AllocateTensors() != kTfLiteOk)
121 {
122 const std::string errorMessage{"ArmnnNetworkExecutor: Failed to alloc tensors"};
123 ARMNN_LOG(error) << errorMessage;
124 throw armnn::Exception(errorMessage);
125 }
126 m_profiling.ProfilingStopAndPrintUs("Create the tflite interpreter");
127
128 /* create delegate options */
129 m_profiling.ProfilingStart();
130
131 /* enable fast math optimization */
132 armnn::BackendOptions modelOptionGpu("GpuAcc", {{"FastMathEnabled", true}});
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000133 optimizerOptions.AddModelOption(modelOptionGpu);
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +0000134
135 armnn::BackendOptions modelOptionCpu("CpuAcc", {{"FastMathEnabled", true}});
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000136 optimizerOptions.AddModelOption(modelOptionCpu);
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +0000137 /* enable reduce float32 to float16 optimization */
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000138 optimizerOptions.SetReduceFp32ToFp16(true);
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +0000139
140 armnnDelegate::DelegateOptions delegateOptions(preferredBackends, optimizerOptions);
141
142 /* create delegate object */
143 std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
144 theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
145 armnnDelegate::TfLiteArmnnDelegateDelete);
146
147 /* Register the delegate file */
148 m_interpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
149 m_profiling.ProfilingStopAndPrintUs("Create and load ArmNN Delegate");
150
151}
152
153template<typename Tout>
David Monahan6a1d5062023-08-29 09:10:50 +0100154void ArmnnNetworkExecutor<Tout>::PrepareTensors(const void* inputData, const size_t dataBytes)
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +0000155{
156 size_t inputTensorSize = m_interpreter->input_tensor(0)->bytes;
157 auto * inputTensorPtr = m_interpreter->input_tensor(0)->data.raw;
158 assert(inputTensorSize >= dataBytes);
David Monahan6a1d5062023-08-29 09:10:50 +0100159 if (inputData == nullptr)
160 {
161 const std::string errorMessage{"ArmnnNetworkExecutor: input data pointer is null"};
162 ARMNN_LOG(error) << errorMessage;
163 throw armnn::Exception(errorMessage);
164 }
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +0000165 if (inputTensorPtr != nullptr)
166 {
167 memcpy(inputTensorPtr, inputData, inputTensorSize);
168 }
169 else
170 {
David Monahan6a1d5062023-08-29 09:10:50 +0100171 const std::string errorMessage{"ArmnnNetworkExecutor: input tensor pointer is null"};
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +0000172 ARMNN_LOG(error) << errorMessage;
173 throw armnn::Exception(errorMessage);
174 }
175
176}
177
178template <typename Tout>
179bool ArmnnNetworkExecutor<Tout>::Run(const void *inputData, const size_t dataBytes,
180 InferenceResults<Tout>& outResults)
181{
182 bool ret = false;
183 m_profiling.ProfilingStart();
184 PrepareTensors(inputData, dataBytes);
185
186 if (m_interpreter->Invoke() == kTfLiteOk)
187 {
188
189
190 ret = true;
191 // Extract the output tensor data.
192 outResults.clear();
193 outResults.reserve(m_interpreter->outputs().size());
194 for (int index = 0; index < m_interpreter->outputs().size(); index++)
195 {
196 size_t size = m_interpreter->output_tensor(index)->bytes / sizeof(Tout);
197 const Tout *p_Output = m_interpreter->typed_output_tensor<Tout>(index);
198 if (p_Output != nullptr) {
199 InferenceResult<float> outRes(p_Output, p_Output + size);
200 outResults.emplace_back(outRes);
201 }
202 else
203 {
204 const std::string errorMessage{"ArmnnNetworkExecutor: p_Output tensor is null"};
205 ARMNN_LOG(error) << errorMessage;
206 ret = false;
207 }
208 }
209 }
210 else
211 {
212 const std::string errorMessage{"ArmnnNetworkExecutor: Invoke has failed"};
213 ARMNN_LOG(error) << errorMessage;
214 }
215 m_profiling.ProfilingStopAndPrintUs("Perform inference");
216 return ret;
217}
218
219template <typename Tout>
220Size ArmnnNetworkExecutor<Tout>::GetImageAspectRatio()
221{
222 assert(m_interpreter->tensor(m_interpreter->inputs()[0])->dims->size == 4);
223 return Size(m_interpreter->tensor(m_interpreter->inputs()[0])->dims->data[2],
224 m_interpreter->tensor(m_interpreter->inputs()[0])->dims->data[1]);
225}
226
227template <typename Tout>
228armnn::DataType ArmnnNetworkExecutor<Tout>::GetInputDataType() const
229{
230 return GetDataType(*(m_interpreter->tensor(m_interpreter->inputs()[0])));
231}
232
233template <typename Tout>
234float ArmnnNetworkExecutor<Tout>::GetQuantizationScale()
235{
236 return m_interpreter->tensor(m_interpreter->inputs()[0])->params.scale;
237}
238
239template <typename Tout>
240int ArmnnNetworkExecutor<Tout>::GetQuantizationOffset()
241{
242 return m_interpreter->tensor(m_interpreter->inputs()[0])->params.zero_point;
243}
244
245template <typename Tout>
246float ArmnnNetworkExecutor<Tout>::GetOutputQuantizationScale(int tensorIndex)
247{
248 assert(m_interpreter->outputs().size() > tensorIndex);
249 return m_interpreter->tensor(m_interpreter->outputs()[tensorIndex])->params.scale;
250}
251
252template <typename Tout>
253int ArmnnNetworkExecutor<Tout>::GetOutputQuantizationOffset(int tensorIndex)
254{
255 assert(m_interpreter->outputs().size() > tensorIndex);
256 return m_interpreter->tensor(m_interpreter->outputs()[tensorIndex])->params.zero_point;
257}
258
259}// namespace common