blob: c8875a27dc1b91bd5fb8dda83da59a2d49a42ee3 [file] [log] [blame]
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +00001//
2// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#pragma once
7
8#include "Types.hpp"
9
10#include "armnn/ArmNN.hpp"
11#include <armnn/Logging.hpp>
12#include <armnn_delegate.hpp>
13#include <DelegateOptions.hpp>
14#include <DelegateUtils.hpp>
15#include <Profiling.hpp>
16#include <tensorflow/lite/builtin_ops.h>
17#include <tensorflow/lite/c/builtin_op_data.h>
18#include <tensorflow/lite/c/common.h>
19#include <tensorflow/lite/optional_debug_tools.h>
20#include <tensorflow/lite/kernels/builtin_op_kernels.h>
21#include <tensorflow/lite/interpreter.h>
22#include <tensorflow/lite/kernels/register.h>
23
24#include <string>
25#include <vector>
26
27namespace common
28{
29/**
30* @brief Used to load in a network through Tflite Interpreter,
31* register Armnn Delegate file to it, and run inference
32* on it against a given backend.
33* currently it is assumed that the input data will be
34* cv:MAT (Frame), the assumption is implemented in
35* PrepareTensors method, it can be generalized later
36*
37*/
38template <typename Tout>
39class ArmnnNetworkExecutor
40{
41private:
42 std::unique_ptr<tflite::Interpreter> m_interpreter;
43 std::unique_ptr<tflite::FlatBufferModel> m_model;
44 Profiling m_profiling;
45
46 void PrepareTensors(const void* inputData, const size_t dataBytes);
47
48 template <typename Enumeration>
49 auto log_as_int(Enumeration value)
50 -> typename std::underlying_type<Enumeration>::type
51 {
52 return static_cast<typename std::underlying_type<Enumeration>::type>(value);
53 }
54
55public:
56 ArmnnNetworkExecutor() = delete;
57
58 /**
59 * @brief Initializes the network with the given input data.
60 *
61 *
62 * * @param[in] modelPath - Relative path to the model file
63 * * @param[in] backends - The list of preferred backends to run inference on
64 */
65 ArmnnNetworkExecutor(std::string& modelPath,
66 std::vector<armnn::BackendId>& backends,
67 bool isProfilingEnabled = false);
68
69 /**
70 * @brief Returns the aspect ratio of the associated model in the order of width, height.
71 */
72 Size GetImageAspectRatio();
73
74 /**
75 * @brief Returns the data type of the associated model.
76 */
77 armnn::DataType GetInputDataType() const;
78
79 float GetQuantizationScale();
80
81 int GetQuantizationOffset();
82
83 float GetOutputQuantizationScale(int tensorIndex);
84
85 int GetOutputQuantizationOffset(int tensorIndex);
86
87
88 /**
89 * @brief Runs inference on the provided input data, and stores the results
90 * in the provided InferenceResults object.
91 *
92 * @param[in] inputData - input frame data
93 * @param[in] dataBytes - input data size in bytes
94 * @param[out] outResults - Vector of DetectionResult objects used to store the output result.
95 */
96 bool Run(const void *inputData, const size_t dataBytes,
97 InferenceResults<Tout> &outResults);
98};
99
100template <typename Tout>
101ArmnnNetworkExecutor<Tout>::ArmnnNetworkExecutor(std::string& modelPath,
102 std::vector<armnn::BackendId>& preferredBackends,
103 bool isProfilingEnabled):
104 m_profiling(isProfilingEnabled)
105{
106 m_profiling.ProfilingStart();
107 armnn::OptimizerOptions optimizerOptions;
108 m_model = tflite::FlatBufferModel::BuildFromFile(modelPath.c_str());
109 if (m_model == nullptr)
110 {
111 const std::string errorMessage{"ArmnnNetworkExecutor: Failed to build the model"};
112 ARMNN_LOG(error) << errorMessage;
113 throw armnn::Exception(errorMessage);
114 }
115 m_profiling.ProfilingStopAndPrintUs("Loading the model took");
116
117 m_profiling.ProfilingStart();
118 tflite::ops::builtin::BuiltinOpResolver resolver;
119 tflite::InterpreterBuilder(*m_model, resolver)(&m_interpreter);
120 if (m_interpreter->AllocateTensors() != kTfLiteOk)
121 {
122 const std::string errorMessage{"ArmnnNetworkExecutor: Failed to alloc tensors"};
123 ARMNN_LOG(error) << errorMessage;
124 throw armnn::Exception(errorMessage);
125 }
126 m_profiling.ProfilingStopAndPrintUs("Create the tflite interpreter");
127
128 /* create delegate options */
129 m_profiling.ProfilingStart();
130
131 /* enable fast math optimization */
132 armnn::BackendOptions modelOptionGpu("GpuAcc", {{"FastMathEnabled", true}});
133 optimizerOptions.m_ModelOptions.push_back(modelOptionGpu);
134
135 armnn::BackendOptions modelOptionCpu("CpuAcc", {{"FastMathEnabled", true}});
136 optimizerOptions.m_ModelOptions.push_back(modelOptionCpu);
137 /* enable reduce float32 to float16 optimization */
138 optimizerOptions.m_ReduceFp32ToFp16 = true;
139
140 armnnDelegate::DelegateOptions delegateOptions(preferredBackends, optimizerOptions);
141
142 /* create delegate object */
143 std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
144 theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
145 armnnDelegate::TfLiteArmnnDelegateDelete);
146
147 /* Register the delegate file */
148 m_interpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
149 m_profiling.ProfilingStopAndPrintUs("Create and load ArmNN Delegate");
150
151}
152
153template<typename Tout>
154void ArmnnNetworkExecutor<Tout>::PrepareTensors(const void *inputData, const size_t dataBytes)
155{
156 size_t inputTensorSize = m_interpreter->input_tensor(0)->bytes;
157 auto * inputTensorPtr = m_interpreter->input_tensor(0)->data.raw;
158 assert(inputTensorSize >= dataBytes);
159 if (inputTensorPtr != nullptr)
160 {
161 memcpy(inputTensorPtr, inputData, inputTensorSize);
162 }
163 else
164 {
165 const std::string errorMessage{"ArmnnNetworkExecutor: input tensor is null"};
166 ARMNN_LOG(error) << errorMessage;
167 throw armnn::Exception(errorMessage);
168 }
169
170}
171
172template <typename Tout>
173bool ArmnnNetworkExecutor<Tout>::Run(const void *inputData, const size_t dataBytes,
174 InferenceResults<Tout>& outResults)
175{
176 bool ret = false;
177 m_profiling.ProfilingStart();
178 PrepareTensors(inputData, dataBytes);
179
180 if (m_interpreter->Invoke() == kTfLiteOk)
181 {
182
183
184 ret = true;
185 // Extract the output tensor data.
186 outResults.clear();
187 outResults.reserve(m_interpreter->outputs().size());
188 for (int index = 0; index < m_interpreter->outputs().size(); index++)
189 {
190 size_t size = m_interpreter->output_tensor(index)->bytes / sizeof(Tout);
191 const Tout *p_Output = m_interpreter->typed_output_tensor<Tout>(index);
192 if (p_Output != nullptr) {
193 InferenceResult<float> outRes(p_Output, p_Output + size);
194 outResults.emplace_back(outRes);
195 }
196 else
197 {
198 const std::string errorMessage{"ArmnnNetworkExecutor: p_Output tensor is null"};
199 ARMNN_LOG(error) << errorMessage;
200 ret = false;
201 }
202 }
203 }
204 else
205 {
206 const std::string errorMessage{"ArmnnNetworkExecutor: Invoke has failed"};
207 ARMNN_LOG(error) << errorMessage;
208 }
209 m_profiling.ProfilingStopAndPrintUs("Perform inference");
210 return ret;
211}
212
213template <typename Tout>
214Size ArmnnNetworkExecutor<Tout>::GetImageAspectRatio()
215{
216 assert(m_interpreter->tensor(m_interpreter->inputs()[0])->dims->size == 4);
217 return Size(m_interpreter->tensor(m_interpreter->inputs()[0])->dims->data[2],
218 m_interpreter->tensor(m_interpreter->inputs()[0])->dims->data[1]);
219}
220
221template <typename Tout>
222armnn::DataType ArmnnNetworkExecutor<Tout>::GetInputDataType() const
223{
224 return GetDataType(*(m_interpreter->tensor(m_interpreter->inputs()[0])));
225}
226
227template <typename Tout>
228float ArmnnNetworkExecutor<Tout>::GetQuantizationScale()
229{
230 return m_interpreter->tensor(m_interpreter->inputs()[0])->params.scale;
231}
232
233template <typename Tout>
234int ArmnnNetworkExecutor<Tout>::GetQuantizationOffset()
235{
236 return m_interpreter->tensor(m_interpreter->inputs()[0])->params.zero_point;
237}
238
239template <typename Tout>
240float ArmnnNetworkExecutor<Tout>::GetOutputQuantizationScale(int tensorIndex)
241{
242 assert(m_interpreter->outputs().size() > tensorIndex);
243 return m_interpreter->tensor(m_interpreter->outputs()[tensorIndex])->params.scale;
244}
245
246template <typename Tout>
247int ArmnnNetworkExecutor<Tout>::GetOutputQuantizationOffset(int tensorIndex)
248{
249 assert(m_interpreter->outputs().size() > tensorIndex);
250 return m_interpreter->tensor(m_interpreter->outputs()[tensorIndex])->params.zero_point;
251}
252
253}// namespace common