blob: 9f1ef5475c1c8ab46836bb306ee821f273da5842 [file] [log] [blame]
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +01001//
2// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#pragma once
7
8#include "Types.hpp"
9
10#include "armnn/ArmNN.hpp"
11#include "armnnTfLiteParser/ITfLiteParser.hpp"
12#include "armnnUtils/DataLayoutIndexed.hpp"
13#include <armnn/Logging.hpp>
14
15#include <string>
16#include <vector>
17
18namespace common
19{
20/**
21* @brief Used to load in a network through ArmNN and run inference on it against a given backend.
22*
23*/
24template <class Tout>
25class ArmnnNetworkExecutor
26{
27private:
28 armnn::IRuntimePtr m_Runtime;
29 armnn::NetworkId m_NetId{};
30 mutable InferenceResults<Tout> m_OutputBuffer;
31 armnn::InputTensors m_InputTensors;
32 armnn::OutputTensors m_OutputTensors;
33 std::vector<armnnTfLiteParser::BindingPointInfo> m_outputBindingInfo;
34
35 std::vector<std::string> m_outputLayerNamesList;
36
37 armnnTfLiteParser::BindingPointInfo m_inputBindingInfo;
38
39 void PrepareTensors(const void* inputData, const size_t dataBytes);
40
41 template <typename Enumeration>
42 auto log_as_int(Enumeration value)
43 -> typename std::underlying_type<Enumeration>::type
44 {
45 return static_cast<typename std::underlying_type<Enumeration>::type>(value);
46 }
47
48public:
49 ArmnnNetworkExecutor() = delete;
50
51 /**
52 * @brief Initializes the network with the given input data. Parsed through TfLiteParser and optimized for a
53 * given backend.
54 *
55 * Note that the output layers names order in m_outputLayerNamesList affects the order of the feature vectors
56 * in output of the Run method.
57 *
58 * * @param[in] modelPath - Relative path to the model file
59 * * @param[in] backends - The list of preferred backends to run inference on
60 */
61 ArmnnNetworkExecutor(std::string& modelPath,
62 std::vector<armnn::BackendId>& backends);
63
64 /**
65 * @brief Returns the aspect ratio of the associated model in the order of width, height.
66 */
67 Size GetImageAspectRatio();
68
69 armnn::DataType GetInputDataType() const;
70
71 float GetQuantizationScale();
72
73 int GetQuantizationOffset();
74
George Gekov23c26272021-08-16 11:32:10 +010075 float GetOutputQuantizationScale(int tensorIndex);
76
77 int GetOutputQuantizationOffset(int tensorIndex);
78
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010079 /**
80 * @brief Runs inference on the provided input data, and stores the results in the provided InferenceResults object.
81 *
82 * @param[in] inputData - input frame data
83 * @param[in] dataBytes - input data size in bytes
84 * @param[out] results - Vector of DetectionResult objects used to store the output result.
85 */
86 bool Run(const void* inputData, const size_t dataBytes, common::InferenceResults<Tout>& outResults);
87
88};
89
90template <class Tout>
91ArmnnNetworkExecutor<Tout>::ArmnnNetworkExecutor(std::string& modelPath,
92 std::vector<armnn::BackendId>& preferredBackends)
93 : m_Runtime(armnn::IRuntime::Create(armnn::IRuntime::CreationOptions()))
94{
95 // Import the TensorFlow lite model.
96 armnnTfLiteParser::ITfLiteParserPtr parser = armnnTfLiteParser::ITfLiteParser::Create();
97 armnn::INetworkPtr network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
98
99 std::vector<std::string> inputNames = parser->GetSubgraphInputTensorNames(0);
100
101 m_inputBindingInfo = parser->GetNetworkInputBindingInfo(0, inputNames[0]);
102
103 m_outputLayerNamesList = parser->GetSubgraphOutputTensorNames(0);
104
105 std::vector<armnn::BindingPointInfo> outputBindings;
106 for(const std::string& name : m_outputLayerNamesList)
107 {
108 m_outputBindingInfo.push_back(std::move(parser->GetNetworkOutputBindingInfo(0, name)));
109 }
110 std::vector<std::string> errorMessages;
111 // optimize the network.
112 armnn::IOptimizedNetworkPtr optNet = Optimize(*network,
113 preferredBackends,
114 m_Runtime->GetDeviceSpec(),
115 armnn::OptimizerOptions(),
116 armnn::Optional<std::vector<std::string>&>(errorMessages));
117
118 if (!optNet)
119 {
120 const std::string errorMessage{"ArmnnNetworkExecutor: Failed to optimize network"};
121 ARMNN_LOG(error) << errorMessage;
122 throw armnn::Exception(errorMessage);
123 }
124
125 // Load the optimized network onto the m_Runtime device
126 std::string errorMessage;
127 if (armnn::Status::Success != m_Runtime->LoadNetwork(m_NetId, std::move(optNet), errorMessage))
128 {
129 ARMNN_LOG(error) << errorMessage;
130 throw armnn::Exception(errorMessage);
131 }
132
133 //pre-allocate memory for output (the size of it never changes)
134 for (int it = 0; it < m_outputLayerNamesList.size(); ++it)
135 {
136 const armnn::DataType dataType = m_outputBindingInfo[it].second.GetDataType();
137 const armnn::TensorShape& tensorShape = m_outputBindingInfo[it].second.GetShape();
138
139 std::vector<Tout> oneLayerOutResult;
140 oneLayerOutResult.resize(tensorShape.GetNumElements(), 0);
141 m_OutputBuffer.emplace_back(oneLayerOutResult);
142
143 // Make ArmNN output tensors
144 m_OutputTensors.reserve(m_OutputBuffer.size());
145 for (size_t it = 0; it < m_OutputBuffer.size(); ++it)
146 {
147 m_OutputTensors.emplace_back(std::make_pair(
148 m_outputBindingInfo[it].first,
149 armnn::Tensor(m_outputBindingInfo[it].second,
150 m_OutputBuffer.at(it).data())
151 ));
152 }
153 }
154
155}
156
157template <class Tout>
158armnn::DataType ArmnnNetworkExecutor<Tout>::GetInputDataType() const
159{
160 return m_inputBindingInfo.second.GetDataType();
161}
162
163template <class Tout>
164void ArmnnNetworkExecutor<Tout>::PrepareTensors(const void* inputData, const size_t dataBytes)
165{
166 assert(m_inputBindingInfo.second.GetNumBytes() >= dataBytes);
167 m_InputTensors.clear();
168 m_InputTensors = {{ m_inputBindingInfo.first, armnn::ConstTensor(m_inputBindingInfo.second, inputData)}};
169}
170
171template <class Tout>
172bool ArmnnNetworkExecutor<Tout>::Run(const void* inputData, const size_t dataBytes, InferenceResults<Tout>& outResults)
173{
174 /* Prepare tensors if they are not ready */
175 ARMNN_LOG(debug) << "Preparing tensors...";
176 this->PrepareTensors(inputData, dataBytes);
177 ARMNN_LOG(trace) << "Running inference...";
178
179 armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetId, m_InputTensors, m_OutputTensors);
180
181 std::stringstream inferenceFinished;
182 inferenceFinished << "Inference finished with code {" << log_as_int(ret) << "}\n";
183
184 ARMNN_LOG(trace) << inferenceFinished.str();
185
186 if (ret == armnn::Status::Failure)
187 {
188 ARMNN_LOG(error) << "Failed to perform inference.";
189 }
190
191 outResults.reserve(m_outputLayerNamesList.size());
192 outResults = m_OutputBuffer;
193
194 return (armnn::Status::Success == ret);
195}
196
197template <class Tout>
198float ArmnnNetworkExecutor<Tout>::GetQuantizationScale()
199{
200 return this->m_inputBindingInfo.second.GetQuantizationScale();
201}
202
203template <class Tout>
204int ArmnnNetworkExecutor<Tout>::GetQuantizationOffset()
205{
206 return this->m_inputBindingInfo.second.GetQuantizationOffset();
207}
208
209template <class Tout>
George Gekov23c26272021-08-16 11:32:10 +0100210float ArmnnNetworkExecutor<Tout>::GetOutputQuantizationScale(int tensorIndex)
211{
212 assert(this->m_outputLayerNamesList.size() > tensorIndex);
213 return this->m_outputBindingInfo[tensorIndex].second.GetQuantizationScale();
214}
215
216template <class Tout>
217int ArmnnNetworkExecutor<Tout>::GetOutputQuantizationOffset(int tensorIndex)
218{
219 assert(this->m_outputLayerNamesList.size() > tensorIndex);
220 return this->m_outputBindingInfo[tensorIndex].second.GetQuantizationOffset();
221}
222
223template <class Tout>
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +0100224Size ArmnnNetworkExecutor<Tout>::GetImageAspectRatio()
225{
226 const auto shape = m_inputBindingInfo.second.GetShape();
227 assert(shape.GetNumDimensions() == 4);
228 armnnUtils::DataLayoutIndexed nhwc(armnn::DataLayout::NHWC);
229 return Size(shape[nhwc.GetWidthIndex()],
230 shape[nhwc.GetHeightIndex()]);
231}
232}// namespace common