blob: 80558d84da7eccb26e34b4d433c0198e20eb0780 [file] [log] [blame]
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +01001//
2// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#pragma once
7
8#include "Types.hpp"
9
10#include "armnn/ArmNN.hpp"
11#include "armnnTfLiteParser/ITfLiteParser.hpp"
12#include "armnnUtils/DataLayoutIndexed.hpp"
13#include <armnn/Logging.hpp>
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +000014#include "Profiling.hpp"
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010015
16#include <string>
17#include <vector>
18
19namespace common
20{
21/**
22* @brief Used to load in a network through ArmNN and run inference on it against a given backend.
23*
24*/
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +000025template <typename Tout>
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010026class ArmnnNetworkExecutor
27{
28private:
29 armnn::IRuntimePtr m_Runtime;
30 armnn::NetworkId m_NetId{};
31 mutable InferenceResults<Tout> m_OutputBuffer;
32 armnn::InputTensors m_InputTensors;
33 armnn::OutputTensors m_OutputTensors;
34 std::vector<armnnTfLiteParser::BindingPointInfo> m_outputBindingInfo;
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +000035 Profiling m_profiling;
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010036 std::vector<std::string> m_outputLayerNamesList;
37
38 armnnTfLiteParser::BindingPointInfo m_inputBindingInfo;
39
40 void PrepareTensors(const void* inputData, const size_t dataBytes);
41
42 template <typename Enumeration>
43 auto log_as_int(Enumeration value)
44 -> typename std::underlying_type<Enumeration>::type
45 {
46 return static_cast<typename std::underlying_type<Enumeration>::type>(value);
47 }
48
49public:
50 ArmnnNetworkExecutor() = delete;
51
52 /**
53 * @brief Initializes the network with the given input data. Parsed through TfLiteParser and optimized for a
54 * given backend.
55 *
56 * Note that the output layers names order in m_outputLayerNamesList affects the order of the feature vectors
57 * in output of the Run method.
58 *
59 * * @param[in] modelPath - Relative path to the model file
60 * * @param[in] backends - The list of preferred backends to run inference on
61 */
62 ArmnnNetworkExecutor(std::string& modelPath,
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +000063 std::vector<armnn::BackendId>& backends,
64 bool isProfilingEnabled = false);
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010065
66 /**
67 * @brief Returns the aspect ratio of the associated model in the order of width, height.
68 */
69 Size GetImageAspectRatio();
70
71 armnn::DataType GetInputDataType() const;
72
73 float GetQuantizationScale();
74
75 int GetQuantizationOffset();
76
George Gekov23c26272021-08-16 11:32:10 +010077 float GetOutputQuantizationScale(int tensorIndex);
78
79 int GetOutputQuantizationOffset(int tensorIndex);
80
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010081 /**
82 * @brief Runs inference on the provided input data, and stores the results in the provided InferenceResults object.
83 *
84 * @param[in] inputData - input frame data
85 * @param[in] dataBytes - input data size in bytes
86 * @param[out] results - Vector of DetectionResult objects used to store the output result.
87 */
88 bool Run(const void* inputData, const size_t dataBytes, common::InferenceResults<Tout>& outResults);
89
90};
91
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +000092template <typename Tout>
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010093ArmnnNetworkExecutor<Tout>::ArmnnNetworkExecutor(std::string& modelPath,
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +000094 std::vector<armnn::BackendId>& preferredBackends,
95 bool isProfilingEnabled):
96 m_profiling(isProfilingEnabled),
97 m_Runtime(armnn::IRuntime::Create(armnn::IRuntime::CreationOptions()))
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010098{
99 // Import the TensorFlow lite model.
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +0000100 m_profiling.ProfilingStart();
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +0100101 armnnTfLiteParser::ITfLiteParserPtr parser = armnnTfLiteParser::ITfLiteParser::Create();
102 armnn::INetworkPtr network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
103
104 std::vector<std::string> inputNames = parser->GetSubgraphInputTensorNames(0);
105
106 m_inputBindingInfo = parser->GetNetworkInputBindingInfo(0, inputNames[0]);
107
108 m_outputLayerNamesList = parser->GetSubgraphOutputTensorNames(0);
109
110 std::vector<armnn::BindingPointInfo> outputBindings;
111 for(const std::string& name : m_outputLayerNamesList)
112 {
113 m_outputBindingInfo.push_back(std::move(parser->GetNetworkOutputBindingInfo(0, name)));
114 }
115 std::vector<std::string> errorMessages;
116 // optimize the network.
117 armnn::IOptimizedNetworkPtr optNet = Optimize(*network,
118 preferredBackends,
119 m_Runtime->GetDeviceSpec(),
120 armnn::OptimizerOptions(),
121 armnn::Optional<std::vector<std::string>&>(errorMessages));
122
123 if (!optNet)
124 {
125 const std::string errorMessage{"ArmnnNetworkExecutor: Failed to optimize network"};
126 ARMNN_LOG(error) << errorMessage;
127 throw armnn::Exception(errorMessage);
128 }
129
130 // Load the optimized network onto the m_Runtime device
131 std::string errorMessage;
132 if (armnn::Status::Success != m_Runtime->LoadNetwork(m_NetId, std::move(optNet), errorMessage))
133 {
134 ARMNN_LOG(error) << errorMessage;
135 throw armnn::Exception(errorMessage);
136 }
137
138 //pre-allocate memory for output (the size of it never changes)
139 for (int it = 0; it < m_outputLayerNamesList.size(); ++it)
140 {
141 const armnn::DataType dataType = m_outputBindingInfo[it].second.GetDataType();
142 const armnn::TensorShape& tensorShape = m_outputBindingInfo[it].second.GetShape();
143
144 std::vector<Tout> oneLayerOutResult;
145 oneLayerOutResult.resize(tensorShape.GetNumElements(), 0);
146 m_OutputBuffer.emplace_back(oneLayerOutResult);
147
148 // Make ArmNN output tensors
149 m_OutputTensors.reserve(m_OutputBuffer.size());
150 for (size_t it = 0; it < m_OutputBuffer.size(); ++it)
151 {
152 m_OutputTensors.emplace_back(std::make_pair(
153 m_outputBindingInfo[it].first,
154 armnn::Tensor(m_outputBindingInfo[it].second,
155 m_OutputBuffer.at(it).data())
156 ));
157 }
158 }
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +0000159 m_profiling.ProfilingStopAndPrintUs("ArmnnNetworkExecutor time");
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +0100160}
161
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +0000162template <typename Tout>
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +0100163armnn::DataType ArmnnNetworkExecutor<Tout>::GetInputDataType() const
164{
165 return m_inputBindingInfo.second.GetDataType();
166}
167
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +0000168template <typename Tout>
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +0100169void ArmnnNetworkExecutor<Tout>::PrepareTensors(const void* inputData, const size_t dataBytes)
170{
171 assert(m_inputBindingInfo.second.GetNumBytes() >= dataBytes);
172 m_InputTensors.clear();
173 m_InputTensors = {{ m_inputBindingInfo.first, armnn::ConstTensor(m_inputBindingInfo.second, inputData)}};
174}
175
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +0000176template <typename Tout>
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +0100177bool ArmnnNetworkExecutor<Tout>::Run(const void* inputData, const size_t dataBytes, InferenceResults<Tout>& outResults)
178{
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +0000179 m_profiling.ProfilingStart();
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +0100180 /* Prepare tensors if they are not ready */
181 ARMNN_LOG(debug) << "Preparing tensors...";
182 this->PrepareTensors(inputData, dataBytes);
183 ARMNN_LOG(trace) << "Running inference...";
184
185 armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetId, m_InputTensors, m_OutputTensors);
186
187 std::stringstream inferenceFinished;
188 inferenceFinished << "Inference finished with code {" << log_as_int(ret) << "}\n";
189
190 ARMNN_LOG(trace) << inferenceFinished.str();
191
192 if (ret == armnn::Status::Failure)
193 {
194 ARMNN_LOG(error) << "Failed to perform inference.";
195 }
196
197 outResults.reserve(m_outputLayerNamesList.size());
198 outResults = m_OutputBuffer;
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +0000199 m_profiling.ProfilingStopAndPrintUs("Total inference time");
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +0100200 return (armnn::Status::Success == ret);
201}
202
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +0000203template <typename Tout>
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +0100204float ArmnnNetworkExecutor<Tout>::GetQuantizationScale()
205{
206 return this->m_inputBindingInfo.second.GetQuantizationScale();
207}
208
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +0000209template <typename Tout>
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +0100210int ArmnnNetworkExecutor<Tout>::GetQuantizationOffset()
211{
212 return this->m_inputBindingInfo.second.GetQuantizationOffset();
213}
214
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +0000215template <typename Tout>
George Gekov23c26272021-08-16 11:32:10 +0100216float ArmnnNetworkExecutor<Tout>::GetOutputQuantizationScale(int tensorIndex)
217{
218 assert(this->m_outputLayerNamesList.size() > tensorIndex);
219 return this->m_outputBindingInfo[tensorIndex].second.GetQuantizationScale();
220}
221
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +0000222template <typename Tout>
George Gekov23c26272021-08-16 11:32:10 +0100223int ArmnnNetworkExecutor<Tout>::GetOutputQuantizationOffset(int tensorIndex)
224{
225 assert(this->m_outputLayerNamesList.size() > tensorIndex);
226 return this->m_outputBindingInfo[tensorIndex].second.GetQuantizationOffset();
227}
228
Eanna O Cathain2f0ddb62022-03-03 15:58:10 +0000229template <typename Tout>
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +0100230Size ArmnnNetworkExecutor<Tout>::GetImageAspectRatio()
231{
232 const auto shape = m_inputBindingInfo.second.GetShape();
233 assert(shape.GetNumDimensions() == 4);
234 armnnUtils::DataLayoutIndexed nhwc(armnn::DataLayout::NHWC);
235 return Size(shape[nhwc.GetWidthIndex()],
236 shape[nhwc.GetHeightIndex()]);
237}
238}// namespace common