blob: 2e0aff981ab83358bd229b4a786b9d3535263adc [file] [log] [blame]
telsoa014fcda012018-03-09 14:13:49 +00001//
2// Copyright © 2017 Arm Ltd. All rights reserved.
David Beckecb56cd2018-09-05 12:52:57 +01003// SPDX-License-Identifier: MIT
telsoa014fcda012018-03-09 14:13:49 +00004//
5#pragma once
telsoa014fcda012018-03-09 14:13:49 +00006#include "armnn/ArmNN.hpp"
telsoa01c577f2c2018-08-31 09:22:23 +01007
8#if defined(ARMNN_TF_LITE_PARSER)
9#include "armnnTfLiteParser/ITfLiteParser.hpp"
10#endif
11
12#include <HeapProfiling.hpp>
13#if defined(ARMNN_ONNX_PARSER)
14#include "armnnOnnxParser/IOnnxParser.hpp"
15#endif
telsoa014fcda012018-03-09 14:13:49 +000016
surmeh013537c2c2018-05-18 16:31:43 +010017#include <boost/exception/exception.hpp>
18#include <boost/exception/diagnostic_information.hpp>
telsoa014fcda012018-03-09 14:13:49 +000019#include <boost/log/trivial.hpp>
20#include <boost/format.hpp>
21#include <boost/program_options.hpp>
surmeh013537c2c2018-05-18 16:31:43 +010022#include <boost/filesystem.hpp>
telsoa014fcda012018-03-09 14:13:49 +000023
24#include <map>
25#include <string>
surmeh013537c2c2018-05-18 16:31:43 +010026#include <fstream>
telsoa01c577f2c2018-08-31 09:22:23 +010027#include <type_traits>
28
29namespace InferenceModelInternal
30{
31// This needs to go when the armnnCaffeParser, armnnTfParser and armnnTfLiteParser
32// definitions of BindingPointInfo gets consolidated.
33using BindingPointInfo = std::pair<armnn::LayerBindingId, armnn::TensorInfo>;
34
35using QuantizationParams = std::pair<float,int32_t>;
36
37struct Params
38{
39 std::string m_ModelPath;
40 std::string m_InputBinding;
41 std::string m_OutputBinding;
42 const armnn::TensorShape* m_InputTensorShape;
43 std::vector<armnn::Compute> m_ComputeDevice;
44 bool m_EnableProfiling;
45 size_t m_SubgraphId;
46 bool m_IsModelBinary;
47 bool m_VisualizePostOptimizationModel;
48 bool m_EnableFp16TurboMode;
49
50 Params()
51 : m_InputTensorShape(nullptr)
52 , m_ComputeDevice{armnn::Compute::CpuRef}
53 , m_EnableProfiling(false)
54 , m_SubgraphId(0)
55 , m_IsModelBinary(true)
56 , m_VisualizePostOptimizationModel(false)
57 , m_EnableFp16TurboMode(false)
58 {}
59};
60
61} // namespace InferenceModelInternal
62
63template <typename IParser>
64struct CreateNetworkImpl
65{
66public:
67 using Params = InferenceModelInternal::Params;
68 using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
69
70 static armnn::INetworkPtr Create(const Params& params,
71 BindingPointInfo& inputBindings,
72 BindingPointInfo& outputBindings)
73 {
74 const std::string& modelPath = params.m_ModelPath;
75
76 // Create a network from a file on disk
77 auto parser(IParser::Create());
78
79 std::map<std::string, armnn::TensorShape> inputShapes;
80 if (params.m_InputTensorShape)
81 {
82 inputShapes[params.m_InputBinding] = *params.m_InputTensorShape;
83 }
84 std::vector<std::string> requestedOutputs{ params.m_OutputBinding };
85 armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
86
87 {
88 ARMNN_SCOPED_HEAP_PROFILING("Parsing");
89 // Handle text and binary input differently by calling the corresponding parser function
90 network = (params.m_IsModelBinary ?
91 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
92 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
93 }
94
95 inputBindings = parser->GetNetworkInputBindingInfo(params.m_InputBinding);
96 outputBindings = parser->GetNetworkOutputBindingInfo(params.m_OutputBinding);
97 return network;
98 }
99};
100
101#if defined(ARMNN_TF_LITE_PARSER)
102template <>
103struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser>
104{
105public:
106 using IParser = armnnTfLiteParser::ITfLiteParser;
107 using Params = InferenceModelInternal::Params;
108 using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
109
110 static armnn::INetworkPtr Create(const Params& params,
111 BindingPointInfo& inputBindings,
112 BindingPointInfo& outputBindings)
113 {
114 const std::string& modelPath = params.m_ModelPath;
115
116 // Create a network from a file on disk
117 auto parser(IParser::Create());
118
119 armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
120
121 {
122 ARMNN_SCOPED_HEAP_PROFILING("Parsing");
123 network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
124 }
125
126 inputBindings = parser->GetNetworkInputBindingInfo(params.m_SubgraphId, params.m_InputBinding);
127 outputBindings = parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, params.m_OutputBinding);
128 return network;
129 }
130};
131#endif
132
133#if defined(ARMNN_ONNX_PARSER)
134template <>
135struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser>
136{
137public:
138 using IParser = armnnOnnxParser::IOnnxParser;
139 using Params = InferenceModelInternal::Params;
140 using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
141
142 static armnn::INetworkPtr Create(const Params& params,
143 BindingPointInfo& inputBindings,
144 BindingPointInfo& outputBindings)
145 {
146 const std::string& modelPath = params.m_ModelPath;
147
148 // Create a network from a file on disk
149 auto parser(IParser::Create());
150
151 armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
152
153 {
154 ARMNN_SCOPED_HEAP_PROFILING("Parsing");
155 network = (params.m_IsModelBinary ?
156 parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
157 parser->CreateNetworkFromTextFile(modelPath.c_str()));
158 }
159
160 inputBindings = parser->GetNetworkInputBindingInfo(params.m_InputBinding);
161 outputBindings = parser->GetNetworkOutputBindingInfo(params.m_OutputBinding);
162 return network;
163 }
164};
165#endif
telsoa014fcda012018-03-09 14:13:49 +0000166
167template<typename TContainer>
telsoa01c577f2c2018-08-31 09:22:23 +0100168inline armnn::InputTensors MakeInputTensors(const InferenceModelInternal::BindingPointInfo& input,
telsoa014fcda012018-03-09 14:13:49 +0000169 const TContainer& inputTensorData)
170{
171 if (inputTensorData.size() != input.second.GetNumElements())
172 {
surmeh013537c2c2018-05-18 16:31:43 +0100173 try
174 {
175 throw armnn::Exception(boost::str(boost::format("Input tensor has incorrect size. Expected %1% elements "
176 "but got %2%.") % input.second.GetNumElements() % inputTensorData.size()));
177 } catch (const boost::exception& e)
178 {
179 // Coverity fix: it should not be possible to get here but boost::str and boost::format can both
telsoa01c577f2c2018-08-31 09:22:23 +0100180 // throw uncaught exceptions, convert them to armnn exceptions and rethrow.
surmeh013537c2c2018-05-18 16:31:43 +0100181 throw armnn::Exception(diagnostic_information(e));
182 }
telsoa014fcda012018-03-09 14:13:49 +0000183 }
184 return { { input.first, armnn::ConstTensor(input.second, inputTensorData.data()) } };
185}
186
187template<typename TContainer>
telsoa01c577f2c2018-08-31 09:22:23 +0100188inline armnn::OutputTensors MakeOutputTensors(const InferenceModelInternal::BindingPointInfo& output,
telsoa014fcda012018-03-09 14:13:49 +0000189 TContainer& outputTensorData)
190{
191 if (outputTensorData.size() != output.second.GetNumElements())
192 {
193 throw armnn::Exception("Output tensor has incorrect size");
194 }
195 return { { output.first, armnn::Tensor(output.second, outputTensorData.data()) } };
196}
197
telsoa01c577f2c2018-08-31 09:22:23 +0100198
199
telsoa014fcda012018-03-09 14:13:49 +0000200template <typename IParser, typename TDataType>
201class InferenceModel
202{
203public:
204 using DataType = TDataType;
telsoa01c577f2c2018-08-31 09:22:23 +0100205 using Params = InferenceModelInternal::Params;
telsoa014fcda012018-03-09 14:13:49 +0000206
207 struct CommandLineOptions
208 {
209 std::string m_ModelDir;
telsoa01c577f2c2018-08-31 09:22:23 +0100210 std::vector<armnn::Compute> m_ComputeDevice;
surmeh013537c2c2018-05-18 16:31:43 +0100211 bool m_VisualizePostOptimizationModel;
telsoa01c577f2c2018-08-31 09:22:23 +0100212 bool m_EnableFp16TurboMode;
telsoa014fcda012018-03-09 14:13:49 +0000213 };
214
215 static void AddCommandLineOptions(boost::program_options::options_description& desc, CommandLineOptions& options)
216 {
217 namespace po = boost::program_options;
218
219 desc.add_options()
220 ("model-dir,m", po::value<std::string>(&options.m_ModelDir)->required(),
telsoa01c577f2c2018-08-31 09:22:23 +0100221 "Path to directory containing model files (.caffemodel/.prototxt/.tflite)")
222 ("compute,c", po::value<std::vector<armnn::Compute>>(&options.m_ComputeDevice)->default_value
223 ({armnn::Compute::CpuAcc, armnn::Compute::CpuRef}),
surmeh013537c2c2018-05-18 16:31:43 +0100224 "Which device to run layers on by default. Possible choices: CpuAcc, CpuRef, GpuAcc")
225 ("visualize-optimized-model,v",
226 po::value<bool>(&options.m_VisualizePostOptimizationModel)->default_value(false),
227 "Produce a dot file useful for visualizing the graph post optimization."
telsoa01c577f2c2018-08-31 09:22:23 +0100228 "The file will have the same name as the model with the .dot extention.")
229 ("fp16-turbo-mode", po::value<bool>(&options.m_EnableFp16TurboMode)->default_value(false),
230 "If this option is enabled FP32 layers, weights and biases will be converted "
231 "to FP16 where the backend supports it.");
telsoa014fcda012018-03-09 14:13:49 +0000232 }
233
telsoa01c577f2c2018-08-31 09:22:23 +0100234 InferenceModel(const Params& params, const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
235 : m_EnableProfiling(params.m_EnableProfiling)
telsoa014fcda012018-03-09 14:13:49 +0000236 {
telsoa01c577f2c2018-08-31 09:22:23 +0100237 if (runtime)
telsoa014fcda012018-03-09 14:13:49 +0000238 {
telsoa01c577f2c2018-08-31 09:22:23 +0100239 m_Runtime = runtime;
telsoa014fcda012018-03-09 14:13:49 +0000240 }
telsoa01c577f2c2018-08-31 09:22:23 +0100241 else
telsoa014fcda012018-03-09 14:13:49 +0000242 {
telsoa01c577f2c2018-08-31 09:22:23 +0100243 armnn::IRuntime::CreationOptions options;
Nina Drozd549ae372018-09-10 14:26:44 +0100244 options.m_EnableGpuProfiling = m_EnableProfiling;
telsoa01c577f2c2018-08-31 09:22:23 +0100245 m_Runtime = std::move(armnn::IRuntime::Create(options));
surmeh013537c2c2018-05-18 16:31:43 +0100246 }
telsoa014fcda012018-03-09 14:13:49 +0000247
telsoa01c577f2c2018-08-31 09:22:23 +0100248 armnn::INetworkPtr network = CreateNetworkImpl<IParser>::Create(params, m_InputBindingInfo,
249 m_OutputBindingInfo);
telsoa014fcda012018-03-09 14:13:49 +0000250
surmeh013537c2c2018-05-18 16:31:43 +0100251 armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork *){}};
252 {
253 ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
telsoa01c577f2c2018-08-31 09:22:23 +0100254
255 armnn::OptimizerOptions options;
256 options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
257
258 optNet = armnn::Optimize(*network, params.m_ComputeDevice, m_Runtime->GetDeviceSpec(), options);
259 if (!optNet)
260 {
261 throw armnn::Exception("Optimize returned nullptr");
262 }
surmeh013537c2c2018-05-18 16:31:43 +0100263 }
telsoa014fcda012018-03-09 14:13:49 +0000264
surmeh013537c2c2018-05-18 16:31:43 +0100265 if (params.m_VisualizePostOptimizationModel)
266 {
267 boost::filesystem::path filename = params.m_ModelPath;
268 filename.replace_extension("dot");
269 std::fstream file(filename.c_str(),file.out);
270 optNet->SerializeToDot(file);
271 }
272
273 armnn::Status ret;
274 {
275 ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
276 ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet));
277 }
278
telsoa014fcda012018-03-09 14:13:49 +0000279 if (ret == armnn::Status::Failure)
280 {
281 throw armnn::Exception("IRuntime::LoadNetwork failed");
282 }
283 }
284
285 unsigned int GetOutputSize() const
286 {
287 return m_OutputBindingInfo.second.GetNumElements();
288 }
289
290 void Run(const std::vector<TDataType>& input, std::vector<TDataType>& output)
291 {
292 BOOST_ASSERT(output.size() == GetOutputSize());
telsoa01c577f2c2018-08-31 09:22:23 +0100293
294 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
295 if (profiler)
296 {
297 profiler->EnableProfiling(m_EnableProfiling);
298 }
299
telsoa014fcda012018-03-09 14:13:49 +0000300 armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
telsoa01c577f2c2018-08-31 09:22:23 +0100301 MakeInputTensors(input),
302 MakeOutputTensors(output));
Sadik Armagan2b7a1582018-09-05 16:33:58 +0100303
304 // if profiling is enabled print out the results
305 if (profiler && profiler->IsProfilingEnabled())
306 {
307 profiler->Print(std::cout);
308 }
309
telsoa014fcda012018-03-09 14:13:49 +0000310 if (ret == armnn::Status::Failure)
311 {
312 throw armnn::Exception("IRuntime::EnqueueWorkload failed");
313 }
314 }
315
telsoa01c577f2c2018-08-31 09:22:23 +0100316 const InferenceModelInternal::BindingPointInfo & GetInputBindingInfo() const
317 {
318 return m_InputBindingInfo;
319 }
320
321 const InferenceModelInternal::BindingPointInfo & GetOutputBindingInfo() const
322 {
323 return m_OutputBindingInfo;
324 }
325
326 InferenceModelInternal::QuantizationParams GetQuantizationParams() const
327 {
328 return std::make_pair(m_OutputBindingInfo.second.GetQuantizationScale(),
329 m_OutputBindingInfo.second.GetQuantizationOffset());
330 }
331
telsoa014fcda012018-03-09 14:13:49 +0000332private:
telsoa01c577f2c2018-08-31 09:22:23 +0100333 armnn::NetworkId m_NetworkIdentifier;
334 std::shared_ptr<armnn::IRuntime> m_Runtime;
335
336 InferenceModelInternal::BindingPointInfo m_InputBindingInfo;
337 InferenceModelInternal::BindingPointInfo m_OutputBindingInfo;
338 bool m_EnableProfiling;
339
telsoa014fcda012018-03-09 14:13:49 +0000340 template<typename TContainer>
341 armnn::InputTensors MakeInputTensors(const TContainer& inputTensorData)
342 {
343 return ::MakeInputTensors(m_InputBindingInfo, inputTensorData);
344 }
345
346 template<typename TContainer>
347 armnn::OutputTensors MakeOutputTensors(TContainer& outputTensorData)
348 {
349 return ::MakeOutputTensors(m_OutputBindingInfo, outputTensorData);
350 }
telsoa014fcda012018-03-09 14:13:49 +0000351};