blob: 330a2397633d4414dd7bdd7d72635dc0f991cab2 [file] [log] [blame]
Teresa Charlin83b42912022-07-07 14:24:59 +01001//
2// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6
7#include "ArmNNExecutor.hpp"
8#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
9
10#include <armnn/IAsyncExecutionCallback.hpp>
11#include <AsyncExecutionCallback.hpp>
12
13
14using namespace armnn;
15using namespace std::chrono;
16
17ArmNNExecutor::ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions)
18: m_Params(params)
19{
20 runtimeOptions.m_EnableGpuProfiling = params.m_EnableProfiling;
21 runtimeOptions.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
22 m_Runtime = armnn::IRuntime::Create(runtimeOptions);
23
24 auto parser = CreateParser();
25 auto network = parser->CreateNetwork(m_Params);
26 auto optNet = OptimizeNetwork(network.get());
27
28 m_IOInfo = GetIOInfo(optNet.get());
29 SetupInputsAndOutputs();
30
Teresa Charlin83b42912022-07-07 14:24:59 +010031 armnn::ProfilingDetailsMethod profilingDetailsMethod = ProfilingDetailsMethod::Undefined;
32 if (params.m_OutputDetailsOnlyToStdOut)
33 {
34 profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
35 }
36 else if (params.m_OutputDetailsToStdOut)
37 {
38 profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
39 }
40
41 INetworkProperties networkProperties{m_Params.m_Concurrent,
42 MemorySource::Undefined,
43 MemorySource::Undefined,
44 params.m_EnableProfiling,
45 profilingDetailsMethod};
46
Colm Donelan78044812022-09-27 16:46:09 +010047 std::string errorMsg;
48 Status status = m_Runtime->LoadNetwork(m_NetworkId, std::move(optNet), errorMsg, networkProperties);
49 if (status != Status::Success)
50 {
51 std::string message("Failed to create Arm NN Executor: ");
52 message.append(errorMsg);
53 // Throwing an exception at this point in the constructor causes lots of problems. We'll instead mark this
54 // executor as not constructed.
55 ARMNN_LOG(fatal) << message;
56 m_constructionFailed = true;
57 return;
58 }
Teresa Charlin83b42912022-07-07 14:24:59 +010059
60 if (m_Params.m_Iterations > 1)
61 {
62 std::stringstream msg;
63 msg << "Network will be executed " << m_Params.m_Iterations;
64 if (m_Params.m_Concurrent)
65 {
66 msg << " times in an asynchronous manner. ";
67 }
68 else
69 {
70 msg << " times successively. ";
71 }
72 msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
73 "cover each execution.";
74 ARMNN_LOG(info) << msg.str();
75 }
76
77 if (m_Params.m_GenerateTensorData)
78 {
79 ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
80 }
81
82 if (m_Params.m_DontPrintOutputs)
83 {
84 ARMNN_LOG(info) << "Printing outputs to console is disabled.";
85 }
86}
87
88void ArmNNExecutor::ExecuteAsync()
89{
90 std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
91 std::unique_ptr<armnn::Threadpool> threadpool;
92 armnn::AsyncCallbackManager callbackManager;
93 std::unordered_map<armnn::InferenceId, const armnn::OutputTensors*> inferenceOutputMap;
94
95 for (size_t i = 0; i < m_Params.m_ThreadPoolSize; ++i)
96 {
97 memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkId));
98 }
99
100 threadpool = std::make_unique<armnn::Threadpool>(m_Params.m_ThreadPoolSize,
101 m_Runtime.get(),
102 memHandles);
103
104 ARMNN_LOG(info) << "Asynchronous Execution with Arm NN thread pool... \n";
105 // Declare the latest and earliest inference times here to be used when calculating overall time
106 std::chrono::high_resolution_clock::time_point earliestStartTime =
107 std::chrono::high_resolution_clock::time_point::max();
108 std::chrono::high_resolution_clock::time_point latestEndTime =
109 std::chrono::high_resolution_clock::now();
110
111 // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
112 // LoadedNetwork with each scheduled inference having a specific priority
113 for (size_t i = 0; i < m_Params.m_Iterations; ++i)
114 {
115 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
116
117 std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
118 inferenceOutputMap.insert({cb->GetInferenceId(), &m_OutputTensorsVec[i]});
119 threadpool->Schedule(m_NetworkId,
120 m_InputTensorsVec[i],
121 m_OutputTensorsVec[i],
122 armnn::QosExecPriority::Medium,
123 cb);
124 }
125
126 // Check the results
127 for (size_t iteration = 0; iteration < m_Params.m_Iterations; ++iteration)
128 {
129 auto cb = callbackManager.GetNotifiedCallback();
130
131 // Get the results
132 if (earliestStartTime > cb->GetStartTime())
133 {
134 earliestStartTime = cb->GetStartTime();
135 }
136 if (latestEndTime < cb->GetEndTime())
137 {
138 latestEndTime = cb->GetEndTime();
139 }
140
141 auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
142 auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
143 auto inferenceDuration = endTime - startTime;
144 CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
145 if(!m_Params.m_DontPrintOutputs)
146 {
147 const armnn::OutputTensors* out = inferenceOutputMap[cb->GetInferenceId()];
148 PrintOutputTensors(out, iteration);
149 }
150 }
151
152 // Print duration difference between overallStartTime and overallEndTime
153 auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
154 auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
155 auto totalInferenceDuration = overallEndTime - overallStartTime;
156 ARMNN_LOG(info) << "Overall Inference time: " << std::setprecision(2)
157 << std::fixed << totalInferenceDuration.count() << " ms\n";
158
159}
160
161void ArmNNExecutor::ExecuteSync()
162{
163 for (size_t x = 0; x < m_Params.m_Iterations; x++)
164 {
165 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
166
167 const auto start_time = armnn::GetTimeNow();
168 armnn::Status ret;
169 if (m_Params.m_ImportInputsIfAligned)
170 {
171 ret = m_Runtime->EnqueueWorkload(m_NetworkId,
172 m_InputTensorsVec[x],
173 m_OutputTensorsVec[x],
174 m_ImportedInputIds[x],
175 m_ImportedOutputIds[x]);
176 }
177 else
178 {
179 ret = m_Runtime->EnqueueWorkload(m_NetworkId,
180 m_InputTensorsVec[x],
181 m_OutputTensorsVec[x]);
182 }
183
184 const auto inferenceDuration = armnn::GetTimeDuration(start_time);
185
186 // If profiling is enabled print out the results
Kevin May251fd952022-10-05 14:42:55 +0100187 if(profiler && profiler->IsProfilingEnabled() && x == (m_Params.m_Iterations - 1))
Teresa Charlin83b42912022-07-07 14:24:59 +0100188 {
189 profiler->Print(std::cout);
190 }
191
192 if(ret == armnn::Status::Failure)
193 {
194 throw armnn::Exception("IRuntime::EnqueueWorkload failed");
195 }
196
197 if(!m_Params.m_DontPrintOutputs)
198 {
199 PrintOutputTensors(&m_OutputTensorsVec[x], x);
200 }
201
202 // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
203 CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
204 }
205}
206
207std::vector<const void*> ArmNNExecutor::Execute()
208{
209 if(m_Params.m_ThreadPoolSize == 0)
210 {
211 ExecuteSync();
212 }
213 else
214 {
215 ExecuteAsync();
216 }
217 std::vector<const void*> results;
218 for (auto& output : m_OutputStorage)
219 {
220 results.push_back(output.m_Mem);
221 }
222
223 return results;
224}
225
226void ArmNNExecutor::PrintNetworkInfo()
227{
228 const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
229 m_Params.m_InputNames :
230 m_IOInfo.m_InputNames;
231 std::stringstream ss;
232 ss << "===== Network Info =====\n";
233 ss << "Inputs in order:\n";
234 for (const auto& inputName : inputNames)
235 {
236 const auto inputInfo = m_IOInfo.m_InputInfoMap[inputName].second;
237 ss << inputName << ", " << inputInfo.GetShape() << ", " << GetDataTypeName(inputInfo.GetDataType());
238 if (inputInfo.IsQuantized())
239 {
240 ss << " Quantization Offset: " << inputInfo.GetQuantizationOffset();
241 if (inputInfo.HasMultipleQuantizationScales())
242 {
243 ss << " Quantization scales: ";
244 for (const auto scale: inputInfo.GetQuantizationScales())
245 {
246 ss << scale << ", ";
247 }
248 }
249 else
250 {
251 ss << " Quantization scale: " << inputInfo.GetQuantizationScale();
252 }
253 }
254 ss << "\n";
255 }
256
257 ss << "Outputs in order:\n";
258 for (const auto& outputName : m_IOInfo.m_OutputNames)
259 {
260 const auto outputInfo = m_IOInfo.m_OutputInfoMap[outputName].second;
261 ss << outputName << ", " << outputInfo.GetShape() << ", " << GetDataTypeName(outputInfo.GetDataType());
262 if (outputInfo.IsQuantized())
263 {
264 ss << " Quantization Offset: " << outputInfo.GetQuantizationOffset();
265 if (outputInfo.HasMultipleQuantizationScales())
266 {
267 ss << " Quantization scales: ";
268 for (const auto scale: outputInfo.GetQuantizationScales())
269 {
270 ss << scale << ", ";
271 }
272 }
273 else
274 {
275 ss << " Quantization scale: " << outputInfo.GetQuantizationScale();
276 }
277 }
278 ss << "\n";
279 }
280
281 std::cout << ss.str() << std::endl;
282}
283
284void ArmNNExecutor::SetupInputsAndOutputs()
285{
286 const unsigned int noOfInputs = m_IOInfo.m_InputNames.size();
287
288 if (m_Params.m_InputNames.size() != 0 && m_Params.m_InputNames.size() != noOfInputs)
289 {
290 LogAndThrow("Number of input names does not match number of inputs");
291 }
292
293 const unsigned int inputFilePaths = m_Params.m_InputTensorDataFilePaths.size();
294 const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
295 m_Params.m_InputNames :
296 m_IOInfo.m_InputNames;
297 unsigned int noInputSets = 1;
298
299 if (inputFilePaths != 0)
300 {
301 if (inputFilePaths % noOfInputs != 0)
302 {
303 LogAndThrow("Number of input files: " + std::to_string(inputFilePaths) +
304 " not compatible with number of inputs: " + std::to_string(noOfInputs));
305 }
306 noInputSets = inputFilePaths / noOfInputs;
307 if (noInputSets != 1 && m_Params.m_ReuseBuffers)
308 {
309 LogAndThrow("Specifying multiple sets of inputs not compatible with ReuseBuffers");
310 }
311 }
312
313 const unsigned int noOfOutputs = m_IOInfo.m_OutputNames.size();
314 const unsigned int outputFilePaths = m_Params.m_OutputTensorFiles.size();
315 unsigned int noOutputSets = 1;
316
317 if (outputFilePaths != 0)
318 {
319 if (outputFilePaths % noOfOutputs != 0)
320 {
321 LogAndThrow("Number of output files: " + std::to_string(outputFilePaths) +
322 ", not compatible with number of outputs: " + std::to_string(noOfOutputs));
323 }
324 noOutputSets = outputFilePaths / noOfOutputs;
325
326 if (noOutputSets != 1 && m_Params.m_ReuseBuffers)
327 {
328 LogAndThrow("Specifying multiple sets of outputs not compatible with ReuseBuffers");
329 }
330 }
331
332 if (m_Params.m_ThreadPoolSize != 0)
333 {
334 // The current implementation of the Threadpool does not allow binding of outputs to a thread
335 // So to ensure no two threads write to the same output at the same time, no output can be reused
336 noOutputSets = m_Params.m_Iterations;
337 }
338
339 if (m_Params.m_InputTensorDataFilePaths.size() > noOfInputs)
340 {
341 ARMNN_LOG(info) << "Given network has " << noOfInputs << " input/s. One input-tensor-data file is required "
342 << "for each input. The user provided "
343 << m_Params.m_InputTensorDataFilePaths.size()
344 << " input-tensor-data file/s which will be used to fill the input/s.\n";
345 }
346
347 unsigned int inputCount = 0;
348 for(unsigned int inputSet = 0; inputSet < noInputSets; ++inputSet)
349 {
350 armnn::InputTensors inputTensors;
351 for (const auto& inputName: inputNames)
352 {
353 armnn::BindingPointInfo bindingPointInfo;
354 try
355 {
356 bindingPointInfo = m_IOInfo.m_InputInfoMap.at(inputName);
357 }
358 catch (const std::out_of_range& e)
359 {
360 LogAndThrow("Input with inputName: " + inputName + " not found.");
361 }
362
363 const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
364 auto newInfo = armnn::TensorInfo{tensorInfo.GetShape(), tensorInfo.GetDataType(),
365 tensorInfo.GetQuantizationScale(),
366 tensorInfo.GetQuantizationOffset(),
367 true};
368
369 m_InputStorage.emplace_back(IOStorage{tensorInfo.GetNumBytes()});
370
371 const int bindingId = bindingPointInfo.first;
372 inputTensors.emplace_back(bindingId, armnn::ConstTensor{newInfo, m_InputStorage.back().m_Mem});
373
374 const armnn::Optional<std::string> dataFile = m_Params.m_GenerateTensorData ?
375 armnn::EmptyOptional() :
376 armnn::MakeOptional<std::string>(
377 m_Params.m_InputTensorDataFilePaths.at(inputCount++));
378
379 switch (tensorInfo.GetDataType())
380 {
381 case armnn::DataType::Float32:
382 {
383 auto typedTensor = reinterpret_cast<float*>(m_InputStorage.back().m_Mem);
384 PopulateTensorWithData<float>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
385 break;
386 }
387 case armnn::DataType::QSymmS16:
388 {
389 auto typedTensor = reinterpret_cast<int16_t*>(m_InputStorage.back().m_Mem);
390 PopulateTensorWithData<int16_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
391 break;
392 }
393 case armnn::DataType::QSymmS8:
394 case armnn::DataType::QAsymmS8:
395 {
396 auto typedTensor = reinterpret_cast<int8_t*>(m_InputStorage.back().m_Mem);
397 PopulateTensorWithData<int8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
398 break;
399 }
400 case armnn::DataType::QAsymmU8:
401 {
402 auto typedTensor = reinterpret_cast<uint8_t*>(m_InputStorage.back().m_Mem);
403 PopulateTensorWithData<uint8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
404 break;
405 }
406 case armnn::DataType::Signed32:
407 {
408 auto typedTensor = reinterpret_cast<int32_t*>(m_InputStorage.back().m_Mem);
409 PopulateTensorWithData<int32_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
410 break;
411 }
412 default:
413 {
414 LogAndThrow("Unexpected DataType");
415 }
416 }
417
418 if (m_Params.m_ImportInputsIfAligned)
419 {
420 m_ImportedInputIds.push_back(
421 m_Runtime->ImportInputs(m_NetworkId, m_InputTensorsVec.back(), armnn::MemorySource::Malloc));
422 }
423 }
424 m_InputTensorsVec.emplace_back(inputTensors);
425 }
426
427 for(unsigned int outputSet = 0; outputSet < noOutputSets; ++outputSet)
428 {
429 armnn::OutputTensors outputTensors;
430 for (const auto& output: m_IOInfo.m_OutputInfoMap)
431 {
432 const armnn::BindingPointInfo& bindingPointInfo = output.second;
433 const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
434
435 m_OutputStorage.emplace_back(tensorInfo.GetNumBytes());
436 outputTensors.emplace_back(bindingPointInfo.first, armnn::Tensor{tensorInfo, m_OutputStorage.back().m_Mem});
437 }
438 m_OutputTensorsVec.emplace_back(outputTensors);
439 if (m_Params.m_ImportInputsIfAligned)
440 {
441 m_ImportedOutputIds.push_back(
442 m_Runtime->ImportOutputs(m_NetworkId, m_OutputTensorsVec.back(), armnn::MemorySource::Malloc));
443 }
444 }
445
446 // Fill the remaining iterations with copies
447 const unsigned int remainingInputSets = m_Params.m_Iterations - noInputSets;
448 for (unsigned int i = 1; i <= remainingInputSets; i++)
449 {
450 m_InputTensorsVec.push_back(m_InputTensorsVec[noInputSets % i]);
451 if (m_Params.m_ImportInputsIfAligned)
452 {
453 m_ImportedInputIds.push_back(m_ImportedInputIds[noInputSets % i]);
454 }
455 }
456
457 const unsigned int remainingOutputSets = m_Params.m_Iterations - noOutputSets;
458 for (unsigned int i = 1; i <= remainingOutputSets; i++)
459 {
460 m_OutputTensorsVec.push_back(m_OutputTensorsVec[noOutputSets % i]);
461 if (m_Params.m_ImportInputsIfAligned)
462 {
463 m_ImportedOutputIds.push_back(m_ImportedOutputIds[noOutputSets % i]);
464 }
465 }
466}
467
468ArmNNExecutor::IOInfo ArmNNExecutor::GetIOInfo(armnn::IOptimizedNetwork* optNet)
469{
470 struct IOStrategy : armnn::IStrategy
471 {
472 void ExecuteStrategy(const armnn::IConnectableLayer* layer,
473 const armnn::BaseDescriptor& descriptor,
474 const std::vector<armnn::ConstTensor>& constants,
475 const char* name,
476 const armnn::LayerBindingId id = 0) override
477 {
478 armnn::IgnoreUnused(descriptor, constants, id);
479 switch (layer->GetType())
480 {
481 case armnn::LayerType::Input:
482 {
483 m_IOInfo.m_InputNames.emplace_back(name);
484 m_IOInfo.m_InputInfoMap[name] = {id, layer->GetOutputSlot(0).GetTensorInfo()};
485 break;
486 }
487 case armnn::LayerType::Output:
488 {
489 m_IOInfo.m_OutputNames.emplace_back(name);
490 m_IOInfo.m_OutputInfoMap[name] = {id, layer->GetInputSlot(0).GetConnection()->GetTensorInfo()};
491 break;
492 }
493 default: {}
494 }
495 }
496 IOInfo m_IOInfo;
497 };
498
499 IOStrategy ioStrategy;
500 optNet->ExecuteStrategy(ioStrategy);
501
502 return ioStrategy.m_IOInfo;
503}
504
505armnn::IOptimizedNetworkPtr ArmNNExecutor::OptimizeNetwork(armnn::INetwork* network)
506{
507 armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
508
509 armnn::OptimizerOptions options;
510 options.m_ReduceFp32ToFp16 = m_Params.m_EnableFp16TurboMode;
511 options.m_ReduceFp32ToBf16 = m_Params.m_EnableBf16TurboMode;
512 options.m_Debug = m_Params.m_PrintIntermediate;
Keith Davis15f9c682022-10-14 15:50:33 +0100513 options.m_DebugToFile = m_Params.m_PrintIntermediateOutputsToFile;
Teresa Charlin83b42912022-07-07 14:24:59 +0100514 options.m_shapeInferenceMethod = m_Params.m_InferOutputShape ?
515 armnn::ShapeInferenceMethod::InferAndValidate :
516 armnn::ShapeInferenceMethod::ValidateOnly;
517 options.m_ProfilingEnabled = m_Params.m_EnableProfiling;
Jim Flynnfcc72f52022-10-14 11:20:07 +0100518 options.m_AllowExpandedDims = m_Params.m_AllowExpandedDims;
Teresa Charlin83b42912022-07-07 14:24:59 +0100519
520 armnn::BackendOptions gpuAcc("GpuAcc",
521 {
522 { "FastMathEnabled", m_Params.m_EnableFastMath },
523 { "SaveCachedNetwork", m_Params.m_SaveCachedNetwork },
524 { "CachedNetworkFilePath", m_Params.m_CachedNetworkFilePath },
525 { "MLGOTuningFilePath", m_Params.m_MLGOTuningFilePath }
526 });
527
528 armnn::BackendOptions cpuAcc("CpuAcc",
529 {
530 { "FastMathEnabled", m_Params.m_EnableFastMath },
531 { "NumberOfThreads", m_Params.m_NumberOfThreads }
532 });
533 options.m_ModelOptions.push_back(gpuAcc);
534 options.m_ModelOptions.push_back(cpuAcc);
Jim Flynnfcc72f52022-10-14 11:20:07 +0100535 // The shapeInferenceMethod and allowExpandedDims values have to be added to the model options
536 // because these are what are passed to the OptimizeSubgraphViews method and are used to create
537 // the new optimized INetwork that method uses
538 armnn::BackendOptions allowExDimOpt("AllowExpandedDims",
539 {
540 { "AllowExpandedDims", m_Params.m_AllowExpandedDims }
541 });
542 options.m_ModelOptions.push_back(allowExDimOpt);
543 armnn::BackendOptions shapeInferOpt("ShapeInferenceMethod",
544 {
545 { "InferAndValidate", m_Params.m_InferOutputShape }
546 });
547 options.m_ModelOptions.push_back(shapeInferOpt);
Teresa Charlin83b42912022-07-07 14:24:59 +0100548
549 const auto optimization_start_time = armnn::GetTimeNow();
550 optNet = armnn::Optimize(*network, m_Params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
551
552 ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
553 << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
554
555 if (!optNet)
556 {
557 LogAndThrow("Optimize returned nullptr");
558 }
559
Teresa Charlin98d3fd82022-08-02 14:17:39 +0100560 // If v,visualize-optimized-model is enabled then construct a file name for the dot file.
561 if (m_Params.m_EnableLayerDetails)
562 {
563 fs::path filename = m_Params.m_ModelPath;
564 filename.replace_extension("dot");
565 std::fstream file(filename.c_str(), std::ios_base::out);
566 optNet->SerializeToDot(file);
567 }
568
Teresa Charlin83b42912022-07-07 14:24:59 +0100569 return optNet;
570}
571
572std::unique_ptr<ArmNNExecutor::IParser> ArmNNExecutor::CreateParser()
573{
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200574 const fs::path modelFilename = m_Params.m_ModelPath;
575 const std::string modelExtension = modelFilename.extension();
Teresa Charlin83b42912022-07-07 14:24:59 +0100576
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200577 m_Params.m_IsModelBinary = modelExtension != ".json";
Teresa Charlin83b42912022-07-07 14:24:59 +0100578 std::unique_ptr<IParser> parser = nullptr;
579 // Forward to implementation based on the parser type
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200580 if (modelExtension == ".armnn")
Teresa Charlin83b42912022-07-07 14:24:59 +0100581 {
582#if defined(ARMNN_SERIALIZER)
583 parser = std::make_unique<ArmNNDeserializer>();
584#else
585 LogAndThrow("Not built with serialization support.");
586#endif
587 }
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200588 else if (modelExtension == ".tflite")
Teresa Charlin83b42912022-07-07 14:24:59 +0100589 {
590#if defined(ARMNN_TF_LITE_PARSER)
591 parser = std::make_unique<TfliteParser>(m_Params);
592#else
593 LogAndThrow("Not built with Tensorflow-Lite parser support.");
594#endif
595 }
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200596 else if (modelExtension == ".onnx")
Teresa Charlin83b42912022-07-07 14:24:59 +0100597 {
598#if defined(ARMNN_ONNX_PARSER)
599 parser = std::make_unique<OnnxParser>();
600#else
601 LogAndThrow("Not built with Onnx parser support.");
602#endif
603 }
604
605 return parser;
606}
607
608void ArmNNExecutor::PrintOutputTensors(const armnn::OutputTensors* outputTensors,
609 unsigned int iteration)
610{
611 auto findOutputName = [&](const armnn::LayerBindingId id)
612 {
613 for (auto it = m_IOInfo.m_OutputInfoMap.begin(); it != m_IOInfo.m_OutputInfoMap.end(); ++it)
614 {
615 if (id == it->second.first)
616 {
617 return it->first;
618 }
619 }
620 return std::string{};
621 };
622
623 unsigned int outputIndex = 0;
624 unsigned int numOutputs = outputTensors->size();
625 for (const auto& output: *outputTensors)
626 {
627 const auto bindingName = findOutputName(output.first);
628 // We've made sure before that the number of output files either equals numOutputs, in which
629 // case we override those files when processing the results of each iteration (only the result
630 // of the last iteration will be stored), or there are enough
631 // output files for each output of each iteration.
632 size_t outputFileIndex = iteration * numOutputs + outputIndex;
633 if (!m_Params.m_OutputTensorFiles.empty())
634 {
635 outputFileIndex = outputFileIndex % m_Params.m_OutputTensorFiles.size();
636 ARMNN_LOG(info) << "Writing output: " << bindingName << " bindingId: '"
637 << output.first
638 << "' of iteration: " << iteration + 1 << " to file: '"
639 << m_Params.m_OutputTensorFiles[outputFileIndex] << "'";
640 }
641
642 const armnn::Optional<std::string> outputTensorFile = m_Params.m_OutputTensorFiles.empty() ?
643 armnn::EmptyOptional() :
644 armnn::MakeOptional<std::string>(
645 m_Params.m_OutputTensorFiles[outputFileIndex]);
646
647 OutputWriteInfo outputWriteInfo
648 {
649 outputTensorFile,
650 bindingName,
651 output.second,
652 !m_Params.m_DontPrintOutputs
653 };
654
655 std::cout << bindingName << ": ";
656 std::vector<float> values;
657 switch (output.second.GetDataType())
658 {
659 case armnn::DataType::Float32:
660 {
661 PrintTensor<float>(outputWriteInfo, "%f ");
662 break;
663 }
664
665 case armnn::DataType::Signed32:
666 {
667 PrintTensor<int>(outputWriteInfo, "%d ");
668 break;
669 }
670 case armnn::DataType::QSymmS8:
671 case armnn::DataType::QAsymmS8:
672 {
673 PrintTensor<int8_t>(outputWriteInfo, "%d ");
674 break;
675 }
676 case armnn::DataType::QAsymmU8:
677 {
678 PrintTensor<uint8_t>(outputWriteInfo, "%d ");
679 break;
680 }
681 case armnn::DataType::Float16:
682 case armnn::DataType::QSymmS16:
683 case armnn::DataType::BFloat16:
684 case armnn::DataType::Boolean:
685 case armnn::DataType::Signed64:
686 default:
687 {
688 LogAndThrow("Unexpected DataType");
689 }
690 }
691 std::cout << "\n";
Adam Jalkemo8f393632022-10-13 09:04:54 +0200692 ++outputIndex;
Teresa Charlin83b42912022-07-07 14:24:59 +0100693 }
694}
695
696void ArmNNExecutor::CompareAndPrintResult(std::vector<const void*> otherOutput)
697{
698 unsigned int index = 0;
699
700 for (const auto& outputTensors: m_OutputTensorsVec)
701 {
702 for (const auto& outputTensor: outputTensors)
703 {
704 float result = 0;
705 size_t size = outputTensor.second.GetNumBytes();
706
707 switch (outputTensor.second.GetDataType())
708 {
709 case armnn::DataType::Float32:
710 {
711 result = ComputeRMSE<float>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
712 break;
713 }
Keith Davis45b82a52022-10-04 11:53:04 +0100714 case armnn::DataType::Signed32:
715 {
716 result = ComputeRMSE<int32_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
717 break;
718 }
Teresa Charlin83b42912022-07-07 14:24:59 +0100719 case armnn::DataType::QSymmS16:
720 {
721 result = ComputeRMSE<int16_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
722 break;
723 }
724 case armnn::DataType::QSymmS8:
Teresa Charlin98d3fd82022-08-02 14:17:39 +0100725 case armnn::DataType::QAsymmS8:
Teresa Charlin83b42912022-07-07 14:24:59 +0100726 {
727 result = ComputeRMSE<int8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
728 break;
729 }
730 case armnn::DataType::QAsymmU8:
Teresa Charlin83b42912022-07-07 14:24:59 +0100731 {
732 result = ComputeRMSE<uint8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
733 break;
734 }
735 default:
736 {
737 LogAndThrow("Unexpected DataType");
738 }
739 }
740 std::cout << "RMSE: of " << result << "\n";
741 }
742 }
743}
744#if defined(ARMNN_SERIALIZER)
745ArmNNExecutor::ArmNNDeserializer::ArmNNDeserializer() : m_Parser(armnnDeserializer::IDeserializer::Create()){}
746
747armnn::INetworkPtr ArmNNExecutor::ArmNNDeserializer::CreateNetwork(const ExecuteNetworkParams& params)
748{
749 const std::string& modelPath = params.m_ModelPath;
750
751 std::ifstream file(modelPath, std::ios::binary);
752 return m_Parser->CreateNetworkFromBinary(file);
753}
754
755armnn::BindingPointInfo
756ArmNNExecutor::ArmNNDeserializer::GetInputBindingPointInfo(size_t, const std::string& inputName)
757{
758 armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkInputBindingInfo(0, inputName);
759 return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
760}
761
762armnn::BindingPointInfo
763ArmNNExecutor::ArmNNDeserializer::GetOutputBindingPointInfo(size_t, const std::string& outputName)
764{
765 armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkOutputBindingInfo(0, outputName);
766 return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
767}
768#endif
769
770#if defined(ARMNN_TF_LITE_PARSER)
771ArmNNExecutor::TfliteParser::TfliteParser(const ExecuteNetworkParams& params)
772{
773 armnnTfLiteParser::ITfLiteParser::TfLiteParserOptions options;
774 options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
775 options.m_InferAndValidate = params.m_InferOutputShape;
Jim Flynnfcc72f52022-10-14 11:20:07 +0100776 options.m_AllowExpandedDims = params.m_AllowExpandedDims;
Teresa Charlin83b42912022-07-07 14:24:59 +0100777
778 m_Parser = armnnTfLiteParser::ITfLiteParser::Create(options);
779}
780
781armnn::INetworkPtr ArmNNExecutor::TfliteParser::CreateNetwork(const ExecuteNetworkParams& params)
782{
783 const std::string& modelPath = params.m_ModelPath;
784 return m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str());
785}
786
787armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetInputBindingPointInfo(size_t subgraphId,
788 const std::string& inputName)
789{
790 return m_Parser->GetNetworkInputBindingInfo(subgraphId, inputName);
791}
792
793armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetOutputBindingPointInfo(size_t subgraphId,
794 const std::string& outputName)
795{
796 return m_Parser->GetNetworkOutputBindingInfo(subgraphId, outputName);
797}
798#endif
799
800
801#if defined(ARMNN_ONNX_PARSER)
802ArmNNExecutor::OnnxParser::OnnxParser() : m_Parser(armnnOnnxParser::IOnnxParser::Create()){}
803
804armnn::INetworkPtr ArmNNExecutor::OnnxParser::CreateNetwork(const ExecuteNetworkParams& params)
805{
806 const std::string& modelPath = params.m_ModelPath;
807 m_Parser = armnnOnnxParser::IOnnxParser::Create();
808 std::map<std::string, armnn::TensorShape> inputShapes;
809 if(!params.m_InputTensorShapes.empty())
810 {
811 const size_t numInputShapes = params.m_InputTensorShapes.size();
812 const size_t numInputBindings = params.m_InputNames.size();
813 if(numInputShapes < numInputBindings)
814 {
815 throw armnn::Exception(
816 fmt::format("Not every input has its tensor shape specified: expected={0}, got={1}",
817 numInputBindings, numInputShapes));
818 }
819
820 for (size_t i = 0; i < numInputShapes; i++)
821 {
822 inputShapes[params.m_InputNames[i]] = params.m_InputTensorShapes[i];
823 }
824
825 return params.m_IsModelBinary ?
826 m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
827 m_Parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes);
828 }
829
830 // Handle text and binary input differently by calling the corresponding parser function
831 return params.m_IsModelBinary ?
832 m_Parser->CreateNetworkFromBinaryFile(params.m_ModelPath.c_str()) :
833 m_Parser->CreateNetworkFromTextFile(params.m_ModelPath.c_str());
834}
835
836armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetInputBindingPointInfo(size_t, const std::string& inputName)
837{
838 return m_Parser->GetNetworkInputBindingInfo(inputName);
839}
840
841armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetOutputBindingPointInfo(size_t, const std::string& outputName)
842{
843 return m_Parser->GetNetworkOutputBindingInfo(outputName);
844}
845#endif