blob: aa71c408d75ebac717343d0f1a9cafb15d2757fb [file] [log] [blame]
Teresa Charlin83b42912022-07-07 14:24:59 +01001//
2// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6
7#include "ArmNNExecutor.hpp"
8#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
9
10#include <armnn/IAsyncExecutionCallback.hpp>
11#include <AsyncExecutionCallback.hpp>
12
13
14using namespace armnn;
15using namespace std::chrono;
16
17ArmNNExecutor::ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions)
18: m_Params(params)
19{
20 runtimeOptions.m_EnableGpuProfiling = params.m_EnableProfiling;
21 runtimeOptions.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
22 m_Runtime = armnn::IRuntime::Create(runtimeOptions);
23
24 auto parser = CreateParser();
25 auto network = parser->CreateNetwork(m_Params);
26 auto optNet = OptimizeNetwork(network.get());
27
28 m_IOInfo = GetIOInfo(optNet.get());
29 SetupInputsAndOutputs();
30
Teresa Charlin83b42912022-07-07 14:24:59 +010031 armnn::ProfilingDetailsMethod profilingDetailsMethod = ProfilingDetailsMethod::Undefined;
32 if (params.m_OutputDetailsOnlyToStdOut)
33 {
34 profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
35 }
36 else if (params.m_OutputDetailsToStdOut)
37 {
38 profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
39 }
40
41 INetworkProperties networkProperties{m_Params.m_Concurrent,
42 MemorySource::Undefined,
43 MemorySource::Undefined,
44 params.m_EnableProfiling,
45 profilingDetailsMethod};
46
Colm Donelan78044812022-09-27 16:46:09 +010047 std::string errorMsg;
48 Status status = m_Runtime->LoadNetwork(m_NetworkId, std::move(optNet), errorMsg, networkProperties);
49 if (status != Status::Success)
50 {
51 std::string message("Failed to create Arm NN Executor: ");
52 message.append(errorMsg);
53 // Throwing an exception at this point in the constructor causes lots of problems. We'll instead mark this
54 // executor as not constructed.
55 ARMNN_LOG(fatal) << message;
56 m_constructionFailed = true;
57 return;
58 }
Teresa Charlin83b42912022-07-07 14:24:59 +010059
60 if (m_Params.m_Iterations > 1)
61 {
62 std::stringstream msg;
63 msg << "Network will be executed " << m_Params.m_Iterations;
64 if (m_Params.m_Concurrent)
65 {
66 msg << " times in an asynchronous manner. ";
67 }
68 else
69 {
70 msg << " times successively. ";
71 }
72 msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
73 "cover each execution.";
74 ARMNN_LOG(info) << msg.str();
75 }
76
77 if (m_Params.m_GenerateTensorData)
78 {
79 ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
80 }
81
82 if (m_Params.m_DontPrintOutputs)
83 {
84 ARMNN_LOG(info) << "Printing outputs to console is disabled.";
85 }
86}
87
88void ArmNNExecutor::ExecuteAsync()
89{
90 std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
91 std::unique_ptr<armnn::Threadpool> threadpool;
92 armnn::AsyncCallbackManager callbackManager;
93 std::unordered_map<armnn::InferenceId, const armnn::OutputTensors*> inferenceOutputMap;
94
95 for (size_t i = 0; i < m_Params.m_ThreadPoolSize; ++i)
96 {
97 memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkId));
98 }
99
100 threadpool = std::make_unique<armnn::Threadpool>(m_Params.m_ThreadPoolSize,
101 m_Runtime.get(),
102 memHandles);
103
104 ARMNN_LOG(info) << "Asynchronous Execution with Arm NN thread pool... \n";
105 // Declare the latest and earliest inference times here to be used when calculating overall time
106 std::chrono::high_resolution_clock::time_point earliestStartTime =
107 std::chrono::high_resolution_clock::time_point::max();
108 std::chrono::high_resolution_clock::time_point latestEndTime =
109 std::chrono::high_resolution_clock::now();
110
111 // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
112 // LoadedNetwork with each scheduled inference having a specific priority
113 for (size_t i = 0; i < m_Params.m_Iterations; ++i)
114 {
115 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
116
117 std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
118 inferenceOutputMap.insert({cb->GetInferenceId(), &m_OutputTensorsVec[i]});
119 threadpool->Schedule(m_NetworkId,
120 m_InputTensorsVec[i],
121 m_OutputTensorsVec[i],
122 armnn::QosExecPriority::Medium,
123 cb);
124 }
125
126 // Check the results
127 for (size_t iteration = 0; iteration < m_Params.m_Iterations; ++iteration)
128 {
129 auto cb = callbackManager.GetNotifiedCallback();
130
131 // Get the results
132 if (earliestStartTime > cb->GetStartTime())
133 {
134 earliestStartTime = cb->GetStartTime();
135 }
136 if (latestEndTime < cb->GetEndTime())
137 {
138 latestEndTime = cb->GetEndTime();
139 }
140
141 auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
142 auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
143 auto inferenceDuration = endTime - startTime;
144 CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
145 if(!m_Params.m_DontPrintOutputs)
146 {
147 const armnn::OutputTensors* out = inferenceOutputMap[cb->GetInferenceId()];
148 PrintOutputTensors(out, iteration);
149 }
150 }
151
152 // Print duration difference between overallStartTime and overallEndTime
153 auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
154 auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
155 auto totalInferenceDuration = overallEndTime - overallStartTime;
156 ARMNN_LOG(info) << "Overall Inference time: " << std::setprecision(2)
157 << std::fixed << totalInferenceDuration.count() << " ms\n";
158
159}
160
161void ArmNNExecutor::ExecuteSync()
162{
163 for (size_t x = 0; x < m_Params.m_Iterations; x++)
164 {
165 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
166
167 const auto start_time = armnn::GetTimeNow();
168 armnn::Status ret;
169 if (m_Params.m_ImportInputsIfAligned)
170 {
171 ret = m_Runtime->EnqueueWorkload(m_NetworkId,
172 m_InputTensorsVec[x],
173 m_OutputTensorsVec[x],
174 m_ImportedInputIds[x],
175 m_ImportedOutputIds[x]);
176 }
177 else
178 {
179 ret = m_Runtime->EnqueueWorkload(m_NetworkId,
180 m_InputTensorsVec[x],
181 m_OutputTensorsVec[x]);
182 }
183
184 const auto inferenceDuration = armnn::GetTimeDuration(start_time);
185
186 // If profiling is enabled print out the results
Kevin May251fd952022-10-05 14:42:55 +0100187 if(profiler && profiler->IsProfilingEnabled() && x == (m_Params.m_Iterations - 1))
Teresa Charlin83b42912022-07-07 14:24:59 +0100188 {
189 profiler->Print(std::cout);
190 }
191
192 if(ret == armnn::Status::Failure)
193 {
194 throw armnn::Exception("IRuntime::EnqueueWorkload failed");
195 }
196
197 if(!m_Params.m_DontPrintOutputs)
198 {
199 PrintOutputTensors(&m_OutputTensorsVec[x], x);
200 }
201
202 // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
203 CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
204 }
205}
206
207std::vector<const void*> ArmNNExecutor::Execute()
208{
209 if(m_Params.m_ThreadPoolSize == 0)
210 {
211 ExecuteSync();
212 }
213 else
214 {
215 ExecuteAsync();
216 }
217 std::vector<const void*> results;
218 for (auto& output : m_OutputStorage)
219 {
220 results.push_back(output.m_Mem);
221 }
222
223 return results;
224}
225
226void ArmNNExecutor::PrintNetworkInfo()
227{
228 const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
229 m_Params.m_InputNames :
230 m_IOInfo.m_InputNames;
231 std::stringstream ss;
232 ss << "===== Network Info =====\n";
233 ss << "Inputs in order:\n";
234 for (const auto& inputName : inputNames)
235 {
236 const auto inputInfo = m_IOInfo.m_InputInfoMap[inputName].second;
237 ss << inputName << ", " << inputInfo.GetShape() << ", " << GetDataTypeName(inputInfo.GetDataType());
238 if (inputInfo.IsQuantized())
239 {
240 ss << " Quantization Offset: " << inputInfo.GetQuantizationOffset();
241 if (inputInfo.HasMultipleQuantizationScales())
242 {
243 ss << " Quantization scales: ";
244 for (const auto scale: inputInfo.GetQuantizationScales())
245 {
246 ss << scale << ", ";
247 }
248 }
249 else
250 {
251 ss << " Quantization scale: " << inputInfo.GetQuantizationScale();
252 }
253 }
254 ss << "\n";
255 }
256
257 ss << "Outputs in order:\n";
258 for (const auto& outputName : m_IOInfo.m_OutputNames)
259 {
260 const auto outputInfo = m_IOInfo.m_OutputInfoMap[outputName].second;
261 ss << outputName << ", " << outputInfo.GetShape() << ", " << GetDataTypeName(outputInfo.GetDataType());
262 if (outputInfo.IsQuantized())
263 {
264 ss << " Quantization Offset: " << outputInfo.GetQuantizationOffset();
265 if (outputInfo.HasMultipleQuantizationScales())
266 {
267 ss << " Quantization scales: ";
268 for (const auto scale: outputInfo.GetQuantizationScales())
269 {
270 ss << scale << ", ";
271 }
272 }
273 else
274 {
275 ss << " Quantization scale: " << outputInfo.GetQuantizationScale();
276 }
277 }
278 ss << "\n";
279 }
280
281 std::cout << ss.str() << std::endl;
282}
283
284void ArmNNExecutor::SetupInputsAndOutputs()
285{
286 const unsigned int noOfInputs = m_IOInfo.m_InputNames.size();
287
288 if (m_Params.m_InputNames.size() != 0 && m_Params.m_InputNames.size() != noOfInputs)
289 {
290 LogAndThrow("Number of input names does not match number of inputs");
291 }
292
293 const unsigned int inputFilePaths = m_Params.m_InputTensorDataFilePaths.size();
294 const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
295 m_Params.m_InputNames :
296 m_IOInfo.m_InputNames;
297 unsigned int noInputSets = 1;
298
299 if (inputFilePaths != 0)
300 {
301 if (inputFilePaths % noOfInputs != 0)
302 {
303 LogAndThrow("Number of input files: " + std::to_string(inputFilePaths) +
304 " not compatible with number of inputs: " + std::to_string(noOfInputs));
305 }
306 noInputSets = inputFilePaths / noOfInputs;
307 if (noInputSets != 1 && m_Params.m_ReuseBuffers)
308 {
309 LogAndThrow("Specifying multiple sets of inputs not compatible with ReuseBuffers");
310 }
311 }
312
313 const unsigned int noOfOutputs = m_IOInfo.m_OutputNames.size();
314 const unsigned int outputFilePaths = m_Params.m_OutputTensorFiles.size();
315 unsigned int noOutputSets = 1;
316
317 if (outputFilePaths != 0)
318 {
319 if (outputFilePaths % noOfOutputs != 0)
320 {
321 LogAndThrow("Number of output files: " + std::to_string(outputFilePaths) +
322 ", not compatible with number of outputs: " + std::to_string(noOfOutputs));
323 }
324 noOutputSets = outputFilePaths / noOfOutputs;
325
326 if (noOutputSets != 1 && m_Params.m_ReuseBuffers)
327 {
328 LogAndThrow("Specifying multiple sets of outputs not compatible with ReuseBuffers");
329 }
330 }
331
332 if (m_Params.m_ThreadPoolSize != 0)
333 {
334 // The current implementation of the Threadpool does not allow binding of outputs to a thread
335 // So to ensure no two threads write to the same output at the same time, no output can be reused
336 noOutputSets = m_Params.m_Iterations;
337 }
338
339 if (m_Params.m_InputTensorDataFilePaths.size() > noOfInputs)
340 {
341 ARMNN_LOG(info) << "Given network has " << noOfInputs << " input/s. One input-tensor-data file is required "
342 << "for each input. The user provided "
343 << m_Params.m_InputTensorDataFilePaths.size()
344 << " input-tensor-data file/s which will be used to fill the input/s.\n";
345 }
346
347 unsigned int inputCount = 0;
348 for(unsigned int inputSet = 0; inputSet < noInputSets; ++inputSet)
349 {
350 armnn::InputTensors inputTensors;
351 for (const auto& inputName: inputNames)
352 {
353 armnn::BindingPointInfo bindingPointInfo;
354 try
355 {
356 bindingPointInfo = m_IOInfo.m_InputInfoMap.at(inputName);
357 }
358 catch (const std::out_of_range& e)
359 {
360 LogAndThrow("Input with inputName: " + inputName + " not found.");
361 }
362
363 const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
364 auto newInfo = armnn::TensorInfo{tensorInfo.GetShape(), tensorInfo.GetDataType(),
365 tensorInfo.GetQuantizationScale(),
366 tensorInfo.GetQuantizationOffset(),
367 true};
368
369 m_InputStorage.emplace_back(IOStorage{tensorInfo.GetNumBytes()});
370
371 const int bindingId = bindingPointInfo.first;
372 inputTensors.emplace_back(bindingId, armnn::ConstTensor{newInfo, m_InputStorage.back().m_Mem});
373
374 const armnn::Optional<std::string> dataFile = m_Params.m_GenerateTensorData ?
375 armnn::EmptyOptional() :
376 armnn::MakeOptional<std::string>(
377 m_Params.m_InputTensorDataFilePaths.at(inputCount++));
378
379 switch (tensorInfo.GetDataType())
380 {
381 case armnn::DataType::Float32:
382 {
383 auto typedTensor = reinterpret_cast<float*>(m_InputStorage.back().m_Mem);
384 PopulateTensorWithData<float>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
385 break;
386 }
387 case armnn::DataType::QSymmS16:
388 {
389 auto typedTensor = reinterpret_cast<int16_t*>(m_InputStorage.back().m_Mem);
390 PopulateTensorWithData<int16_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
391 break;
392 }
393 case armnn::DataType::QSymmS8:
394 case armnn::DataType::QAsymmS8:
395 {
396 auto typedTensor = reinterpret_cast<int8_t*>(m_InputStorage.back().m_Mem);
397 PopulateTensorWithData<int8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
398 break;
399 }
400 case armnn::DataType::QAsymmU8:
401 {
402 auto typedTensor = reinterpret_cast<uint8_t*>(m_InputStorage.back().m_Mem);
403 PopulateTensorWithData<uint8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
404 break;
405 }
406 case armnn::DataType::Signed32:
407 {
408 auto typedTensor = reinterpret_cast<int32_t*>(m_InputStorage.back().m_Mem);
409 PopulateTensorWithData<int32_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
410 break;
411 }
412 default:
413 {
414 LogAndThrow("Unexpected DataType");
415 }
416 }
417
418 if (m_Params.m_ImportInputsIfAligned)
419 {
420 m_ImportedInputIds.push_back(
421 m_Runtime->ImportInputs(m_NetworkId, m_InputTensorsVec.back(), armnn::MemorySource::Malloc));
422 }
423 }
424 m_InputTensorsVec.emplace_back(inputTensors);
425 }
426
427 for(unsigned int outputSet = 0; outputSet < noOutputSets; ++outputSet)
428 {
429 armnn::OutputTensors outputTensors;
430 for (const auto& output: m_IOInfo.m_OutputInfoMap)
431 {
432 const armnn::BindingPointInfo& bindingPointInfo = output.second;
433 const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
434
435 m_OutputStorage.emplace_back(tensorInfo.GetNumBytes());
436 outputTensors.emplace_back(bindingPointInfo.first, armnn::Tensor{tensorInfo, m_OutputStorage.back().m_Mem});
437 }
438 m_OutputTensorsVec.emplace_back(outputTensors);
439 if (m_Params.m_ImportInputsIfAligned)
440 {
441 m_ImportedOutputIds.push_back(
442 m_Runtime->ImportOutputs(m_NetworkId, m_OutputTensorsVec.back(), armnn::MemorySource::Malloc));
443 }
444 }
445
446 // Fill the remaining iterations with copies
447 const unsigned int remainingInputSets = m_Params.m_Iterations - noInputSets;
448 for (unsigned int i = 1; i <= remainingInputSets; i++)
449 {
450 m_InputTensorsVec.push_back(m_InputTensorsVec[noInputSets % i]);
451 if (m_Params.m_ImportInputsIfAligned)
452 {
453 m_ImportedInputIds.push_back(m_ImportedInputIds[noInputSets % i]);
454 }
455 }
456
457 const unsigned int remainingOutputSets = m_Params.m_Iterations - noOutputSets;
458 for (unsigned int i = 1; i <= remainingOutputSets; i++)
459 {
460 m_OutputTensorsVec.push_back(m_OutputTensorsVec[noOutputSets % i]);
461 if (m_Params.m_ImportInputsIfAligned)
462 {
463 m_ImportedOutputIds.push_back(m_ImportedOutputIds[noOutputSets % i]);
464 }
465 }
466}
467
468ArmNNExecutor::IOInfo ArmNNExecutor::GetIOInfo(armnn::IOptimizedNetwork* optNet)
469{
470 struct IOStrategy : armnn::IStrategy
471 {
472 void ExecuteStrategy(const armnn::IConnectableLayer* layer,
473 const armnn::BaseDescriptor& descriptor,
474 const std::vector<armnn::ConstTensor>& constants,
475 const char* name,
476 const armnn::LayerBindingId id = 0) override
477 {
478 armnn::IgnoreUnused(descriptor, constants, id);
479 switch (layer->GetType())
480 {
481 case armnn::LayerType::Input:
482 {
483 m_IOInfo.m_InputNames.emplace_back(name);
484 m_IOInfo.m_InputInfoMap[name] = {id, layer->GetOutputSlot(0).GetTensorInfo()};
485 break;
486 }
487 case armnn::LayerType::Output:
488 {
489 m_IOInfo.m_OutputNames.emplace_back(name);
490 m_IOInfo.m_OutputInfoMap[name] = {id, layer->GetInputSlot(0).GetConnection()->GetTensorInfo()};
491 break;
492 }
493 default: {}
494 }
495 }
496 IOInfo m_IOInfo;
497 };
498
499 IOStrategy ioStrategy;
500 optNet->ExecuteStrategy(ioStrategy);
501
502 return ioStrategy.m_IOInfo;
503}
504
505armnn::IOptimizedNetworkPtr ArmNNExecutor::OptimizeNetwork(armnn::INetwork* network)
506{
507 armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
508
509 armnn::OptimizerOptions options;
510 options.m_ReduceFp32ToFp16 = m_Params.m_EnableFp16TurboMode;
511 options.m_ReduceFp32ToBf16 = m_Params.m_EnableBf16TurboMode;
512 options.m_Debug = m_Params.m_PrintIntermediate;
513 options.m_shapeInferenceMethod = m_Params.m_InferOutputShape ?
514 armnn::ShapeInferenceMethod::InferAndValidate :
515 armnn::ShapeInferenceMethod::ValidateOnly;
516 options.m_ProfilingEnabled = m_Params.m_EnableProfiling;
517
518 armnn::BackendOptions gpuAcc("GpuAcc",
519 {
520 { "FastMathEnabled", m_Params.m_EnableFastMath },
521 { "SaveCachedNetwork", m_Params.m_SaveCachedNetwork },
522 { "CachedNetworkFilePath", m_Params.m_CachedNetworkFilePath },
523 { "MLGOTuningFilePath", m_Params.m_MLGOTuningFilePath }
524 });
525
526 armnn::BackendOptions cpuAcc("CpuAcc",
527 {
528 { "FastMathEnabled", m_Params.m_EnableFastMath },
529 { "NumberOfThreads", m_Params.m_NumberOfThreads }
530 });
531 options.m_ModelOptions.push_back(gpuAcc);
532 options.m_ModelOptions.push_back(cpuAcc);
533
534 const auto optimization_start_time = armnn::GetTimeNow();
535 optNet = armnn::Optimize(*network, m_Params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
536
537 ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
538 << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
539
540 if (!optNet)
541 {
542 LogAndThrow("Optimize returned nullptr");
543 }
544
Teresa Charlin98d3fd82022-08-02 14:17:39 +0100545 // If v,visualize-optimized-model is enabled then construct a file name for the dot file.
546 if (m_Params.m_EnableLayerDetails)
547 {
548 fs::path filename = m_Params.m_ModelPath;
549 filename.replace_extension("dot");
550 std::fstream file(filename.c_str(), std::ios_base::out);
551 optNet->SerializeToDot(file);
552 }
553
Teresa Charlin83b42912022-07-07 14:24:59 +0100554 return optNet;
555}
556
557std::unique_ptr<ArmNNExecutor::IParser> ArmNNExecutor::CreateParser()
558{
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200559 const fs::path modelFilename = m_Params.m_ModelPath;
560 const std::string modelExtension = modelFilename.extension();
Teresa Charlin83b42912022-07-07 14:24:59 +0100561
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200562 m_Params.m_IsModelBinary = modelExtension != ".json";
Teresa Charlin83b42912022-07-07 14:24:59 +0100563 std::unique_ptr<IParser> parser = nullptr;
564 // Forward to implementation based on the parser type
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200565 if (modelExtension == ".armnn")
Teresa Charlin83b42912022-07-07 14:24:59 +0100566 {
567#if defined(ARMNN_SERIALIZER)
568 parser = std::make_unique<ArmNNDeserializer>();
569#else
570 LogAndThrow("Not built with serialization support.");
571#endif
572 }
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200573 else if (modelExtension == ".tflite")
Teresa Charlin83b42912022-07-07 14:24:59 +0100574 {
575#if defined(ARMNN_TF_LITE_PARSER)
576 parser = std::make_unique<TfliteParser>(m_Params);
577#else
578 LogAndThrow("Not built with Tensorflow-Lite parser support.");
579#endif
580 }
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200581 else if (modelExtension == ".onnx")
Teresa Charlin83b42912022-07-07 14:24:59 +0100582 {
583#if defined(ARMNN_ONNX_PARSER)
584 parser = std::make_unique<OnnxParser>();
585#else
586 LogAndThrow("Not built with Onnx parser support.");
587#endif
588 }
589
590 return parser;
591}
592
593void ArmNNExecutor::PrintOutputTensors(const armnn::OutputTensors* outputTensors,
594 unsigned int iteration)
595{
596 auto findOutputName = [&](const armnn::LayerBindingId id)
597 {
598 for (auto it = m_IOInfo.m_OutputInfoMap.begin(); it != m_IOInfo.m_OutputInfoMap.end(); ++it)
599 {
600 if (id == it->second.first)
601 {
602 return it->first;
603 }
604 }
605 return std::string{};
606 };
607
608 unsigned int outputIndex = 0;
609 unsigned int numOutputs = outputTensors->size();
610 for (const auto& output: *outputTensors)
611 {
612 const auto bindingName = findOutputName(output.first);
613 // We've made sure before that the number of output files either equals numOutputs, in which
614 // case we override those files when processing the results of each iteration (only the result
615 // of the last iteration will be stored), or there are enough
616 // output files for each output of each iteration.
617 size_t outputFileIndex = iteration * numOutputs + outputIndex;
618 if (!m_Params.m_OutputTensorFiles.empty())
619 {
620 outputFileIndex = outputFileIndex % m_Params.m_OutputTensorFiles.size();
621 ARMNN_LOG(info) << "Writing output: " << bindingName << " bindingId: '"
622 << output.first
623 << "' of iteration: " << iteration + 1 << " to file: '"
624 << m_Params.m_OutputTensorFiles[outputFileIndex] << "'";
625 }
626
627 const armnn::Optional<std::string> outputTensorFile = m_Params.m_OutputTensorFiles.empty() ?
628 armnn::EmptyOptional() :
629 armnn::MakeOptional<std::string>(
630 m_Params.m_OutputTensorFiles[outputFileIndex]);
631
632 OutputWriteInfo outputWriteInfo
633 {
634 outputTensorFile,
635 bindingName,
636 output.second,
637 !m_Params.m_DontPrintOutputs
638 };
639
640 std::cout << bindingName << ": ";
641 std::vector<float> values;
642 switch (output.second.GetDataType())
643 {
644 case armnn::DataType::Float32:
645 {
646 PrintTensor<float>(outputWriteInfo, "%f ");
647 break;
648 }
649
650 case armnn::DataType::Signed32:
651 {
652 PrintTensor<int>(outputWriteInfo, "%d ");
653 break;
654 }
655 case armnn::DataType::QSymmS8:
656 case armnn::DataType::QAsymmS8:
657 {
658 PrintTensor<int8_t>(outputWriteInfo, "%d ");
659 break;
660 }
661 case armnn::DataType::QAsymmU8:
662 {
663 PrintTensor<uint8_t>(outputWriteInfo, "%d ");
664 break;
665 }
666 case armnn::DataType::Float16:
667 case armnn::DataType::QSymmS16:
668 case armnn::DataType::BFloat16:
669 case armnn::DataType::Boolean:
670 case armnn::DataType::Signed64:
671 default:
672 {
673 LogAndThrow("Unexpected DataType");
674 }
675 }
676 std::cout << "\n";
677 }
678}
679
680void ArmNNExecutor::CompareAndPrintResult(std::vector<const void*> otherOutput)
681{
682 unsigned int index = 0;
683
684 for (const auto& outputTensors: m_OutputTensorsVec)
685 {
686 for (const auto& outputTensor: outputTensors)
687 {
688 float result = 0;
689 size_t size = outputTensor.second.GetNumBytes();
690
691 switch (outputTensor.second.GetDataType())
692 {
693 case armnn::DataType::Float32:
694 {
695 result = ComputeRMSE<float>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
696 break;
697 }
Keith Davis45b82a52022-10-04 11:53:04 +0100698 case armnn::DataType::Signed32:
699 {
700 result = ComputeRMSE<int32_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
701 break;
702 }
Teresa Charlin83b42912022-07-07 14:24:59 +0100703 case armnn::DataType::QSymmS16:
704 {
705 result = ComputeRMSE<int16_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
706 break;
707 }
708 case armnn::DataType::QSymmS8:
Teresa Charlin98d3fd82022-08-02 14:17:39 +0100709 case armnn::DataType::QAsymmS8:
Teresa Charlin83b42912022-07-07 14:24:59 +0100710 {
711 result = ComputeRMSE<int8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
712 break;
713 }
714 case armnn::DataType::QAsymmU8:
Teresa Charlin83b42912022-07-07 14:24:59 +0100715 {
716 result = ComputeRMSE<uint8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
717 break;
718 }
719 default:
720 {
721 LogAndThrow("Unexpected DataType");
722 }
723 }
724 std::cout << "RMSE: of " << result << "\n";
725 }
726 }
727}
728#if defined(ARMNN_SERIALIZER)
729ArmNNExecutor::ArmNNDeserializer::ArmNNDeserializer() : m_Parser(armnnDeserializer::IDeserializer::Create()){}
730
731armnn::INetworkPtr ArmNNExecutor::ArmNNDeserializer::CreateNetwork(const ExecuteNetworkParams& params)
732{
733 const std::string& modelPath = params.m_ModelPath;
734
735 std::ifstream file(modelPath, std::ios::binary);
736 return m_Parser->CreateNetworkFromBinary(file);
737}
738
739armnn::BindingPointInfo
740ArmNNExecutor::ArmNNDeserializer::GetInputBindingPointInfo(size_t, const std::string& inputName)
741{
742 armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkInputBindingInfo(0, inputName);
743 return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
744}
745
746armnn::BindingPointInfo
747ArmNNExecutor::ArmNNDeserializer::GetOutputBindingPointInfo(size_t, const std::string& outputName)
748{
749 armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkOutputBindingInfo(0, outputName);
750 return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
751}
752#endif
753
754#if defined(ARMNN_TF_LITE_PARSER)
755ArmNNExecutor::TfliteParser::TfliteParser(const ExecuteNetworkParams& params)
756{
757 armnnTfLiteParser::ITfLiteParser::TfLiteParserOptions options;
758 options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
759 options.m_InferAndValidate = params.m_InferOutputShape;
760
761 m_Parser = armnnTfLiteParser::ITfLiteParser::Create(options);
762}
763
764armnn::INetworkPtr ArmNNExecutor::TfliteParser::CreateNetwork(const ExecuteNetworkParams& params)
765{
766 const std::string& modelPath = params.m_ModelPath;
767 return m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str());
768}
769
770armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetInputBindingPointInfo(size_t subgraphId,
771 const std::string& inputName)
772{
773 return m_Parser->GetNetworkInputBindingInfo(subgraphId, inputName);
774}
775
776armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetOutputBindingPointInfo(size_t subgraphId,
777 const std::string& outputName)
778{
779 return m_Parser->GetNetworkOutputBindingInfo(subgraphId, outputName);
780}
781#endif
782
783
784#if defined(ARMNN_ONNX_PARSER)
785ArmNNExecutor::OnnxParser::OnnxParser() : m_Parser(armnnOnnxParser::IOnnxParser::Create()){}
786
787armnn::INetworkPtr ArmNNExecutor::OnnxParser::CreateNetwork(const ExecuteNetworkParams& params)
788{
789 const std::string& modelPath = params.m_ModelPath;
790 m_Parser = armnnOnnxParser::IOnnxParser::Create();
791 std::map<std::string, armnn::TensorShape> inputShapes;
792 if(!params.m_InputTensorShapes.empty())
793 {
794 const size_t numInputShapes = params.m_InputTensorShapes.size();
795 const size_t numInputBindings = params.m_InputNames.size();
796 if(numInputShapes < numInputBindings)
797 {
798 throw armnn::Exception(
799 fmt::format("Not every input has its tensor shape specified: expected={0}, got={1}",
800 numInputBindings, numInputShapes));
801 }
802
803 for (size_t i = 0; i < numInputShapes; i++)
804 {
805 inputShapes[params.m_InputNames[i]] = params.m_InputTensorShapes[i];
806 }
807
808 return params.m_IsModelBinary ?
809 m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
810 m_Parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes);
811 }
812
813 // Handle text and binary input differently by calling the corresponding parser function
814 return params.m_IsModelBinary ?
815 m_Parser->CreateNetworkFromBinaryFile(params.m_ModelPath.c_str()) :
816 m_Parser->CreateNetworkFromTextFile(params.m_ModelPath.c_str());
817}
818
819armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetInputBindingPointInfo(size_t, const std::string& inputName)
820{
821 return m_Parser->GetNetworkInputBindingInfo(inputName);
822}
823
824armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetOutputBindingPointInfo(size_t, const std::string& outputName)
825{
826 return m_Parser->GetNetworkOutputBindingInfo(outputName);
827}
828#endif