blob: d1892f9d42152fbeea2c3b7fc5fef937a53ee636 [file] [log] [blame]
Teresa Charlinfbd28172022-07-07 14:24:59 +01001//
2// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6
7#include "ArmNNExecutor.hpp"
8#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
9
10#include <armnn/IAsyncExecutionCallback.hpp>
11#include <AsyncExecutionCallback.hpp>
12
13
14using namespace armnn;
15using namespace std::chrono;
16
17ArmNNExecutor::ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions)
18: m_Params(params)
19{
20 runtimeOptions.m_EnableGpuProfiling = params.m_EnableProfiling;
21 runtimeOptions.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
22 m_Runtime = armnn::IRuntime::Create(runtimeOptions);
23
24 auto parser = CreateParser();
25 auto network = parser->CreateNetwork(m_Params);
26 auto optNet = OptimizeNetwork(network.get());
27
28 m_IOInfo = GetIOInfo(optNet.get());
29 SetupInputsAndOutputs();
30
31 std::string errorMsg;
32
33 armnn::ProfilingDetailsMethod profilingDetailsMethod = ProfilingDetailsMethod::Undefined;
34 if (params.m_OutputDetailsOnlyToStdOut)
35 {
36 profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
37 }
38 else if (params.m_OutputDetailsToStdOut)
39 {
40 profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
41 }
42
43 INetworkProperties networkProperties{m_Params.m_Concurrent,
44 MemorySource::Undefined,
45 MemorySource::Undefined,
46 params.m_EnableProfiling,
47 profilingDetailsMethod};
48
49 m_Runtime->LoadNetwork(m_NetworkId, std::move(optNet), errorMsg, networkProperties);
50
51 if (m_Params.m_Iterations > 1)
52 {
53 std::stringstream msg;
54 msg << "Network will be executed " << m_Params.m_Iterations;
55 if (m_Params.m_Concurrent)
56 {
57 msg << " times in an asynchronous manner. ";
58 }
59 else
60 {
61 msg << " times successively. ";
62 }
63 msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
64 "cover each execution.";
65 ARMNN_LOG(info) << msg.str();
66 }
67
68 if (m_Params.m_GenerateTensorData)
69 {
70 ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
71 }
72
73 if (m_Params.m_DontPrintOutputs)
74 {
75 ARMNN_LOG(info) << "Printing outputs to console is disabled.";
76 }
77}
78
79void ArmNNExecutor::ExecuteAsync()
80{
81 std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
82 std::unique_ptr<armnn::Threadpool> threadpool;
83 armnn::AsyncCallbackManager callbackManager;
84 std::unordered_map<armnn::InferenceId, const armnn::OutputTensors*> inferenceOutputMap;
85
86 for (size_t i = 0; i < m_Params.m_ThreadPoolSize; ++i)
87 {
88 memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkId));
89 }
90
91 threadpool = std::make_unique<armnn::Threadpool>(m_Params.m_ThreadPoolSize,
92 m_Runtime.get(),
93 memHandles);
94
95 ARMNN_LOG(info) << "Asynchronous Execution with Arm NN thread pool... \n";
96 // Declare the latest and earliest inference times here to be used when calculating overall time
97 std::chrono::high_resolution_clock::time_point earliestStartTime =
98 std::chrono::high_resolution_clock::time_point::max();
99 std::chrono::high_resolution_clock::time_point latestEndTime =
100 std::chrono::high_resolution_clock::now();
101
102 // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
103 // LoadedNetwork with each scheduled inference having a specific priority
104 for (size_t i = 0; i < m_Params.m_Iterations; ++i)
105 {
106 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
107
108 std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
109 inferenceOutputMap.insert({cb->GetInferenceId(), &m_OutputTensorsVec[i]});
110 threadpool->Schedule(m_NetworkId,
111 m_InputTensorsVec[i],
112 m_OutputTensorsVec[i],
113 armnn::QosExecPriority::Medium,
114 cb);
115 }
116
117 // Check the results
118 for (size_t iteration = 0; iteration < m_Params.m_Iterations; ++iteration)
119 {
120 auto cb = callbackManager.GetNotifiedCallback();
121
122 // Get the results
123 if (earliestStartTime > cb->GetStartTime())
124 {
125 earliestStartTime = cb->GetStartTime();
126 }
127 if (latestEndTime < cb->GetEndTime())
128 {
129 latestEndTime = cb->GetEndTime();
130 }
131
132 auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
133 auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
134 auto inferenceDuration = endTime - startTime;
135 CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
136 if(!m_Params.m_DontPrintOutputs)
137 {
138 const armnn::OutputTensors* out = inferenceOutputMap[cb->GetInferenceId()];
139 PrintOutputTensors(out, iteration);
140 }
141 }
142
143 // Print duration difference between overallStartTime and overallEndTime
144 auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
145 auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
146 auto totalInferenceDuration = overallEndTime - overallStartTime;
147 ARMNN_LOG(info) << "Overall Inference time: " << std::setprecision(2)
148 << std::fixed << totalInferenceDuration.count() << " ms\n";
149
150}
151
152void ArmNNExecutor::ExecuteSync()
153{
154 for (size_t x = 0; x < m_Params.m_Iterations; x++)
155 {
156 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
157
158 const auto start_time = armnn::GetTimeNow();
159 armnn::Status ret;
160 if (m_Params.m_ImportInputsIfAligned)
161 {
162 ret = m_Runtime->EnqueueWorkload(m_NetworkId,
163 m_InputTensorsVec[x],
164 m_OutputTensorsVec[x],
165 m_ImportedInputIds[x],
166 m_ImportedOutputIds[x]);
167 }
168 else
169 {
170 ret = m_Runtime->EnqueueWorkload(m_NetworkId,
171 m_InputTensorsVec[x],
172 m_OutputTensorsVec[x]);
173 }
174
175 const auto inferenceDuration = armnn::GetTimeDuration(start_time);
176
177 // If profiling is enabled print out the results
178 if(profiler && profiler->IsProfilingEnabled())
179 {
180 profiler->Print(std::cout);
181 }
182
183 if(ret == armnn::Status::Failure)
184 {
185 throw armnn::Exception("IRuntime::EnqueueWorkload failed");
186 }
187
188 if(!m_Params.m_DontPrintOutputs)
189 {
190 PrintOutputTensors(&m_OutputTensorsVec[x], x);
191 }
192
193 // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
194 CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
195 }
196}
197
198std::vector<const void*> ArmNNExecutor::Execute()
199{
200 if(m_Params.m_ThreadPoolSize == 0)
201 {
202 ExecuteSync();
203 }
204 else
205 {
206 ExecuteAsync();
207 }
208 std::vector<const void*> results;
209 for (auto& output : m_OutputStorage)
210 {
211 results.push_back(output.m_Mem);
212 }
213
214 return results;
215}
216
217void ArmNNExecutor::PrintNetworkInfo()
218{
219 const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
220 m_Params.m_InputNames :
221 m_IOInfo.m_InputNames;
222 std::stringstream ss;
223 ss << "===== Network Info =====\n";
224 ss << "Inputs in order:\n";
225 for (const auto& inputName : inputNames)
226 {
227 const auto inputInfo = m_IOInfo.m_InputInfoMap[inputName].second;
228 ss << inputName << ", " << inputInfo.GetShape() << ", " << GetDataTypeName(inputInfo.GetDataType());
229 if (inputInfo.IsQuantized())
230 {
231 ss << " Quantization Offset: " << inputInfo.GetQuantizationOffset();
232 if (inputInfo.HasMultipleQuantizationScales())
233 {
234 ss << " Quantization scales: ";
235 for (const auto scale: inputInfo.GetQuantizationScales())
236 {
237 ss << scale << ", ";
238 }
239 }
240 else
241 {
242 ss << " Quantization scale: " << inputInfo.GetQuantizationScale();
243 }
244 }
245 ss << "\n";
246 }
247
248 ss << "Outputs in order:\n";
249 for (const auto& outputName : m_IOInfo.m_OutputNames)
250 {
251 const auto outputInfo = m_IOInfo.m_OutputInfoMap[outputName].second;
252 ss << outputName << ", " << outputInfo.GetShape() << ", " << GetDataTypeName(outputInfo.GetDataType());
253 if (outputInfo.IsQuantized())
254 {
255 ss << " Quantization Offset: " << outputInfo.GetQuantizationOffset();
256 if (outputInfo.HasMultipleQuantizationScales())
257 {
258 ss << " Quantization scales: ";
259 for (const auto scale: outputInfo.GetQuantizationScales())
260 {
261 ss << scale << ", ";
262 }
263 }
264 else
265 {
266 ss << " Quantization scale: " << outputInfo.GetQuantizationScale();
267 }
268 }
269 ss << "\n";
270 }
271
272 std::cout << ss.str() << std::endl;
273}
274
275void ArmNNExecutor::SetupInputsAndOutputs()
276{
277 const unsigned int noOfInputs = m_IOInfo.m_InputNames.size();
278
279 if (m_Params.m_InputNames.size() != 0 && m_Params.m_InputNames.size() != noOfInputs)
280 {
281 LogAndThrow("Number of input names does not match number of inputs");
282 }
283
284 const unsigned int inputFilePaths = m_Params.m_InputTensorDataFilePaths.size();
285 const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
286 m_Params.m_InputNames :
287 m_IOInfo.m_InputNames;
288 unsigned int noInputSets = 1;
289
290 if (inputFilePaths != 0)
291 {
292 if (inputFilePaths % noOfInputs != 0)
293 {
294 LogAndThrow("Number of input files: " + std::to_string(inputFilePaths) +
295 " not compatible with number of inputs: " + std::to_string(noOfInputs));
296 }
297 noInputSets = inputFilePaths / noOfInputs;
298 if (noInputSets != 1 && m_Params.m_ReuseBuffers)
299 {
300 LogAndThrow("Specifying multiple sets of inputs not compatible with ReuseBuffers");
301 }
302 }
303
304 const unsigned int noOfOutputs = m_IOInfo.m_OutputNames.size();
305 const unsigned int outputFilePaths = m_Params.m_OutputTensorFiles.size();
306 unsigned int noOutputSets = 1;
307
308 if (outputFilePaths != 0)
309 {
310 if (outputFilePaths % noOfOutputs != 0)
311 {
312 LogAndThrow("Number of output files: " + std::to_string(outputFilePaths) +
313 ", not compatible with number of outputs: " + std::to_string(noOfOutputs));
314 }
315 noOutputSets = outputFilePaths / noOfOutputs;
316
317 if (noOutputSets != 1 && m_Params.m_ReuseBuffers)
318 {
319 LogAndThrow("Specifying multiple sets of outputs not compatible with ReuseBuffers");
320 }
321 }
322
323 if (m_Params.m_ThreadPoolSize != 0)
324 {
325 // The current implementation of the Threadpool does not allow binding of outputs to a thread
326 // So to ensure no two threads write to the same output at the same time, no output can be reused
327 noOutputSets = m_Params.m_Iterations;
328 }
329
330 if (m_Params.m_InputTensorDataFilePaths.size() > noOfInputs)
331 {
332 ARMNN_LOG(info) << "Given network has " << noOfInputs << " input/s. One input-tensor-data file is required "
333 << "for each input. The user provided "
334 << m_Params.m_InputTensorDataFilePaths.size()
335 << " input-tensor-data file/s which will be used to fill the input/s.\n";
336 }
337
338 unsigned int inputCount = 0;
339 for(unsigned int inputSet = 0; inputSet < noInputSets; ++inputSet)
340 {
341 armnn::InputTensors inputTensors;
342 for (const auto& inputName: inputNames)
343 {
344 armnn::BindingPointInfo bindingPointInfo;
345 try
346 {
347 bindingPointInfo = m_IOInfo.m_InputInfoMap.at(inputName);
348 }
349 catch (const std::out_of_range& e)
350 {
351 LogAndThrow("Input with inputName: " + inputName + " not found.");
352 }
353
354 const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
355 auto newInfo = armnn::TensorInfo{tensorInfo.GetShape(), tensorInfo.GetDataType(),
356 tensorInfo.GetQuantizationScale(),
357 tensorInfo.GetQuantizationOffset(),
358 true};
359
360 m_InputStorage.emplace_back(IOStorage{tensorInfo.GetNumBytes()});
361
362 const int bindingId = bindingPointInfo.first;
363 inputTensors.emplace_back(bindingId, armnn::ConstTensor{newInfo, m_InputStorage.back().m_Mem});
364
365 const armnn::Optional<std::string> dataFile = m_Params.m_GenerateTensorData ?
366 armnn::EmptyOptional() :
367 armnn::MakeOptional<std::string>(
368 m_Params.m_InputTensorDataFilePaths.at(inputCount++));
369
370 switch (tensorInfo.GetDataType())
371 {
372 case armnn::DataType::Float32:
373 {
374 auto typedTensor = reinterpret_cast<float*>(m_InputStorage.back().m_Mem);
375 PopulateTensorWithData<float>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
376 break;
377 }
378 case armnn::DataType::QSymmS16:
379 {
380 auto typedTensor = reinterpret_cast<int16_t*>(m_InputStorage.back().m_Mem);
381 PopulateTensorWithData<int16_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
382 break;
383 }
384 case armnn::DataType::QSymmS8:
385 case armnn::DataType::QAsymmS8:
386 {
387 auto typedTensor = reinterpret_cast<int8_t*>(m_InputStorage.back().m_Mem);
388 PopulateTensorWithData<int8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
389 break;
390 }
391 case armnn::DataType::QAsymmU8:
392 {
393 auto typedTensor = reinterpret_cast<uint8_t*>(m_InputStorage.back().m_Mem);
394 PopulateTensorWithData<uint8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
395 break;
396 }
397 case armnn::DataType::Signed32:
398 {
399 auto typedTensor = reinterpret_cast<int32_t*>(m_InputStorage.back().m_Mem);
400 PopulateTensorWithData<int32_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
401 break;
402 }
403 default:
404 {
405 LogAndThrow("Unexpected DataType");
406 }
407 }
408
409 if (m_Params.m_ImportInputsIfAligned)
410 {
411 m_ImportedInputIds.push_back(
412 m_Runtime->ImportInputs(m_NetworkId, m_InputTensorsVec.back(), armnn::MemorySource::Malloc));
413 }
414 }
415 m_InputTensorsVec.emplace_back(inputTensors);
416 }
417
418 for(unsigned int outputSet = 0; outputSet < noOutputSets; ++outputSet)
419 {
420 armnn::OutputTensors outputTensors;
421 for (const auto& output: m_IOInfo.m_OutputInfoMap)
422 {
423 const armnn::BindingPointInfo& bindingPointInfo = output.second;
424 const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
425
426 m_OutputStorage.emplace_back(tensorInfo.GetNumBytes());
427 outputTensors.emplace_back(bindingPointInfo.first, armnn::Tensor{tensorInfo, m_OutputStorage.back().m_Mem});
428 }
429 m_OutputTensorsVec.emplace_back(outputTensors);
430 if (m_Params.m_ImportInputsIfAligned)
431 {
432 m_ImportedOutputIds.push_back(
433 m_Runtime->ImportOutputs(m_NetworkId, m_OutputTensorsVec.back(), armnn::MemorySource::Malloc));
434 }
435 }
436
437 // Fill the remaining iterations with copies
438 const unsigned int remainingInputSets = m_Params.m_Iterations - noInputSets;
439 for (unsigned int i = 1; i <= remainingInputSets; i++)
440 {
441 m_InputTensorsVec.push_back(m_InputTensorsVec[noInputSets % i]);
442 if (m_Params.m_ImportInputsIfAligned)
443 {
444 m_ImportedInputIds.push_back(m_ImportedInputIds[noInputSets % i]);
445 }
446 }
447
448 const unsigned int remainingOutputSets = m_Params.m_Iterations - noOutputSets;
449 for (unsigned int i = 1; i <= remainingOutputSets; i++)
450 {
451 m_OutputTensorsVec.push_back(m_OutputTensorsVec[noOutputSets % i]);
452 if (m_Params.m_ImportInputsIfAligned)
453 {
454 m_ImportedOutputIds.push_back(m_ImportedOutputIds[noOutputSets % i]);
455 }
456 }
457}
458
459ArmNNExecutor::IOInfo ArmNNExecutor::GetIOInfo(armnn::IOptimizedNetwork* optNet)
460{
461 struct IOStrategy : armnn::IStrategy
462 {
463 void ExecuteStrategy(const armnn::IConnectableLayer* layer,
464 const armnn::BaseDescriptor& descriptor,
465 const std::vector<armnn::ConstTensor>& constants,
466 const char* name,
467 const armnn::LayerBindingId id = 0) override
468 {
469 armnn::IgnoreUnused(descriptor, constants, id);
470 switch (layer->GetType())
471 {
472 case armnn::LayerType::Input:
473 {
474 m_IOInfo.m_InputNames.emplace_back(name);
475 m_IOInfo.m_InputInfoMap[name] = {id, layer->GetOutputSlot(0).GetTensorInfo()};
476 break;
477 }
478 case armnn::LayerType::Output:
479 {
480 m_IOInfo.m_OutputNames.emplace_back(name);
481 m_IOInfo.m_OutputInfoMap[name] = {id, layer->GetInputSlot(0).GetConnection()->GetTensorInfo()};
482 break;
483 }
484 default: {}
485 }
486 }
487 IOInfo m_IOInfo;
488 };
489
490 IOStrategy ioStrategy;
491 optNet->ExecuteStrategy(ioStrategy);
492
493 return ioStrategy.m_IOInfo;
494}
495
496armnn::IOptimizedNetworkPtr ArmNNExecutor::OptimizeNetwork(armnn::INetwork* network)
497{
498 armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
499
500 armnn::OptimizerOptions options;
501 options.m_ReduceFp32ToFp16 = m_Params.m_EnableFp16TurboMode;
502 options.m_ReduceFp32ToBf16 = m_Params.m_EnableBf16TurboMode;
503 options.m_Debug = m_Params.m_PrintIntermediate;
504 options.m_shapeInferenceMethod = m_Params.m_InferOutputShape ?
505 armnn::ShapeInferenceMethod::InferAndValidate :
506 armnn::ShapeInferenceMethod::ValidateOnly;
507 options.m_ProfilingEnabled = m_Params.m_EnableProfiling;
508
509 armnn::BackendOptions gpuAcc("GpuAcc",
510 {
511 { "FastMathEnabled", m_Params.m_EnableFastMath },
512 { "SaveCachedNetwork", m_Params.m_SaveCachedNetwork },
513 { "CachedNetworkFilePath", m_Params.m_CachedNetworkFilePath },
514 { "MLGOTuningFilePath", m_Params.m_MLGOTuningFilePath }
515 });
516
517 armnn::BackendOptions cpuAcc("CpuAcc",
518 {
519 { "FastMathEnabled", m_Params.m_EnableFastMath },
520 { "NumberOfThreads", m_Params.m_NumberOfThreads }
521 });
522 options.m_ModelOptions.push_back(gpuAcc);
523 options.m_ModelOptions.push_back(cpuAcc);
524
525 const auto optimization_start_time = armnn::GetTimeNow();
526 optNet = armnn::Optimize(*network, m_Params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
527
528 ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
529 << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
530
531 if (!optNet)
532 {
533 LogAndThrow("Optimize returned nullptr");
534 }
535
Teresa Charlin0ea0a082022-08-02 14:17:39 +0100536 // If v,visualize-optimized-model is enabled then construct a file name for the dot file.
537 if (m_Params.m_EnableLayerDetails)
538 {
539 fs::path filename = m_Params.m_ModelPath;
540 filename.replace_extension("dot");
541 std::fstream file(filename.c_str(), std::ios_base::out);
542 optNet->SerializeToDot(file);
543 }
544
Teresa Charlinfbd28172022-07-07 14:24:59 +0100545 return optNet;
546}
547
548std::unique_ptr<ArmNNExecutor::IParser> ArmNNExecutor::CreateParser()
549{
550 // If no model format is given check the file name
551 const std::string& modelFormat = m_Params.m_ModelPath;
552
553 m_Params.m_IsModelBinary = modelFormat.find("json") == std::string::npos ? true : false;
554 std::unique_ptr<IParser> parser = nullptr;
555 // Forward to implementation based on the parser type
556 if (modelFormat.find("armnn") != std::string::npos)
557 {
558#if defined(ARMNN_SERIALIZER)
559 parser = std::make_unique<ArmNNDeserializer>();
560#else
561 LogAndThrow("Not built with serialization support.");
562#endif
563 }
564 else if(modelFormat.find("tflite") != std::string::npos)
565 {
566#if defined(ARMNN_TF_LITE_PARSER)
567 parser = std::make_unique<TfliteParser>(m_Params);
568#else
569 LogAndThrow("Not built with Tensorflow-Lite parser support.");
570#endif
571 }
572 else if (modelFormat.find("onnx") != std::string::npos)
573 {
574#if defined(ARMNN_ONNX_PARSER)
575 parser = std::make_unique<OnnxParser>();
576#else
577 LogAndThrow("Not built with Onnx parser support.");
578#endif
579 }
580
581 return parser;
582}
583
584void ArmNNExecutor::PrintOutputTensors(const armnn::OutputTensors* outputTensors,
585 unsigned int iteration)
586{
587 auto findOutputName = [&](const armnn::LayerBindingId id)
588 {
589 for (auto it = m_IOInfo.m_OutputInfoMap.begin(); it != m_IOInfo.m_OutputInfoMap.end(); ++it)
590 {
591 if (id == it->second.first)
592 {
593 return it->first;
594 }
595 }
596 return std::string{};
597 };
598
599 unsigned int outputIndex = 0;
600 unsigned int numOutputs = outputTensors->size();
601 for (const auto& output: *outputTensors)
602 {
603 const auto bindingName = findOutputName(output.first);
604 // We've made sure before that the number of output files either equals numOutputs, in which
605 // case we override those files when processing the results of each iteration (only the result
606 // of the last iteration will be stored), or there are enough
607 // output files for each output of each iteration.
608 size_t outputFileIndex = iteration * numOutputs + outputIndex;
609 if (!m_Params.m_OutputTensorFiles.empty())
610 {
611 outputFileIndex = outputFileIndex % m_Params.m_OutputTensorFiles.size();
612 ARMNN_LOG(info) << "Writing output: " << bindingName << " bindingId: '"
613 << output.first
614 << "' of iteration: " << iteration + 1 << " to file: '"
615 << m_Params.m_OutputTensorFiles[outputFileIndex] << "'";
616 }
617
618 const armnn::Optional<std::string> outputTensorFile = m_Params.m_OutputTensorFiles.empty() ?
619 armnn::EmptyOptional() :
620 armnn::MakeOptional<std::string>(
621 m_Params.m_OutputTensorFiles[outputFileIndex]);
622
623 OutputWriteInfo outputWriteInfo
624 {
625 outputTensorFile,
626 bindingName,
627 output.second,
628 !m_Params.m_DontPrintOutputs
629 };
630
631 std::cout << bindingName << ": ";
632 std::vector<float> values;
633 switch (output.second.GetDataType())
634 {
635 case armnn::DataType::Float32:
636 {
637 PrintTensor<float>(outputWriteInfo, "%f ");
638 break;
639 }
640
641 case armnn::DataType::Signed32:
642 {
643 PrintTensor<int>(outputWriteInfo, "%d ");
644 break;
645 }
646 case armnn::DataType::QSymmS8:
647 case armnn::DataType::QAsymmS8:
648 {
649 PrintTensor<int8_t>(outputWriteInfo, "%d ");
650 break;
651 }
652 case armnn::DataType::QAsymmU8:
653 {
654 PrintTensor<uint8_t>(outputWriteInfo, "%d ");
655 break;
656 }
657 case armnn::DataType::Float16:
658 case armnn::DataType::QSymmS16:
659 case armnn::DataType::BFloat16:
660 case armnn::DataType::Boolean:
661 case armnn::DataType::Signed64:
662 default:
663 {
664 LogAndThrow("Unexpected DataType");
665 }
666 }
667 std::cout << "\n";
668 }
669}
670
671void ArmNNExecutor::CompareAndPrintResult(std::vector<const void*> otherOutput)
672{
673 unsigned int index = 0;
674
675 for (const auto& outputTensors: m_OutputTensorsVec)
676 {
677 for (const auto& outputTensor: outputTensors)
678 {
679 float result = 0;
680 size_t size = outputTensor.second.GetNumBytes();
681
682 switch (outputTensor.second.GetDataType())
683 {
684 case armnn::DataType::Float32:
685 {
686 result = ComputeRMSE<float>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
687 break;
688 }
689 case armnn::DataType::QSymmS16:
690 {
691 result = ComputeRMSE<int16_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
692 break;
693 }
694 case armnn::DataType::QSymmS8:
Teresa Charlin0ea0a082022-08-02 14:17:39 +0100695 case armnn::DataType::QAsymmS8:
Teresa Charlinfbd28172022-07-07 14:24:59 +0100696 {
697 result = ComputeRMSE<int8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
698 break;
699 }
700 case armnn::DataType::QAsymmU8:
Teresa Charlinfbd28172022-07-07 14:24:59 +0100701 {
702 result = ComputeRMSE<uint8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
703 break;
704 }
705 default:
706 {
707 LogAndThrow("Unexpected DataType");
708 }
709 }
710 std::cout << "RMSE: of " << result << "\n";
711 }
712 }
713}
714#if defined(ARMNN_SERIALIZER)
715ArmNNExecutor::ArmNNDeserializer::ArmNNDeserializer() : m_Parser(armnnDeserializer::IDeserializer::Create()){}
716
717armnn::INetworkPtr ArmNNExecutor::ArmNNDeserializer::CreateNetwork(const ExecuteNetworkParams& params)
718{
719 const std::string& modelPath = params.m_ModelPath;
720
721 std::ifstream file(modelPath, std::ios::binary);
722 return m_Parser->CreateNetworkFromBinary(file);
723}
724
725armnn::BindingPointInfo
726ArmNNExecutor::ArmNNDeserializer::GetInputBindingPointInfo(size_t, const std::string& inputName)
727{
728 armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkInputBindingInfo(0, inputName);
729 return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
730}
731
732armnn::BindingPointInfo
733ArmNNExecutor::ArmNNDeserializer::GetOutputBindingPointInfo(size_t, const std::string& outputName)
734{
735 armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkOutputBindingInfo(0, outputName);
736 return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
737}
738#endif
739
740#if defined(ARMNN_TF_LITE_PARSER)
741ArmNNExecutor::TfliteParser::TfliteParser(const ExecuteNetworkParams& params)
742{
743 armnnTfLiteParser::ITfLiteParser::TfLiteParserOptions options;
744 options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
745 options.m_InferAndValidate = params.m_InferOutputShape;
746
747 m_Parser = armnnTfLiteParser::ITfLiteParser::Create(options);
748}
749
750armnn::INetworkPtr ArmNNExecutor::TfliteParser::CreateNetwork(const ExecuteNetworkParams& params)
751{
752 const std::string& modelPath = params.m_ModelPath;
753 return m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str());
754}
755
756armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetInputBindingPointInfo(size_t subgraphId,
757 const std::string& inputName)
758{
759 return m_Parser->GetNetworkInputBindingInfo(subgraphId, inputName);
760}
761
762armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetOutputBindingPointInfo(size_t subgraphId,
763 const std::string& outputName)
764{
765 return m_Parser->GetNetworkOutputBindingInfo(subgraphId, outputName);
766}
767#endif
768
769
770#if defined(ARMNN_ONNX_PARSER)
771ArmNNExecutor::OnnxParser::OnnxParser() : m_Parser(armnnOnnxParser::IOnnxParser::Create()){}
772
773armnn::INetworkPtr ArmNNExecutor::OnnxParser::CreateNetwork(const ExecuteNetworkParams& params)
774{
775 const std::string& modelPath = params.m_ModelPath;
776 m_Parser = armnnOnnxParser::IOnnxParser::Create();
777 std::map<std::string, armnn::TensorShape> inputShapes;
778 if(!params.m_InputTensorShapes.empty())
779 {
780 const size_t numInputShapes = params.m_InputTensorShapes.size();
781 const size_t numInputBindings = params.m_InputNames.size();
782 if(numInputShapes < numInputBindings)
783 {
784 throw armnn::Exception(
785 fmt::format("Not every input has its tensor shape specified: expected={0}, got={1}",
786 numInputBindings, numInputShapes));
787 }
788
789 for (size_t i = 0; i < numInputShapes; i++)
790 {
791 inputShapes[params.m_InputNames[i]] = params.m_InputTensorShapes[i];
792 }
793
794 return params.m_IsModelBinary ?
795 m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
796 m_Parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes);
797 }
798
799 // Handle text and binary input differently by calling the corresponding parser function
800 return params.m_IsModelBinary ?
801 m_Parser->CreateNetworkFromBinaryFile(params.m_ModelPath.c_str()) :
802 m_Parser->CreateNetworkFromTextFile(params.m_ModelPath.c_str());
803}
804
805armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetInputBindingPointInfo(size_t, const std::string& inputName)
806{
807 return m_Parser->GetNetworkInputBindingInfo(inputName);
808}
809
810armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetOutputBindingPointInfo(size_t, const std::string& outputName)
811{
812 return m_Parser->GetNetworkOutputBindingInfo(outputName);
813}
814#endif