blob: 139da5f8300f1d3cc343cc849ee7522b2c3ac73e [file] [log] [blame]
Teresa Charlin83b42912022-07-07 14:24:59 +01001//
Ryan OSheab5540542022-07-06 09:52:52 +01002// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
Teresa Charlin83b42912022-07-07 14:24:59 +01003// SPDX-License-Identifier: MIT
4//
5
6
7#include "ArmNNExecutor.hpp"
8#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
9
10#include <armnn/IAsyncExecutionCallback.hpp>
11#include <AsyncExecutionCallback.hpp>
12
13
14using namespace armnn;
15using namespace std::chrono;
16
17ArmNNExecutor::ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions)
18: m_Params(params)
19{
20 runtimeOptions.m_EnableGpuProfiling = params.m_EnableProfiling;
21 runtimeOptions.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
22 m_Runtime = armnn::IRuntime::Create(runtimeOptions);
23
24 auto parser = CreateParser();
25 auto network = parser->CreateNetwork(m_Params);
26 auto optNet = OptimizeNetwork(network.get());
27
28 m_IOInfo = GetIOInfo(optNet.get());
Teresa Charlin83b42912022-07-07 14:24:59 +010029
Teresa Charlin83b42912022-07-07 14:24:59 +010030 armnn::ProfilingDetailsMethod profilingDetailsMethod = ProfilingDetailsMethod::Undefined;
31 if (params.m_OutputDetailsOnlyToStdOut)
32 {
33 profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
34 }
35 else if (params.m_OutputDetailsToStdOut)
36 {
37 profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
38 }
39
40 INetworkProperties networkProperties{m_Params.m_Concurrent,
41 MemorySource::Undefined,
42 MemorySource::Undefined,
43 params.m_EnableProfiling,
44 profilingDetailsMethod};
45
Colm Donelan78044812022-09-27 16:46:09 +010046 std::string errorMsg;
47 Status status = m_Runtime->LoadNetwork(m_NetworkId, std::move(optNet), errorMsg, networkProperties);
48 if (status != Status::Success)
49 {
50 std::string message("Failed to create Arm NN Executor: ");
51 message.append(errorMsg);
52 // Throwing an exception at this point in the constructor causes lots of problems. We'll instead mark this
53 // executor as not constructed.
54 ARMNN_LOG(fatal) << message;
55 m_constructionFailed = true;
56 return;
57 }
Teresa Charlin83b42912022-07-07 14:24:59 +010058
Matthew Benthamb4f5c232022-11-16 10:59:12 +000059 SetupInputsAndOutputs();
60
Teresa Charlin83b42912022-07-07 14:24:59 +010061 if (m_Params.m_Iterations > 1)
62 {
63 std::stringstream msg;
64 msg << "Network will be executed " << m_Params.m_Iterations;
65 if (m_Params.m_Concurrent)
66 {
67 msg << " times in an asynchronous manner. ";
68 }
69 else
70 {
71 msg << " times successively. ";
72 }
73 msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
74 "cover each execution.";
75 ARMNN_LOG(info) << msg.str();
76 }
77
78 if (m_Params.m_GenerateTensorData)
79 {
80 ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
81 }
82
83 if (m_Params.m_DontPrintOutputs)
84 {
85 ARMNN_LOG(info) << "Printing outputs to console is disabled.";
86 }
87}
88
89void ArmNNExecutor::ExecuteAsync()
90{
Ryan OSheab5540542022-07-06 09:52:52 +010091#if !defined(ARMNN_DISABLE_THREADS)
Teresa Charlin83b42912022-07-07 14:24:59 +010092 std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
93 std::unique_ptr<armnn::Threadpool> threadpool;
94 armnn::AsyncCallbackManager callbackManager;
95 std::unordered_map<armnn::InferenceId, const armnn::OutputTensors*> inferenceOutputMap;
96
97 for (size_t i = 0; i < m_Params.m_ThreadPoolSize; ++i)
98 {
99 memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkId));
100 }
101
102 threadpool = std::make_unique<armnn::Threadpool>(m_Params.m_ThreadPoolSize,
103 m_Runtime.get(),
104 memHandles);
105
106 ARMNN_LOG(info) << "Asynchronous Execution with Arm NN thread pool... \n";
107 // Declare the latest and earliest inference times here to be used when calculating overall time
108 std::chrono::high_resolution_clock::time_point earliestStartTime =
109 std::chrono::high_resolution_clock::time_point::max();
110 std::chrono::high_resolution_clock::time_point latestEndTime =
111 std::chrono::high_resolution_clock::now();
112
113 // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
114 // LoadedNetwork with each scheduled inference having a specific priority
115 for (size_t i = 0; i < m_Params.m_Iterations; ++i)
116 {
117 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
118
119 std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
120 inferenceOutputMap.insert({cb->GetInferenceId(), &m_OutputTensorsVec[i]});
121 threadpool->Schedule(m_NetworkId,
122 m_InputTensorsVec[i],
123 m_OutputTensorsVec[i],
124 armnn::QosExecPriority::Medium,
125 cb);
126 }
127
128 // Check the results
129 for (size_t iteration = 0; iteration < m_Params.m_Iterations; ++iteration)
130 {
131 auto cb = callbackManager.GetNotifiedCallback();
132
133 // Get the results
134 if (earliestStartTime > cb->GetStartTime())
135 {
136 earliestStartTime = cb->GetStartTime();
137 }
138 if (latestEndTime < cb->GetEndTime())
139 {
140 latestEndTime = cb->GetEndTime();
141 }
142
143 auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
144 auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
145 auto inferenceDuration = endTime - startTime;
146 CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
147 if(!m_Params.m_DontPrintOutputs)
148 {
149 const armnn::OutputTensors* out = inferenceOutputMap[cb->GetInferenceId()];
150 PrintOutputTensors(out, iteration);
151 }
152 }
153
154 // Print duration difference between overallStartTime and overallEndTime
155 auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
156 auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
157 auto totalInferenceDuration = overallEndTime - overallStartTime;
158 ARMNN_LOG(info) << "Overall Inference time: " << std::setprecision(2)
159 << std::fixed << totalInferenceDuration.count() << " ms\n";
160
Ryan OSheab5540542022-07-06 09:52:52 +0100161#endif
Teresa Charlin83b42912022-07-07 14:24:59 +0100162}
163
164void ArmNNExecutor::ExecuteSync()
165{
166 for (size_t x = 0; x < m_Params.m_Iterations; x++)
167 {
168 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
169
170 const auto start_time = armnn::GetTimeNow();
171 armnn::Status ret;
172 if (m_Params.m_ImportInputsIfAligned)
173 {
174 ret = m_Runtime->EnqueueWorkload(m_NetworkId,
175 m_InputTensorsVec[x],
176 m_OutputTensorsVec[x],
177 m_ImportedInputIds[x],
178 m_ImportedOutputIds[x]);
179 }
180 else
181 {
182 ret = m_Runtime->EnqueueWorkload(m_NetworkId,
183 m_InputTensorsVec[x],
184 m_OutputTensorsVec[x]);
185 }
186
187 const auto inferenceDuration = armnn::GetTimeDuration(start_time);
188
189 // If profiling is enabled print out the results
Kevin May251fd952022-10-05 14:42:55 +0100190 if(profiler && profiler->IsProfilingEnabled() && x == (m_Params.m_Iterations - 1))
Teresa Charlin83b42912022-07-07 14:24:59 +0100191 {
192 profiler->Print(std::cout);
193 }
194
195 if(ret == armnn::Status::Failure)
196 {
197 throw armnn::Exception("IRuntime::EnqueueWorkload failed");
198 }
199
200 if(!m_Params.m_DontPrintOutputs)
201 {
202 PrintOutputTensors(&m_OutputTensorsVec[x], x);
203 }
204
205 // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
206 CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
207 }
208}
209
210std::vector<const void*> ArmNNExecutor::Execute()
211{
212 if(m_Params.m_ThreadPoolSize == 0)
213 {
214 ExecuteSync();
215 }
216 else
217 {
218 ExecuteAsync();
219 }
220 std::vector<const void*> results;
221 for (auto& output : m_OutputStorage)
222 {
223 results.push_back(output.m_Mem);
224 }
225
226 return results;
227}
228
229void ArmNNExecutor::PrintNetworkInfo()
230{
231 const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
232 m_Params.m_InputNames :
233 m_IOInfo.m_InputNames;
234 std::stringstream ss;
235 ss << "===== Network Info =====\n";
236 ss << "Inputs in order:\n";
237 for (const auto& inputName : inputNames)
238 {
239 const auto inputInfo = m_IOInfo.m_InputInfoMap[inputName].second;
240 ss << inputName << ", " << inputInfo.GetShape() << ", " << GetDataTypeName(inputInfo.GetDataType());
241 if (inputInfo.IsQuantized())
242 {
243 ss << " Quantization Offset: " << inputInfo.GetQuantizationOffset();
244 if (inputInfo.HasMultipleQuantizationScales())
245 {
246 ss << " Quantization scales: ";
247 for (const auto scale: inputInfo.GetQuantizationScales())
248 {
249 ss << scale << ", ";
250 }
251 }
252 else
253 {
254 ss << " Quantization scale: " << inputInfo.GetQuantizationScale();
255 }
256 }
257 ss << "\n";
258 }
259
260 ss << "Outputs in order:\n";
261 for (const auto& outputName : m_IOInfo.m_OutputNames)
262 {
263 const auto outputInfo = m_IOInfo.m_OutputInfoMap[outputName].second;
264 ss << outputName << ", " << outputInfo.GetShape() << ", " << GetDataTypeName(outputInfo.GetDataType());
265 if (outputInfo.IsQuantized())
266 {
267 ss << " Quantization Offset: " << outputInfo.GetQuantizationOffset();
268 if (outputInfo.HasMultipleQuantizationScales())
269 {
270 ss << " Quantization scales: ";
271 for (const auto scale: outputInfo.GetQuantizationScales())
272 {
273 ss << scale << ", ";
274 }
275 }
276 else
277 {
278 ss << " Quantization scale: " << outputInfo.GetQuantizationScale();
279 }
280 }
281 ss << "\n";
282 }
283
284 std::cout << ss.str() << std::endl;
285}
286
287void ArmNNExecutor::SetupInputsAndOutputs()
288{
289 const unsigned int noOfInputs = m_IOInfo.m_InputNames.size();
290
291 if (m_Params.m_InputNames.size() != 0 && m_Params.m_InputNames.size() != noOfInputs)
292 {
293 LogAndThrow("Number of input names does not match number of inputs");
294 }
295
296 const unsigned int inputFilePaths = m_Params.m_InputTensorDataFilePaths.size();
297 const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
298 m_Params.m_InputNames :
299 m_IOInfo.m_InputNames;
300 unsigned int noInputSets = 1;
301
302 if (inputFilePaths != 0)
303 {
304 if (inputFilePaths % noOfInputs != 0)
305 {
306 LogAndThrow("Number of input files: " + std::to_string(inputFilePaths) +
307 " not compatible with number of inputs: " + std::to_string(noOfInputs));
308 }
309 noInputSets = inputFilePaths / noOfInputs;
310 if (noInputSets != 1 && m_Params.m_ReuseBuffers)
311 {
312 LogAndThrow("Specifying multiple sets of inputs not compatible with ReuseBuffers");
313 }
314 }
315
316 const unsigned int noOfOutputs = m_IOInfo.m_OutputNames.size();
317 const unsigned int outputFilePaths = m_Params.m_OutputTensorFiles.size();
318 unsigned int noOutputSets = 1;
319
320 if (outputFilePaths != 0)
321 {
322 if (outputFilePaths % noOfOutputs != 0)
323 {
324 LogAndThrow("Number of output files: " + std::to_string(outputFilePaths) +
325 ", not compatible with number of outputs: " + std::to_string(noOfOutputs));
326 }
327 noOutputSets = outputFilePaths / noOfOutputs;
328
329 if (noOutputSets != 1 && m_Params.m_ReuseBuffers)
330 {
331 LogAndThrow("Specifying multiple sets of outputs not compatible with ReuseBuffers");
332 }
333 }
334
335 if (m_Params.m_ThreadPoolSize != 0)
336 {
337 // The current implementation of the Threadpool does not allow binding of outputs to a thread
338 // So to ensure no two threads write to the same output at the same time, no output can be reused
339 noOutputSets = m_Params.m_Iterations;
340 }
341
342 if (m_Params.m_InputTensorDataFilePaths.size() > noOfInputs)
343 {
344 ARMNN_LOG(info) << "Given network has " << noOfInputs << " input/s. One input-tensor-data file is required "
345 << "for each input. The user provided "
346 << m_Params.m_InputTensorDataFilePaths.size()
347 << " input-tensor-data file/s which will be used to fill the input/s.\n";
348 }
349
350 unsigned int inputCount = 0;
351 for(unsigned int inputSet = 0; inputSet < noInputSets; ++inputSet)
352 {
353 armnn::InputTensors inputTensors;
354 for (const auto& inputName: inputNames)
355 {
356 armnn::BindingPointInfo bindingPointInfo;
357 try
358 {
359 bindingPointInfo = m_IOInfo.m_InputInfoMap.at(inputName);
360 }
361 catch (const std::out_of_range& e)
362 {
363 LogAndThrow("Input with inputName: " + inputName + " not found.");
364 }
365
366 const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
367 auto newInfo = armnn::TensorInfo{tensorInfo.GetShape(), tensorInfo.GetDataType(),
368 tensorInfo.GetQuantizationScale(),
369 tensorInfo.GetQuantizationOffset(),
370 true};
371
372 m_InputStorage.emplace_back(IOStorage{tensorInfo.GetNumBytes()});
373
374 const int bindingId = bindingPointInfo.first;
375 inputTensors.emplace_back(bindingId, armnn::ConstTensor{newInfo, m_InputStorage.back().m_Mem});
376
377 const armnn::Optional<std::string> dataFile = m_Params.m_GenerateTensorData ?
378 armnn::EmptyOptional() :
379 armnn::MakeOptional<std::string>(
380 m_Params.m_InputTensorDataFilePaths.at(inputCount++));
381
382 switch (tensorInfo.GetDataType())
383 {
384 case armnn::DataType::Float32:
385 {
386 auto typedTensor = reinterpret_cast<float*>(m_InputStorage.back().m_Mem);
387 PopulateTensorWithData<float>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
388 break;
389 }
390 case armnn::DataType::QSymmS16:
391 {
392 auto typedTensor = reinterpret_cast<int16_t*>(m_InputStorage.back().m_Mem);
393 PopulateTensorWithData<int16_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
394 break;
395 }
396 case armnn::DataType::QSymmS8:
397 case armnn::DataType::QAsymmS8:
398 {
399 auto typedTensor = reinterpret_cast<int8_t*>(m_InputStorage.back().m_Mem);
400 PopulateTensorWithData<int8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
401 break;
402 }
403 case armnn::DataType::QAsymmU8:
404 {
405 auto typedTensor = reinterpret_cast<uint8_t*>(m_InputStorage.back().m_Mem);
406 PopulateTensorWithData<uint8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
407 break;
408 }
409 case armnn::DataType::Signed32:
410 {
411 auto typedTensor = reinterpret_cast<int32_t*>(m_InputStorage.back().m_Mem);
412 PopulateTensorWithData<int32_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
413 break;
414 }
415 default:
416 {
417 LogAndThrow("Unexpected DataType");
418 }
419 }
420
Matthew Benthamb4f5c232022-11-16 10:59:12 +0000421 }
422
423 if (m_Params.m_ImportInputsIfAligned)
424 {
425 m_ImportedInputIds.push_back(
426 m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc));
Teresa Charlin83b42912022-07-07 14:24:59 +0100427 }
428 m_InputTensorsVec.emplace_back(inputTensors);
429 }
430
431 for(unsigned int outputSet = 0; outputSet < noOutputSets; ++outputSet)
432 {
433 armnn::OutputTensors outputTensors;
434 for (const auto& output: m_IOInfo.m_OutputInfoMap)
435 {
436 const armnn::BindingPointInfo& bindingPointInfo = output.second;
437 const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
438
439 m_OutputStorage.emplace_back(tensorInfo.GetNumBytes());
440 outputTensors.emplace_back(bindingPointInfo.first, armnn::Tensor{tensorInfo, m_OutputStorage.back().m_Mem});
441 }
442 m_OutputTensorsVec.emplace_back(outputTensors);
443 if (m_Params.m_ImportInputsIfAligned)
444 {
445 m_ImportedOutputIds.push_back(
446 m_Runtime->ImportOutputs(m_NetworkId, m_OutputTensorsVec.back(), armnn::MemorySource::Malloc));
447 }
448 }
449
Teresa Charlin20508422022-10-26 14:03:08 +0100450 // If iterations > noSets fill the remaining iterations repeating the given files
451 // If iterations < noSets just ignore the extra files
452 const unsigned int remainingInputSets = (m_Params.m_Iterations > noInputSets)
453 ? m_Params.m_Iterations - noInputSets
454 : 0;
455 for (unsigned int i = 0; i < remainingInputSets; ++i)
Teresa Charlin83b42912022-07-07 14:24:59 +0100456 {
Teresa Charlin20508422022-10-26 14:03:08 +0100457 m_InputTensorsVec.push_back(m_InputTensorsVec[i % noInputSets]);
Teresa Charlin83b42912022-07-07 14:24:59 +0100458 if (m_Params.m_ImportInputsIfAligned)
459 {
Teresa Charlin20508422022-10-26 14:03:08 +0100460 m_ImportedInputIds.push_back(m_ImportedInputIds[i % noInputSets]);
Teresa Charlin83b42912022-07-07 14:24:59 +0100461 }
462 }
463
Teresa Charlin20508422022-10-26 14:03:08 +0100464 const unsigned int remainingOutputSets = (m_Params.m_Iterations > noOutputSets)
465 ? m_Params.m_Iterations - noOutputSets
466 : 0;
467 for (unsigned int i = 0; i < remainingOutputSets; ++i)
Teresa Charlin83b42912022-07-07 14:24:59 +0100468 {
Teresa Charlin20508422022-10-26 14:03:08 +0100469 m_OutputTensorsVec.push_back(m_OutputTensorsVec[i % noOutputSets]);
Teresa Charlin83b42912022-07-07 14:24:59 +0100470 if (m_Params.m_ImportInputsIfAligned)
471 {
Teresa Charlin20508422022-10-26 14:03:08 +0100472 m_ImportedOutputIds.push_back(m_ImportedOutputIds[i % noOutputSets]);
Teresa Charlin83b42912022-07-07 14:24:59 +0100473 }
474 }
475}
476
477ArmNNExecutor::IOInfo ArmNNExecutor::GetIOInfo(armnn::IOptimizedNetwork* optNet)
478{
479 struct IOStrategy : armnn::IStrategy
480 {
481 void ExecuteStrategy(const armnn::IConnectableLayer* layer,
482 const armnn::BaseDescriptor& descriptor,
483 const std::vector<armnn::ConstTensor>& constants,
484 const char* name,
485 const armnn::LayerBindingId id = 0) override
486 {
487 armnn::IgnoreUnused(descriptor, constants, id);
488 switch (layer->GetType())
489 {
490 case armnn::LayerType::Input:
491 {
492 m_IOInfo.m_InputNames.emplace_back(name);
493 m_IOInfo.m_InputInfoMap[name] = {id, layer->GetOutputSlot(0).GetTensorInfo()};
494 break;
495 }
496 case armnn::LayerType::Output:
497 {
498 m_IOInfo.m_OutputNames.emplace_back(name);
499 m_IOInfo.m_OutputInfoMap[name] = {id, layer->GetInputSlot(0).GetConnection()->GetTensorInfo()};
500 break;
501 }
502 default: {}
503 }
504 }
505 IOInfo m_IOInfo;
506 };
507
508 IOStrategy ioStrategy;
509 optNet->ExecuteStrategy(ioStrategy);
510
511 return ioStrategy.m_IOInfo;
512}
513
514armnn::IOptimizedNetworkPtr ArmNNExecutor::OptimizeNetwork(armnn::INetwork* network)
515{
516 armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
517
518 armnn::OptimizerOptions options;
519 options.m_ReduceFp32ToFp16 = m_Params.m_EnableFp16TurboMode;
Teresa Charlin83b42912022-07-07 14:24:59 +0100520 options.m_Debug = m_Params.m_PrintIntermediate;
Keith Davis15f9c682022-10-14 15:50:33 +0100521 options.m_DebugToFile = m_Params.m_PrintIntermediateOutputsToFile;
Teresa Charlin83b42912022-07-07 14:24:59 +0100522 options.m_shapeInferenceMethod = m_Params.m_InferOutputShape ?
523 armnn::ShapeInferenceMethod::InferAndValidate :
524 armnn::ShapeInferenceMethod::ValidateOnly;
525 options.m_ProfilingEnabled = m_Params.m_EnableProfiling;
Jim Flynnfcc72f52022-10-14 11:20:07 +0100526 options.m_AllowExpandedDims = m_Params.m_AllowExpandedDims;
Teresa Charlin83b42912022-07-07 14:24:59 +0100527
528 armnn::BackendOptions gpuAcc("GpuAcc",
529 {
530 { "FastMathEnabled", m_Params.m_EnableFastMath },
531 { "SaveCachedNetwork", m_Params.m_SaveCachedNetwork },
532 { "CachedNetworkFilePath", m_Params.m_CachedNetworkFilePath },
533 { "MLGOTuningFilePath", m_Params.m_MLGOTuningFilePath }
534 });
535
536 armnn::BackendOptions cpuAcc("CpuAcc",
537 {
538 { "FastMathEnabled", m_Params.m_EnableFastMath },
539 { "NumberOfThreads", m_Params.m_NumberOfThreads }
540 });
541 options.m_ModelOptions.push_back(gpuAcc);
542 options.m_ModelOptions.push_back(cpuAcc);
Jim Flynnfcc72f52022-10-14 11:20:07 +0100543 // The shapeInferenceMethod and allowExpandedDims values have to be added to the model options
544 // because these are what are passed to the OptimizeSubgraphViews method and are used to create
545 // the new optimized INetwork that method uses
546 armnn::BackendOptions allowExDimOpt("AllowExpandedDims",
547 {
548 { "AllowExpandedDims", m_Params.m_AllowExpandedDims }
549 });
550 options.m_ModelOptions.push_back(allowExDimOpt);
551 armnn::BackendOptions shapeInferOpt("ShapeInferenceMethod",
552 {
553 { "InferAndValidate", m_Params.m_InferOutputShape }
554 });
555 options.m_ModelOptions.push_back(shapeInferOpt);
Teresa Charlin83b42912022-07-07 14:24:59 +0100556
557 const auto optimization_start_time = armnn::GetTimeNow();
558 optNet = armnn::Optimize(*network, m_Params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
559
560 ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
561 << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
562
563 if (!optNet)
564 {
565 LogAndThrow("Optimize returned nullptr");
566 }
567
Teresa Charlin98d3fd82022-08-02 14:17:39 +0100568 // If v,visualize-optimized-model is enabled then construct a file name for the dot file.
569 if (m_Params.m_EnableLayerDetails)
570 {
571 fs::path filename = m_Params.m_ModelPath;
572 filename.replace_extension("dot");
573 std::fstream file(filename.c_str(), std::ios_base::out);
574 optNet->SerializeToDot(file);
575 }
576
Teresa Charlin83b42912022-07-07 14:24:59 +0100577 return optNet;
578}
579
580std::unique_ptr<ArmNNExecutor::IParser> ArmNNExecutor::CreateParser()
581{
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200582 const fs::path modelFilename = m_Params.m_ModelPath;
583 const std::string modelExtension = modelFilename.extension();
Teresa Charlin83b42912022-07-07 14:24:59 +0100584
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200585 m_Params.m_IsModelBinary = modelExtension != ".json";
Teresa Charlin83b42912022-07-07 14:24:59 +0100586 std::unique_ptr<IParser> parser = nullptr;
587 // Forward to implementation based on the parser type
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200588 if (modelExtension == ".armnn")
Teresa Charlin83b42912022-07-07 14:24:59 +0100589 {
590#if defined(ARMNN_SERIALIZER)
591 parser = std::make_unique<ArmNNDeserializer>();
592#else
593 LogAndThrow("Not built with serialization support.");
594#endif
595 }
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200596 else if (modelExtension == ".tflite")
Teresa Charlin83b42912022-07-07 14:24:59 +0100597 {
598#if defined(ARMNN_TF_LITE_PARSER)
599 parser = std::make_unique<TfliteParser>(m_Params);
600#else
601 LogAndThrow("Not built with Tensorflow-Lite parser support.");
602#endif
603 }
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200604 else if (modelExtension == ".onnx")
Teresa Charlin83b42912022-07-07 14:24:59 +0100605 {
606#if defined(ARMNN_ONNX_PARSER)
607 parser = std::make_unique<OnnxParser>();
608#else
609 LogAndThrow("Not built with Onnx parser support.");
610#endif
611 }
612
613 return parser;
614}
615
616void ArmNNExecutor::PrintOutputTensors(const armnn::OutputTensors* outputTensors,
617 unsigned int iteration)
618{
619 auto findOutputName = [&](const armnn::LayerBindingId id)
620 {
621 for (auto it = m_IOInfo.m_OutputInfoMap.begin(); it != m_IOInfo.m_OutputInfoMap.end(); ++it)
622 {
623 if (id == it->second.first)
624 {
625 return it->first;
626 }
627 }
628 return std::string{};
629 };
630
631 unsigned int outputIndex = 0;
632 unsigned int numOutputs = outputTensors->size();
633 for (const auto& output: *outputTensors)
634 {
635 const auto bindingName = findOutputName(output.first);
636 // We've made sure before that the number of output files either equals numOutputs, in which
637 // case we override those files when processing the results of each iteration (only the result
638 // of the last iteration will be stored), or there are enough
639 // output files for each output of each iteration.
640 size_t outputFileIndex = iteration * numOutputs + outputIndex;
641 if (!m_Params.m_OutputTensorFiles.empty())
642 {
643 outputFileIndex = outputFileIndex % m_Params.m_OutputTensorFiles.size();
644 ARMNN_LOG(info) << "Writing output: " << bindingName << " bindingId: '"
645 << output.first
646 << "' of iteration: " << iteration + 1 << " to file: '"
647 << m_Params.m_OutputTensorFiles[outputFileIndex] << "'";
648 }
649
650 const armnn::Optional<std::string> outputTensorFile = m_Params.m_OutputTensorFiles.empty() ?
651 armnn::EmptyOptional() :
652 armnn::MakeOptional<std::string>(
653 m_Params.m_OutputTensorFiles[outputFileIndex]);
654
655 OutputWriteInfo outputWriteInfo
656 {
657 outputTensorFile,
658 bindingName,
659 output.second,
660 !m_Params.m_DontPrintOutputs
661 };
662
663 std::cout << bindingName << ": ";
664 std::vector<float> values;
665 switch (output.second.GetDataType())
666 {
667 case armnn::DataType::Float32:
668 {
669 PrintTensor<float>(outputWriteInfo, "%f ");
670 break;
671 }
672
673 case armnn::DataType::Signed32:
674 {
675 PrintTensor<int>(outputWriteInfo, "%d ");
676 break;
677 }
678 case armnn::DataType::QSymmS8:
679 case armnn::DataType::QAsymmS8:
680 {
681 PrintTensor<int8_t>(outputWriteInfo, "%d ");
682 break;
683 }
684 case armnn::DataType::QAsymmU8:
685 {
686 PrintTensor<uint8_t>(outputWriteInfo, "%d ");
687 break;
688 }
689 case armnn::DataType::Float16:
690 case armnn::DataType::QSymmS16:
691 case armnn::DataType::BFloat16:
692 case armnn::DataType::Boolean:
693 case armnn::DataType::Signed64:
694 default:
695 {
696 LogAndThrow("Unexpected DataType");
697 }
698 }
699 std::cout << "\n";
Adam Jalkemo8f393632022-10-13 09:04:54 +0200700 ++outputIndex;
Teresa Charlin83b42912022-07-07 14:24:59 +0100701 }
702}
703
704void ArmNNExecutor::CompareAndPrintResult(std::vector<const void*> otherOutput)
705{
706 unsigned int index = 0;
707
708 for (const auto& outputTensors: m_OutputTensorsVec)
709 {
710 for (const auto& outputTensor: outputTensors)
711 {
712 float result = 0;
713 size_t size = outputTensor.second.GetNumBytes();
714
715 switch (outputTensor.second.GetDataType())
716 {
717 case armnn::DataType::Float32:
718 {
719 result = ComputeRMSE<float>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
720 break;
721 }
Keith Davis45b82a52022-10-04 11:53:04 +0100722 case armnn::DataType::Signed32:
723 {
724 result = ComputeRMSE<int32_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
725 break;
726 }
Teresa Charlin83b42912022-07-07 14:24:59 +0100727 case armnn::DataType::QSymmS16:
728 {
729 result = ComputeRMSE<int16_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
730 break;
731 }
732 case armnn::DataType::QSymmS8:
Teresa Charlin98d3fd82022-08-02 14:17:39 +0100733 case armnn::DataType::QAsymmS8:
Teresa Charlin83b42912022-07-07 14:24:59 +0100734 {
735 result = ComputeRMSE<int8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
736 break;
737 }
738 case armnn::DataType::QAsymmU8:
Teresa Charlin83b42912022-07-07 14:24:59 +0100739 {
740 result = ComputeRMSE<uint8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
741 break;
742 }
743 default:
744 {
745 LogAndThrow("Unexpected DataType");
746 }
747 }
748 std::cout << "RMSE: of " << result << "\n";
749 }
750 }
751}
752#if defined(ARMNN_SERIALIZER)
753ArmNNExecutor::ArmNNDeserializer::ArmNNDeserializer() : m_Parser(armnnDeserializer::IDeserializer::Create()){}
754
755armnn::INetworkPtr ArmNNExecutor::ArmNNDeserializer::CreateNetwork(const ExecuteNetworkParams& params)
756{
757 const std::string& modelPath = params.m_ModelPath;
758
759 std::ifstream file(modelPath, std::ios::binary);
760 return m_Parser->CreateNetworkFromBinary(file);
761}
762
763armnn::BindingPointInfo
764ArmNNExecutor::ArmNNDeserializer::GetInputBindingPointInfo(size_t, const std::string& inputName)
765{
766 armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkInputBindingInfo(0, inputName);
767 return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
768}
769
770armnn::BindingPointInfo
771ArmNNExecutor::ArmNNDeserializer::GetOutputBindingPointInfo(size_t, const std::string& outputName)
772{
773 armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkOutputBindingInfo(0, outputName);
774 return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
775}
776#endif
777
778#if defined(ARMNN_TF_LITE_PARSER)
779ArmNNExecutor::TfliteParser::TfliteParser(const ExecuteNetworkParams& params)
780{
781 armnnTfLiteParser::ITfLiteParser::TfLiteParserOptions options;
782 options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
783 options.m_InferAndValidate = params.m_InferOutputShape;
Jim Flynnfcc72f52022-10-14 11:20:07 +0100784 options.m_AllowExpandedDims = params.m_AllowExpandedDims;
Teresa Charlin83b42912022-07-07 14:24:59 +0100785
786 m_Parser = armnnTfLiteParser::ITfLiteParser::Create(options);
787}
788
789armnn::INetworkPtr ArmNNExecutor::TfliteParser::CreateNetwork(const ExecuteNetworkParams& params)
790{
791 const std::string& modelPath = params.m_ModelPath;
792 return m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str());
793}
794
795armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetInputBindingPointInfo(size_t subgraphId,
796 const std::string& inputName)
797{
798 return m_Parser->GetNetworkInputBindingInfo(subgraphId, inputName);
799}
800
801armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetOutputBindingPointInfo(size_t subgraphId,
802 const std::string& outputName)
803{
804 return m_Parser->GetNetworkOutputBindingInfo(subgraphId, outputName);
805}
806#endif
807
808
809#if defined(ARMNN_ONNX_PARSER)
810ArmNNExecutor::OnnxParser::OnnxParser() : m_Parser(armnnOnnxParser::IOnnxParser::Create()){}
811
812armnn::INetworkPtr ArmNNExecutor::OnnxParser::CreateNetwork(const ExecuteNetworkParams& params)
813{
814 const std::string& modelPath = params.m_ModelPath;
815 m_Parser = armnnOnnxParser::IOnnxParser::Create();
816 std::map<std::string, armnn::TensorShape> inputShapes;
817 if(!params.m_InputTensorShapes.empty())
818 {
819 const size_t numInputShapes = params.m_InputTensorShapes.size();
820 const size_t numInputBindings = params.m_InputNames.size();
821 if(numInputShapes < numInputBindings)
822 {
823 throw armnn::Exception(
824 fmt::format("Not every input has its tensor shape specified: expected={0}, got={1}",
825 numInputBindings, numInputShapes));
826 }
827
828 for (size_t i = 0; i < numInputShapes; i++)
829 {
830 inputShapes[params.m_InputNames[i]] = params.m_InputTensorShapes[i];
831 }
832
833 return params.m_IsModelBinary ?
834 m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
835 m_Parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes);
836 }
837
838 // Handle text and binary input differently by calling the corresponding parser function
839 return params.m_IsModelBinary ?
840 m_Parser->CreateNetworkFromBinaryFile(params.m_ModelPath.c_str()) :
841 m_Parser->CreateNetworkFromTextFile(params.m_ModelPath.c_str());
842}
843
844armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetInputBindingPointInfo(size_t, const std::string& inputName)
845{
846 return m_Parser->GetNetworkInputBindingInfo(inputName);
847}
848
849armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetOutputBindingPointInfo(size_t, const std::string& outputName)
850{
851 return m_Parser->GetNetworkOutputBindingInfo(outputName);
852}
853#endif