blob: 58655c1814c6027fed2328c4c80232ac87731e33 [file] [log] [blame]
Teresa Charlin83b42912022-07-07 14:24:59 +01001//
Ryan OSheab5540542022-07-06 09:52:52 +01002// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
Teresa Charlin83b42912022-07-07 14:24:59 +01003// SPDX-License-Identifier: MIT
4//
5
6
7#include "ArmNNExecutor.hpp"
8#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
9
10#include <armnn/IAsyncExecutionCallback.hpp>
11#include <AsyncExecutionCallback.hpp>
12
13
14using namespace armnn;
15using namespace std::chrono;
16
17ArmNNExecutor::ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions)
18: m_Params(params)
19{
20 runtimeOptions.m_EnableGpuProfiling = params.m_EnableProfiling;
21 runtimeOptions.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
Mike Kelly5446a4d2023-01-20 15:51:05 +000022
23 // Create/Get the static ArmNN Runtime. Note that the m_Runtime will be shared by all ArmNNExecutor
24 // instances so the RuntimeOptions cannot be altered for different ArmNNExecutor instances.
25 m_Runtime = GetRuntime(runtimeOptions);
Teresa Charlin83b42912022-07-07 14:24:59 +010026
27 auto parser = CreateParser();
28 auto network = parser->CreateNetwork(m_Params);
29 auto optNet = OptimizeNetwork(network.get());
30
31 m_IOInfo = GetIOInfo(optNet.get());
Teresa Charlin83b42912022-07-07 14:24:59 +010032
Teresa Charlin83b42912022-07-07 14:24:59 +010033 armnn::ProfilingDetailsMethod profilingDetailsMethod = ProfilingDetailsMethod::Undefined;
34 if (params.m_OutputDetailsOnlyToStdOut)
35 {
36 profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
37 }
38 else if (params.m_OutputDetailsToStdOut)
39 {
40 profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
41 }
42
43 INetworkProperties networkProperties{m_Params.m_Concurrent,
44 MemorySource::Undefined,
45 MemorySource::Undefined,
46 params.m_EnableProfiling,
47 profilingDetailsMethod};
48
Colm Donelan78044812022-09-27 16:46:09 +010049 std::string errorMsg;
50 Status status = m_Runtime->LoadNetwork(m_NetworkId, std::move(optNet), errorMsg, networkProperties);
51 if (status != Status::Success)
52 {
53 std::string message("Failed to create Arm NN Executor: ");
54 message.append(errorMsg);
55 // Throwing an exception at this point in the constructor causes lots of problems. We'll instead mark this
56 // executor as not constructed.
57 ARMNN_LOG(fatal) << message;
58 m_constructionFailed = true;
59 return;
60 }
Teresa Charlin83b42912022-07-07 14:24:59 +010061
Matthew Benthamb4f5c232022-11-16 10:59:12 +000062 SetupInputsAndOutputs();
63
Teresa Charlin83b42912022-07-07 14:24:59 +010064 if (m_Params.m_Iterations > 1)
65 {
66 std::stringstream msg;
67 msg << "Network will be executed " << m_Params.m_Iterations;
68 if (m_Params.m_Concurrent)
69 {
70 msg << " times in an asynchronous manner. ";
71 }
72 else
73 {
74 msg << " times successively. ";
75 }
76 msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
77 "cover each execution.";
78 ARMNN_LOG(info) << msg.str();
79 }
80
81 if (m_Params.m_GenerateTensorData)
82 {
83 ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
84 }
85
86 if (m_Params.m_DontPrintOutputs)
87 {
88 ARMNN_LOG(info) << "Printing outputs to console is disabled.";
89 }
90}
91
92void ArmNNExecutor::ExecuteAsync()
93{
Ryan OSheab5540542022-07-06 09:52:52 +010094#if !defined(ARMNN_DISABLE_THREADS)
Teresa Charlin83b42912022-07-07 14:24:59 +010095 std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
96 std::unique_ptr<armnn::Threadpool> threadpool;
97 armnn::AsyncCallbackManager callbackManager;
98 std::unordered_map<armnn::InferenceId, const armnn::OutputTensors*> inferenceOutputMap;
99
100 for (size_t i = 0; i < m_Params.m_ThreadPoolSize; ++i)
101 {
102 memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkId));
103 }
104
105 threadpool = std::make_unique<armnn::Threadpool>(m_Params.m_ThreadPoolSize,
Mike Kelly5446a4d2023-01-20 15:51:05 +0000106 m_Runtime,
Teresa Charlin83b42912022-07-07 14:24:59 +0100107 memHandles);
108
109 ARMNN_LOG(info) << "Asynchronous Execution with Arm NN thread pool... \n";
110 // Declare the latest and earliest inference times here to be used when calculating overall time
111 std::chrono::high_resolution_clock::time_point earliestStartTime =
112 std::chrono::high_resolution_clock::time_point::max();
113 std::chrono::high_resolution_clock::time_point latestEndTime =
114 std::chrono::high_resolution_clock::now();
115
116 // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
117 // LoadedNetwork with each scheduled inference having a specific priority
118 for (size_t i = 0; i < m_Params.m_Iterations; ++i)
119 {
120 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
121
122 std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
123 inferenceOutputMap.insert({cb->GetInferenceId(), &m_OutputTensorsVec[i]});
124 threadpool->Schedule(m_NetworkId,
125 m_InputTensorsVec[i],
126 m_OutputTensorsVec[i],
127 armnn::QosExecPriority::Medium,
128 cb);
129 }
130
131 // Check the results
132 for (size_t iteration = 0; iteration < m_Params.m_Iterations; ++iteration)
133 {
134 auto cb = callbackManager.GetNotifiedCallback();
135
136 // Get the results
137 if (earliestStartTime > cb->GetStartTime())
138 {
139 earliestStartTime = cb->GetStartTime();
140 }
141 if (latestEndTime < cb->GetEndTime())
142 {
143 latestEndTime = cb->GetEndTime();
144 }
145
146 auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
147 auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
148 auto inferenceDuration = endTime - startTime;
149 CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
150 if(!m_Params.m_DontPrintOutputs)
151 {
152 const armnn::OutputTensors* out = inferenceOutputMap[cb->GetInferenceId()];
153 PrintOutputTensors(out, iteration);
154 }
155 }
156
157 // Print duration difference between overallStartTime and overallEndTime
158 auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
159 auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
160 auto totalInferenceDuration = overallEndTime - overallStartTime;
161 ARMNN_LOG(info) << "Overall Inference time: " << std::setprecision(2)
162 << std::fixed << totalInferenceDuration.count() << " ms\n";
163
Ryan OSheab5540542022-07-06 09:52:52 +0100164#endif
Teresa Charlin83b42912022-07-07 14:24:59 +0100165}
166
167void ArmNNExecutor::ExecuteSync()
168{
169 for (size_t x = 0; x < m_Params.m_Iterations; x++)
170 {
171 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
172
173 const auto start_time = armnn::GetTimeNow();
174 armnn::Status ret;
175 if (m_Params.m_ImportInputsIfAligned)
176 {
177 ret = m_Runtime->EnqueueWorkload(m_NetworkId,
178 m_InputTensorsVec[x],
179 m_OutputTensorsVec[x],
180 m_ImportedInputIds[x],
181 m_ImportedOutputIds[x]);
182 }
183 else
184 {
185 ret = m_Runtime->EnqueueWorkload(m_NetworkId,
186 m_InputTensorsVec[x],
187 m_OutputTensorsVec[x]);
188 }
189
190 const auto inferenceDuration = armnn::GetTimeDuration(start_time);
191
192 // If profiling is enabled print out the results
Kevin May251fd952022-10-05 14:42:55 +0100193 if(profiler && profiler->IsProfilingEnabled() && x == (m_Params.m_Iterations - 1))
Teresa Charlin83b42912022-07-07 14:24:59 +0100194 {
195 profiler->Print(std::cout);
196 }
197
198 if(ret == armnn::Status::Failure)
199 {
200 throw armnn::Exception("IRuntime::EnqueueWorkload failed");
201 }
202
203 if(!m_Params.m_DontPrintOutputs)
204 {
205 PrintOutputTensors(&m_OutputTensorsVec[x], x);
206 }
207
208 // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
209 CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
210 }
211}
212
213std::vector<const void*> ArmNNExecutor::Execute()
214{
Kevin May691ceca2023-11-28 15:38:37 +0000215 ARMNN_LOG(info) << "Inferences began at: "
216 << std::chrono::duration_cast<std::chrono::nanoseconds>(armnn::GetTimeNow().time_since_epoch()).count()
217 << " ns\n";
218
Teresa Charlin83b42912022-07-07 14:24:59 +0100219 if(m_Params.m_ThreadPoolSize == 0)
220 {
221 ExecuteSync();
222 }
223 else
224 {
225 ExecuteAsync();
226 }
Kevin May691ceca2023-11-28 15:38:37 +0000227
228 ARMNN_LOG(info) << "Inferences ended at: "
229 << std::chrono::duration_cast<std::chrono::nanoseconds>(armnn::GetTimeNow().time_since_epoch()).count()
230 << " ns\n";
231
Teresa Charlin83b42912022-07-07 14:24:59 +0100232 std::vector<const void*> results;
233 for (auto& output : m_OutputStorage)
234 {
235 results.push_back(output.m_Mem);
236 }
237
238 return results;
239}
240
241void ArmNNExecutor::PrintNetworkInfo()
242{
243 const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
244 m_Params.m_InputNames :
245 m_IOInfo.m_InputNames;
246 std::stringstream ss;
247 ss << "===== Network Info =====\n";
248 ss << "Inputs in order:\n";
249 for (const auto& inputName : inputNames)
250 {
251 const auto inputInfo = m_IOInfo.m_InputInfoMap[inputName].second;
252 ss << inputName << ", " << inputInfo.GetShape() << ", " << GetDataTypeName(inputInfo.GetDataType());
253 if (inputInfo.IsQuantized())
254 {
255 ss << " Quantization Offset: " << inputInfo.GetQuantizationOffset();
256 if (inputInfo.HasMultipleQuantizationScales())
257 {
258 ss << " Quantization scales: ";
259 for (const auto scale: inputInfo.GetQuantizationScales())
260 {
261 ss << scale << ", ";
262 }
263 }
264 else
265 {
266 ss << " Quantization scale: " << inputInfo.GetQuantizationScale();
267 }
268 }
269 ss << "\n";
270 }
271
272 ss << "Outputs in order:\n";
273 for (const auto& outputName : m_IOInfo.m_OutputNames)
274 {
275 const auto outputInfo = m_IOInfo.m_OutputInfoMap[outputName].second;
276 ss << outputName << ", " << outputInfo.GetShape() << ", " << GetDataTypeName(outputInfo.GetDataType());
277 if (outputInfo.IsQuantized())
278 {
279 ss << " Quantization Offset: " << outputInfo.GetQuantizationOffset();
280 if (outputInfo.HasMultipleQuantizationScales())
281 {
282 ss << " Quantization scales: ";
283 for (const auto scale: outputInfo.GetQuantizationScales())
284 {
285 ss << scale << ", ";
286 }
287 }
288 else
289 {
290 ss << " Quantization scale: " << outputInfo.GetQuantizationScale();
291 }
292 }
293 ss << "\n";
294 }
295
296 std::cout << ss.str() << std::endl;
297}
298
299void ArmNNExecutor::SetupInputsAndOutputs()
300{
301 const unsigned int noOfInputs = m_IOInfo.m_InputNames.size();
302
303 if (m_Params.m_InputNames.size() != 0 && m_Params.m_InputNames.size() != noOfInputs)
304 {
305 LogAndThrow("Number of input names does not match number of inputs");
306 }
307
308 const unsigned int inputFilePaths = m_Params.m_InputTensorDataFilePaths.size();
309 const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
310 m_Params.m_InputNames :
311 m_IOInfo.m_InputNames;
312 unsigned int noInputSets = 1;
313
314 if (inputFilePaths != 0)
315 {
316 if (inputFilePaths % noOfInputs != 0)
317 {
318 LogAndThrow("Number of input files: " + std::to_string(inputFilePaths) +
319 " not compatible with number of inputs: " + std::to_string(noOfInputs));
320 }
321 noInputSets = inputFilePaths / noOfInputs;
322 if (noInputSets != 1 && m_Params.m_ReuseBuffers)
323 {
324 LogAndThrow("Specifying multiple sets of inputs not compatible with ReuseBuffers");
325 }
326 }
327
328 const unsigned int noOfOutputs = m_IOInfo.m_OutputNames.size();
329 const unsigned int outputFilePaths = m_Params.m_OutputTensorFiles.size();
330 unsigned int noOutputSets = 1;
331
332 if (outputFilePaths != 0)
333 {
334 if (outputFilePaths % noOfOutputs != 0)
335 {
336 LogAndThrow("Number of output files: " + std::to_string(outputFilePaths) +
337 ", not compatible with number of outputs: " + std::to_string(noOfOutputs));
338 }
339 noOutputSets = outputFilePaths / noOfOutputs;
340
341 if (noOutputSets != 1 && m_Params.m_ReuseBuffers)
342 {
343 LogAndThrow("Specifying multiple sets of outputs not compatible with ReuseBuffers");
344 }
345 }
346
347 if (m_Params.m_ThreadPoolSize != 0)
348 {
349 // The current implementation of the Threadpool does not allow binding of outputs to a thread
350 // So to ensure no two threads write to the same output at the same time, no output can be reused
351 noOutputSets = m_Params.m_Iterations;
352 }
353
354 if (m_Params.m_InputTensorDataFilePaths.size() > noOfInputs)
355 {
356 ARMNN_LOG(info) << "Given network has " << noOfInputs << " input/s. One input-tensor-data file is required "
357 << "for each input. The user provided "
358 << m_Params.m_InputTensorDataFilePaths.size()
359 << " input-tensor-data file/s which will be used to fill the input/s.\n";
360 }
361
362 unsigned int inputCount = 0;
363 for(unsigned int inputSet = 0; inputSet < noInputSets; ++inputSet)
364 {
365 armnn::InputTensors inputTensors;
366 for (const auto& inputName: inputNames)
367 {
368 armnn::BindingPointInfo bindingPointInfo;
369 try
370 {
371 bindingPointInfo = m_IOInfo.m_InputInfoMap.at(inputName);
372 }
373 catch (const std::out_of_range& e)
374 {
375 LogAndThrow("Input with inputName: " + inputName + " not found.");
376 }
377
378 const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
379 auto newInfo = armnn::TensorInfo{tensorInfo.GetShape(), tensorInfo.GetDataType(),
380 tensorInfo.GetQuantizationScale(),
381 tensorInfo.GetQuantizationOffset(),
382 true};
383
384 m_InputStorage.emplace_back(IOStorage{tensorInfo.GetNumBytes()});
385
386 const int bindingId = bindingPointInfo.first;
387 inputTensors.emplace_back(bindingId, armnn::ConstTensor{newInfo, m_InputStorage.back().m_Mem});
388
389 const armnn::Optional<std::string> dataFile = m_Params.m_GenerateTensorData ?
390 armnn::EmptyOptional() :
391 armnn::MakeOptional<std::string>(
392 m_Params.m_InputTensorDataFilePaths.at(inputCount++));
393
394 switch (tensorInfo.GetDataType())
395 {
396 case armnn::DataType::Float32:
397 {
398 auto typedTensor = reinterpret_cast<float*>(m_InputStorage.back().m_Mem);
399 PopulateTensorWithData<float>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
400 break;
401 }
402 case armnn::DataType::QSymmS16:
403 {
404 auto typedTensor = reinterpret_cast<int16_t*>(m_InputStorage.back().m_Mem);
405 PopulateTensorWithData<int16_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
406 break;
407 }
408 case armnn::DataType::QSymmS8:
409 case armnn::DataType::QAsymmS8:
410 {
411 auto typedTensor = reinterpret_cast<int8_t*>(m_InputStorage.back().m_Mem);
412 PopulateTensorWithData<int8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
413 break;
414 }
415 case armnn::DataType::QAsymmU8:
416 {
417 auto typedTensor = reinterpret_cast<uint8_t*>(m_InputStorage.back().m_Mem);
418 PopulateTensorWithData<uint8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
419 break;
420 }
421 case armnn::DataType::Signed32:
422 {
423 auto typedTensor = reinterpret_cast<int32_t*>(m_InputStorage.back().m_Mem);
424 PopulateTensorWithData<int32_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
425 break;
426 }
427 default:
428 {
429 LogAndThrow("Unexpected DataType");
430 }
431 }
432
Matthew Benthamb4f5c232022-11-16 10:59:12 +0000433 }
434
435 if (m_Params.m_ImportInputsIfAligned)
436 {
437 m_ImportedInputIds.push_back(
438 m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc));
Teresa Charlin83b42912022-07-07 14:24:59 +0100439 }
440 m_InputTensorsVec.emplace_back(inputTensors);
441 }
442
443 for(unsigned int outputSet = 0; outputSet < noOutputSets; ++outputSet)
444 {
445 armnn::OutputTensors outputTensors;
446 for (const auto& output: m_IOInfo.m_OutputInfoMap)
447 {
448 const armnn::BindingPointInfo& bindingPointInfo = output.second;
449 const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
450
451 m_OutputStorage.emplace_back(tensorInfo.GetNumBytes());
452 outputTensors.emplace_back(bindingPointInfo.first, armnn::Tensor{tensorInfo, m_OutputStorage.back().m_Mem});
453 }
454 m_OutputTensorsVec.emplace_back(outputTensors);
455 if (m_Params.m_ImportInputsIfAligned)
456 {
457 m_ImportedOutputIds.push_back(
458 m_Runtime->ImportOutputs(m_NetworkId, m_OutputTensorsVec.back(), armnn::MemorySource::Malloc));
459 }
460 }
461
Teresa Charlin20508422022-10-26 14:03:08 +0100462 // If iterations > noSets fill the remaining iterations repeating the given files
463 // If iterations < noSets just ignore the extra files
464 const unsigned int remainingInputSets = (m_Params.m_Iterations > noInputSets)
465 ? m_Params.m_Iterations - noInputSets
466 : 0;
467 for (unsigned int i = 0; i < remainingInputSets; ++i)
Teresa Charlin83b42912022-07-07 14:24:59 +0100468 {
Teresa Charlin20508422022-10-26 14:03:08 +0100469 m_InputTensorsVec.push_back(m_InputTensorsVec[i % noInputSets]);
Teresa Charlin83b42912022-07-07 14:24:59 +0100470 if (m_Params.m_ImportInputsIfAligned)
471 {
Teresa Charlin20508422022-10-26 14:03:08 +0100472 m_ImportedInputIds.push_back(m_ImportedInputIds[i % noInputSets]);
Teresa Charlin83b42912022-07-07 14:24:59 +0100473 }
474 }
475
Teresa Charlin20508422022-10-26 14:03:08 +0100476 const unsigned int remainingOutputSets = (m_Params.m_Iterations > noOutputSets)
477 ? m_Params.m_Iterations - noOutputSets
478 : 0;
479 for (unsigned int i = 0; i < remainingOutputSets; ++i)
Teresa Charlin83b42912022-07-07 14:24:59 +0100480 {
Teresa Charlin20508422022-10-26 14:03:08 +0100481 m_OutputTensorsVec.push_back(m_OutputTensorsVec[i % noOutputSets]);
Teresa Charlin83b42912022-07-07 14:24:59 +0100482 if (m_Params.m_ImportInputsIfAligned)
483 {
Teresa Charlin20508422022-10-26 14:03:08 +0100484 m_ImportedOutputIds.push_back(m_ImportedOutputIds[i % noOutputSets]);
Teresa Charlin83b42912022-07-07 14:24:59 +0100485 }
486 }
487}
488
489ArmNNExecutor::IOInfo ArmNNExecutor::GetIOInfo(armnn::IOptimizedNetwork* optNet)
490{
491 struct IOStrategy : armnn::IStrategy
492 {
493 void ExecuteStrategy(const armnn::IConnectableLayer* layer,
494 const armnn::BaseDescriptor& descriptor,
495 const std::vector<armnn::ConstTensor>& constants,
496 const char* name,
497 const armnn::LayerBindingId id = 0) override
498 {
499 armnn::IgnoreUnused(descriptor, constants, id);
500 switch (layer->GetType())
501 {
502 case armnn::LayerType::Input:
503 {
504 m_IOInfo.m_InputNames.emplace_back(name);
505 m_IOInfo.m_InputInfoMap[name] = {id, layer->GetOutputSlot(0).GetTensorInfo()};
506 break;
507 }
508 case armnn::LayerType::Output:
509 {
510 m_IOInfo.m_OutputNames.emplace_back(name);
Mike Kelly4cc341c2023-07-07 15:43:06 +0100511 m_IOInfo.m_OutputInfoMap[name] = {id, layer->GetInputSlot(0).GetTensorInfo()};
Teresa Charlin83b42912022-07-07 14:24:59 +0100512 break;
513 }
514 default: {}
515 }
516 }
517 IOInfo m_IOInfo;
518 };
519
520 IOStrategy ioStrategy;
521 optNet->ExecuteStrategy(ioStrategy);
522
523 return ioStrategy.m_IOInfo;
524}
525
526armnn::IOptimizedNetworkPtr ArmNNExecutor::OptimizeNetwork(armnn::INetwork* network)
527{
528 armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
529
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000530 armnn::OptimizerOptionsOpaque options;
531 options.SetReduceFp32ToFp16(m_Params.m_EnableFp16TurboMode);
532 options.SetDebugEnabled(m_Params.m_PrintIntermediate);
533 options.SetDebugToFileEnabled(m_Params.m_PrintIntermediateOutputsToFile);
534 options.SetShapeInferenceMethod(m_Params.m_InferOutputShape ?
535 armnn::ShapeInferenceMethod::InferAndValidate :
536 armnn::ShapeInferenceMethod::ValidateOnly);
537 options.SetProfilingEnabled(m_Params.m_EnableProfiling);
538 options.SetAllowExpandedDims(m_Params.m_AllowExpandedDims);
Teresa Charlin83b42912022-07-07 14:24:59 +0100539
540 armnn::BackendOptions gpuAcc("GpuAcc",
541 {
542 { "FastMathEnabled", m_Params.m_EnableFastMath },
543 { "SaveCachedNetwork", m_Params.m_SaveCachedNetwork },
544 { "CachedNetworkFilePath", m_Params.m_CachedNetworkFilePath },
545 { "MLGOTuningFilePath", m_Params.m_MLGOTuningFilePath }
546 });
547
548 armnn::BackendOptions cpuAcc("CpuAcc",
549 {
550 { "FastMathEnabled", m_Params.m_EnableFastMath },
551 { "NumberOfThreads", m_Params.m_NumberOfThreads }
552 });
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000553 options.AddModelOption(gpuAcc);
554 options.AddModelOption(cpuAcc);
Jim Flynnfcc72f52022-10-14 11:20:07 +0100555 // The shapeInferenceMethod and allowExpandedDims values have to be added to the model options
556 // because these are what are passed to the OptimizeSubgraphViews method and are used to create
557 // the new optimized INetwork that method uses
558 armnn::BackendOptions allowExDimOpt("AllowExpandedDims",
559 {
560 { "AllowExpandedDims", m_Params.m_AllowExpandedDims }
561 });
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000562 options.AddModelOption(allowExDimOpt);
Jim Flynnfcc72f52022-10-14 11:20:07 +0100563 armnn::BackendOptions shapeInferOpt("ShapeInferenceMethod",
564 {
565 { "InferAndValidate", m_Params.m_InferOutputShape }
566 });
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000567 options.AddModelOption(shapeInferOpt);
Teresa Charlin83b42912022-07-07 14:24:59 +0100568
569 const auto optimization_start_time = armnn::GetTimeNow();
570 optNet = armnn::Optimize(*network, m_Params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
571
572 ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
573 << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
574
575 if (!optNet)
576 {
577 LogAndThrow("Optimize returned nullptr");
578 }
579
Teresa Charlin98d3fd82022-08-02 14:17:39 +0100580 // If v,visualize-optimized-model is enabled then construct a file name for the dot file.
581 if (m_Params.m_EnableLayerDetails)
582 {
583 fs::path filename = m_Params.m_ModelPath;
584 filename.replace_extension("dot");
585 std::fstream file(filename.c_str(), std::ios_base::out);
586 optNet->SerializeToDot(file);
587 }
588
Teresa Charlin83b42912022-07-07 14:24:59 +0100589 return optNet;
590}
591
592std::unique_ptr<ArmNNExecutor::IParser> ArmNNExecutor::CreateParser()
593{
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200594 const fs::path modelFilename = m_Params.m_ModelPath;
595 const std::string modelExtension = modelFilename.extension();
Teresa Charlin83b42912022-07-07 14:24:59 +0100596
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200597 m_Params.m_IsModelBinary = modelExtension != ".json";
Teresa Charlin83b42912022-07-07 14:24:59 +0100598 std::unique_ptr<IParser> parser = nullptr;
599 // Forward to implementation based on the parser type
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200600 if (modelExtension == ".armnn")
Teresa Charlin83b42912022-07-07 14:24:59 +0100601 {
602#if defined(ARMNN_SERIALIZER)
603 parser = std::make_unique<ArmNNDeserializer>();
604#else
605 LogAndThrow("Not built with serialization support.");
606#endif
607 }
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200608 else if (modelExtension == ".tflite")
Teresa Charlin83b42912022-07-07 14:24:59 +0100609 {
610#if defined(ARMNN_TF_LITE_PARSER)
611 parser = std::make_unique<TfliteParser>(m_Params);
612#else
613 LogAndThrow("Not built with Tensorflow-Lite parser support.");
614#endif
615 }
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200616 else if (modelExtension == ".onnx")
Teresa Charlin83b42912022-07-07 14:24:59 +0100617 {
618#if defined(ARMNN_ONNX_PARSER)
619 parser = std::make_unique<OnnxParser>();
620#else
621 LogAndThrow("Not built with Onnx parser support.");
622#endif
623 }
Colm Donelaned928a92023-06-25 15:29:08 +0100624 if (parser == nullptr)
625 {
626 throw InvalidArgumentException("Unable to determine the model type based on the file name extension.");
627 }
Teresa Charlin83b42912022-07-07 14:24:59 +0100628 return parser;
629}
630
631void ArmNNExecutor::PrintOutputTensors(const armnn::OutputTensors* outputTensors,
632 unsigned int iteration)
633{
634 auto findOutputName = [&](const armnn::LayerBindingId id)
635 {
636 for (auto it = m_IOInfo.m_OutputInfoMap.begin(); it != m_IOInfo.m_OutputInfoMap.end(); ++it)
637 {
638 if (id == it->second.first)
639 {
640 return it->first;
641 }
642 }
643 return std::string{};
644 };
645
646 unsigned int outputIndex = 0;
647 unsigned int numOutputs = outputTensors->size();
648 for (const auto& output: *outputTensors)
649 {
650 const auto bindingName = findOutputName(output.first);
651 // We've made sure before that the number of output files either equals numOutputs, in which
652 // case we override those files when processing the results of each iteration (only the result
653 // of the last iteration will be stored), or there are enough
654 // output files for each output of each iteration.
655 size_t outputFileIndex = iteration * numOutputs + outputIndex;
656 if (!m_Params.m_OutputTensorFiles.empty())
657 {
658 outputFileIndex = outputFileIndex % m_Params.m_OutputTensorFiles.size();
659 ARMNN_LOG(info) << "Writing output: " << bindingName << " bindingId: '"
660 << output.first
661 << "' of iteration: " << iteration + 1 << " to file: '"
662 << m_Params.m_OutputTensorFiles[outputFileIndex] << "'";
663 }
664
665 const armnn::Optional<std::string> outputTensorFile = m_Params.m_OutputTensorFiles.empty() ?
666 armnn::EmptyOptional() :
667 armnn::MakeOptional<std::string>(
668 m_Params.m_OutputTensorFiles[outputFileIndex]);
669
670 OutputWriteInfo outputWriteInfo
671 {
672 outputTensorFile,
673 bindingName,
674 output.second,
Colm Donelan0dfb2652023-06-22 10:19:17 +0100675 !m_Params.m_DontPrintOutputs,
676 output.second.GetDataType()
Teresa Charlin83b42912022-07-07 14:24:59 +0100677 };
678
679 std::cout << bindingName << ": ";
680 std::vector<float> values;
681 switch (output.second.GetDataType())
682 {
683 case armnn::DataType::Float32:
684 {
685 PrintTensor<float>(outputWriteInfo, "%f ");
686 break;
687 }
688
689 case armnn::DataType::Signed32:
690 {
691 PrintTensor<int>(outputWriteInfo, "%d ");
692 break;
693 }
John Mcloughlin4cf29d62023-09-25 14:10:32 +0100694 case armnn::DataType::Signed64:
695 {
696 PrintTensor<int64_t>(outputWriteInfo, "%ld ");
697 break;
698 }
Teresa Charlin83b42912022-07-07 14:24:59 +0100699 case armnn::DataType::QSymmS8:
700 case armnn::DataType::QAsymmS8:
701 {
702 PrintTensor<int8_t>(outputWriteInfo, "%d ");
703 break;
704 }
705 case armnn::DataType::QAsymmU8:
Mike Kelly4cc341c2023-07-07 15:43:06 +0100706 case armnn::DataType::Boolean:
Teresa Charlin83b42912022-07-07 14:24:59 +0100707 {
708 PrintTensor<uint8_t>(outputWriteInfo, "%d ");
709 break;
710 }
711 case armnn::DataType::Float16:
712 case armnn::DataType::QSymmS16:
713 case armnn::DataType::BFloat16:
Teresa Charlin83b42912022-07-07 14:24:59 +0100714 default:
715 {
716 LogAndThrow("Unexpected DataType");
717 }
718 }
719 std::cout << "\n";
Adam Jalkemo8f393632022-10-13 09:04:54 +0200720 ++outputIndex;
Teresa Charlin83b42912022-07-07 14:24:59 +0100721 }
722}
723
724void ArmNNExecutor::CompareAndPrintResult(std::vector<const void*> otherOutput)
725{
726 unsigned int index = 0;
Colm Doneland0472622023-03-06 12:34:54 +0000727 std::string typeString;
Teresa Charlin83b42912022-07-07 14:24:59 +0100728 for (const auto& outputTensors: m_OutputTensorsVec)
729 {
730 for (const auto& outputTensor: outputTensors)
731 {
Teresa Charlin83b42912022-07-07 14:24:59 +0100732 size_t size = outputTensor.second.GetNumBytes();
Colm Doneland0472622023-03-06 12:34:54 +0000733 double result = ComputeByteLevelRMSE(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
734 std::cout << "Byte level root mean square error: " << result << "\n";
Teresa Charlin83b42912022-07-07 14:24:59 +0100735 }
736 }
737}
738#if defined(ARMNN_SERIALIZER)
739ArmNNExecutor::ArmNNDeserializer::ArmNNDeserializer() : m_Parser(armnnDeserializer::IDeserializer::Create()){}
740
741armnn::INetworkPtr ArmNNExecutor::ArmNNDeserializer::CreateNetwork(const ExecuteNetworkParams& params)
742{
743 const std::string& modelPath = params.m_ModelPath;
744
745 std::ifstream file(modelPath, std::ios::binary);
746 return m_Parser->CreateNetworkFromBinary(file);
747}
748
749armnn::BindingPointInfo
750ArmNNExecutor::ArmNNDeserializer::GetInputBindingPointInfo(size_t, const std::string& inputName)
751{
752 armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkInputBindingInfo(0, inputName);
753 return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
754}
755
756armnn::BindingPointInfo
757ArmNNExecutor::ArmNNDeserializer::GetOutputBindingPointInfo(size_t, const std::string& outputName)
758{
759 armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkOutputBindingInfo(0, outputName);
760 return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
761}
762#endif
763
764#if defined(ARMNN_TF_LITE_PARSER)
765ArmNNExecutor::TfliteParser::TfliteParser(const ExecuteNetworkParams& params)
766{
767 armnnTfLiteParser::ITfLiteParser::TfLiteParserOptions options;
768 options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
769 options.m_InferAndValidate = params.m_InferOutputShape;
Jim Flynnfcc72f52022-10-14 11:20:07 +0100770 options.m_AllowExpandedDims = params.m_AllowExpandedDims;
Teresa Charlin83b42912022-07-07 14:24:59 +0100771
772 m_Parser = armnnTfLiteParser::ITfLiteParser::Create(options);
773}
774
775armnn::INetworkPtr ArmNNExecutor::TfliteParser::CreateNetwork(const ExecuteNetworkParams& params)
776{
777 const std::string& modelPath = params.m_ModelPath;
778 return m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str());
779}
780
781armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetInputBindingPointInfo(size_t subgraphId,
782 const std::string& inputName)
783{
784 return m_Parser->GetNetworkInputBindingInfo(subgraphId, inputName);
785}
786
787armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetOutputBindingPointInfo(size_t subgraphId,
788 const std::string& outputName)
789{
790 return m_Parser->GetNetworkOutputBindingInfo(subgraphId, outputName);
791}
792#endif
793
794
795#if defined(ARMNN_ONNX_PARSER)
796ArmNNExecutor::OnnxParser::OnnxParser() : m_Parser(armnnOnnxParser::IOnnxParser::Create()){}
797
798armnn::INetworkPtr ArmNNExecutor::OnnxParser::CreateNetwork(const ExecuteNetworkParams& params)
799{
800 const std::string& modelPath = params.m_ModelPath;
801 m_Parser = armnnOnnxParser::IOnnxParser::Create();
802 std::map<std::string, armnn::TensorShape> inputShapes;
803 if(!params.m_InputTensorShapes.empty())
804 {
805 const size_t numInputShapes = params.m_InputTensorShapes.size();
806 const size_t numInputBindings = params.m_InputNames.size();
807 if(numInputShapes < numInputBindings)
808 {
809 throw armnn::Exception(
810 fmt::format("Not every input has its tensor shape specified: expected={0}, got={1}",
811 numInputBindings, numInputShapes));
812 }
813
814 for (size_t i = 0; i < numInputShapes; i++)
815 {
816 inputShapes[params.m_InputNames[i]] = params.m_InputTensorShapes[i];
817 }
818
819 return params.m_IsModelBinary ?
820 m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
821 m_Parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes);
822 }
823
824 // Handle text and binary input differently by calling the corresponding parser function
825 return params.m_IsModelBinary ?
826 m_Parser->CreateNetworkFromBinaryFile(params.m_ModelPath.c_str()) :
827 m_Parser->CreateNetworkFromTextFile(params.m_ModelPath.c_str());
828}
829
830armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetInputBindingPointInfo(size_t, const std::string& inputName)
831{
832 return m_Parser->GetNetworkInputBindingInfo(inputName);
833}
834
835armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetOutputBindingPointInfo(size_t, const std::string& outputName)
836{
837 return m_Parser->GetNetworkOutputBindingInfo(outputName);
838}
839#endif