blob: e8b501489e0b7210870d155abacd76e9afa09e8d [file] [log] [blame]
Teresa Charlin83b42912022-07-07 14:24:59 +01001//
2// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6
7#include "ArmNNExecutor.hpp"
8#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
9
10#include <armnn/IAsyncExecutionCallback.hpp>
11#include <AsyncExecutionCallback.hpp>
12
13
14using namespace armnn;
15using namespace std::chrono;
16
17ArmNNExecutor::ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions)
18: m_Params(params)
19{
20 runtimeOptions.m_EnableGpuProfiling = params.m_EnableProfiling;
21 runtimeOptions.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
22 m_Runtime = armnn::IRuntime::Create(runtimeOptions);
23
24 auto parser = CreateParser();
25 auto network = parser->CreateNetwork(m_Params);
26 auto optNet = OptimizeNetwork(network.get());
27
28 m_IOInfo = GetIOInfo(optNet.get());
29 SetupInputsAndOutputs();
30
Teresa Charlin83b42912022-07-07 14:24:59 +010031 armnn::ProfilingDetailsMethod profilingDetailsMethod = ProfilingDetailsMethod::Undefined;
32 if (params.m_OutputDetailsOnlyToStdOut)
33 {
34 profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
35 }
36 else if (params.m_OutputDetailsToStdOut)
37 {
38 profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
39 }
40
41 INetworkProperties networkProperties{m_Params.m_Concurrent,
42 MemorySource::Undefined,
43 MemorySource::Undefined,
44 params.m_EnableProfiling,
45 profilingDetailsMethod};
46
Colm Donelan78044812022-09-27 16:46:09 +010047 std::string errorMsg;
48 Status status = m_Runtime->LoadNetwork(m_NetworkId, std::move(optNet), errorMsg, networkProperties);
49 if (status != Status::Success)
50 {
51 std::string message("Failed to create Arm NN Executor: ");
52 message.append(errorMsg);
53 // Throwing an exception at this point in the constructor causes lots of problems. We'll instead mark this
54 // executor as not constructed.
55 ARMNN_LOG(fatal) << message;
56 m_constructionFailed = true;
57 return;
58 }
Teresa Charlin83b42912022-07-07 14:24:59 +010059
60 if (m_Params.m_Iterations > 1)
61 {
62 std::stringstream msg;
63 msg << "Network will be executed " << m_Params.m_Iterations;
64 if (m_Params.m_Concurrent)
65 {
66 msg << " times in an asynchronous manner. ";
67 }
68 else
69 {
70 msg << " times successively. ";
71 }
72 msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
73 "cover each execution.";
74 ARMNN_LOG(info) << msg.str();
75 }
76
77 if (m_Params.m_GenerateTensorData)
78 {
79 ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
80 }
81
82 if (m_Params.m_DontPrintOutputs)
83 {
84 ARMNN_LOG(info) << "Printing outputs to console is disabled.";
85 }
86}
87
88void ArmNNExecutor::ExecuteAsync()
89{
90 std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
91 std::unique_ptr<armnn::Threadpool> threadpool;
92 armnn::AsyncCallbackManager callbackManager;
93 std::unordered_map<armnn::InferenceId, const armnn::OutputTensors*> inferenceOutputMap;
94
95 for (size_t i = 0; i < m_Params.m_ThreadPoolSize; ++i)
96 {
97 memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkId));
98 }
99
100 threadpool = std::make_unique<armnn::Threadpool>(m_Params.m_ThreadPoolSize,
101 m_Runtime.get(),
102 memHandles);
103
104 ARMNN_LOG(info) << "Asynchronous Execution with Arm NN thread pool... \n";
105 // Declare the latest and earliest inference times here to be used when calculating overall time
106 std::chrono::high_resolution_clock::time_point earliestStartTime =
107 std::chrono::high_resolution_clock::time_point::max();
108 std::chrono::high_resolution_clock::time_point latestEndTime =
109 std::chrono::high_resolution_clock::now();
110
111 // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
112 // LoadedNetwork with each scheduled inference having a specific priority
113 for (size_t i = 0; i < m_Params.m_Iterations; ++i)
114 {
115 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
116
117 std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
118 inferenceOutputMap.insert({cb->GetInferenceId(), &m_OutputTensorsVec[i]});
119 threadpool->Schedule(m_NetworkId,
120 m_InputTensorsVec[i],
121 m_OutputTensorsVec[i],
122 armnn::QosExecPriority::Medium,
123 cb);
124 }
125
126 // Check the results
127 for (size_t iteration = 0; iteration < m_Params.m_Iterations; ++iteration)
128 {
129 auto cb = callbackManager.GetNotifiedCallback();
130
131 // Get the results
132 if (earliestStartTime > cb->GetStartTime())
133 {
134 earliestStartTime = cb->GetStartTime();
135 }
136 if (latestEndTime < cb->GetEndTime())
137 {
138 latestEndTime = cb->GetEndTime();
139 }
140
141 auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
142 auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
143 auto inferenceDuration = endTime - startTime;
144 CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
145 if(!m_Params.m_DontPrintOutputs)
146 {
147 const armnn::OutputTensors* out = inferenceOutputMap[cb->GetInferenceId()];
148 PrintOutputTensors(out, iteration);
149 }
150 }
151
152 // Print duration difference between overallStartTime and overallEndTime
153 auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
154 auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
155 auto totalInferenceDuration = overallEndTime - overallStartTime;
156 ARMNN_LOG(info) << "Overall Inference time: " << std::setprecision(2)
157 << std::fixed << totalInferenceDuration.count() << " ms\n";
158
159}
160
161void ArmNNExecutor::ExecuteSync()
162{
163 for (size_t x = 0; x < m_Params.m_Iterations; x++)
164 {
165 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
166
167 const auto start_time = armnn::GetTimeNow();
168 armnn::Status ret;
169 if (m_Params.m_ImportInputsIfAligned)
170 {
171 ret = m_Runtime->EnqueueWorkload(m_NetworkId,
172 m_InputTensorsVec[x],
173 m_OutputTensorsVec[x],
174 m_ImportedInputIds[x],
175 m_ImportedOutputIds[x]);
176 }
177 else
178 {
179 ret = m_Runtime->EnqueueWorkload(m_NetworkId,
180 m_InputTensorsVec[x],
181 m_OutputTensorsVec[x]);
182 }
183
184 const auto inferenceDuration = armnn::GetTimeDuration(start_time);
185
186 // If profiling is enabled print out the results
Kevin May251fd952022-10-05 14:42:55 +0100187 if(profiler && profiler->IsProfilingEnabled() && x == (m_Params.m_Iterations - 1))
Teresa Charlin83b42912022-07-07 14:24:59 +0100188 {
189 profiler->Print(std::cout);
190 }
191
192 if(ret == armnn::Status::Failure)
193 {
194 throw armnn::Exception("IRuntime::EnqueueWorkload failed");
195 }
196
197 if(!m_Params.m_DontPrintOutputs)
198 {
199 PrintOutputTensors(&m_OutputTensorsVec[x], x);
200 }
201
202 // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
203 CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
204 }
205}
206
207std::vector<const void*> ArmNNExecutor::Execute()
208{
209 if(m_Params.m_ThreadPoolSize == 0)
210 {
211 ExecuteSync();
212 }
213 else
214 {
215 ExecuteAsync();
216 }
217 std::vector<const void*> results;
218 for (auto& output : m_OutputStorage)
219 {
220 results.push_back(output.m_Mem);
221 }
222
223 return results;
224}
225
226void ArmNNExecutor::PrintNetworkInfo()
227{
228 const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
229 m_Params.m_InputNames :
230 m_IOInfo.m_InputNames;
231 std::stringstream ss;
232 ss << "===== Network Info =====\n";
233 ss << "Inputs in order:\n";
234 for (const auto& inputName : inputNames)
235 {
236 const auto inputInfo = m_IOInfo.m_InputInfoMap[inputName].second;
237 ss << inputName << ", " << inputInfo.GetShape() << ", " << GetDataTypeName(inputInfo.GetDataType());
238 if (inputInfo.IsQuantized())
239 {
240 ss << " Quantization Offset: " << inputInfo.GetQuantizationOffset();
241 if (inputInfo.HasMultipleQuantizationScales())
242 {
243 ss << " Quantization scales: ";
244 for (const auto scale: inputInfo.GetQuantizationScales())
245 {
246 ss << scale << ", ";
247 }
248 }
249 else
250 {
251 ss << " Quantization scale: " << inputInfo.GetQuantizationScale();
252 }
253 }
254 ss << "\n";
255 }
256
257 ss << "Outputs in order:\n";
258 for (const auto& outputName : m_IOInfo.m_OutputNames)
259 {
260 const auto outputInfo = m_IOInfo.m_OutputInfoMap[outputName].second;
261 ss << outputName << ", " << outputInfo.GetShape() << ", " << GetDataTypeName(outputInfo.GetDataType());
262 if (outputInfo.IsQuantized())
263 {
264 ss << " Quantization Offset: " << outputInfo.GetQuantizationOffset();
265 if (outputInfo.HasMultipleQuantizationScales())
266 {
267 ss << " Quantization scales: ";
268 for (const auto scale: outputInfo.GetQuantizationScales())
269 {
270 ss << scale << ", ";
271 }
272 }
273 else
274 {
275 ss << " Quantization scale: " << outputInfo.GetQuantizationScale();
276 }
277 }
278 ss << "\n";
279 }
280
281 std::cout << ss.str() << std::endl;
282}
283
284void ArmNNExecutor::SetupInputsAndOutputs()
285{
286 const unsigned int noOfInputs = m_IOInfo.m_InputNames.size();
287
288 if (m_Params.m_InputNames.size() != 0 && m_Params.m_InputNames.size() != noOfInputs)
289 {
290 LogAndThrow("Number of input names does not match number of inputs");
291 }
292
293 const unsigned int inputFilePaths = m_Params.m_InputTensorDataFilePaths.size();
294 const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
295 m_Params.m_InputNames :
296 m_IOInfo.m_InputNames;
297 unsigned int noInputSets = 1;
298
299 if (inputFilePaths != 0)
300 {
301 if (inputFilePaths % noOfInputs != 0)
302 {
303 LogAndThrow("Number of input files: " + std::to_string(inputFilePaths) +
304 " not compatible with number of inputs: " + std::to_string(noOfInputs));
305 }
306 noInputSets = inputFilePaths / noOfInputs;
307 if (noInputSets != 1 && m_Params.m_ReuseBuffers)
308 {
309 LogAndThrow("Specifying multiple sets of inputs not compatible with ReuseBuffers");
310 }
311 }
312
313 const unsigned int noOfOutputs = m_IOInfo.m_OutputNames.size();
314 const unsigned int outputFilePaths = m_Params.m_OutputTensorFiles.size();
315 unsigned int noOutputSets = 1;
316
317 if (outputFilePaths != 0)
318 {
319 if (outputFilePaths % noOfOutputs != 0)
320 {
321 LogAndThrow("Number of output files: " + std::to_string(outputFilePaths) +
322 ", not compatible with number of outputs: " + std::to_string(noOfOutputs));
323 }
324 noOutputSets = outputFilePaths / noOfOutputs;
325
326 if (noOutputSets != 1 && m_Params.m_ReuseBuffers)
327 {
328 LogAndThrow("Specifying multiple sets of outputs not compatible with ReuseBuffers");
329 }
330 }
331
332 if (m_Params.m_ThreadPoolSize != 0)
333 {
334 // The current implementation of the Threadpool does not allow binding of outputs to a thread
335 // So to ensure no two threads write to the same output at the same time, no output can be reused
336 noOutputSets = m_Params.m_Iterations;
337 }
338
339 if (m_Params.m_InputTensorDataFilePaths.size() > noOfInputs)
340 {
341 ARMNN_LOG(info) << "Given network has " << noOfInputs << " input/s. One input-tensor-data file is required "
342 << "for each input. The user provided "
343 << m_Params.m_InputTensorDataFilePaths.size()
344 << " input-tensor-data file/s which will be used to fill the input/s.\n";
345 }
346
347 unsigned int inputCount = 0;
348 for(unsigned int inputSet = 0; inputSet < noInputSets; ++inputSet)
349 {
350 armnn::InputTensors inputTensors;
351 for (const auto& inputName: inputNames)
352 {
353 armnn::BindingPointInfo bindingPointInfo;
354 try
355 {
356 bindingPointInfo = m_IOInfo.m_InputInfoMap.at(inputName);
357 }
358 catch (const std::out_of_range& e)
359 {
360 LogAndThrow("Input with inputName: " + inputName + " not found.");
361 }
362
363 const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
364 auto newInfo = armnn::TensorInfo{tensorInfo.GetShape(), tensorInfo.GetDataType(),
365 tensorInfo.GetQuantizationScale(),
366 tensorInfo.GetQuantizationOffset(),
367 true};
368
369 m_InputStorage.emplace_back(IOStorage{tensorInfo.GetNumBytes()});
370
371 const int bindingId = bindingPointInfo.first;
372 inputTensors.emplace_back(bindingId, armnn::ConstTensor{newInfo, m_InputStorage.back().m_Mem});
373
374 const armnn::Optional<std::string> dataFile = m_Params.m_GenerateTensorData ?
375 armnn::EmptyOptional() :
376 armnn::MakeOptional<std::string>(
377 m_Params.m_InputTensorDataFilePaths.at(inputCount++));
378
379 switch (tensorInfo.GetDataType())
380 {
381 case armnn::DataType::Float32:
382 {
383 auto typedTensor = reinterpret_cast<float*>(m_InputStorage.back().m_Mem);
384 PopulateTensorWithData<float>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
385 break;
386 }
387 case armnn::DataType::QSymmS16:
388 {
389 auto typedTensor = reinterpret_cast<int16_t*>(m_InputStorage.back().m_Mem);
390 PopulateTensorWithData<int16_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
391 break;
392 }
393 case armnn::DataType::QSymmS8:
394 case armnn::DataType::QAsymmS8:
395 {
396 auto typedTensor = reinterpret_cast<int8_t*>(m_InputStorage.back().m_Mem);
397 PopulateTensorWithData<int8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
398 break;
399 }
400 case armnn::DataType::QAsymmU8:
401 {
402 auto typedTensor = reinterpret_cast<uint8_t*>(m_InputStorage.back().m_Mem);
403 PopulateTensorWithData<uint8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
404 break;
405 }
406 case armnn::DataType::Signed32:
407 {
408 auto typedTensor = reinterpret_cast<int32_t*>(m_InputStorage.back().m_Mem);
409 PopulateTensorWithData<int32_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
410 break;
411 }
412 default:
413 {
414 LogAndThrow("Unexpected DataType");
415 }
416 }
417
418 if (m_Params.m_ImportInputsIfAligned)
419 {
420 m_ImportedInputIds.push_back(
421 m_Runtime->ImportInputs(m_NetworkId, m_InputTensorsVec.back(), armnn::MemorySource::Malloc));
422 }
423 }
424 m_InputTensorsVec.emplace_back(inputTensors);
425 }
426
427 for(unsigned int outputSet = 0; outputSet < noOutputSets; ++outputSet)
428 {
429 armnn::OutputTensors outputTensors;
430 for (const auto& output: m_IOInfo.m_OutputInfoMap)
431 {
432 const armnn::BindingPointInfo& bindingPointInfo = output.second;
433 const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
434
435 m_OutputStorage.emplace_back(tensorInfo.GetNumBytes());
436 outputTensors.emplace_back(bindingPointInfo.first, armnn::Tensor{tensorInfo, m_OutputStorage.back().m_Mem});
437 }
438 m_OutputTensorsVec.emplace_back(outputTensors);
439 if (m_Params.m_ImportInputsIfAligned)
440 {
441 m_ImportedOutputIds.push_back(
442 m_Runtime->ImportOutputs(m_NetworkId, m_OutputTensorsVec.back(), armnn::MemorySource::Malloc));
443 }
444 }
445
Teresa Charlin20508422022-10-26 14:03:08 +0100446 // If iterations > noSets fill the remaining iterations repeating the given files
447 // If iterations < noSets just ignore the extra files
448 const unsigned int remainingInputSets = (m_Params.m_Iterations > noInputSets)
449 ? m_Params.m_Iterations - noInputSets
450 : 0;
451 for (unsigned int i = 0; i < remainingInputSets; ++i)
Teresa Charlin83b42912022-07-07 14:24:59 +0100452 {
Teresa Charlin20508422022-10-26 14:03:08 +0100453 m_InputTensorsVec.push_back(m_InputTensorsVec[i % noInputSets]);
Teresa Charlin83b42912022-07-07 14:24:59 +0100454 if (m_Params.m_ImportInputsIfAligned)
455 {
Teresa Charlin20508422022-10-26 14:03:08 +0100456 m_ImportedInputIds.push_back(m_ImportedInputIds[i % noInputSets]);
Teresa Charlin83b42912022-07-07 14:24:59 +0100457 }
458 }
459
Teresa Charlin20508422022-10-26 14:03:08 +0100460 const unsigned int remainingOutputSets = (m_Params.m_Iterations > noOutputSets)
461 ? m_Params.m_Iterations - noOutputSets
462 : 0;
463 for (unsigned int i = 0; i < remainingOutputSets; ++i)
Teresa Charlin83b42912022-07-07 14:24:59 +0100464 {
Teresa Charlin20508422022-10-26 14:03:08 +0100465 m_OutputTensorsVec.push_back(m_OutputTensorsVec[i % noOutputSets]);
Teresa Charlin83b42912022-07-07 14:24:59 +0100466 if (m_Params.m_ImportInputsIfAligned)
467 {
Teresa Charlin20508422022-10-26 14:03:08 +0100468 m_ImportedOutputIds.push_back(m_ImportedOutputIds[i % noOutputSets]);
Teresa Charlin83b42912022-07-07 14:24:59 +0100469 }
470 }
471}
472
473ArmNNExecutor::IOInfo ArmNNExecutor::GetIOInfo(armnn::IOptimizedNetwork* optNet)
474{
475 struct IOStrategy : armnn::IStrategy
476 {
477 void ExecuteStrategy(const armnn::IConnectableLayer* layer,
478 const armnn::BaseDescriptor& descriptor,
479 const std::vector<armnn::ConstTensor>& constants,
480 const char* name,
481 const armnn::LayerBindingId id = 0) override
482 {
483 armnn::IgnoreUnused(descriptor, constants, id);
484 switch (layer->GetType())
485 {
486 case armnn::LayerType::Input:
487 {
488 m_IOInfo.m_InputNames.emplace_back(name);
489 m_IOInfo.m_InputInfoMap[name] = {id, layer->GetOutputSlot(0).GetTensorInfo()};
490 break;
491 }
492 case armnn::LayerType::Output:
493 {
494 m_IOInfo.m_OutputNames.emplace_back(name);
495 m_IOInfo.m_OutputInfoMap[name] = {id, layer->GetInputSlot(0).GetConnection()->GetTensorInfo()};
496 break;
497 }
498 default: {}
499 }
500 }
501 IOInfo m_IOInfo;
502 };
503
504 IOStrategy ioStrategy;
505 optNet->ExecuteStrategy(ioStrategy);
506
507 return ioStrategy.m_IOInfo;
508}
509
510armnn::IOptimizedNetworkPtr ArmNNExecutor::OptimizeNetwork(armnn::INetwork* network)
511{
512 armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
513
514 armnn::OptimizerOptions options;
515 options.m_ReduceFp32ToFp16 = m_Params.m_EnableFp16TurboMode;
516 options.m_ReduceFp32ToBf16 = m_Params.m_EnableBf16TurboMode;
517 options.m_Debug = m_Params.m_PrintIntermediate;
Keith Davis15f9c682022-10-14 15:50:33 +0100518 options.m_DebugToFile = m_Params.m_PrintIntermediateOutputsToFile;
Teresa Charlin83b42912022-07-07 14:24:59 +0100519 options.m_shapeInferenceMethod = m_Params.m_InferOutputShape ?
520 armnn::ShapeInferenceMethod::InferAndValidate :
521 armnn::ShapeInferenceMethod::ValidateOnly;
522 options.m_ProfilingEnabled = m_Params.m_EnableProfiling;
Jim Flynnfcc72f52022-10-14 11:20:07 +0100523 options.m_AllowExpandedDims = m_Params.m_AllowExpandedDims;
Teresa Charlin83b42912022-07-07 14:24:59 +0100524
525 armnn::BackendOptions gpuAcc("GpuAcc",
526 {
527 { "FastMathEnabled", m_Params.m_EnableFastMath },
528 { "SaveCachedNetwork", m_Params.m_SaveCachedNetwork },
529 { "CachedNetworkFilePath", m_Params.m_CachedNetworkFilePath },
530 { "MLGOTuningFilePath", m_Params.m_MLGOTuningFilePath }
531 });
532
533 armnn::BackendOptions cpuAcc("CpuAcc",
534 {
535 { "FastMathEnabled", m_Params.m_EnableFastMath },
536 { "NumberOfThreads", m_Params.m_NumberOfThreads }
537 });
538 options.m_ModelOptions.push_back(gpuAcc);
539 options.m_ModelOptions.push_back(cpuAcc);
Jim Flynnfcc72f52022-10-14 11:20:07 +0100540 // The shapeInferenceMethod and allowExpandedDims values have to be added to the model options
541 // because these are what are passed to the OptimizeSubgraphViews method and are used to create
542 // the new optimized INetwork that method uses
543 armnn::BackendOptions allowExDimOpt("AllowExpandedDims",
544 {
545 { "AllowExpandedDims", m_Params.m_AllowExpandedDims }
546 });
547 options.m_ModelOptions.push_back(allowExDimOpt);
548 armnn::BackendOptions shapeInferOpt("ShapeInferenceMethod",
549 {
550 { "InferAndValidate", m_Params.m_InferOutputShape }
551 });
552 options.m_ModelOptions.push_back(shapeInferOpt);
Teresa Charlin83b42912022-07-07 14:24:59 +0100553
554 const auto optimization_start_time = armnn::GetTimeNow();
555 optNet = armnn::Optimize(*network, m_Params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
556
557 ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
558 << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
559
560 if (!optNet)
561 {
562 LogAndThrow("Optimize returned nullptr");
563 }
564
Teresa Charlin98d3fd82022-08-02 14:17:39 +0100565 // If v,visualize-optimized-model is enabled then construct a file name for the dot file.
566 if (m_Params.m_EnableLayerDetails)
567 {
568 fs::path filename = m_Params.m_ModelPath;
569 filename.replace_extension("dot");
570 std::fstream file(filename.c_str(), std::ios_base::out);
571 optNet->SerializeToDot(file);
572 }
573
Teresa Charlin83b42912022-07-07 14:24:59 +0100574 return optNet;
575}
576
577std::unique_ptr<ArmNNExecutor::IParser> ArmNNExecutor::CreateParser()
578{
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200579 const fs::path modelFilename = m_Params.m_ModelPath;
580 const std::string modelExtension = modelFilename.extension();
Teresa Charlin83b42912022-07-07 14:24:59 +0100581
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200582 m_Params.m_IsModelBinary = modelExtension != ".json";
Teresa Charlin83b42912022-07-07 14:24:59 +0100583 std::unique_ptr<IParser> parser = nullptr;
584 // Forward to implementation based on the parser type
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200585 if (modelExtension == ".armnn")
Teresa Charlin83b42912022-07-07 14:24:59 +0100586 {
587#if defined(ARMNN_SERIALIZER)
588 parser = std::make_unique<ArmNNDeserializer>();
589#else
590 LogAndThrow("Not built with serialization support.");
591#endif
592 }
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200593 else if (modelExtension == ".tflite")
Teresa Charlin83b42912022-07-07 14:24:59 +0100594 {
595#if defined(ARMNN_TF_LITE_PARSER)
596 parser = std::make_unique<TfliteParser>(m_Params);
597#else
598 LogAndThrow("Not built with Tensorflow-Lite parser support.");
599#endif
600 }
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200601 else if (modelExtension == ".onnx")
Teresa Charlin83b42912022-07-07 14:24:59 +0100602 {
603#if defined(ARMNN_ONNX_PARSER)
604 parser = std::make_unique<OnnxParser>();
605#else
606 LogAndThrow("Not built with Onnx parser support.");
607#endif
608 }
609
610 return parser;
611}
612
613void ArmNNExecutor::PrintOutputTensors(const armnn::OutputTensors* outputTensors,
614 unsigned int iteration)
615{
616 auto findOutputName = [&](const armnn::LayerBindingId id)
617 {
618 for (auto it = m_IOInfo.m_OutputInfoMap.begin(); it != m_IOInfo.m_OutputInfoMap.end(); ++it)
619 {
620 if (id == it->second.first)
621 {
622 return it->first;
623 }
624 }
625 return std::string{};
626 };
627
628 unsigned int outputIndex = 0;
629 unsigned int numOutputs = outputTensors->size();
630 for (const auto& output: *outputTensors)
631 {
632 const auto bindingName = findOutputName(output.first);
633 // We've made sure before that the number of output files either equals numOutputs, in which
634 // case we override those files when processing the results of each iteration (only the result
635 // of the last iteration will be stored), or there are enough
636 // output files for each output of each iteration.
637 size_t outputFileIndex = iteration * numOutputs + outputIndex;
638 if (!m_Params.m_OutputTensorFiles.empty())
639 {
640 outputFileIndex = outputFileIndex % m_Params.m_OutputTensorFiles.size();
641 ARMNN_LOG(info) << "Writing output: " << bindingName << " bindingId: '"
642 << output.first
643 << "' of iteration: " << iteration + 1 << " to file: '"
644 << m_Params.m_OutputTensorFiles[outputFileIndex] << "'";
645 }
646
647 const armnn::Optional<std::string> outputTensorFile = m_Params.m_OutputTensorFiles.empty() ?
648 armnn::EmptyOptional() :
649 armnn::MakeOptional<std::string>(
650 m_Params.m_OutputTensorFiles[outputFileIndex]);
651
652 OutputWriteInfo outputWriteInfo
653 {
654 outputTensorFile,
655 bindingName,
656 output.second,
657 !m_Params.m_DontPrintOutputs
658 };
659
660 std::cout << bindingName << ": ";
661 std::vector<float> values;
662 switch (output.second.GetDataType())
663 {
664 case armnn::DataType::Float32:
665 {
666 PrintTensor<float>(outputWriteInfo, "%f ");
667 break;
668 }
669
670 case armnn::DataType::Signed32:
671 {
672 PrintTensor<int>(outputWriteInfo, "%d ");
673 break;
674 }
675 case armnn::DataType::QSymmS8:
676 case armnn::DataType::QAsymmS8:
677 {
678 PrintTensor<int8_t>(outputWriteInfo, "%d ");
679 break;
680 }
681 case armnn::DataType::QAsymmU8:
682 {
683 PrintTensor<uint8_t>(outputWriteInfo, "%d ");
684 break;
685 }
686 case armnn::DataType::Float16:
687 case armnn::DataType::QSymmS16:
688 case armnn::DataType::BFloat16:
689 case armnn::DataType::Boolean:
690 case armnn::DataType::Signed64:
691 default:
692 {
693 LogAndThrow("Unexpected DataType");
694 }
695 }
696 std::cout << "\n";
Adam Jalkemo8f393632022-10-13 09:04:54 +0200697 ++outputIndex;
Teresa Charlin83b42912022-07-07 14:24:59 +0100698 }
699}
700
701void ArmNNExecutor::CompareAndPrintResult(std::vector<const void*> otherOutput)
702{
703 unsigned int index = 0;
704
705 for (const auto& outputTensors: m_OutputTensorsVec)
706 {
707 for (const auto& outputTensor: outputTensors)
708 {
709 float result = 0;
710 size_t size = outputTensor.second.GetNumBytes();
711
712 switch (outputTensor.second.GetDataType())
713 {
714 case armnn::DataType::Float32:
715 {
716 result = ComputeRMSE<float>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
717 break;
718 }
Keith Davis45b82a52022-10-04 11:53:04 +0100719 case armnn::DataType::Signed32:
720 {
721 result = ComputeRMSE<int32_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
722 break;
723 }
Teresa Charlin83b42912022-07-07 14:24:59 +0100724 case armnn::DataType::QSymmS16:
725 {
726 result = ComputeRMSE<int16_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
727 break;
728 }
729 case armnn::DataType::QSymmS8:
Teresa Charlin98d3fd82022-08-02 14:17:39 +0100730 case armnn::DataType::QAsymmS8:
Teresa Charlin83b42912022-07-07 14:24:59 +0100731 {
732 result = ComputeRMSE<int8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
733 break;
734 }
735 case armnn::DataType::QAsymmU8:
Teresa Charlin83b42912022-07-07 14:24:59 +0100736 {
737 result = ComputeRMSE<uint8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
738 break;
739 }
740 default:
741 {
742 LogAndThrow("Unexpected DataType");
743 }
744 }
745 std::cout << "RMSE: of " << result << "\n";
746 }
747 }
748}
749#if defined(ARMNN_SERIALIZER)
750ArmNNExecutor::ArmNNDeserializer::ArmNNDeserializer() : m_Parser(armnnDeserializer::IDeserializer::Create()){}
751
752armnn::INetworkPtr ArmNNExecutor::ArmNNDeserializer::CreateNetwork(const ExecuteNetworkParams& params)
753{
754 const std::string& modelPath = params.m_ModelPath;
755
756 std::ifstream file(modelPath, std::ios::binary);
757 return m_Parser->CreateNetworkFromBinary(file);
758}
759
760armnn::BindingPointInfo
761ArmNNExecutor::ArmNNDeserializer::GetInputBindingPointInfo(size_t, const std::string& inputName)
762{
763 armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkInputBindingInfo(0, inputName);
764 return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
765}
766
767armnn::BindingPointInfo
768ArmNNExecutor::ArmNNDeserializer::GetOutputBindingPointInfo(size_t, const std::string& outputName)
769{
770 armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkOutputBindingInfo(0, outputName);
771 return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
772}
773#endif
774
775#if defined(ARMNN_TF_LITE_PARSER)
776ArmNNExecutor::TfliteParser::TfliteParser(const ExecuteNetworkParams& params)
777{
778 armnnTfLiteParser::ITfLiteParser::TfLiteParserOptions options;
779 options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
780 options.m_InferAndValidate = params.m_InferOutputShape;
Jim Flynnfcc72f52022-10-14 11:20:07 +0100781 options.m_AllowExpandedDims = params.m_AllowExpandedDims;
Teresa Charlin83b42912022-07-07 14:24:59 +0100782
783 m_Parser = armnnTfLiteParser::ITfLiteParser::Create(options);
784}
785
786armnn::INetworkPtr ArmNNExecutor::TfliteParser::CreateNetwork(const ExecuteNetworkParams& params)
787{
788 const std::string& modelPath = params.m_ModelPath;
789 return m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str());
790}
791
792armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetInputBindingPointInfo(size_t subgraphId,
793 const std::string& inputName)
794{
795 return m_Parser->GetNetworkInputBindingInfo(subgraphId, inputName);
796}
797
798armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetOutputBindingPointInfo(size_t subgraphId,
799 const std::string& outputName)
800{
801 return m_Parser->GetNetworkOutputBindingInfo(subgraphId, outputName);
802}
803#endif
804
805
806#if defined(ARMNN_ONNX_PARSER)
807ArmNNExecutor::OnnxParser::OnnxParser() : m_Parser(armnnOnnxParser::IOnnxParser::Create()){}
808
809armnn::INetworkPtr ArmNNExecutor::OnnxParser::CreateNetwork(const ExecuteNetworkParams& params)
810{
811 const std::string& modelPath = params.m_ModelPath;
812 m_Parser = armnnOnnxParser::IOnnxParser::Create();
813 std::map<std::string, armnn::TensorShape> inputShapes;
814 if(!params.m_InputTensorShapes.empty())
815 {
816 const size_t numInputShapes = params.m_InputTensorShapes.size();
817 const size_t numInputBindings = params.m_InputNames.size();
818 if(numInputShapes < numInputBindings)
819 {
820 throw armnn::Exception(
821 fmt::format("Not every input has its tensor shape specified: expected={0}, got={1}",
822 numInputBindings, numInputShapes));
823 }
824
825 for (size_t i = 0; i < numInputShapes; i++)
826 {
827 inputShapes[params.m_InputNames[i]] = params.m_InputTensorShapes[i];
828 }
829
830 return params.m_IsModelBinary ?
831 m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
832 m_Parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes);
833 }
834
835 // Handle text and binary input differently by calling the corresponding parser function
836 return params.m_IsModelBinary ?
837 m_Parser->CreateNetworkFromBinaryFile(params.m_ModelPath.c_str()) :
838 m_Parser->CreateNetworkFromTextFile(params.m_ModelPath.c_str());
839}
840
841armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetInputBindingPointInfo(size_t, const std::string& inputName)
842{
843 return m_Parser->GetNetworkInputBindingInfo(inputName);
844}
845
846armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetOutputBindingPointInfo(size_t, const std::string& outputName)
847{
848 return m_Parser->GetNetworkOutputBindingInfo(outputName);
849}
850#endif