blob: 943d3aad075aa932001ef7965ca7a76fb81725a7 [file] [log] [blame]
Teresa Charlin83b42912022-07-07 14:24:59 +01001//
2// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6
7#include "ArmNNExecutor.hpp"
8#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
9
10#include <armnn/IAsyncExecutionCallback.hpp>
11#include <AsyncExecutionCallback.hpp>
12
13
14using namespace armnn;
15using namespace std::chrono;
16
17ArmNNExecutor::ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions)
18: m_Params(params)
19{
20 runtimeOptions.m_EnableGpuProfiling = params.m_EnableProfiling;
21 runtimeOptions.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
22 m_Runtime = armnn::IRuntime::Create(runtimeOptions);
23
24 auto parser = CreateParser();
25 auto network = parser->CreateNetwork(m_Params);
26 auto optNet = OptimizeNetwork(network.get());
27
28 m_IOInfo = GetIOInfo(optNet.get());
29 SetupInputsAndOutputs();
30
Teresa Charlin83b42912022-07-07 14:24:59 +010031 armnn::ProfilingDetailsMethod profilingDetailsMethod = ProfilingDetailsMethod::Undefined;
32 if (params.m_OutputDetailsOnlyToStdOut)
33 {
34 profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
35 }
36 else if (params.m_OutputDetailsToStdOut)
37 {
38 profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
39 }
40
41 INetworkProperties networkProperties{m_Params.m_Concurrent,
42 MemorySource::Undefined,
43 MemorySource::Undefined,
44 params.m_EnableProfiling,
45 profilingDetailsMethod};
46
Colm Donelan78044812022-09-27 16:46:09 +010047 std::string errorMsg;
48 Status status = m_Runtime->LoadNetwork(m_NetworkId, std::move(optNet), errorMsg, networkProperties);
49 if (status != Status::Success)
50 {
51 std::string message("Failed to create Arm NN Executor: ");
52 message.append(errorMsg);
53 // Throwing an exception at this point in the constructor causes lots of problems. We'll instead mark this
54 // executor as not constructed.
55 ARMNN_LOG(fatal) << message;
56 m_constructionFailed = true;
57 return;
58 }
Teresa Charlin83b42912022-07-07 14:24:59 +010059
60 if (m_Params.m_Iterations > 1)
61 {
62 std::stringstream msg;
63 msg << "Network will be executed " << m_Params.m_Iterations;
64 if (m_Params.m_Concurrent)
65 {
66 msg << " times in an asynchronous manner. ";
67 }
68 else
69 {
70 msg << " times successively. ";
71 }
72 msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
73 "cover each execution.";
74 ARMNN_LOG(info) << msg.str();
75 }
76
77 if (m_Params.m_GenerateTensorData)
78 {
79 ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
80 }
81
82 if (m_Params.m_DontPrintOutputs)
83 {
84 ARMNN_LOG(info) << "Printing outputs to console is disabled.";
85 }
86}
87
88void ArmNNExecutor::ExecuteAsync()
89{
90 std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
91 std::unique_ptr<armnn::Threadpool> threadpool;
92 armnn::AsyncCallbackManager callbackManager;
93 std::unordered_map<armnn::InferenceId, const armnn::OutputTensors*> inferenceOutputMap;
94
95 for (size_t i = 0; i < m_Params.m_ThreadPoolSize; ++i)
96 {
97 memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkId));
98 }
99
100 threadpool = std::make_unique<armnn::Threadpool>(m_Params.m_ThreadPoolSize,
101 m_Runtime.get(),
102 memHandles);
103
104 ARMNN_LOG(info) << "Asynchronous Execution with Arm NN thread pool... \n";
105 // Declare the latest and earliest inference times here to be used when calculating overall time
106 std::chrono::high_resolution_clock::time_point earliestStartTime =
107 std::chrono::high_resolution_clock::time_point::max();
108 std::chrono::high_resolution_clock::time_point latestEndTime =
109 std::chrono::high_resolution_clock::now();
110
111 // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
112 // LoadedNetwork with each scheduled inference having a specific priority
113 for (size_t i = 0; i < m_Params.m_Iterations; ++i)
114 {
115 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
116
117 std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
118 inferenceOutputMap.insert({cb->GetInferenceId(), &m_OutputTensorsVec[i]});
119 threadpool->Schedule(m_NetworkId,
120 m_InputTensorsVec[i],
121 m_OutputTensorsVec[i],
122 armnn::QosExecPriority::Medium,
123 cb);
124 }
125
126 // Check the results
127 for (size_t iteration = 0; iteration < m_Params.m_Iterations; ++iteration)
128 {
129 auto cb = callbackManager.GetNotifiedCallback();
130
131 // Get the results
132 if (earliestStartTime > cb->GetStartTime())
133 {
134 earliestStartTime = cb->GetStartTime();
135 }
136 if (latestEndTime < cb->GetEndTime())
137 {
138 latestEndTime = cb->GetEndTime();
139 }
140
141 auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
142 auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
143 auto inferenceDuration = endTime - startTime;
144 CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
145 if(!m_Params.m_DontPrintOutputs)
146 {
147 const armnn::OutputTensors* out = inferenceOutputMap[cb->GetInferenceId()];
148 PrintOutputTensors(out, iteration);
149 }
150 }
151
152 // Print duration difference between overallStartTime and overallEndTime
153 auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
154 auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
155 auto totalInferenceDuration = overallEndTime - overallStartTime;
156 ARMNN_LOG(info) << "Overall Inference time: " << std::setprecision(2)
157 << std::fixed << totalInferenceDuration.count() << " ms\n";
158
159}
160
161void ArmNNExecutor::ExecuteSync()
162{
163 for (size_t x = 0; x < m_Params.m_Iterations; x++)
164 {
165 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
166
167 const auto start_time = armnn::GetTimeNow();
168 armnn::Status ret;
169 if (m_Params.m_ImportInputsIfAligned)
170 {
171 ret = m_Runtime->EnqueueWorkload(m_NetworkId,
172 m_InputTensorsVec[x],
173 m_OutputTensorsVec[x],
174 m_ImportedInputIds[x],
175 m_ImportedOutputIds[x]);
176 }
177 else
178 {
179 ret = m_Runtime->EnqueueWorkload(m_NetworkId,
180 m_InputTensorsVec[x],
181 m_OutputTensorsVec[x]);
182 }
183
184 const auto inferenceDuration = armnn::GetTimeDuration(start_time);
185
186 // If profiling is enabled print out the results
Kevin May251fd952022-10-05 14:42:55 +0100187 if(profiler && profiler->IsProfilingEnabled() && x == (m_Params.m_Iterations - 1))
Teresa Charlin83b42912022-07-07 14:24:59 +0100188 {
189 profiler->Print(std::cout);
190 }
191
192 if(ret == armnn::Status::Failure)
193 {
194 throw armnn::Exception("IRuntime::EnqueueWorkload failed");
195 }
196
197 if(!m_Params.m_DontPrintOutputs)
198 {
199 PrintOutputTensors(&m_OutputTensorsVec[x], x);
200 }
201
202 // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
203 CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
204 }
205}
206
207std::vector<const void*> ArmNNExecutor::Execute()
208{
209 if(m_Params.m_ThreadPoolSize == 0)
210 {
211 ExecuteSync();
212 }
213 else
214 {
215 ExecuteAsync();
216 }
217 std::vector<const void*> results;
218 for (auto& output : m_OutputStorage)
219 {
220 results.push_back(output.m_Mem);
221 }
222
223 return results;
224}
225
226void ArmNNExecutor::PrintNetworkInfo()
227{
228 const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
229 m_Params.m_InputNames :
230 m_IOInfo.m_InputNames;
231 std::stringstream ss;
232 ss << "===== Network Info =====\n";
233 ss << "Inputs in order:\n";
234 for (const auto& inputName : inputNames)
235 {
236 const auto inputInfo = m_IOInfo.m_InputInfoMap[inputName].second;
237 ss << inputName << ", " << inputInfo.GetShape() << ", " << GetDataTypeName(inputInfo.GetDataType());
238 if (inputInfo.IsQuantized())
239 {
240 ss << " Quantization Offset: " << inputInfo.GetQuantizationOffset();
241 if (inputInfo.HasMultipleQuantizationScales())
242 {
243 ss << " Quantization scales: ";
244 for (const auto scale: inputInfo.GetQuantizationScales())
245 {
246 ss << scale << ", ";
247 }
248 }
249 else
250 {
251 ss << " Quantization scale: " << inputInfo.GetQuantizationScale();
252 }
253 }
254 ss << "\n";
255 }
256
257 ss << "Outputs in order:\n";
258 for (const auto& outputName : m_IOInfo.m_OutputNames)
259 {
260 const auto outputInfo = m_IOInfo.m_OutputInfoMap[outputName].second;
261 ss << outputName << ", " << outputInfo.GetShape() << ", " << GetDataTypeName(outputInfo.GetDataType());
262 if (outputInfo.IsQuantized())
263 {
264 ss << " Quantization Offset: " << outputInfo.GetQuantizationOffset();
265 if (outputInfo.HasMultipleQuantizationScales())
266 {
267 ss << " Quantization scales: ";
268 for (const auto scale: outputInfo.GetQuantizationScales())
269 {
270 ss << scale << ", ";
271 }
272 }
273 else
274 {
275 ss << " Quantization scale: " << outputInfo.GetQuantizationScale();
276 }
277 }
278 ss << "\n";
279 }
280
281 std::cout << ss.str() << std::endl;
282}
283
284void ArmNNExecutor::SetupInputsAndOutputs()
285{
286 const unsigned int noOfInputs = m_IOInfo.m_InputNames.size();
287
288 if (m_Params.m_InputNames.size() != 0 && m_Params.m_InputNames.size() != noOfInputs)
289 {
290 LogAndThrow("Number of input names does not match number of inputs");
291 }
292
293 const unsigned int inputFilePaths = m_Params.m_InputTensorDataFilePaths.size();
294 const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
295 m_Params.m_InputNames :
296 m_IOInfo.m_InputNames;
297 unsigned int noInputSets = 1;
298
299 if (inputFilePaths != 0)
300 {
301 if (inputFilePaths % noOfInputs != 0)
302 {
303 LogAndThrow("Number of input files: " + std::to_string(inputFilePaths) +
304 " not compatible with number of inputs: " + std::to_string(noOfInputs));
305 }
306 noInputSets = inputFilePaths / noOfInputs;
307 if (noInputSets != 1 && m_Params.m_ReuseBuffers)
308 {
309 LogAndThrow("Specifying multiple sets of inputs not compatible with ReuseBuffers");
310 }
311 }
312
313 const unsigned int noOfOutputs = m_IOInfo.m_OutputNames.size();
314 const unsigned int outputFilePaths = m_Params.m_OutputTensorFiles.size();
315 unsigned int noOutputSets = 1;
316
317 if (outputFilePaths != 0)
318 {
319 if (outputFilePaths % noOfOutputs != 0)
320 {
321 LogAndThrow("Number of output files: " + std::to_string(outputFilePaths) +
322 ", not compatible with number of outputs: " + std::to_string(noOfOutputs));
323 }
324 noOutputSets = outputFilePaths / noOfOutputs;
325
326 if (noOutputSets != 1 && m_Params.m_ReuseBuffers)
327 {
328 LogAndThrow("Specifying multiple sets of outputs not compatible with ReuseBuffers");
329 }
330 }
331
332 if (m_Params.m_ThreadPoolSize != 0)
333 {
334 // The current implementation of the Threadpool does not allow binding of outputs to a thread
335 // So to ensure no two threads write to the same output at the same time, no output can be reused
336 noOutputSets = m_Params.m_Iterations;
337 }
338
339 if (m_Params.m_InputTensorDataFilePaths.size() > noOfInputs)
340 {
341 ARMNN_LOG(info) << "Given network has " << noOfInputs << " input/s. One input-tensor-data file is required "
342 << "for each input. The user provided "
343 << m_Params.m_InputTensorDataFilePaths.size()
344 << " input-tensor-data file/s which will be used to fill the input/s.\n";
345 }
346
347 unsigned int inputCount = 0;
348 for(unsigned int inputSet = 0; inputSet < noInputSets; ++inputSet)
349 {
350 armnn::InputTensors inputTensors;
351 for (const auto& inputName: inputNames)
352 {
353 armnn::BindingPointInfo bindingPointInfo;
354 try
355 {
356 bindingPointInfo = m_IOInfo.m_InputInfoMap.at(inputName);
357 }
358 catch (const std::out_of_range& e)
359 {
360 LogAndThrow("Input with inputName: " + inputName + " not found.");
361 }
362
363 const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
364 auto newInfo = armnn::TensorInfo{tensorInfo.GetShape(), tensorInfo.GetDataType(),
365 tensorInfo.GetQuantizationScale(),
366 tensorInfo.GetQuantizationOffset(),
367 true};
368
369 m_InputStorage.emplace_back(IOStorage{tensorInfo.GetNumBytes()});
370
371 const int bindingId = bindingPointInfo.first;
372 inputTensors.emplace_back(bindingId, armnn::ConstTensor{newInfo, m_InputStorage.back().m_Mem});
373
374 const armnn::Optional<std::string> dataFile = m_Params.m_GenerateTensorData ?
375 armnn::EmptyOptional() :
376 armnn::MakeOptional<std::string>(
377 m_Params.m_InputTensorDataFilePaths.at(inputCount++));
378
379 switch (tensorInfo.GetDataType())
380 {
381 case armnn::DataType::Float32:
382 {
383 auto typedTensor = reinterpret_cast<float*>(m_InputStorage.back().m_Mem);
384 PopulateTensorWithData<float>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
385 break;
386 }
387 case armnn::DataType::QSymmS16:
388 {
389 auto typedTensor = reinterpret_cast<int16_t*>(m_InputStorage.back().m_Mem);
390 PopulateTensorWithData<int16_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
391 break;
392 }
393 case armnn::DataType::QSymmS8:
394 case armnn::DataType::QAsymmS8:
395 {
396 auto typedTensor = reinterpret_cast<int8_t*>(m_InputStorage.back().m_Mem);
397 PopulateTensorWithData<int8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
398 break;
399 }
400 case armnn::DataType::QAsymmU8:
401 {
402 auto typedTensor = reinterpret_cast<uint8_t*>(m_InputStorage.back().m_Mem);
403 PopulateTensorWithData<uint8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
404 break;
405 }
406 case armnn::DataType::Signed32:
407 {
408 auto typedTensor = reinterpret_cast<int32_t*>(m_InputStorage.back().m_Mem);
409 PopulateTensorWithData<int32_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
410 break;
411 }
412 default:
413 {
414 LogAndThrow("Unexpected DataType");
415 }
416 }
417
418 if (m_Params.m_ImportInputsIfAligned)
419 {
420 m_ImportedInputIds.push_back(
421 m_Runtime->ImportInputs(m_NetworkId, m_InputTensorsVec.back(), armnn::MemorySource::Malloc));
422 }
423 }
424 m_InputTensorsVec.emplace_back(inputTensors);
425 }
426
427 for(unsigned int outputSet = 0; outputSet < noOutputSets; ++outputSet)
428 {
429 armnn::OutputTensors outputTensors;
430 for (const auto& output: m_IOInfo.m_OutputInfoMap)
431 {
432 const armnn::BindingPointInfo& bindingPointInfo = output.second;
433 const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
434
435 m_OutputStorage.emplace_back(tensorInfo.GetNumBytes());
436 outputTensors.emplace_back(bindingPointInfo.first, armnn::Tensor{tensorInfo, m_OutputStorage.back().m_Mem});
437 }
438 m_OutputTensorsVec.emplace_back(outputTensors);
439 if (m_Params.m_ImportInputsIfAligned)
440 {
441 m_ImportedOutputIds.push_back(
442 m_Runtime->ImportOutputs(m_NetworkId, m_OutputTensorsVec.back(), armnn::MemorySource::Malloc));
443 }
444 }
445
Teresa Charlin20508422022-10-26 14:03:08 +0100446 // If iterations > noSets fill the remaining iterations repeating the given files
447 // If iterations < noSets just ignore the extra files
448 const unsigned int remainingInputSets = (m_Params.m_Iterations > noInputSets)
449 ? m_Params.m_Iterations - noInputSets
450 : 0;
451 for (unsigned int i = 0; i < remainingInputSets; ++i)
Teresa Charlin83b42912022-07-07 14:24:59 +0100452 {
Teresa Charlin20508422022-10-26 14:03:08 +0100453 m_InputTensorsVec.push_back(m_InputTensorsVec[i % noInputSets]);
Teresa Charlin83b42912022-07-07 14:24:59 +0100454 if (m_Params.m_ImportInputsIfAligned)
455 {
Teresa Charlin20508422022-10-26 14:03:08 +0100456 m_ImportedInputIds.push_back(m_ImportedInputIds[i % noInputSets]);
Teresa Charlin83b42912022-07-07 14:24:59 +0100457 }
458 }
459
Teresa Charlin20508422022-10-26 14:03:08 +0100460 const unsigned int remainingOutputSets = (m_Params.m_Iterations > noOutputSets)
461 ? m_Params.m_Iterations - noOutputSets
462 : 0;
463 for (unsigned int i = 0; i < remainingOutputSets; ++i)
Teresa Charlin83b42912022-07-07 14:24:59 +0100464 {
Teresa Charlin20508422022-10-26 14:03:08 +0100465 m_OutputTensorsVec.push_back(m_OutputTensorsVec[i % noOutputSets]);
Teresa Charlin83b42912022-07-07 14:24:59 +0100466 if (m_Params.m_ImportInputsIfAligned)
467 {
Teresa Charlin20508422022-10-26 14:03:08 +0100468 m_ImportedOutputIds.push_back(m_ImportedOutputIds[i % noOutputSets]);
Teresa Charlin83b42912022-07-07 14:24:59 +0100469 }
470 }
471}
472
473ArmNNExecutor::IOInfo ArmNNExecutor::GetIOInfo(armnn::IOptimizedNetwork* optNet)
474{
475 struct IOStrategy : armnn::IStrategy
476 {
477 void ExecuteStrategy(const armnn::IConnectableLayer* layer,
478 const armnn::BaseDescriptor& descriptor,
479 const std::vector<armnn::ConstTensor>& constants,
480 const char* name,
481 const armnn::LayerBindingId id = 0) override
482 {
483 armnn::IgnoreUnused(descriptor, constants, id);
484 switch (layer->GetType())
485 {
486 case armnn::LayerType::Input:
487 {
488 m_IOInfo.m_InputNames.emplace_back(name);
489 m_IOInfo.m_InputInfoMap[name] = {id, layer->GetOutputSlot(0).GetTensorInfo()};
490 break;
491 }
492 case armnn::LayerType::Output:
493 {
494 m_IOInfo.m_OutputNames.emplace_back(name);
495 m_IOInfo.m_OutputInfoMap[name] = {id, layer->GetInputSlot(0).GetConnection()->GetTensorInfo()};
496 break;
497 }
498 default: {}
499 }
500 }
501 IOInfo m_IOInfo;
502 };
503
504 IOStrategy ioStrategy;
505 optNet->ExecuteStrategy(ioStrategy);
506
507 return ioStrategy.m_IOInfo;
508}
509
510armnn::IOptimizedNetworkPtr ArmNNExecutor::OptimizeNetwork(armnn::INetwork* network)
511{
512 armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
513
514 armnn::OptimizerOptions options;
515 options.m_ReduceFp32ToFp16 = m_Params.m_EnableFp16TurboMode;
Teresa Charlin83b42912022-07-07 14:24:59 +0100516 options.m_Debug = m_Params.m_PrintIntermediate;
Keith Davis15f9c682022-10-14 15:50:33 +0100517 options.m_DebugToFile = m_Params.m_PrintIntermediateOutputsToFile;
Teresa Charlin83b42912022-07-07 14:24:59 +0100518 options.m_shapeInferenceMethod = m_Params.m_InferOutputShape ?
519 armnn::ShapeInferenceMethod::InferAndValidate :
520 armnn::ShapeInferenceMethod::ValidateOnly;
521 options.m_ProfilingEnabled = m_Params.m_EnableProfiling;
Jim Flynnfcc72f52022-10-14 11:20:07 +0100522 options.m_AllowExpandedDims = m_Params.m_AllowExpandedDims;
Teresa Charlin83b42912022-07-07 14:24:59 +0100523
524 armnn::BackendOptions gpuAcc("GpuAcc",
525 {
526 { "FastMathEnabled", m_Params.m_EnableFastMath },
527 { "SaveCachedNetwork", m_Params.m_SaveCachedNetwork },
528 { "CachedNetworkFilePath", m_Params.m_CachedNetworkFilePath },
529 { "MLGOTuningFilePath", m_Params.m_MLGOTuningFilePath }
530 });
531
532 armnn::BackendOptions cpuAcc("CpuAcc",
533 {
534 { "FastMathEnabled", m_Params.m_EnableFastMath },
535 { "NumberOfThreads", m_Params.m_NumberOfThreads }
536 });
537 options.m_ModelOptions.push_back(gpuAcc);
538 options.m_ModelOptions.push_back(cpuAcc);
Jim Flynnfcc72f52022-10-14 11:20:07 +0100539 // The shapeInferenceMethod and allowExpandedDims values have to be added to the model options
540 // because these are what are passed to the OptimizeSubgraphViews method and are used to create
541 // the new optimized INetwork that method uses
542 armnn::BackendOptions allowExDimOpt("AllowExpandedDims",
543 {
544 { "AllowExpandedDims", m_Params.m_AllowExpandedDims }
545 });
546 options.m_ModelOptions.push_back(allowExDimOpt);
547 armnn::BackendOptions shapeInferOpt("ShapeInferenceMethod",
548 {
549 { "InferAndValidate", m_Params.m_InferOutputShape }
550 });
551 options.m_ModelOptions.push_back(shapeInferOpt);
Teresa Charlin83b42912022-07-07 14:24:59 +0100552
553 const auto optimization_start_time = armnn::GetTimeNow();
554 optNet = armnn::Optimize(*network, m_Params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
555
556 ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
557 << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
558
559 if (!optNet)
560 {
561 LogAndThrow("Optimize returned nullptr");
562 }
563
Teresa Charlin98d3fd82022-08-02 14:17:39 +0100564 // If v,visualize-optimized-model is enabled then construct a file name for the dot file.
565 if (m_Params.m_EnableLayerDetails)
566 {
567 fs::path filename = m_Params.m_ModelPath;
568 filename.replace_extension("dot");
569 std::fstream file(filename.c_str(), std::ios_base::out);
570 optNet->SerializeToDot(file);
571 }
572
Teresa Charlin83b42912022-07-07 14:24:59 +0100573 return optNet;
574}
575
576std::unique_ptr<ArmNNExecutor::IParser> ArmNNExecutor::CreateParser()
577{
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200578 const fs::path modelFilename = m_Params.m_ModelPath;
579 const std::string modelExtension = modelFilename.extension();
Teresa Charlin83b42912022-07-07 14:24:59 +0100580
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200581 m_Params.m_IsModelBinary = modelExtension != ".json";
Teresa Charlin83b42912022-07-07 14:24:59 +0100582 std::unique_ptr<IParser> parser = nullptr;
583 // Forward to implementation based on the parser type
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200584 if (modelExtension == ".armnn")
Teresa Charlin83b42912022-07-07 14:24:59 +0100585 {
586#if defined(ARMNN_SERIALIZER)
587 parser = std::make_unique<ArmNNDeserializer>();
588#else
589 LogAndThrow("Not built with serialization support.");
590#endif
591 }
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200592 else if (modelExtension == ".tflite")
Teresa Charlin83b42912022-07-07 14:24:59 +0100593 {
594#if defined(ARMNN_TF_LITE_PARSER)
595 parser = std::make_unique<TfliteParser>(m_Params);
596#else
597 LogAndThrow("Not built with Tensorflow-Lite parser support.");
598#endif
599 }
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200600 else if (modelExtension == ".onnx")
Teresa Charlin83b42912022-07-07 14:24:59 +0100601 {
602#if defined(ARMNN_ONNX_PARSER)
603 parser = std::make_unique<OnnxParser>();
604#else
605 LogAndThrow("Not built with Onnx parser support.");
606#endif
607 }
608
609 return parser;
610}
611
612void ArmNNExecutor::PrintOutputTensors(const armnn::OutputTensors* outputTensors,
613 unsigned int iteration)
614{
615 auto findOutputName = [&](const armnn::LayerBindingId id)
616 {
617 for (auto it = m_IOInfo.m_OutputInfoMap.begin(); it != m_IOInfo.m_OutputInfoMap.end(); ++it)
618 {
619 if (id == it->second.first)
620 {
621 return it->first;
622 }
623 }
624 return std::string{};
625 };
626
627 unsigned int outputIndex = 0;
628 unsigned int numOutputs = outputTensors->size();
629 for (const auto& output: *outputTensors)
630 {
631 const auto bindingName = findOutputName(output.first);
632 // We've made sure before that the number of output files either equals numOutputs, in which
633 // case we override those files when processing the results of each iteration (only the result
634 // of the last iteration will be stored), or there are enough
635 // output files for each output of each iteration.
636 size_t outputFileIndex = iteration * numOutputs + outputIndex;
637 if (!m_Params.m_OutputTensorFiles.empty())
638 {
639 outputFileIndex = outputFileIndex % m_Params.m_OutputTensorFiles.size();
640 ARMNN_LOG(info) << "Writing output: " << bindingName << " bindingId: '"
641 << output.first
642 << "' of iteration: " << iteration + 1 << " to file: '"
643 << m_Params.m_OutputTensorFiles[outputFileIndex] << "'";
644 }
645
646 const armnn::Optional<std::string> outputTensorFile = m_Params.m_OutputTensorFiles.empty() ?
647 armnn::EmptyOptional() :
648 armnn::MakeOptional<std::string>(
649 m_Params.m_OutputTensorFiles[outputFileIndex]);
650
651 OutputWriteInfo outputWriteInfo
652 {
653 outputTensorFile,
654 bindingName,
655 output.second,
656 !m_Params.m_DontPrintOutputs
657 };
658
659 std::cout << bindingName << ": ";
660 std::vector<float> values;
661 switch (output.second.GetDataType())
662 {
663 case armnn::DataType::Float32:
664 {
665 PrintTensor<float>(outputWriteInfo, "%f ");
666 break;
667 }
668
669 case armnn::DataType::Signed32:
670 {
671 PrintTensor<int>(outputWriteInfo, "%d ");
672 break;
673 }
674 case armnn::DataType::QSymmS8:
675 case armnn::DataType::QAsymmS8:
676 {
677 PrintTensor<int8_t>(outputWriteInfo, "%d ");
678 break;
679 }
680 case armnn::DataType::QAsymmU8:
681 {
682 PrintTensor<uint8_t>(outputWriteInfo, "%d ");
683 break;
684 }
685 case armnn::DataType::Float16:
686 case armnn::DataType::QSymmS16:
687 case armnn::DataType::BFloat16:
688 case armnn::DataType::Boolean:
689 case armnn::DataType::Signed64:
690 default:
691 {
692 LogAndThrow("Unexpected DataType");
693 }
694 }
695 std::cout << "\n";
Adam Jalkemo8f393632022-10-13 09:04:54 +0200696 ++outputIndex;
Teresa Charlin83b42912022-07-07 14:24:59 +0100697 }
698}
699
700void ArmNNExecutor::CompareAndPrintResult(std::vector<const void*> otherOutput)
701{
702 unsigned int index = 0;
703
704 for (const auto& outputTensors: m_OutputTensorsVec)
705 {
706 for (const auto& outputTensor: outputTensors)
707 {
708 float result = 0;
709 size_t size = outputTensor.second.GetNumBytes();
710
711 switch (outputTensor.second.GetDataType())
712 {
713 case armnn::DataType::Float32:
714 {
715 result = ComputeRMSE<float>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
716 break;
717 }
Keith Davis45b82a52022-10-04 11:53:04 +0100718 case armnn::DataType::Signed32:
719 {
720 result = ComputeRMSE<int32_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
721 break;
722 }
Teresa Charlin83b42912022-07-07 14:24:59 +0100723 case armnn::DataType::QSymmS16:
724 {
725 result = ComputeRMSE<int16_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
726 break;
727 }
728 case armnn::DataType::QSymmS8:
Teresa Charlin98d3fd82022-08-02 14:17:39 +0100729 case armnn::DataType::QAsymmS8:
Teresa Charlin83b42912022-07-07 14:24:59 +0100730 {
731 result = ComputeRMSE<int8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
732 break;
733 }
734 case armnn::DataType::QAsymmU8:
Teresa Charlin83b42912022-07-07 14:24:59 +0100735 {
736 result = ComputeRMSE<uint8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
737 break;
738 }
739 default:
740 {
741 LogAndThrow("Unexpected DataType");
742 }
743 }
744 std::cout << "RMSE: of " << result << "\n";
745 }
746 }
747}
748#if defined(ARMNN_SERIALIZER)
749ArmNNExecutor::ArmNNDeserializer::ArmNNDeserializer() : m_Parser(armnnDeserializer::IDeserializer::Create()){}
750
751armnn::INetworkPtr ArmNNExecutor::ArmNNDeserializer::CreateNetwork(const ExecuteNetworkParams& params)
752{
753 const std::string& modelPath = params.m_ModelPath;
754
755 std::ifstream file(modelPath, std::ios::binary);
756 return m_Parser->CreateNetworkFromBinary(file);
757}
758
759armnn::BindingPointInfo
760ArmNNExecutor::ArmNNDeserializer::GetInputBindingPointInfo(size_t, const std::string& inputName)
761{
762 armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkInputBindingInfo(0, inputName);
763 return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
764}
765
766armnn::BindingPointInfo
767ArmNNExecutor::ArmNNDeserializer::GetOutputBindingPointInfo(size_t, const std::string& outputName)
768{
769 armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkOutputBindingInfo(0, outputName);
770 return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
771}
772#endif
773
774#if defined(ARMNN_TF_LITE_PARSER)
775ArmNNExecutor::TfliteParser::TfliteParser(const ExecuteNetworkParams& params)
776{
777 armnnTfLiteParser::ITfLiteParser::TfLiteParserOptions options;
778 options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
779 options.m_InferAndValidate = params.m_InferOutputShape;
Jim Flynnfcc72f52022-10-14 11:20:07 +0100780 options.m_AllowExpandedDims = params.m_AllowExpandedDims;
Teresa Charlin83b42912022-07-07 14:24:59 +0100781
782 m_Parser = armnnTfLiteParser::ITfLiteParser::Create(options);
783}
784
785armnn::INetworkPtr ArmNNExecutor::TfliteParser::CreateNetwork(const ExecuteNetworkParams& params)
786{
787 const std::string& modelPath = params.m_ModelPath;
788 return m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str());
789}
790
791armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetInputBindingPointInfo(size_t subgraphId,
792 const std::string& inputName)
793{
794 return m_Parser->GetNetworkInputBindingInfo(subgraphId, inputName);
795}
796
797armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetOutputBindingPointInfo(size_t subgraphId,
798 const std::string& outputName)
799{
800 return m_Parser->GetNetworkOutputBindingInfo(subgraphId, outputName);
801}
802#endif
803
804
805#if defined(ARMNN_ONNX_PARSER)
806ArmNNExecutor::OnnxParser::OnnxParser() : m_Parser(armnnOnnxParser::IOnnxParser::Create()){}
807
808armnn::INetworkPtr ArmNNExecutor::OnnxParser::CreateNetwork(const ExecuteNetworkParams& params)
809{
810 const std::string& modelPath = params.m_ModelPath;
811 m_Parser = armnnOnnxParser::IOnnxParser::Create();
812 std::map<std::string, armnn::TensorShape> inputShapes;
813 if(!params.m_InputTensorShapes.empty())
814 {
815 const size_t numInputShapes = params.m_InputTensorShapes.size();
816 const size_t numInputBindings = params.m_InputNames.size();
817 if(numInputShapes < numInputBindings)
818 {
819 throw armnn::Exception(
820 fmt::format("Not every input has its tensor shape specified: expected={0}, got={1}",
821 numInputBindings, numInputShapes));
822 }
823
824 for (size_t i = 0; i < numInputShapes; i++)
825 {
826 inputShapes[params.m_InputNames[i]] = params.m_InputTensorShapes[i];
827 }
828
829 return params.m_IsModelBinary ?
830 m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
831 m_Parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes);
832 }
833
834 // Handle text and binary input differently by calling the corresponding parser function
835 return params.m_IsModelBinary ?
836 m_Parser->CreateNetworkFromBinaryFile(params.m_ModelPath.c_str()) :
837 m_Parser->CreateNetworkFromTextFile(params.m_ModelPath.c_str());
838}
839
840armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetInputBindingPointInfo(size_t, const std::string& inputName)
841{
842 return m_Parser->GetNetworkInputBindingInfo(inputName);
843}
844
845armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetOutputBindingPointInfo(size_t, const std::string& outputName)
846{
847 return m_Parser->GetNetworkOutputBindingInfo(outputName);
848}
849#endif