blob: ece3dafea435ef91bddcd00d48a9463a4f8e6111 [file] [log] [blame]
Teresa Charlin83b42912022-07-07 14:24:59 +01001//
Colm Donelanf760c932024-03-25 17:54:04 +00002// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved.
Teresa Charlin83b42912022-07-07 14:24:59 +01003// SPDX-License-Identifier: MIT
4//
5
6
7#include "ArmNNExecutor.hpp"
8#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
9
Teresa Charlin83b42912022-07-07 14:24:59 +010010#include <AsyncExecutionCallback.hpp>
Colm Donelan00fe3932023-08-16 21:57:54 +010011#include <armnn/IAsyncExecutionCallback.hpp>
Colm Donelanf5fa0db2024-05-07 11:51:26 +010012#if defined(ARMNN_SERIALIZER)
Colm Donelan00fe3932023-08-16 21:57:54 +010013#include <armnnSerializer/ISerializer.hpp>
Colm Donelanf5fa0db2024-05-07 11:51:26 +010014#endif
Teresa Charlin83b42912022-07-07 14:24:59 +010015using namespace armnn;
16using namespace std::chrono;
17
Colm Donelanf5fa0db2024-05-07 11:51:26 +010018#if defined(ARMNN_SERIALIZER)
Colm Donelan00fe3932023-08-16 21:57:54 +010019/**
20 * Given a reference to an INetwork and a target directory, serialize the network to a file
21 * called "<timestamp>_network.armnn"
22 *
23 * @param network The network to serialize.
24 * @param dumpDir The target directory.
25 * @return the full path to the serialized file.
26 */
27std::string SerializeNetwork(const armnn::INetwork& network, const std::string& dumpDir)
Teresa Charlin83b42912022-07-07 14:24:59 +010028{
Colm Donelan00fe3932023-08-16 21:57:54 +010029 if (dumpDir.empty())
30 {
31 throw InvalidArgumentException("An output directory must be specified.");
32 }
33 fs::path outputDirectory(dumpDir);
34 if (!exists(outputDirectory))
35 {
36 throw InvalidArgumentException(
37 fmt::format("The specified directory does not exist: {}", outputDirectory.c_str()));
38 }
39 auto serializer(armnnSerializer::ISerializer::Create());
40 // Serialize the Network
41 serializer->Serialize(network);
42
43 fs::path fileName;
44 fileName += dumpDir;
45 // used to get a timestamp to name diagnostic files (the ArmNN serialized graph
46 // and getSupportedOperations.txt files)
47 timespec ts;
48 if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts) == 0)
49 {
50 std::stringstream ss;
51 ss << std::to_string(ts.tv_sec) << "_" << std::to_string(ts.tv_nsec) << "_network.armnn";
52 fileName += ss.str();
53 }
54 else
55 {
56 // This is incredibly unlikely but just in case.
57 throw RuntimeException("clock_gettime, CLOCK_MONOTONIC_RAW returned a non zero result.");
58 }
59
60 // Save serialized network to a file
61 std::ofstream serializedFile(fileName, std::ios::out | std::ios::binary);
62 auto serialized = serializer->SaveSerializedToStream(serializedFile);
63 if (!serialized)
64 {
65 throw RuntimeException(fmt::format("An error occurred when serializing to file %s", fileName.c_str()));
66 }
67 serializedFile.flush();
68 serializedFile.close();
69 return fileName;
70}
71
72/**
73 * Given a reference to an optimized network and a target directory, serialize the network in .dot file format to
74 * a file called "<timestamp>_optimized_networkgraph.dot"
75 *
76 * @param network The network to serialize.
77 * @param dumpDir The target directory.
78 * @return the full path to the serialized file.
79 */
80std::string SerializeNetworkToDotFile(const armnn::IOptimizedNetwork& optimizedNetwork, const std::string& dumpDir)
81{
82 if (dumpDir.empty())
83 {
84 throw InvalidArgumentException("An output directory must be specified.");
85 }
86 fs::path outputDirectory(dumpDir);
87 if (!exists(outputDirectory))
88 {
89 throw InvalidArgumentException(
90 fmt::format("The specified directory does not exist: {}", outputDirectory.c_str()));
91 }
92
93 fs::path fileName;
94 fileName += dumpDir;
95 // used to get a timestamp to name diagnostic files (the ArmNN serialized graph
96 // and getSupportedOperations.txt files)
97 timespec ts;
98 if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts) == 0)
99 {
100 std::stringstream ss;
101 ss << std::to_string(ts.tv_sec) << "_" << std::to_string(ts.tv_nsec) << "_optimized_networkgraph.dot";
102 fileName += ss.str();
103 }
104 else
105 {
106 // This is incredibly unlikely but just in case.
107 throw RuntimeException("clock_gettime, CLOCK_MONOTONIC_RAW returned a non zero result.");
108 }
109
110 // Write the network graph to a dot file.
111 std::ofstream fileStream;
112 fileStream.open(fileName, std::ofstream::out | std::ofstream::trunc);
113 if (!fileStream.good())
114 {
115 throw RuntimeException(fmt::format("An error occurred when creating %s", fileName.c_str()));
116 }
117
118 if (optimizedNetwork.SerializeToDot(fileStream) != armnn::Status::Success)
119 {
120 throw RuntimeException(fmt::format("An error occurred when serializing to file %s", fileName.c_str()));
121 }
122 fileStream.flush();
123 fileStream.close();
124 return fileName;
125}
Colm Donelanf5fa0db2024-05-07 11:51:26 +0100126#endif
Colm Donelan00fe3932023-08-16 21:57:54 +0100127
128ArmNNExecutor::ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions)
129 : m_Params(params)
130{
131 runtimeOptions.m_EnableGpuProfiling = params.m_EnableProfiling;
Teresa Charlin83b42912022-07-07 14:24:59 +0100132 runtimeOptions.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
Mike Kelly5446a4d2023-01-20 15:51:05 +0000133
134 // Create/Get the static ArmNN Runtime. Note that the m_Runtime will be shared by all ArmNNExecutor
135 // instances so the RuntimeOptions cannot be altered for different ArmNNExecutor instances.
136 m_Runtime = GetRuntime(runtimeOptions);
Teresa Charlin83b42912022-07-07 14:24:59 +0100137
Colm Donelan00fe3932023-08-16 21:57:54 +0100138 auto parser = CreateParser();
Teresa Charlin83b42912022-07-07 14:24:59 +0100139 auto network = parser->CreateNetwork(m_Params);
Colm Donelan00fe3932023-08-16 21:57:54 +0100140 auto optNet = OptimizeNetwork(network.get());
141
142 // If the user has asked for detailed data write out the .armnn amd .dot files.
143 if (params.m_SerializeToArmNN)
144 {
Colm Donelanf5fa0db2024-05-07 11:51:26 +0100145#if defined(ARMNN_SERIALIZER)
Colm Donelan00fe3932023-08-16 21:57:54 +0100146 // .armnn first.
147 // This could throw multiple exceptions if the directory cannot be created or the file cannot be written.
148 std::string targetDirectory(armnnUtils::Filesystem::CreateDirectory("/ArmNNSerializeNetwork"));
149 std::string fileName;
150 fileName = SerializeNetwork(*network, targetDirectory);
151 ARMNN_LOG(info) << "The pre-optimized network has been serialized to:" << fileName;
152 // and the .dot file.
153 // Most of the possible exceptions should have already occurred with the .armnn file.
154 fileName =
155 SerializeNetworkToDotFile(*optNet, targetDirectory);
156 ARMNN_LOG(info) << "The optimized network has been serialized to:" << fileName;
Colm Donelanf5fa0db2024-05-07 11:51:26 +0100157#else
158 ARMNN_LOG(info) << "Arm NN has not been built with ARMNN_SERIALIZER enabled.";
159#endif
Colm Donelan00fe3932023-08-16 21:57:54 +0100160 }
Teresa Charlin83b42912022-07-07 14:24:59 +0100161 m_IOInfo = GetIOInfo(optNet.get());
Teresa Charlin83b42912022-07-07 14:24:59 +0100162
Teresa Charlin83b42912022-07-07 14:24:59 +0100163 armnn::ProfilingDetailsMethod profilingDetailsMethod = ProfilingDetailsMethod::Undefined;
164 if (params.m_OutputDetailsOnlyToStdOut)
165 {
166 profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
167 }
168 else if (params.m_OutputDetailsToStdOut)
169 {
170 profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
171 }
172
173 INetworkProperties networkProperties{m_Params.m_Concurrent,
174 MemorySource::Undefined,
175 MemorySource::Undefined,
176 params.m_EnableProfiling,
177 profilingDetailsMethod};
178
Colm Donelan78044812022-09-27 16:46:09 +0100179 std::string errorMsg;
180 Status status = m_Runtime->LoadNetwork(m_NetworkId, std::move(optNet), errorMsg, networkProperties);
181 if (status != Status::Success)
182 {
183 std::string message("Failed to create Arm NN Executor: ");
184 message.append(errorMsg);
185 // Throwing an exception at this point in the constructor causes lots of problems. We'll instead mark this
186 // executor as not constructed.
187 ARMNN_LOG(fatal) << message;
188 m_constructionFailed = true;
189 return;
190 }
Teresa Charlin83b42912022-07-07 14:24:59 +0100191
Matthew Benthamb4f5c232022-11-16 10:59:12 +0000192 SetupInputsAndOutputs();
193
Teresa Charlin83b42912022-07-07 14:24:59 +0100194 if (m_Params.m_Iterations > 1)
195 {
196 std::stringstream msg;
197 msg << "Network will be executed " << m_Params.m_Iterations;
198 if (m_Params.m_Concurrent)
199 {
200 msg << " times in an asynchronous manner. ";
201 }
202 else
203 {
204 msg << " times successively. ";
205 }
206 msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
207 "cover each execution.";
208 ARMNN_LOG(info) << msg.str();
209 }
210
211 if (m_Params.m_GenerateTensorData)
212 {
213 ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
214 }
215
216 if (m_Params.m_DontPrintOutputs)
217 {
218 ARMNN_LOG(info) << "Printing outputs to console is disabled.";
219 }
220}
221
Colm Donelanf760c932024-03-25 17:54:04 +0000222ArmNNExecutor::~ArmNNExecutor()
223{
Nikhil Raj02d40462024-04-02 14:35:34 +0100224 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
225 // If profiling is enabled print out the results
226 if (profiler && profiler->IsProfilingEnabled())
227 {
228 profiler->Print(std::cout);
229 }
Cathal Corbette9cf46d2024-06-05 16:39:03 +0100230
231 // We're finished with the network.
232 m_Runtime->UnloadNetwork(m_NetworkId);
Colm Donelanf760c932024-03-25 17:54:04 +0000233}
234
Teresa Charlin83b42912022-07-07 14:24:59 +0100235void ArmNNExecutor::ExecuteAsync()
236{
Ryan OSheab5540542022-07-06 09:52:52 +0100237#if !defined(ARMNN_DISABLE_THREADS)
Teresa Charlin83b42912022-07-07 14:24:59 +0100238 std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
239 std::unique_ptr<armnn::Threadpool> threadpool;
240 armnn::AsyncCallbackManager callbackManager;
241 std::unordered_map<armnn::InferenceId, const armnn::OutputTensors*> inferenceOutputMap;
242
243 for (size_t i = 0; i < m_Params.m_ThreadPoolSize; ++i)
244 {
245 memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkId));
246 }
247
248 threadpool = std::make_unique<armnn::Threadpool>(m_Params.m_ThreadPoolSize,
Mike Kelly5446a4d2023-01-20 15:51:05 +0000249 m_Runtime,
Teresa Charlin83b42912022-07-07 14:24:59 +0100250 memHandles);
251
252 ARMNN_LOG(info) << "Asynchronous Execution with Arm NN thread pool... \n";
253 // Declare the latest and earliest inference times here to be used when calculating overall time
254 std::chrono::high_resolution_clock::time_point earliestStartTime =
255 std::chrono::high_resolution_clock::time_point::max();
256 std::chrono::high_resolution_clock::time_point latestEndTime =
257 std::chrono::high_resolution_clock::now();
258
259 // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
260 // LoadedNetwork with each scheduled inference having a specific priority
261 for (size_t i = 0; i < m_Params.m_Iterations; ++i)
262 {
263 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
264
265 std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
266 inferenceOutputMap.insert({cb->GetInferenceId(), &m_OutputTensorsVec[i]});
267 threadpool->Schedule(m_NetworkId,
268 m_InputTensorsVec[i],
269 m_OutputTensorsVec[i],
270 armnn::QosExecPriority::Medium,
271 cb);
272 }
273
274 // Check the results
275 for (size_t iteration = 0; iteration < m_Params.m_Iterations; ++iteration)
276 {
277 auto cb = callbackManager.GetNotifiedCallback();
278
279 // Get the results
280 if (earliestStartTime > cb->GetStartTime())
281 {
282 earliestStartTime = cb->GetStartTime();
283 }
284 if (latestEndTime < cb->GetEndTime())
285 {
286 latestEndTime = cb->GetEndTime();
287 }
288
289 auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
290 auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
291 auto inferenceDuration = endTime - startTime;
292 CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
293 if(!m_Params.m_DontPrintOutputs)
294 {
295 const armnn::OutputTensors* out = inferenceOutputMap[cb->GetInferenceId()];
296 PrintOutputTensors(out, iteration);
297 }
298 }
299
300 // Print duration difference between overallStartTime and overallEndTime
301 auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
302 auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
303 auto totalInferenceDuration = overallEndTime - overallStartTime;
304 ARMNN_LOG(info) << "Overall Inference time: " << std::setprecision(2)
305 << std::fixed << totalInferenceDuration.count() << " ms\n";
306
Ryan OSheab5540542022-07-06 09:52:52 +0100307#endif
Teresa Charlin83b42912022-07-07 14:24:59 +0100308}
309
310void ArmNNExecutor::ExecuteSync()
311{
Colm Donelan00fe3932023-08-16 21:57:54 +0100312 // If we've only been asked to serialize the networks, don't execute the inference.
313 if (m_Params.m_SerializeToArmNN)
314 {
315 ARMNN_LOG(info) << "serialize-to-armnn has been specified. No inference will be executed.";
316 return;
317 }
Teresa Charlin83b42912022-07-07 14:24:59 +0100318 for (size_t x = 0; x < m_Params.m_Iterations; x++)
319 {
320 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
321
322 const auto start_time = armnn::GetTimeNow();
323 armnn::Status ret;
324 if (m_Params.m_ImportInputsIfAligned)
325 {
326 ret = m_Runtime->EnqueueWorkload(m_NetworkId,
327 m_InputTensorsVec[x],
328 m_OutputTensorsVec[x],
329 m_ImportedInputIds[x],
330 m_ImportedOutputIds[x]);
331 }
332 else
333 {
334 ret = m_Runtime->EnqueueWorkload(m_NetworkId,
335 m_InputTensorsVec[x],
336 m_OutputTensorsVec[x]);
337 }
338
339 const auto inferenceDuration = armnn::GetTimeDuration(start_time);
340
Teresa Charlin83b42912022-07-07 14:24:59 +0100341 if(ret == armnn::Status::Failure)
342 {
343 throw armnn::Exception("IRuntime::EnqueueWorkload failed");
344 }
345
346 if(!m_Params.m_DontPrintOutputs)
347 {
348 PrintOutputTensors(&m_OutputTensorsVec[x], x);
349 }
350
351 // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
352 CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
353 }
354}
355
356std::vector<const void*> ArmNNExecutor::Execute()
357{
Colm Donelan1c368a12024-03-26 14:38:52 +0000358 time_t rawtime;
359 time (&rawtime);
360 ARMNN_LOG(info) << "Inferences began at: ("
Kevin May691ceca2023-11-28 15:38:37 +0000361 << std::chrono::duration_cast<std::chrono::nanoseconds>(armnn::GetTimeNow().time_since_epoch()).count()
Colm Donelan1c368a12024-03-26 14:38:52 +0000362 << " ns) " << ctime (&rawtime);
Kevin May691ceca2023-11-28 15:38:37 +0000363
Teresa Charlin83b42912022-07-07 14:24:59 +0100364 if(m_Params.m_ThreadPoolSize == 0)
365 {
366 ExecuteSync();
367 }
368 else
369 {
370 ExecuteAsync();
371 }
Kevin May691ceca2023-11-28 15:38:37 +0000372
Colm Donelan1c368a12024-03-26 14:38:52 +0000373 time (&rawtime);
374 ARMNN_LOG(info) << "Inferences ended at: ("
Kevin May691ceca2023-11-28 15:38:37 +0000375 << std::chrono::duration_cast<std::chrono::nanoseconds>(armnn::GetTimeNow().time_since_epoch()).count()
Colm Donelan1c368a12024-03-26 14:38:52 +0000376 << " ns) " << ctime (&rawtime);
Kevin May691ceca2023-11-28 15:38:37 +0000377
Teresa Charlin83b42912022-07-07 14:24:59 +0100378 std::vector<const void*> results;
379 for (auto& output : m_OutputStorage)
380 {
381 results.push_back(output.m_Mem);
382 }
383
384 return results;
385}
386
387void ArmNNExecutor::PrintNetworkInfo()
388{
389 const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
390 m_Params.m_InputNames :
391 m_IOInfo.m_InputNames;
392 std::stringstream ss;
393 ss << "===== Network Info =====\n";
394 ss << "Inputs in order:\n";
395 for (const auto& inputName : inputNames)
396 {
397 const auto inputInfo = m_IOInfo.m_InputInfoMap[inputName].second;
398 ss << inputName << ", " << inputInfo.GetShape() << ", " << GetDataTypeName(inputInfo.GetDataType());
399 if (inputInfo.IsQuantized())
400 {
401 ss << " Quantization Offset: " << inputInfo.GetQuantizationOffset();
402 if (inputInfo.HasMultipleQuantizationScales())
403 {
404 ss << " Quantization scales: ";
405 for (const auto scale: inputInfo.GetQuantizationScales())
406 {
407 ss << scale << ", ";
408 }
409 }
410 else
411 {
412 ss << " Quantization scale: " << inputInfo.GetQuantizationScale();
413 }
414 }
415 ss << "\n";
416 }
417
418 ss << "Outputs in order:\n";
419 for (const auto& outputName : m_IOInfo.m_OutputNames)
420 {
421 const auto outputInfo = m_IOInfo.m_OutputInfoMap[outputName].second;
422 ss << outputName << ", " << outputInfo.GetShape() << ", " << GetDataTypeName(outputInfo.GetDataType());
423 if (outputInfo.IsQuantized())
424 {
425 ss << " Quantization Offset: " << outputInfo.GetQuantizationOffset();
426 if (outputInfo.HasMultipleQuantizationScales())
427 {
428 ss << " Quantization scales: ";
429 for (const auto scale: outputInfo.GetQuantizationScales())
430 {
431 ss << scale << ", ";
432 }
433 }
434 else
435 {
436 ss << " Quantization scale: " << outputInfo.GetQuantizationScale();
437 }
438 }
439 ss << "\n";
440 }
441
442 std::cout << ss.str() << std::endl;
443}
444
445void ArmNNExecutor::SetupInputsAndOutputs()
446{
447 const unsigned int noOfInputs = m_IOInfo.m_InputNames.size();
448
449 if (m_Params.m_InputNames.size() != 0 && m_Params.m_InputNames.size() != noOfInputs)
450 {
451 LogAndThrow("Number of input names does not match number of inputs");
452 }
453
454 const unsigned int inputFilePaths = m_Params.m_InputTensorDataFilePaths.size();
455 const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
456 m_Params.m_InputNames :
457 m_IOInfo.m_InputNames;
458 unsigned int noInputSets = 1;
459
460 if (inputFilePaths != 0)
461 {
462 if (inputFilePaths % noOfInputs != 0)
463 {
464 LogAndThrow("Number of input files: " + std::to_string(inputFilePaths) +
465 " not compatible with number of inputs: " + std::to_string(noOfInputs));
466 }
467 noInputSets = inputFilePaths / noOfInputs;
468 if (noInputSets != 1 && m_Params.m_ReuseBuffers)
469 {
470 LogAndThrow("Specifying multiple sets of inputs not compatible with ReuseBuffers");
471 }
472 }
473
474 const unsigned int noOfOutputs = m_IOInfo.m_OutputNames.size();
475 const unsigned int outputFilePaths = m_Params.m_OutputTensorFiles.size();
476 unsigned int noOutputSets = 1;
477
478 if (outputFilePaths != 0)
479 {
480 if (outputFilePaths % noOfOutputs != 0)
481 {
482 LogAndThrow("Number of output files: " + std::to_string(outputFilePaths) +
483 ", not compatible with number of outputs: " + std::to_string(noOfOutputs));
484 }
485 noOutputSets = outputFilePaths / noOfOutputs;
486
487 if (noOutputSets != 1 && m_Params.m_ReuseBuffers)
488 {
489 LogAndThrow("Specifying multiple sets of outputs not compatible with ReuseBuffers");
490 }
491 }
492
493 if (m_Params.m_ThreadPoolSize != 0)
494 {
495 // The current implementation of the Threadpool does not allow binding of outputs to a thread
496 // So to ensure no two threads write to the same output at the same time, no output can be reused
497 noOutputSets = m_Params.m_Iterations;
498 }
499
500 if (m_Params.m_InputTensorDataFilePaths.size() > noOfInputs)
501 {
502 ARMNN_LOG(info) << "Given network has " << noOfInputs << " input/s. One input-tensor-data file is required "
503 << "for each input. The user provided "
504 << m_Params.m_InputTensorDataFilePaths.size()
505 << " input-tensor-data file/s which will be used to fill the input/s.\n";
506 }
507
508 unsigned int inputCount = 0;
509 for(unsigned int inputSet = 0; inputSet < noInputSets; ++inputSet)
510 {
511 armnn::InputTensors inputTensors;
512 for (const auto& inputName: inputNames)
513 {
514 armnn::BindingPointInfo bindingPointInfo;
515 try
516 {
517 bindingPointInfo = m_IOInfo.m_InputInfoMap.at(inputName);
518 }
519 catch (const std::out_of_range& e)
520 {
521 LogAndThrow("Input with inputName: " + inputName + " not found.");
522 }
523
524 const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
525 auto newInfo = armnn::TensorInfo{tensorInfo.GetShape(), tensorInfo.GetDataType(),
526 tensorInfo.GetQuantizationScale(),
527 tensorInfo.GetQuantizationOffset(),
528 true};
529
530 m_InputStorage.emplace_back(IOStorage{tensorInfo.GetNumBytes()});
531
532 const int bindingId = bindingPointInfo.first;
533 inputTensors.emplace_back(bindingId, armnn::ConstTensor{newInfo, m_InputStorage.back().m_Mem});
534
535 const armnn::Optional<std::string> dataFile = m_Params.m_GenerateTensorData ?
536 armnn::EmptyOptional() :
537 armnn::MakeOptional<std::string>(
538 m_Params.m_InputTensorDataFilePaths.at(inputCount++));
539
540 switch (tensorInfo.GetDataType())
541 {
542 case armnn::DataType::Float32:
543 {
544 auto typedTensor = reinterpret_cast<float*>(m_InputStorage.back().m_Mem);
545 PopulateTensorWithData<float>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
546 break;
547 }
548 case armnn::DataType::QSymmS16:
549 {
550 auto typedTensor = reinterpret_cast<int16_t*>(m_InputStorage.back().m_Mem);
551 PopulateTensorWithData<int16_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
552 break;
553 }
554 case armnn::DataType::QSymmS8:
555 case armnn::DataType::QAsymmS8:
556 {
557 auto typedTensor = reinterpret_cast<int8_t*>(m_InputStorage.back().m_Mem);
558 PopulateTensorWithData<int8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
559 break;
560 }
561 case armnn::DataType::QAsymmU8:
562 {
563 auto typedTensor = reinterpret_cast<uint8_t*>(m_InputStorage.back().m_Mem);
564 PopulateTensorWithData<uint8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
565 break;
566 }
567 case armnn::DataType::Signed32:
568 {
569 auto typedTensor = reinterpret_cast<int32_t*>(m_InputStorage.back().m_Mem);
570 PopulateTensorWithData<int32_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
571 break;
572 }
573 default:
574 {
575 LogAndThrow("Unexpected DataType");
576 }
577 }
578
Matthew Benthamb4f5c232022-11-16 10:59:12 +0000579 }
580
581 if (m_Params.m_ImportInputsIfAligned)
582 {
583 m_ImportedInputIds.push_back(
584 m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc));
Teresa Charlin83b42912022-07-07 14:24:59 +0100585 }
586 m_InputTensorsVec.emplace_back(inputTensors);
587 }
588
589 for(unsigned int outputSet = 0; outputSet < noOutputSets; ++outputSet)
590 {
591 armnn::OutputTensors outputTensors;
592 for (const auto& output: m_IOInfo.m_OutputInfoMap)
593 {
594 const armnn::BindingPointInfo& bindingPointInfo = output.second;
595 const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
596
597 m_OutputStorage.emplace_back(tensorInfo.GetNumBytes());
598 outputTensors.emplace_back(bindingPointInfo.first, armnn::Tensor{tensorInfo, m_OutputStorage.back().m_Mem});
599 }
600 m_OutputTensorsVec.emplace_back(outputTensors);
601 if (m_Params.m_ImportInputsIfAligned)
602 {
603 m_ImportedOutputIds.push_back(
604 m_Runtime->ImportOutputs(m_NetworkId, m_OutputTensorsVec.back(), armnn::MemorySource::Malloc));
605 }
606 }
607
Teresa Charlin20508422022-10-26 14:03:08 +0100608 // If iterations > noSets fill the remaining iterations repeating the given files
609 // If iterations < noSets just ignore the extra files
610 const unsigned int remainingInputSets = (m_Params.m_Iterations > noInputSets)
611 ? m_Params.m_Iterations - noInputSets
612 : 0;
613 for (unsigned int i = 0; i < remainingInputSets; ++i)
Teresa Charlin83b42912022-07-07 14:24:59 +0100614 {
Teresa Charlin20508422022-10-26 14:03:08 +0100615 m_InputTensorsVec.push_back(m_InputTensorsVec[i % noInputSets]);
Teresa Charlin83b42912022-07-07 14:24:59 +0100616 if (m_Params.m_ImportInputsIfAligned)
617 {
Teresa Charlin20508422022-10-26 14:03:08 +0100618 m_ImportedInputIds.push_back(m_ImportedInputIds[i % noInputSets]);
Teresa Charlin83b42912022-07-07 14:24:59 +0100619 }
620 }
621
Teresa Charlin20508422022-10-26 14:03:08 +0100622 const unsigned int remainingOutputSets = (m_Params.m_Iterations > noOutputSets)
623 ? m_Params.m_Iterations - noOutputSets
624 : 0;
625 for (unsigned int i = 0; i < remainingOutputSets; ++i)
Teresa Charlin83b42912022-07-07 14:24:59 +0100626 {
Teresa Charlin20508422022-10-26 14:03:08 +0100627 m_OutputTensorsVec.push_back(m_OutputTensorsVec[i % noOutputSets]);
Teresa Charlin83b42912022-07-07 14:24:59 +0100628 if (m_Params.m_ImportInputsIfAligned)
629 {
Teresa Charlin20508422022-10-26 14:03:08 +0100630 m_ImportedOutputIds.push_back(m_ImportedOutputIds[i % noOutputSets]);
Teresa Charlin83b42912022-07-07 14:24:59 +0100631 }
632 }
633}
634
635ArmNNExecutor::IOInfo ArmNNExecutor::GetIOInfo(armnn::IOptimizedNetwork* optNet)
636{
637 struct IOStrategy : armnn::IStrategy
638 {
639 void ExecuteStrategy(const armnn::IConnectableLayer* layer,
640 const armnn::BaseDescriptor& descriptor,
641 const std::vector<armnn::ConstTensor>& constants,
642 const char* name,
643 const armnn::LayerBindingId id = 0) override
644 {
645 armnn::IgnoreUnused(descriptor, constants, id);
646 switch (layer->GetType())
647 {
648 case armnn::LayerType::Input:
649 {
650 m_IOInfo.m_InputNames.emplace_back(name);
651 m_IOInfo.m_InputInfoMap[name] = {id, layer->GetOutputSlot(0).GetTensorInfo()};
652 break;
653 }
654 case armnn::LayerType::Output:
655 {
656 m_IOInfo.m_OutputNames.emplace_back(name);
Mike Kelly4cc341c2023-07-07 15:43:06 +0100657 m_IOInfo.m_OutputInfoMap[name] = {id, layer->GetInputSlot(0).GetTensorInfo()};
Teresa Charlin83b42912022-07-07 14:24:59 +0100658 break;
659 }
660 default: {}
661 }
662 }
663 IOInfo m_IOInfo;
664 };
665
666 IOStrategy ioStrategy;
667 optNet->ExecuteStrategy(ioStrategy);
668
669 return ioStrategy.m_IOInfo;
670}
671
672armnn::IOptimizedNetworkPtr ArmNNExecutor::OptimizeNetwork(armnn::INetwork* network)
673{
674 armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
675
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000676 armnn::OptimizerOptionsOpaque options;
677 options.SetReduceFp32ToFp16(m_Params.m_EnableFp16TurboMode);
678 options.SetDebugEnabled(m_Params.m_PrintIntermediate);
679 options.SetDebugToFileEnabled(m_Params.m_PrintIntermediateOutputsToFile);
680 options.SetShapeInferenceMethod(m_Params.m_InferOutputShape ?
681 armnn::ShapeInferenceMethod::InferAndValidate :
682 armnn::ShapeInferenceMethod::ValidateOnly);
683 options.SetProfilingEnabled(m_Params.m_EnableProfiling);
684 options.SetAllowExpandedDims(m_Params.m_AllowExpandedDims);
Teresa Charlin83b42912022-07-07 14:24:59 +0100685
686 armnn::BackendOptions gpuAcc("GpuAcc",
687 {
688 { "FastMathEnabled", m_Params.m_EnableFastMath },
689 { "SaveCachedNetwork", m_Params.m_SaveCachedNetwork },
690 { "CachedNetworkFilePath", m_Params.m_CachedNetworkFilePath },
691 { "MLGOTuningFilePath", m_Params.m_MLGOTuningFilePath }
692 });
693
694 armnn::BackendOptions cpuAcc("CpuAcc",
695 {
696 { "FastMathEnabled", m_Params.m_EnableFastMath },
697 { "NumberOfThreads", m_Params.m_NumberOfThreads }
698 });
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000699 options.AddModelOption(gpuAcc);
700 options.AddModelOption(cpuAcc);
Jim Flynnfcc72f52022-10-14 11:20:07 +0100701 // The shapeInferenceMethod and allowExpandedDims values have to be added to the model options
702 // because these are what are passed to the OptimizeSubgraphViews method and are used to create
703 // the new optimized INetwork that method uses
704 armnn::BackendOptions allowExDimOpt("AllowExpandedDims",
705 {
706 { "AllowExpandedDims", m_Params.m_AllowExpandedDims }
707 });
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000708 options.AddModelOption(allowExDimOpt);
Jim Flynnfcc72f52022-10-14 11:20:07 +0100709 armnn::BackendOptions shapeInferOpt("ShapeInferenceMethod",
710 {
711 { "InferAndValidate", m_Params.m_InferOutputShape }
712 });
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000713 options.AddModelOption(shapeInferOpt);
Teresa Charlin83b42912022-07-07 14:24:59 +0100714
715 const auto optimization_start_time = armnn::GetTimeNow();
716 optNet = armnn::Optimize(*network, m_Params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
717
718 ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
719 << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
720
721 if (!optNet)
722 {
723 LogAndThrow("Optimize returned nullptr");
724 }
725
Teresa Charlin98d3fd82022-08-02 14:17:39 +0100726 // If v,visualize-optimized-model is enabled then construct a file name for the dot file.
727 if (m_Params.m_EnableLayerDetails)
728 {
729 fs::path filename = m_Params.m_ModelPath;
730 filename.replace_extension("dot");
731 std::fstream file(filename.c_str(), std::ios_base::out);
732 optNet->SerializeToDot(file);
733 }
734
Teresa Charlin83b42912022-07-07 14:24:59 +0100735 return optNet;
736}
737
738std::unique_ptr<ArmNNExecutor::IParser> ArmNNExecutor::CreateParser()
739{
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200740 const fs::path modelFilename = m_Params.m_ModelPath;
741 const std::string modelExtension = modelFilename.extension();
Teresa Charlin83b42912022-07-07 14:24:59 +0100742
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200743 m_Params.m_IsModelBinary = modelExtension != ".json";
Teresa Charlin83b42912022-07-07 14:24:59 +0100744 std::unique_ptr<IParser> parser = nullptr;
745 // Forward to implementation based on the parser type
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200746 if (modelExtension == ".armnn")
Teresa Charlin83b42912022-07-07 14:24:59 +0100747 {
748#if defined(ARMNN_SERIALIZER)
749 parser = std::make_unique<ArmNNDeserializer>();
750#else
751 LogAndThrow("Not built with serialization support.");
752#endif
753 }
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200754 else if (modelExtension == ".tflite")
Teresa Charlin83b42912022-07-07 14:24:59 +0100755 {
756#if defined(ARMNN_TF_LITE_PARSER)
757 parser = std::make_unique<TfliteParser>(m_Params);
758#else
759 LogAndThrow("Not built with Tensorflow-Lite parser support.");
760#endif
761 }
Adam Jalkemo1e8187a2022-10-12 15:14:04 +0200762 else if (modelExtension == ".onnx")
Teresa Charlin83b42912022-07-07 14:24:59 +0100763 {
764#if defined(ARMNN_ONNX_PARSER)
765 parser = std::make_unique<OnnxParser>();
766#else
767 LogAndThrow("Not built with Onnx parser support.");
768#endif
769 }
Colm Donelaned928a92023-06-25 15:29:08 +0100770 if (parser == nullptr)
771 {
772 throw InvalidArgumentException("Unable to determine the model type based on the file name extension.");
773 }
Teresa Charlin83b42912022-07-07 14:24:59 +0100774 return parser;
775}
776
777void ArmNNExecutor::PrintOutputTensors(const armnn::OutputTensors* outputTensors,
778 unsigned int iteration)
779{
780 auto findOutputName = [&](const armnn::LayerBindingId id)
781 {
782 for (auto it = m_IOInfo.m_OutputInfoMap.begin(); it != m_IOInfo.m_OutputInfoMap.end(); ++it)
783 {
784 if (id == it->second.first)
785 {
786 return it->first;
787 }
788 }
789 return std::string{};
790 };
791
792 unsigned int outputIndex = 0;
793 unsigned int numOutputs = outputTensors->size();
794 for (const auto& output: *outputTensors)
795 {
796 const auto bindingName = findOutputName(output.first);
797 // We've made sure before that the number of output files either equals numOutputs, in which
798 // case we override those files when processing the results of each iteration (only the result
799 // of the last iteration will be stored), or there are enough
800 // output files for each output of each iteration.
801 size_t outputFileIndex = iteration * numOutputs + outputIndex;
802 if (!m_Params.m_OutputTensorFiles.empty())
803 {
804 outputFileIndex = outputFileIndex % m_Params.m_OutputTensorFiles.size();
805 ARMNN_LOG(info) << "Writing output: " << bindingName << " bindingId: '"
806 << output.first
807 << "' of iteration: " << iteration + 1 << " to file: '"
808 << m_Params.m_OutputTensorFiles[outputFileIndex] << "'";
809 }
810
811 const armnn::Optional<std::string> outputTensorFile = m_Params.m_OutputTensorFiles.empty() ?
812 armnn::EmptyOptional() :
813 armnn::MakeOptional<std::string>(
814 m_Params.m_OutputTensorFiles[outputFileIndex]);
815
816 OutputWriteInfo outputWriteInfo
817 {
818 outputTensorFile,
819 bindingName,
820 output.second,
Colm Donelan0dfb2652023-06-22 10:19:17 +0100821 !m_Params.m_DontPrintOutputs,
822 output.second.GetDataType()
Teresa Charlin83b42912022-07-07 14:24:59 +0100823 };
824
825 std::cout << bindingName << ": ";
826 std::vector<float> values;
827 switch (output.second.GetDataType())
828 {
829 case armnn::DataType::Float32:
830 {
831 PrintTensor<float>(outputWriteInfo, "%f ");
832 break;
833 }
834
835 case armnn::DataType::Signed32:
836 {
837 PrintTensor<int>(outputWriteInfo, "%d ");
838 break;
839 }
John Mcloughlin4cf29d62023-09-25 14:10:32 +0100840 case armnn::DataType::Signed64:
841 {
842 PrintTensor<int64_t>(outputWriteInfo, "%ld ");
843 break;
844 }
Teresa Charlin83b42912022-07-07 14:24:59 +0100845 case armnn::DataType::QSymmS8:
846 case armnn::DataType::QAsymmS8:
847 {
848 PrintTensor<int8_t>(outputWriteInfo, "%d ");
849 break;
850 }
851 case armnn::DataType::QAsymmU8:
Mike Kelly4cc341c2023-07-07 15:43:06 +0100852 case armnn::DataType::Boolean:
Teresa Charlin83b42912022-07-07 14:24:59 +0100853 {
854 PrintTensor<uint8_t>(outputWriteInfo, "%d ");
855 break;
856 }
857 case armnn::DataType::Float16:
858 case armnn::DataType::QSymmS16:
859 case armnn::DataType::BFloat16:
Teresa Charlin83b42912022-07-07 14:24:59 +0100860 default:
861 {
862 LogAndThrow("Unexpected DataType");
863 }
864 }
865 std::cout << "\n";
Adam Jalkemo8f393632022-10-13 09:04:54 +0200866 ++outputIndex;
Teresa Charlin83b42912022-07-07 14:24:59 +0100867 }
868}
869
870void ArmNNExecutor::CompareAndPrintResult(std::vector<const void*> otherOutput)
871{
872 unsigned int index = 0;
Colm Doneland0472622023-03-06 12:34:54 +0000873 std::string typeString;
Teresa Charlin83b42912022-07-07 14:24:59 +0100874 for (const auto& outputTensors: m_OutputTensorsVec)
875 {
876 for (const auto& outputTensor: outputTensors)
877 {
Teresa Charlin83b42912022-07-07 14:24:59 +0100878 size_t size = outputTensor.second.GetNumBytes();
Colm Doneland0472622023-03-06 12:34:54 +0000879 double result = ComputeByteLevelRMSE(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
880 std::cout << "Byte level root mean square error: " << result << "\n";
Teresa Charlin83b42912022-07-07 14:24:59 +0100881 }
882 }
883}
884#if defined(ARMNN_SERIALIZER)
885ArmNNExecutor::ArmNNDeserializer::ArmNNDeserializer() : m_Parser(armnnDeserializer::IDeserializer::Create()){}
886
887armnn::INetworkPtr ArmNNExecutor::ArmNNDeserializer::CreateNetwork(const ExecuteNetworkParams& params)
888{
889 const std::string& modelPath = params.m_ModelPath;
890
891 std::ifstream file(modelPath, std::ios::binary);
892 return m_Parser->CreateNetworkFromBinary(file);
893}
894
895armnn::BindingPointInfo
896ArmNNExecutor::ArmNNDeserializer::GetInputBindingPointInfo(size_t, const std::string& inputName)
897{
898 armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkInputBindingInfo(0, inputName);
899 return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
900}
901
902armnn::BindingPointInfo
903ArmNNExecutor::ArmNNDeserializer::GetOutputBindingPointInfo(size_t, const std::string& outputName)
904{
905 armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkOutputBindingInfo(0, outputName);
906 return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
907}
908#endif
909
910#if defined(ARMNN_TF_LITE_PARSER)
911ArmNNExecutor::TfliteParser::TfliteParser(const ExecuteNetworkParams& params)
912{
913 armnnTfLiteParser::ITfLiteParser::TfLiteParserOptions options;
914 options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
915 options.m_InferAndValidate = params.m_InferOutputShape;
Jim Flynnfcc72f52022-10-14 11:20:07 +0100916 options.m_AllowExpandedDims = params.m_AllowExpandedDims;
Teresa Charlin83b42912022-07-07 14:24:59 +0100917
918 m_Parser = armnnTfLiteParser::ITfLiteParser::Create(options);
919}
920
921armnn::INetworkPtr ArmNNExecutor::TfliteParser::CreateNetwork(const ExecuteNetworkParams& params)
922{
923 const std::string& modelPath = params.m_ModelPath;
924 return m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str());
925}
926
927armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetInputBindingPointInfo(size_t subgraphId,
928 const std::string& inputName)
929{
930 return m_Parser->GetNetworkInputBindingInfo(subgraphId, inputName);
931}
932
933armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetOutputBindingPointInfo(size_t subgraphId,
934 const std::string& outputName)
935{
936 return m_Parser->GetNetworkOutputBindingInfo(subgraphId, outputName);
937}
938#endif
939
940
941#if defined(ARMNN_ONNX_PARSER)
Colm Donelan46dee402024-05-10 16:49:39 +0100942ARMNN_NO_DEPRECATE_WARN_BEGIN
Teresa Charlin83b42912022-07-07 14:24:59 +0100943ArmNNExecutor::OnnxParser::OnnxParser() : m_Parser(armnnOnnxParser::IOnnxParser::Create()){}
944
945armnn::INetworkPtr ArmNNExecutor::OnnxParser::CreateNetwork(const ExecuteNetworkParams& params)
946{
947 const std::string& modelPath = params.m_ModelPath;
948 m_Parser = armnnOnnxParser::IOnnxParser::Create();
949 std::map<std::string, armnn::TensorShape> inputShapes;
950 if(!params.m_InputTensorShapes.empty())
951 {
952 const size_t numInputShapes = params.m_InputTensorShapes.size();
953 const size_t numInputBindings = params.m_InputNames.size();
954 if(numInputShapes < numInputBindings)
955 {
956 throw armnn::Exception(
957 fmt::format("Not every input has its tensor shape specified: expected={0}, got={1}",
958 numInputBindings, numInputShapes));
959 }
960
961 for (size_t i = 0; i < numInputShapes; i++)
962 {
963 inputShapes[params.m_InputNames[i]] = params.m_InputTensorShapes[i];
964 }
965
966 return params.m_IsModelBinary ?
967 m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
968 m_Parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes);
969 }
970
971 // Handle text and binary input differently by calling the corresponding parser function
972 return params.m_IsModelBinary ?
973 m_Parser->CreateNetworkFromBinaryFile(params.m_ModelPath.c_str()) :
974 m_Parser->CreateNetworkFromTextFile(params.m_ModelPath.c_str());
975}
976
977armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetInputBindingPointInfo(size_t, const std::string& inputName)
978{
979 return m_Parser->GetNetworkInputBindingInfo(inputName);
980}
981
982armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetOutputBindingPointInfo(size_t, const std::string& outputName)
983{
984 return m_Parser->GetNetworkOutputBindingInfo(outputName);
985}
Colm Donelan46dee402024-05-10 16:49:39 +0100986ARMNN_NO_DEPRECATE_WARN_END
Teresa Charlin83b42912022-07-07 14:24:59 +0100987#endif