blob: 4518f1426f6ac760577193c62db515b5b12dda96 [file] [log] [blame]
//
// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "ArmNNExecutor.hpp"
#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
#include <armnn/IAsyncExecutionCallback.hpp>
#include <AsyncExecutionCallback.hpp>
using namespace armnn;
using namespace std::chrono;
ArmNNExecutor::ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions)
: m_Params(params)
{
runtimeOptions.m_EnableGpuProfiling = params.m_EnableProfiling;
runtimeOptions.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
// Create/Get the static ArmNN Runtime. Note that the m_Runtime will be shared by all ArmNNExecutor
// instances so the RuntimeOptions cannot be altered for different ArmNNExecutor instances.
m_Runtime = GetRuntime(runtimeOptions);
auto parser = CreateParser();
auto network = parser->CreateNetwork(m_Params);
auto optNet = OptimizeNetwork(network.get());
m_IOInfo = GetIOInfo(optNet.get());
armnn::ProfilingDetailsMethod profilingDetailsMethod = ProfilingDetailsMethod::Undefined;
if (params.m_OutputDetailsOnlyToStdOut)
{
profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
}
else if (params.m_OutputDetailsToStdOut)
{
profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
}
INetworkProperties networkProperties{m_Params.m_Concurrent,
MemorySource::Undefined,
MemorySource::Undefined,
params.m_EnableProfiling,
profilingDetailsMethod};
std::string errorMsg;
Status status = m_Runtime->LoadNetwork(m_NetworkId, std::move(optNet), errorMsg, networkProperties);
if (status != Status::Success)
{
std::string message("Failed to create Arm NN Executor: ");
message.append(errorMsg);
// Throwing an exception at this point in the constructor causes lots of problems. We'll instead mark this
// executor as not constructed.
ARMNN_LOG(fatal) << message;
m_constructionFailed = true;
return;
}
SetupInputsAndOutputs();
if (m_Params.m_Iterations > 1)
{
std::stringstream msg;
msg << "Network will be executed " << m_Params.m_Iterations;
if (m_Params.m_Concurrent)
{
msg << " times in an asynchronous manner. ";
}
else
{
msg << " times successively. ";
}
msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
"cover each execution.";
ARMNN_LOG(info) << msg.str();
}
if (m_Params.m_GenerateTensorData)
{
ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
}
if (m_Params.m_DontPrintOutputs)
{
ARMNN_LOG(info) << "Printing outputs to console is disabled.";
}
}
ArmNNExecutor::~ArmNNExecutor()
{
std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
// If profiling is enabled print out the results
if (profiler && profiler->IsProfilingEnabled())
{
profiler->Print(std::cout);
}
}
void ArmNNExecutor::ExecuteAsync()
{
#if !defined(ARMNN_DISABLE_THREADS)
std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
std::unique_ptr<armnn::Threadpool> threadpool;
armnn::AsyncCallbackManager callbackManager;
std::unordered_map<armnn::InferenceId, const armnn::OutputTensors*> inferenceOutputMap;
for (size_t i = 0; i < m_Params.m_ThreadPoolSize; ++i)
{
memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkId));
}
threadpool = std::make_unique<armnn::Threadpool>(m_Params.m_ThreadPoolSize,
m_Runtime,
memHandles);
ARMNN_LOG(info) << "Asynchronous Execution with Arm NN thread pool... \n";
// Declare the latest and earliest inference times here to be used when calculating overall time
std::chrono::high_resolution_clock::time_point earliestStartTime =
std::chrono::high_resolution_clock::time_point::max();
std::chrono::high_resolution_clock::time_point latestEndTime =
std::chrono::high_resolution_clock::now();
// For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
// LoadedNetwork with each scheduled inference having a specific priority
for (size_t i = 0; i < m_Params.m_Iterations; ++i)
{
std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
inferenceOutputMap.insert({cb->GetInferenceId(), &m_OutputTensorsVec[i]});
threadpool->Schedule(m_NetworkId,
m_InputTensorsVec[i],
m_OutputTensorsVec[i],
armnn::QosExecPriority::Medium,
cb);
}
// Check the results
for (size_t iteration = 0; iteration < m_Params.m_Iterations; ++iteration)
{
auto cb = callbackManager.GetNotifiedCallback();
// Get the results
if (earliestStartTime > cb->GetStartTime())
{
earliestStartTime = cb->GetStartTime();
}
if (latestEndTime < cb->GetEndTime())
{
latestEndTime = cb->GetEndTime();
}
auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
auto inferenceDuration = endTime - startTime;
CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
if(!m_Params.m_DontPrintOutputs)
{
const armnn::OutputTensors* out = inferenceOutputMap[cb->GetInferenceId()];
PrintOutputTensors(out, iteration);
}
}
// Print duration difference between overallStartTime and overallEndTime
auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
auto totalInferenceDuration = overallEndTime - overallStartTime;
ARMNN_LOG(info) << "Overall Inference time: " << std::setprecision(2)
<< std::fixed << totalInferenceDuration.count() << " ms\n";
#endif
}
void ArmNNExecutor::ExecuteSync()
{
for (size_t x = 0; x < m_Params.m_Iterations; x++)
{
std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
const auto start_time = armnn::GetTimeNow();
armnn::Status ret;
if (m_Params.m_ImportInputsIfAligned)
{
ret = m_Runtime->EnqueueWorkload(m_NetworkId,
m_InputTensorsVec[x],
m_OutputTensorsVec[x],
m_ImportedInputIds[x],
m_ImportedOutputIds[x]);
}
else
{
ret = m_Runtime->EnqueueWorkload(m_NetworkId,
m_InputTensorsVec[x],
m_OutputTensorsVec[x]);
}
const auto inferenceDuration = armnn::GetTimeDuration(start_time);
if(ret == armnn::Status::Failure)
{
throw armnn::Exception("IRuntime::EnqueueWorkload failed");
}
if(!m_Params.m_DontPrintOutputs)
{
PrintOutputTensors(&m_OutputTensorsVec[x], x);
}
// If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
}
}
std::vector<const void*> ArmNNExecutor::Execute()
{
time_t rawtime;
time (&rawtime);
ARMNN_LOG(info) << "Inferences began at: ("
<< std::chrono::duration_cast<std::chrono::nanoseconds>(armnn::GetTimeNow().time_since_epoch()).count()
<< " ns) " << ctime (&rawtime);
if(m_Params.m_ThreadPoolSize == 0)
{
ExecuteSync();
}
else
{
ExecuteAsync();
}
time (&rawtime);
ARMNN_LOG(info) << "Inferences ended at: ("
<< std::chrono::duration_cast<std::chrono::nanoseconds>(armnn::GetTimeNow().time_since_epoch()).count()
<< " ns) " << ctime (&rawtime);
std::vector<const void*> results;
for (auto& output : m_OutputStorage)
{
results.push_back(output.m_Mem);
}
return results;
}
void ArmNNExecutor::PrintNetworkInfo()
{
const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
m_Params.m_InputNames :
m_IOInfo.m_InputNames;
std::stringstream ss;
ss << "===== Network Info =====\n";
ss << "Inputs in order:\n";
for (const auto& inputName : inputNames)
{
const auto inputInfo = m_IOInfo.m_InputInfoMap[inputName].second;
ss << inputName << ", " << inputInfo.GetShape() << ", " << GetDataTypeName(inputInfo.GetDataType());
if (inputInfo.IsQuantized())
{
ss << " Quantization Offset: " << inputInfo.GetQuantizationOffset();
if (inputInfo.HasMultipleQuantizationScales())
{
ss << " Quantization scales: ";
for (const auto scale: inputInfo.GetQuantizationScales())
{
ss << scale << ", ";
}
}
else
{
ss << " Quantization scale: " << inputInfo.GetQuantizationScale();
}
}
ss << "\n";
}
ss << "Outputs in order:\n";
for (const auto& outputName : m_IOInfo.m_OutputNames)
{
const auto outputInfo = m_IOInfo.m_OutputInfoMap[outputName].second;
ss << outputName << ", " << outputInfo.GetShape() << ", " << GetDataTypeName(outputInfo.GetDataType());
if (outputInfo.IsQuantized())
{
ss << " Quantization Offset: " << outputInfo.GetQuantizationOffset();
if (outputInfo.HasMultipleQuantizationScales())
{
ss << " Quantization scales: ";
for (const auto scale: outputInfo.GetQuantizationScales())
{
ss << scale << ", ";
}
}
else
{
ss << " Quantization scale: " << outputInfo.GetQuantizationScale();
}
}
ss << "\n";
}
std::cout << ss.str() << std::endl;
}
void ArmNNExecutor::SetupInputsAndOutputs()
{
const unsigned int noOfInputs = m_IOInfo.m_InputNames.size();
if (m_Params.m_InputNames.size() != 0 && m_Params.m_InputNames.size() != noOfInputs)
{
LogAndThrow("Number of input names does not match number of inputs");
}
const unsigned int inputFilePaths = m_Params.m_InputTensorDataFilePaths.size();
const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
m_Params.m_InputNames :
m_IOInfo.m_InputNames;
unsigned int noInputSets = 1;
if (inputFilePaths != 0)
{
if (inputFilePaths % noOfInputs != 0)
{
LogAndThrow("Number of input files: " + std::to_string(inputFilePaths) +
" not compatible with number of inputs: " + std::to_string(noOfInputs));
}
noInputSets = inputFilePaths / noOfInputs;
if (noInputSets != 1 && m_Params.m_ReuseBuffers)
{
LogAndThrow("Specifying multiple sets of inputs not compatible with ReuseBuffers");
}
}
const unsigned int noOfOutputs = m_IOInfo.m_OutputNames.size();
const unsigned int outputFilePaths = m_Params.m_OutputTensorFiles.size();
unsigned int noOutputSets = 1;
if (outputFilePaths != 0)
{
if (outputFilePaths % noOfOutputs != 0)
{
LogAndThrow("Number of output files: " + std::to_string(outputFilePaths) +
", not compatible with number of outputs: " + std::to_string(noOfOutputs));
}
noOutputSets = outputFilePaths / noOfOutputs;
if (noOutputSets != 1 && m_Params.m_ReuseBuffers)
{
LogAndThrow("Specifying multiple sets of outputs not compatible with ReuseBuffers");
}
}
if (m_Params.m_ThreadPoolSize != 0)
{
// The current implementation of the Threadpool does not allow binding of outputs to a thread
// So to ensure no two threads write to the same output at the same time, no output can be reused
noOutputSets = m_Params.m_Iterations;
}
if (m_Params.m_InputTensorDataFilePaths.size() > noOfInputs)
{
ARMNN_LOG(info) << "Given network has " << noOfInputs << " input/s. One input-tensor-data file is required "
<< "for each input. The user provided "
<< m_Params.m_InputTensorDataFilePaths.size()
<< " input-tensor-data file/s which will be used to fill the input/s.\n";
}
unsigned int inputCount = 0;
for(unsigned int inputSet = 0; inputSet < noInputSets; ++inputSet)
{
armnn::InputTensors inputTensors;
for (const auto& inputName: inputNames)
{
armnn::BindingPointInfo bindingPointInfo;
try
{
bindingPointInfo = m_IOInfo.m_InputInfoMap.at(inputName);
}
catch (const std::out_of_range& e)
{
LogAndThrow("Input with inputName: " + inputName + " not found.");
}
const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
auto newInfo = armnn::TensorInfo{tensorInfo.GetShape(), tensorInfo.GetDataType(),
tensorInfo.GetQuantizationScale(),
tensorInfo.GetQuantizationOffset(),
true};
m_InputStorage.emplace_back(IOStorage{tensorInfo.GetNumBytes()});
const int bindingId = bindingPointInfo.first;
inputTensors.emplace_back(bindingId, armnn::ConstTensor{newInfo, m_InputStorage.back().m_Mem});
const armnn::Optional<std::string> dataFile = m_Params.m_GenerateTensorData ?
armnn::EmptyOptional() :
armnn::MakeOptional<std::string>(
m_Params.m_InputTensorDataFilePaths.at(inputCount++));
switch (tensorInfo.GetDataType())
{
case armnn::DataType::Float32:
{
auto typedTensor = reinterpret_cast<float*>(m_InputStorage.back().m_Mem);
PopulateTensorWithData<float>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
break;
}
case armnn::DataType::QSymmS16:
{
auto typedTensor = reinterpret_cast<int16_t*>(m_InputStorage.back().m_Mem);
PopulateTensorWithData<int16_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
break;
}
case armnn::DataType::QSymmS8:
case armnn::DataType::QAsymmS8:
{
auto typedTensor = reinterpret_cast<int8_t*>(m_InputStorage.back().m_Mem);
PopulateTensorWithData<int8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
break;
}
case armnn::DataType::QAsymmU8:
{
auto typedTensor = reinterpret_cast<uint8_t*>(m_InputStorage.back().m_Mem);
PopulateTensorWithData<uint8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
break;
}
case armnn::DataType::Signed32:
{
auto typedTensor = reinterpret_cast<int32_t*>(m_InputStorage.back().m_Mem);
PopulateTensorWithData<int32_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
break;
}
default:
{
LogAndThrow("Unexpected DataType");
}
}
}
if (m_Params.m_ImportInputsIfAligned)
{
m_ImportedInputIds.push_back(
m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc));
}
m_InputTensorsVec.emplace_back(inputTensors);
}
for(unsigned int outputSet = 0; outputSet < noOutputSets; ++outputSet)
{
armnn::OutputTensors outputTensors;
for (const auto& output: m_IOInfo.m_OutputInfoMap)
{
const armnn::BindingPointInfo& bindingPointInfo = output.second;
const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
m_OutputStorage.emplace_back(tensorInfo.GetNumBytes());
outputTensors.emplace_back(bindingPointInfo.first, armnn::Tensor{tensorInfo, m_OutputStorage.back().m_Mem});
}
m_OutputTensorsVec.emplace_back(outputTensors);
if (m_Params.m_ImportInputsIfAligned)
{
m_ImportedOutputIds.push_back(
m_Runtime->ImportOutputs(m_NetworkId, m_OutputTensorsVec.back(), armnn::MemorySource::Malloc));
}
}
// If iterations > noSets fill the remaining iterations repeating the given files
// If iterations < noSets just ignore the extra files
const unsigned int remainingInputSets = (m_Params.m_Iterations > noInputSets)
? m_Params.m_Iterations - noInputSets
: 0;
for (unsigned int i = 0; i < remainingInputSets; ++i)
{
m_InputTensorsVec.push_back(m_InputTensorsVec[i % noInputSets]);
if (m_Params.m_ImportInputsIfAligned)
{
m_ImportedInputIds.push_back(m_ImportedInputIds[i % noInputSets]);
}
}
const unsigned int remainingOutputSets = (m_Params.m_Iterations > noOutputSets)
? m_Params.m_Iterations - noOutputSets
: 0;
for (unsigned int i = 0; i < remainingOutputSets; ++i)
{
m_OutputTensorsVec.push_back(m_OutputTensorsVec[i % noOutputSets]);
if (m_Params.m_ImportInputsIfAligned)
{
m_ImportedOutputIds.push_back(m_ImportedOutputIds[i % noOutputSets]);
}
}
}
ArmNNExecutor::IOInfo ArmNNExecutor::GetIOInfo(armnn::IOptimizedNetwork* optNet)
{
struct IOStrategy : armnn::IStrategy
{
void ExecuteStrategy(const armnn::IConnectableLayer* layer,
const armnn::BaseDescriptor& descriptor,
const std::vector<armnn::ConstTensor>& constants,
const char* name,
const armnn::LayerBindingId id = 0) override
{
armnn::IgnoreUnused(descriptor, constants, id);
switch (layer->GetType())
{
case armnn::LayerType::Input:
{
m_IOInfo.m_InputNames.emplace_back(name);
m_IOInfo.m_InputInfoMap[name] = {id, layer->GetOutputSlot(0).GetTensorInfo()};
break;
}
case armnn::LayerType::Output:
{
m_IOInfo.m_OutputNames.emplace_back(name);
m_IOInfo.m_OutputInfoMap[name] = {id, layer->GetInputSlot(0).GetTensorInfo()};
break;
}
default: {}
}
}
IOInfo m_IOInfo;
};
IOStrategy ioStrategy;
optNet->ExecuteStrategy(ioStrategy);
return ioStrategy.m_IOInfo;
}
armnn::IOptimizedNetworkPtr ArmNNExecutor::OptimizeNetwork(armnn::INetwork* network)
{
armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
armnn::OptimizerOptionsOpaque options;
options.SetReduceFp32ToFp16(m_Params.m_EnableFp16TurboMode);
options.SetDebugEnabled(m_Params.m_PrintIntermediate);
options.SetDebugToFileEnabled(m_Params.m_PrintIntermediateOutputsToFile);
options.SetShapeInferenceMethod(m_Params.m_InferOutputShape ?
armnn::ShapeInferenceMethod::InferAndValidate :
armnn::ShapeInferenceMethod::ValidateOnly);
options.SetProfilingEnabled(m_Params.m_EnableProfiling);
options.SetAllowExpandedDims(m_Params.m_AllowExpandedDims);
armnn::BackendOptions gpuAcc("GpuAcc",
{
{ "FastMathEnabled", m_Params.m_EnableFastMath },
{ "SaveCachedNetwork", m_Params.m_SaveCachedNetwork },
{ "CachedNetworkFilePath", m_Params.m_CachedNetworkFilePath },
{ "MLGOTuningFilePath", m_Params.m_MLGOTuningFilePath }
});
armnn::BackendOptions cpuAcc("CpuAcc",
{
{ "FastMathEnabled", m_Params.m_EnableFastMath },
{ "NumberOfThreads", m_Params.m_NumberOfThreads }
});
options.AddModelOption(gpuAcc);
options.AddModelOption(cpuAcc);
// The shapeInferenceMethod and allowExpandedDims values have to be added to the model options
// because these are what are passed to the OptimizeSubgraphViews method and are used to create
// the new optimized INetwork that method uses
armnn::BackendOptions allowExDimOpt("AllowExpandedDims",
{
{ "AllowExpandedDims", m_Params.m_AllowExpandedDims }
});
options.AddModelOption(allowExDimOpt);
armnn::BackendOptions shapeInferOpt("ShapeInferenceMethod",
{
{ "InferAndValidate", m_Params.m_InferOutputShape }
});
options.AddModelOption(shapeInferOpt);
const auto optimization_start_time = armnn::GetTimeNow();
optNet = armnn::Optimize(*network, m_Params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
<< std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
if (!optNet)
{
LogAndThrow("Optimize returned nullptr");
}
// If v,visualize-optimized-model is enabled then construct a file name for the dot file.
if (m_Params.m_EnableLayerDetails)
{
fs::path filename = m_Params.m_ModelPath;
filename.replace_extension("dot");
std::fstream file(filename.c_str(), std::ios_base::out);
optNet->SerializeToDot(file);
}
return optNet;
}
std::unique_ptr<ArmNNExecutor::IParser> ArmNNExecutor::CreateParser()
{
const fs::path modelFilename = m_Params.m_ModelPath;
const std::string modelExtension = modelFilename.extension();
m_Params.m_IsModelBinary = modelExtension != ".json";
std::unique_ptr<IParser> parser = nullptr;
// Forward to implementation based on the parser type
if (modelExtension == ".armnn")
{
#if defined(ARMNN_SERIALIZER)
parser = std::make_unique<ArmNNDeserializer>();
#else
LogAndThrow("Not built with serialization support.");
#endif
}
else if (modelExtension == ".tflite")
{
#if defined(ARMNN_TF_LITE_PARSER)
parser = std::make_unique<TfliteParser>(m_Params);
#else
LogAndThrow("Not built with Tensorflow-Lite parser support.");
#endif
}
else if (modelExtension == ".onnx")
{
#if defined(ARMNN_ONNX_PARSER)
parser = std::make_unique<OnnxParser>();
#else
LogAndThrow("Not built with Onnx parser support.");
#endif
}
if (parser == nullptr)
{
throw InvalidArgumentException("Unable to determine the model type based on the file name extension.");
}
return parser;
}
void ArmNNExecutor::PrintOutputTensors(const armnn::OutputTensors* outputTensors,
unsigned int iteration)
{
auto findOutputName = [&](const armnn::LayerBindingId id)
{
for (auto it = m_IOInfo.m_OutputInfoMap.begin(); it != m_IOInfo.m_OutputInfoMap.end(); ++it)
{
if (id == it->second.first)
{
return it->first;
}
}
return std::string{};
};
unsigned int outputIndex = 0;
unsigned int numOutputs = outputTensors->size();
for (const auto& output: *outputTensors)
{
const auto bindingName = findOutputName(output.first);
// We've made sure before that the number of output files either equals numOutputs, in which
// case we override those files when processing the results of each iteration (only the result
// of the last iteration will be stored), or there are enough
// output files for each output of each iteration.
size_t outputFileIndex = iteration * numOutputs + outputIndex;
if (!m_Params.m_OutputTensorFiles.empty())
{
outputFileIndex = outputFileIndex % m_Params.m_OutputTensorFiles.size();
ARMNN_LOG(info) << "Writing output: " << bindingName << " bindingId: '"
<< output.first
<< "' of iteration: " << iteration + 1 << " to file: '"
<< m_Params.m_OutputTensorFiles[outputFileIndex] << "'";
}
const armnn::Optional<std::string> outputTensorFile = m_Params.m_OutputTensorFiles.empty() ?
armnn::EmptyOptional() :
armnn::MakeOptional<std::string>(
m_Params.m_OutputTensorFiles[outputFileIndex]);
OutputWriteInfo outputWriteInfo
{
outputTensorFile,
bindingName,
output.second,
!m_Params.m_DontPrintOutputs,
output.second.GetDataType()
};
std::cout << bindingName << ": ";
std::vector<float> values;
switch (output.second.GetDataType())
{
case armnn::DataType::Float32:
{
PrintTensor<float>(outputWriteInfo, "%f ");
break;
}
case armnn::DataType::Signed32:
{
PrintTensor<int>(outputWriteInfo, "%d ");
break;
}
case armnn::DataType::Signed64:
{
PrintTensor<int64_t>(outputWriteInfo, "%ld ");
break;
}
case armnn::DataType::QSymmS8:
case armnn::DataType::QAsymmS8:
{
PrintTensor<int8_t>(outputWriteInfo, "%d ");
break;
}
case armnn::DataType::QAsymmU8:
case armnn::DataType::Boolean:
{
PrintTensor<uint8_t>(outputWriteInfo, "%d ");
break;
}
case armnn::DataType::Float16:
case armnn::DataType::QSymmS16:
case armnn::DataType::BFloat16:
default:
{
LogAndThrow("Unexpected DataType");
}
}
std::cout << "\n";
++outputIndex;
}
}
void ArmNNExecutor::CompareAndPrintResult(std::vector<const void*> otherOutput)
{
unsigned int index = 0;
std::string typeString;
for (const auto& outputTensors: m_OutputTensorsVec)
{
for (const auto& outputTensor: outputTensors)
{
size_t size = outputTensor.second.GetNumBytes();
double result = ComputeByteLevelRMSE(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
std::cout << "Byte level root mean square error: " << result << "\n";
}
}
}
#if defined(ARMNN_SERIALIZER)
ArmNNExecutor::ArmNNDeserializer::ArmNNDeserializer() : m_Parser(armnnDeserializer::IDeserializer::Create()){}
armnn::INetworkPtr ArmNNExecutor::ArmNNDeserializer::CreateNetwork(const ExecuteNetworkParams& params)
{
const std::string& modelPath = params.m_ModelPath;
std::ifstream file(modelPath, std::ios::binary);
return m_Parser->CreateNetworkFromBinary(file);
}
armnn::BindingPointInfo
ArmNNExecutor::ArmNNDeserializer::GetInputBindingPointInfo(size_t, const std::string& inputName)
{
armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkInputBindingInfo(0, inputName);
return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
}
armnn::BindingPointInfo
ArmNNExecutor::ArmNNDeserializer::GetOutputBindingPointInfo(size_t, const std::string& outputName)
{
armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkOutputBindingInfo(0, outputName);
return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
}
#endif
#if defined(ARMNN_TF_LITE_PARSER)
ArmNNExecutor::TfliteParser::TfliteParser(const ExecuteNetworkParams& params)
{
armnnTfLiteParser::ITfLiteParser::TfLiteParserOptions options;
options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
options.m_InferAndValidate = params.m_InferOutputShape;
options.m_AllowExpandedDims = params.m_AllowExpandedDims;
m_Parser = armnnTfLiteParser::ITfLiteParser::Create(options);
}
armnn::INetworkPtr ArmNNExecutor::TfliteParser::CreateNetwork(const ExecuteNetworkParams& params)
{
const std::string& modelPath = params.m_ModelPath;
return m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str());
}
armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetInputBindingPointInfo(size_t subgraphId,
const std::string& inputName)
{
return m_Parser->GetNetworkInputBindingInfo(subgraphId, inputName);
}
armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetOutputBindingPointInfo(size_t subgraphId,
const std::string& outputName)
{
return m_Parser->GetNetworkOutputBindingInfo(subgraphId, outputName);
}
#endif
#if defined(ARMNN_ONNX_PARSER)
ArmNNExecutor::OnnxParser::OnnxParser() : m_Parser(armnnOnnxParser::IOnnxParser::Create()){}
armnn::INetworkPtr ArmNNExecutor::OnnxParser::CreateNetwork(const ExecuteNetworkParams& params)
{
const std::string& modelPath = params.m_ModelPath;
m_Parser = armnnOnnxParser::IOnnxParser::Create();
std::map<std::string, armnn::TensorShape> inputShapes;
if(!params.m_InputTensorShapes.empty())
{
const size_t numInputShapes = params.m_InputTensorShapes.size();
const size_t numInputBindings = params.m_InputNames.size();
if(numInputShapes < numInputBindings)
{
throw armnn::Exception(
fmt::format("Not every input has its tensor shape specified: expected={0}, got={1}",
numInputBindings, numInputShapes));
}
for (size_t i = 0; i < numInputShapes; i++)
{
inputShapes[params.m_InputNames[i]] = params.m_InputTensorShapes[i];
}
return params.m_IsModelBinary ?
m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
m_Parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes);
}
// Handle text and binary input differently by calling the corresponding parser function
return params.m_IsModelBinary ?
m_Parser->CreateNetworkFromBinaryFile(params.m_ModelPath.c_str()) :
m_Parser->CreateNetworkFromTextFile(params.m_ModelPath.c_str());
}
armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetInputBindingPointInfo(size_t, const std::string& inputName)
{
return m_Parser->GetNetworkInputBindingInfo(inputName);
}
armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetOutputBindingPointInfo(size_t, const std::string& outputName)
{
return m_Parser->GetNetworkOutputBindingInfo(outputName);
}
#endif