blob: cfd2e0e110c32d7a3d149a76924e34fd2e334b4c [file] [log] [blame]
// Copyright © 2017-2024 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
#include <armnn/Exceptions.hpp>
#include <aclCommon/ArmComputeTensorUtils.hpp>
#include <aclCommon/ArmComputeUtils.hpp>
#include "ArmComputeUtils.hpp"
#include <armnn/Descriptors.hpp>
#include <fmt/format.h>
namespace armnn
namespace armcomputetensorutils
arm_compute::DataType GetArmComputeDataType(armnn::DataType dataType, bool multiScales)
case armnn::DataType::BFloat16:
return arm_compute::DataType::BFLOAT16;
case armnn::DataType::Boolean:
return arm_compute::DataType::U8;
case armnn::DataType::Float16:
return arm_compute::DataType::F16;
case armnn::DataType::Float32:
return arm_compute::DataType::F32;
case armnn::DataType::QAsymmS8:
return arm_compute::DataType::QASYMM8_SIGNED;
case armnn::DataType::QAsymmU8:
return arm_compute::DataType::QASYMM8;
case armnn::DataType::QSymmS16:
return arm_compute::DataType::QSYMM16;
case armnn::DataType::Signed64:
return arm_compute::DataType::S64;
case armnn::DataType::QSymmS8:
return multiScales ? arm_compute::DataType::QSYMM8_PER_CHANNEL : arm_compute::DataType::QSYMM8;
case armnn::DataType::Signed32:
return arm_compute::DataType::S32;
return arm_compute::DataType::UNKNOWN;
armnn::DataType GetArmNNDataType(arm_compute::DataType dataType)
case arm_compute::DataType::BFLOAT16:
return armnn::DataType::BFloat16;
case arm_compute::DataType::U8:
return armnn::DataType::Boolean;
case arm_compute::DataType::F16:
return armnn::DataType::Float16;
case arm_compute::DataType::F32:
return armnn::DataType::Float32;
case arm_compute::DataType::QASYMM8_SIGNED:
return armnn::DataType::QAsymmS8;
case arm_compute::DataType::QASYMM8:
return armnn::DataType::QAsymmU8;
case arm_compute::DataType::QSYMM16:
return armnn::DataType::QSymmS16;
case arm_compute::DataType::S64:
return armnn::DataType::Signed64;
case arm_compute::DataType::QSYMM8_PER_CHANNEL:
return armnn::DataType::QSymmS8;
case arm_compute::DataType::QSYMM8:
return armnn::DataType::QSymmS8;
case arm_compute::DataType::S32:
return armnn::DataType::Signed32;
throw InvalidArgumentException("Unknown arm_compute::DataType data type");
arm_compute::Coordinates BuildArmComputeReductionCoordinates(size_t inputDimensions,
unsigned int originalInputRank,
const std::vector<unsigned int>& armnnAxes)
arm_compute::Coordinates outAclCoords;
if (armnnAxes.empty())
// If no reduction axes were provided, then the input must be reduced along all dimensions.
// Since Compute Library does not accept an empty vector as the reduction dimensions, we then
// manually create a vector including all the input dimensions (in reversed order) as:
// { inputDimensions - 1, inputDimensions - 2, ..., 1, 0 }
std::generate(outAclCoords.begin(), outAclCoords.end(), [d = inputDimensions - 1] () mutable { return d--; });
// Create a vector of reduction dimensions (in reversed order) with the given reduction axes.
// Adjust the given reduction axes according to the original rank of the input tensor (before ACL applied any
// dimension correction).
// For example, if the input tensor originally had 4 dimensions, and one of the reduction axes was 2, then the
// new value for that reduction axis should be 1.
// Example:
// ArmNN input shape = { 1, 1, 3, 2 } -> ACL input shape = { 2, 3 }
// ArmNN reduction axis = { 2 } -> ACL reduction axis = { 1 }
// ArmNN reduction axis = { 3 } -> ACL reduction axis = { 0 }
// The transformation: ACL reduction axis index = original rank - ArmNN reduction axis index - 1
std::transform(armnnAxes.begin(), armnnAxes.end(),
[originalInputRank](unsigned int i){ return originalInputRank - i - 1; });
return outAclCoords;
arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape)
arm_compute::TensorShape shape;
// armnn tensors are (batch, channels, height, width).
// arm_compute tensors are (width, height, channels, batch).
for (unsigned int i = 0; i < tensorShape.GetNumDimensions(); i++)
// Note that our dimensions are stored in the opposite order to ACL's.
shape.set(tensorShape.GetNumDimensions() - i - 1, tensorShape[i], false);
// TensorShape::set() flattens leading ones, so that batch size 1 cannot happen.
// arm_compute tensors expect this.
// prevent arm_compute issue where tensor is flattened to nothing
if (shape.num_dimensions() == 0)
return shape;
std::vector<unsigned int> ReduceDimsForACL(const armnn::TensorShape tensorShape, unsigned int dimensions)
std::vector<unsigned int> newShape;
unsigned int dimsToSkip = 0;
if (tensorShape.GetNumDimensions() > dimensions)
dimsToSkip = tensorShape.GetNumDimensions() - dimensions;
unsigned int dimsSkipped = 0;
bool insertRemainder = false;
for (unsigned int i = 0; i < tensorShape.GetNumDimensions(); ++i)
if (tensorShape[i] == 1 && dimsSkipped < dimsToSkip && !insertRemainder)
newShape.insert(newShape.begin(), tensorShape[i]);
// Once we insert the first dimension we can't skip any more
insertRemainder = true;
return newShape;
arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape, unsigned int dimensions)
arm_compute::TensorShape shape;
std::vector<unsigned int> strippedShape = ReduceDimsForACL(tensorShape, dimensions);
for (unsigned int i = 0; i < strippedShape.size(); i++)
shape.set(i, strippedShape[i], false);
// prevent arm_compute issue where tensor is flattened to nothing
if (shape.num_dimensions() == 0)
return shape;
// Utility function used to build a TensorInfo object, that can be used to initialise
// ARM Compute Tensor and CLTensor allocators.
// Note: this utility ignores the value of armnn::TensorInfo.IsConstant(). ACL tensors
// default to constant but Arm NN ones default to non constant. In the cases where
// we expect ACL to treat a tensor as constant that value must be set after this
// utility has been called.
arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo)
bool multiScales = tensorInfo.HasMultipleQuantizationScales();
const arm_compute::TensorShape aclTensorShape = BuildArmComputeTensorShape(tensorInfo.GetShape());
const arm_compute::DataType aclDataType = GetArmComputeDataType(tensorInfo.GetDataType(), multiScales);
const arm_compute::QuantizationInfo aclQuantizationInfo = multiScales ?
arm_compute::QuantizationInfo(tensorInfo.GetQuantizationScales()) :
arm_compute::QuantizationInfo(tensorInfo.GetQuantizationScale(), tensorInfo.GetQuantizationOffset());
return arm_compute::TensorInfo(aclTensorShape, 1, aclDataType, aclQuantizationInfo);
arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo,
armnn::DataLayout dataLayout)
arm_compute::TensorInfo aclTensorInfo = BuildArmComputeTensorInfo(tensorInfo);
return aclTensorInfo;
arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo, unsigned int dimensions)
bool multiScales = tensorInfo.HasMultipleQuantizationScales();
const arm_compute::TensorShape aclTensorShape = BuildArmComputeTensorShape(tensorInfo.GetShape(), dimensions);
const arm_compute::DataType aclDataType = GetArmComputeDataType(tensorInfo.GetDataType(), multiScales);
const arm_compute::QuantizationInfo aclQuantizationInfo = multiScales ?
arm_compute::QuantizationInfo(tensorInfo.GetQuantizationScales()) :
arm_compute::QuantizationInfo(tensorInfo.GetQuantizationScale(), tensorInfo.GetQuantizationOffset());
return arm_compute::TensorInfo(aclTensorShape, 1, aclDataType, aclQuantizationInfo);
arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo,
armnn::DataLayout dataLayout, unsigned int dimensions)
arm_compute::TensorInfo aclTensorInfo = BuildArmComputeTensorInfo(tensorInfo, dimensions);
return aclTensorInfo;
arm_compute::DataLayout ConvertDataLayout(armnn::DataLayout dataLayout)
case armnn::DataLayout::NHWC : return arm_compute::DataLayout::NHWC;
case armnn::DataLayout::NCHW : return arm_compute::DataLayout::NCHW;
case armnn::DataLayout::NDHWC : return arm_compute::DataLayout::NDHWC;
case armnn::DataLayout::NCDHW : return arm_compute::DataLayout::NCDHW;
default: throw InvalidArgumentException("Unknown armnn::DataLayout: [" +
std::to_string(static_cast<int>(dataLayout)) + "]");
arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor,
bool fpMixedPrecision)
// Resolve ARM Compute layer parameters.
const arm_compute::PoolingType poolingType = ConvertPoolingAlgorithmToAclPoolingType(descriptor.m_PoolType);
const arm_compute::DataLayout dataLayout = ConvertDataLayout(descriptor.m_DataLayout);
bool isGlobalPooling = (descriptor.m_StrideX==0 && descriptor.m_StrideY==0);
//use specific constructor if global pooling
return arm_compute::PoolingLayerInfo(poolingType, dataLayout);
const arm_compute::DimensionRoundingType rounding = ConvertOutputShapeRoundingToAclDimensionRoundingType(
const arm_compute::PadStrideInfo padStrideInfo(descriptor.m_StrideX,
const bool excludePadding = (descriptor.m_PaddingMethod == PaddingMethod::Exclude);
const arm_compute::Size2D poolSize(descriptor.m_PoolWidth, descriptor.m_PoolHeight);
return arm_compute::PoolingLayerInfo(poolingType, poolSize, dataLayout, padStrideInfo, excludePadding,
arm_compute::Pooling3dLayerInfo BuildArmComputePooling3dLayerInfo(const Pooling3dDescriptor& descriptor,
bool fpMixedPrecision)
const arm_compute::PoolingType poolingType = ConvertPoolingAlgorithmToAclPoolingType(descriptor.m_PoolType);
bool isGlobalPooling = (descriptor.m_StrideX==0 && descriptor.m_StrideY==0 && descriptor.m_StrideZ==0);
//use specific constructor if global pooling
return arm_compute::Pooling3dLayerInfo(poolingType);
const arm_compute::Size3D poolSize(descriptor.m_PoolWidth, descriptor.m_PoolHeight, descriptor.m_PoolDepth);
const arm_compute::Size3D stride(descriptor.m_StrideX,
const arm_compute::Padding3D padding(descriptor.m_PadLeft,
const bool excludePadding = (descriptor.m_PaddingMethod == PaddingMethod::Exclude);
const arm_compute::DimensionRoundingType rounding = ConvertOutputShapeRoundingToAclDimensionRoundingType(
return arm_compute::Pooling3dLayerInfo(poolingType,
arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& descriptor)
const arm_compute::NormType normType =
return arm_compute::NormalizationLayerInfo(normType,
arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::PermutationVector& perm)
arm_compute::PermutationVector aclPerm;
unsigned int start = 0;
while ((start < perm.GetSize()) && (start == perm[start]))
for (unsigned int i = start; i < perm.GetSize(); ++i)
aclPerm.set(i - start, perm[i] - start);
return aclPerm;
arm_compute::PermutationVector BuildArmComputeTransposeVector(const armnn::PermutationVector& perm)
// As ArmNN indexes are left to right and ACL indexes are right to left,
// the permutation vector has to be reversed and then translated into ACL axis.
// i.e. {1, 0, 2, 3} --> {3, 2, 0, 1} --> {0, 1, 3, 2}
// Below an example of how the ArmNN and ACL index format work:
// ArmNN Format:
// Input Shape {1, 10, 20, 30}
// Permutation Vector {1, 0, 2, 3}
// Output Shape {10, 1, 20, 30}
// dim "1" of input goes into index 0 of the output ([ 10, X, X, X])
// dim "0" of input goes into index 1 of the output ([ 10, 1, X, X ])
// dim "2" of input goes into index 2 of the output ([ 10, 1, 20, X ])
// dim "3" of input goes into index 3 of the output ([ 10, 1, 20, 30 ])
// ACL Format:
// Input Shape {30, 20, 10, 1}
// Permutation Vector {0, 1, 3, 2}
// Output Shape {30, 20, 1, 10}
// dim "0" of input goes into index 0 of the output ([ 30, X, X, X])
// dim "1" of input goes into index 1 of the output ([ 30, 20, X, X ])
// dim "3" of input goes into index 2 of the output ([ 30, 20, 1, X ])
// dim "2" of input goes into index 3 of the output ([ 30, 20, 1, 10 ])
arm_compute::PermutationVector aclPerm;
auto rank = perm.GetSize();
// Reverse the order. i.e. {1, 0, 2, 3} --> {3, 2, 0, 1}
std::vector<unsigned int> reversedPerm;
for (unsigned int i = rank; i > 0; --i)
// Translate from Arm NN axis to ACL axis. i.e. {3, 2, 0, 1} --> {0, 1, 3, 2}
for (unsigned int i = 0; i < rank; ++i)
auto aclAxis = rank - 1 - reversedPerm[i];
aclPerm.set(i, aclAxis);
return aclPerm;
arm_compute::Size2D BuildArmComputeSize2D(const unsigned int width, const unsigned int height)
return arm_compute::Size2D(width, height);
arm_compute::PixelValue GetPixelValue(const arm_compute::ITensorInfo* tensorInfo, float value)
switch (tensorInfo->data_type())
case arm_compute::DataType::F16:
arm_compute::PixelValue pixelValue = arm_compute::PixelValue(static_cast<Half>(value));
if (isinf(pixelValue.get<Half>())) {
throw InvalidArgumentException("Under/Overflow converting float value [" + std::to_string(value) +
"] to fp16: [" + std::to_string(pixelValue.get<Half>()) + "]");
return pixelValue;
case arm_compute::DataType::F32:
return arm_compute::PixelValue(value);
case arm_compute::DataType::QASYMM8:
return arm_compute::PixelValue(static_cast<uint8_t>(value));
case arm_compute::DataType::QSYMM16:
return arm_compute::PixelValue(static_cast<int16_t>(value));
case arm_compute::DataType::QSYMM8:
case arm_compute::DataType::QASYMM8_SIGNED:
case arm_compute::DataType::QSYMM8_PER_CHANNEL:
return arm_compute::PixelValue(static_cast<int8_t>(value));
case arm_compute::DataType::S32:
return arm_compute::PixelValue(static_cast<int32_t>(value));
throw InvalidArgumentException("Unsupported DataType: [" +
std::to_string(static_cast<int>(tensorInfo->data_type())) + "]");
unsigned int ComputeDepthwiseConv2dDepthMultiplier(armnn::DataLayout layout,
const arm_compute::TensorShape& weightsShape,
const arm_compute::TensorShape& inputShape)
unsigned int depthMultiplier;
if (layout == armnn::DataLayout::NHWC)
depthMultiplier = static_cast<uint32_t>(weightsShape[0]) / static_cast<uint32_t>(inputShape[0]);
else if (layout == armnn::DataLayout::NCHW)
depthMultiplier = static_cast<uint32_t>(weightsShape[2]) / static_cast<uint32_t>(inputShape[2]);
throw InvalidArgumentException(fmt::format("Unknown data layout for tensor conversion: {}",
return depthMultiplier;
arm_compute::ScatterInfo BuildArmComputeScatterInfo(const ScatterNdDescriptor& descriptor)
arm_compute::ScatterFunction scatterFunction;
case ScatterNdFunction::Update:
scatterFunction = arm_compute::ScatterFunction::Update;
case ScatterNdFunction::Add:
scatterFunction = arm_compute::ScatterFunction::Add;
case ScatterNdFunction::Sub:
scatterFunction = arm_compute::ScatterFunction::Sub;
case ScatterNdFunction::Max:
scatterFunction = arm_compute::ScatterFunction::Max;
case ScatterNdFunction::Min:
scatterFunction = arm_compute::ScatterFunction::Min;
default: throw InvalidArgumentException("Unknown ArmNN::ScatterNd Function: [" +
std::to_string(static_cast<int>(descriptor.m_Function)) + "]");
return arm_compute::ScatterInfo(scatterFunction, !descriptor.m_InputEnabled);
} // namespace armcomputetensorutils
} // namespace armnn