blob: a79f4c03652783e05f3789f6ed706f3f2e689355 [file] [log] [blame]
//
// Copyright © 2017 Arm Ltd. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "NeonLayerSupport.hpp"
#include <LayerSupportCommon.hpp>
#include <InternalTypes.hpp>
#include <armnn/Descriptors.hpp>
#include <armnn/Types.hpp>
#include <armnn/Tensor.hpp>
#include <boost/core/ignore_unused.hpp>
#ifdef ARMCOMPUTENEON_ENABLED
#include "workloads/NeonAdditionFloatWorkload.hpp"
#include "workloads/NeonActivationFloatWorkload.hpp"
#include "workloads/NeonBatchNormalizationFloatWorkload.hpp"
#include "workloads/NeonConvolution2dBaseWorkload.hpp"
#include "workloads/NeonDepthwiseConvolutionBaseWorkload.hpp"
#include "workloads/NeonL2NormalizationFloatWorkload.hpp"
#include "workloads/NeonMultiplicationFloatWorkload.hpp"
#include "workloads/NeonNormalizationFloatWorkload.hpp"
#include "workloads/NeonFullyConnectedWorkload.hpp"
#include "workloads/NeonPermuteWorkload.hpp"
#include "workloads/NeonPooling2dBaseWorkload.hpp"
#include "workloads/NeonSoftmaxBaseWorkload.hpp"
#include "workloads/NeonSubtractionFloatWorkload.hpp"
#endif
using namespace boost;
namespace armnn
{
bool IsNeonDirectConvolutionPreferred(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc)
{
// See arm_compute::NEDirectConvolutionLayer documentation for the supported cases,
// and complement with NEDirectConvolutionLayerKernel::configure() implementation.
// Only 1x1 is using direct convolution. Performance results and details are in:
// https://jira.arm.com/browse/IVGCVSW-1003
// Measurements were taken as of clframework: f105ab972135bcd21304883eff040d7e587099bc
const bool dataTypeSupported = (weightInfo.GetDataType() == armnn::DataType::Float32);
// Strides: 1|2|3
const bool strideSupported = (desc.m_StrideX == 1 || desc.m_StrideX == 2 || desc.m_StrideX == 3) &&
(desc.m_StrideY == 1 || desc.m_StrideY == 2 || desc.m_StrideY == 3);
auto paddingLargerThan = [](const Convolution2dDescriptor& conv2ddesc, unsigned int value)
{
return conv2ddesc.m_PadLeft > value || conv2ddesc.m_PadRight > value ||
conv2ddesc.m_PadTop > value || conv2ddesc.m_PadBottom > value;
};
// Supported sizes and padding.
const bool sizeAndPaddingSupported =
// Pad > 0 not supported for 1x1 weights.
(weightInfo.GetShape()[2] == 1 && weightInfo.GetShape()[3] == 1 && !paddingLargerThan(desc, 0u));
const bool preferDirectConvolution = dataTypeSupported &&
strideSupported &&
sizeAndPaddingSupported &&
// NEDirectConvolutionLayerKernel doesn't support NULL bias.
desc.m_BiasEnabled;
return preferDirectConvolution;
}
bool IsNeonNormalizationDescParamsSupported(std::string* reasonIfUnsupported, const NormalizationDescriptor& parameters)
{
if (parameters.m_NormMethodType != NormalizationAlgorithmMethod::LocalBrightness)
{
if (reasonIfUnsupported)
{
*reasonIfUnsupported = "Unsupported normalisation method type, only LocalBrightness is supported";
}
return false;
}
if (parameters.m_NormSize % 2 == 0)
{
if (reasonIfUnsupported)
{
*reasonIfUnsupported = "Normalization size must be an odd number.";
}
return false;
}
return true;
}
bool IsNeonBackendSupported(std::string* reasonIfUnsupported)
{
#if ARMCOMPUTENEON_ENABLED
return true;
#else
if (reasonIfUnsupported != nullptr)
{
*reasonIfUnsupported = "The armnn library has been built without NEON support";
}
return false;
#endif
}
template<typename FloatFunc, typename Uint8Func, typename ... Params>
bool IsSupportedForDataTypeNeon(std::string* reasonIfUnsupported,
DataType dataType,
FloatFunc floatFuncPtr,
Uint8Func uint8FuncPtr,
Params&&... params)
{
return IsNeonBackendSupported(reasonIfUnsupported) &&
IsSupportedForDataTypeGeneric(reasonIfUnsupported,
dataType,
floatFuncPtr,
floatFuncPtr,
uint8FuncPtr,
std::forward<Params>(params)...);
}
#if ARMCOMPUTENEON_ENABLED
template<class FuncType, class... Args>
inline bool IsWorkloadSupported(FuncType& func, std::string* reasonIfUnsupported, Args&&... args)
{
arm_compute::Status aclStatus = func(std::forward<Args>(args)...);
const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
if (!supported && reasonIfUnsupported)
{
*reasonIfUnsupported = aclStatus.error_description();
}
return supported;
}
#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \
return IsWorkloadSupported(func, reasonIfUnsupported, __VA_ARGS__);
#else
#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \
return IsNeonBackendSupported(reasonIfUnsupported);
#endif
bool IsActivationSupportedNeon(const TensorInfo& input,
const TensorInfo& output,
const ActivationDescriptor& descriptor,
std::string* reasonIfUnsupported)
{
ignore_unused(descriptor);
FORWARD_WORKLOAD_VALIDATE_FUNC(NeonActivationWorkloadValidate,
reasonIfUnsupported,
input,
output,
descriptor);
}
bool IsAdditionSupportedNeon(const TensorInfo& input0,
const TensorInfo& input1,
const TensorInfo& output,
std::string* reasonIfUnsupported)
{
FORWARD_WORKLOAD_VALIDATE_FUNC(NeonAdditionWorkloadValidate,
reasonIfUnsupported,
input0,
input1,
output);
}
bool IsBatchNormalizationSupportedNeon(const TensorInfo& input,
const TensorInfo& output,
const TensorInfo& mean,
const TensorInfo& var,
const TensorInfo& beta,
const TensorInfo& gamma,
const BatchNormalizationDescriptor& descriptor,
std::string* reasonIfUnsupported)
{
FORWARD_WORKLOAD_VALIDATE_FUNC(NeonBatchNormalizationValidate,
reasonIfUnsupported,
input,
output,
mean,
var,
beta,
gamma,
descriptor);
}
bool IsConstantSupportedNeon(const TensorInfo& output,
std::string* reasonIfUnsupported)
{
return IsSupportedForDataTypeNeon(reasonIfUnsupported,
output.GetDataType(),
&TrueFunc<>,
&TrueFunc<>);
}
bool IsConvolution2dSupportedNeon(const TensorInfo& input,
const TensorInfo& output,
const Convolution2dDescriptor& descriptor,
const TensorInfo& weights,
const boost::optional<TensorInfo>& biases,
std::string* reasonIfUnsupported)
{
FORWARD_WORKLOAD_VALIDATE_FUNC(NeonConvolution2dWorkloadValidate,
reasonIfUnsupported,
input,
output,
descriptor,
weights,
biases);
}
bool IsDepthwiseConvolutionSupportedNeon(const TensorInfo& input,
const TensorInfo& output,
const DepthwiseConvolution2dDescriptor& descriptor,
const TensorInfo& weights,
const boost::optional<TensorInfo>& biases,
std::string* reasonIfUnsupported)
{
FORWARD_WORKLOAD_VALIDATE_FUNC(NeonDepthwiseConvolutionWorkloadValidate,
reasonIfUnsupported,
input,
output,
descriptor,
weights,
biases);
}
bool IsDivisionSupportedNeon(const TensorInfo& input0,
const TensorInfo& input1,
const TensorInfo& output,
std::string* reasonIfUnsupported)
{
// At the moment division is not supported
return false;
}
bool IsSubtractionSupportedNeon(const TensorInfo& input0,
const TensorInfo& input1,
const TensorInfo& output,
std::string* reasonIfUnsupported)
{
FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSubtractionWorkloadValidate,
reasonIfUnsupported,
input0,
input1,
output);
}
bool IsFullyConnectedSupportedNeon(const TensorInfo& input,
const TensorInfo& output,
const TensorInfo& weights,
const TensorInfo& biases,
const FullyConnectedDescriptor& descriptor,
std::string* reasonIfUnsupported)
{
// At the moment U8 is unsupported
if (input.GetDataType() == DataType::QuantisedAsymm8)
{
return false;
}
FORWARD_WORKLOAD_VALIDATE_FUNC(NeonFullyConnectedWorkloadValidate,
reasonIfUnsupported,
input,
output,
weights,
biases,
descriptor);
}
bool IsInputSupportedNeon(const TensorInfo& input,
std::string* reasonIfUnsupported)
{
return IsSupportedForDataTypeNeon(reasonIfUnsupported,
input.GetDataType(),
&TrueFunc<>,
&TrueFunc<>);
}
bool IsL2NormalizationSupportedNeon(const TensorInfo& input,
const TensorInfo& output,
const L2NormalizationDescriptor& descriptor,
std::string* reasonIfUnsupported)
{
FORWARD_WORKLOAD_VALIDATE_FUNC(NeonL2NormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
}
bool IsMergerSupportedNeon(const std::vector<const TensorInfo*> inputs,
const OriginsDescriptor& descriptor,
std::string* reasonIfUnsupported)
{
ignore_unused(descriptor);
return IsSupportedForDataTypeNeon(reasonIfUnsupported,
inputs[0]->GetDataType(),
&TrueFunc<>,
&TrueFunc<>);
}
bool IsMultiplicationSupportedNeon(const TensorInfo& input0,
const TensorInfo& input1,
const TensorInfo& output,
std::string* reasonIfUnsupported)
{
FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMultiplicationWorkloadValidate,
reasonIfUnsupported,
input0,
input1,
output);
}
bool IsNormalizationSupportedNeon(const TensorInfo& input,
const TensorInfo& output,
const NormalizationDescriptor& descriptor,
std::string* reasonIfUnsupported)
{
FORWARD_WORKLOAD_VALIDATE_FUNC(NeonNormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
}
bool IsOutputSupportedNeon(const TensorInfo& output,
std::string* reasonIfUnsupported)
{
return IsSupportedForDataTypeNeon(reasonIfUnsupported,
output.GetDataType(),
&TrueFunc<>,
&TrueFunc<>);
}
bool IsPermuteSupportedNeon(const TensorInfo& input,
const TensorInfo& output,
const PermuteDescriptor& descriptor,
std::string* reasonIfUnsupported)
{
FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPermuteWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
}
bool IsPooling2dSupportedNeon(const TensorInfo& input,
const TensorInfo& output,
const Pooling2dDescriptor& descriptor,
std::string* reasonIfUnsupported)
{
FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPooling2dWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
}
bool IsResizeBilinearSupportedNeon(const TensorInfo& input,
std::string* reasonIfUnsupported)
{
ignore_unused(input);
return false;
}
bool IsSoftmaxSupportedNeon(const TensorInfo& input,
const TensorInfo& output,
const SoftmaxDescriptor& descriptor,
std::string* reasonIfUnsupported)
{
FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSoftmaxWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
}
bool IsSplitterSupportedNeon(const TensorInfo& input,
const ViewsDescriptor& descriptor,
std::string* reasonIfUnsupported)
{
ignore_unused(descriptor);
return IsSupportedForDataTypeNeon(reasonIfUnsupported,
input.GetDataType(),
&TrueFunc<>,
&TrueFunc<>);
}
bool IsFakeQuantizationSupportedNeon(const TensorInfo& input,
const FakeQuantizationDescriptor& descriptor,
std::string* reasonIfUnsupported)
{
ignore_unused(input);
ignore_unused(descriptor);
return false;
}
bool IsReshapeSupportedNeon(const TensorInfo& input,
std::string* reasonIfUnsupported)
{
return IsSupportedForDataTypeNeon(reasonIfUnsupported,
input.GetDataType(),
&TrueFunc<>,
&TrueFunc<>);
}
bool IsFloorSupportedNeon(const TensorInfo& input,
const TensorInfo& output,
std::string* reasonIfUnsupported)
{
ignore_unused(output);
return IsNeonBackendSupported(reasonIfUnsupported) &&
IsSupportedForDataTypeGeneric(reasonIfUnsupported,
input.GetDataType(),
&FalseFuncF16<>,
&TrueFunc<>,
&FalseFuncU8<>);
}
bool IsLstmSupportedNeon(const TensorInfo& input, const TensorInfo& outputStateIn,
const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer,
const TensorInfo& outputStateOut, const TensorInfo& cellStateOut,
const TensorInfo& output, const LstmDescriptor& descriptor,
const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights,
const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights,
const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights,
const TensorInfo& forgetGateBias, const TensorInfo& cellBias,
const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights,
const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights,
const TensorInfo* inputGateBias, const TensorInfo* projectionWeights,
const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights,
const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported)
{
ignore_unused(input);
ignore_unused(outputStateIn);
ignore_unused(cellStateIn);
ignore_unused(scratchBuffer);
ignore_unused(outputStateOut);
ignore_unused(cellStateOut);
ignore_unused(output);
ignore_unused(descriptor);
ignore_unused(inputToForgetWeights);
ignore_unused(inputToCellWeights);
ignore_unused(inputToOutputWeights);
ignore_unused(recurrentToForgetWeights);
ignore_unused(recurrentToCellWeights);
ignore_unused(recurrentToOutputWeights);
ignore_unused(forgetGateBias);
ignore_unused(cellBias);
ignore_unused(outputGateBias);
ignore_unused(inputToInputWeights);
ignore_unused(recurrentToInputWeights);
ignore_unused(cellToInputWeights);
ignore_unused(inputGateBias);
ignore_unused(projectionWeights);
ignore_unused(projectionBias);
ignore_unused(cellToForgetWeights);
ignore_unused(cellToOutputWeights);
return false;
}
bool IsConvertFp16ToFp32SupportedNeon(const TensorInfo& input,
const TensorInfo& output,
std::string* reasonIfUnsupported)
{
ignore_unused(input);
ignore_unused(output);
return true;
}
bool IsConvertFp32ToFp16SupportedNeon(const TensorInfo& input,
const TensorInfo& output,
std::string* reasonIfUnsupported)
{
ignore_unused(input);
ignore_unused(output);
return true;
}
bool IsMeanSupportedNeon(const TensorInfo& input,
const TensorInfo& output,
const MeanDescriptor& descriptor,
std::string* reasonIfUnsupported)
{
return false;
}
}