blob: a804f55468a450b09906788215d8c9ea37a20092 [file] [log] [blame]
//
// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
#include <array>
#include <functional>
#include <stdint.h>
#include <chrono>
#include "BackendId.hpp"
#include "Exceptions.hpp"
#include "Deprecated.hpp"
namespace arm
{
namespace pipe
{
class ProfilingGuid;
} // namespace armn
} // namespace pipe
/// Define LayerGuid type.
using LayerGuid = arm::pipe::ProfilingGuid;
namespace armnn
{
constexpr unsigned int MaxNumOfTensorDimensions = 5U;
/// The lowest performance data capture interval we support is 10 miliseconds.
constexpr unsigned int LOWEST_CAPTURE_PERIOD = 10000u;
/// Variable to control expire rate of priority queue
constexpr unsigned int EXPIRE_RATE = 3U;
/// @enum Status enumeration
/// @var Status::Successful
/// @var Status::Failure
enum class Status
{
Success = 0,
Failure = 1
};
enum class DataType
{
Float16 = 0,
Float32 = 1,
QAsymmU8 = 2,
Signed32 = 3,
Boolean = 4,
QSymmS16 = 5,
QSymmS8 = 6,
QAsymmS8 = 7,
BFloat16 = 8,
Signed64 = 9,
};
enum class DataLayout
{
NCHW = 1,
NHWC = 2,
NDHWC = 3,
NCDHW = 4
};
/// Define the behaviour of the internal profiler when outputting network details
enum class ProfilingDetailsMethod
{
Undefined = 0,
DetailsWithEvents = 1,
DetailsOnly = 2
};
enum class QosExecPriority
{
Low = 0,
Medium = 1,
High = 2
};
enum class ActivationFunction
{
Sigmoid = 0,
TanH = 1,
Linear = 2,
ReLu = 3,
BoundedReLu = 4, ///< min(a, max(b, input)) ReLu1 & ReLu6.
SoftReLu = 5,
LeakyReLu = 6,
Abs = 7,
Sqrt = 8,
Square = 9,
Elu = 10,
HardSwish = 11
};
enum class ArgMinMaxFunction
{
Min = 0,
Max = 1
};
enum class ComparisonOperation
{
Equal = 0,
Greater = 1,
GreaterOrEqual = 2,
Less = 3,
LessOrEqual = 4,
NotEqual = 5
};
enum class LogicalBinaryOperation
{
LogicalAnd = 0,
LogicalOr = 1
};
enum class UnaryOperation
{
Abs = 0,
Exp = 1,
Sqrt = 2,
Rsqrt = 3,
Neg = 4,
LogicalNot = 5,
Log = 6,
Sin = 7
};
enum class PoolingAlgorithm
{
Max = 0,
Average = 1,
L2 = 2
};
enum class ReduceOperation
{
Sum = 0,
Max = 1,
Mean = 2,
Min = 3,
Prod = 4
};
enum class ResizeMethod
{
Bilinear = 0,
NearestNeighbor = 1
};
enum class Dimensionality
{
NotSpecified = 0,
Specified = 1,
Scalar = 2
};
///
/// The padding method modifies the output of pooling layers.
/// In both supported methods, the values are ignored (they are
/// not even zeroes, which would make a difference for max pooling
/// a tensor with negative values). The difference between
/// IgnoreValue and Exclude is that the former counts the padding
/// fields in the divisor of Average and L2 pooling, while
/// Exclude does not.
///
enum class PaddingMethod
{
/// The padding fields count, but are ignored
IgnoreValue = 0,
/// The padding fields don't count and are ignored
Exclude = 1
};
///
/// The padding mode controls whether the padding should be filled with constant values (Constant), or
/// reflect the input, either including the border values (Symmetric) or not (Reflect).
///
enum class PaddingMode
{
Constant = 0,
Reflect = 1,
Symmetric = 2
};
enum class NormalizationAlgorithmChannel
{
Across = 0,
Within = 1
};
enum class NormalizationAlgorithmMethod
{
/// Krichevsky 2012: Local Brightness Normalization
LocalBrightness = 0,
/// Jarret 2009: Local Contrast Normalization
LocalContrast = 1
};
enum class OutputShapeRounding
{
Floor = 0,
Ceiling = 1
};
///
/// The ShapeInferenceMethod modify how the output shapes are treated.
/// When ValidateOnly is selected, the output shapes are inferred from the input parameters of the layer
/// and any mismatch is reported.
/// When InferAndValidate is selected 2 actions are performed: (1)infer output shape from inputs and (2)validate the
/// shapes as in ValidateOnly. This option has been added to work with tensors which rank or dimension sizes are not
/// specified explicitly, however this information can be calculated from the inputs.
///
enum class ShapeInferenceMethod
{
/// Validate all output shapes
ValidateOnly = 0,
/// Infer missing output shapes and validate all output shapes
InferAndValidate = 1
};
/// Define the Memory Source to reduce copies
enum class MemorySource : uint32_t
{
Undefined = 0,
Malloc = 1,
DmaBuf = 2,
DmaBufProtected = 4,
Gralloc = 5
};
enum class MemBlockStrategyType
{
// MemBlocks can be packed on the Y axis only, overlap allowed on X axis.
// In other words MemBlocks with overlapping lifetimes cannot use the same MemBin,
// equivalent to blob or pooling memory management.
SingleAxisPacking = 0,
// MemBlocks can be packed on either Y or X axis but cannot overlap on both.
// In other words MemBlocks with overlapping lifetimes can use the same MemBin,
// equivalent to offset or slab memory management.
MultiAxisPacking = 1
};
/// Each backend should implement an IBackend.
class IBackend
{
protected:
IBackend() {}
virtual ~IBackend() {}
public:
virtual const BackendId& GetId() const = 0;
};
using IBackendSharedPtr = std::shared_ptr<IBackend>;
using IBackendUniquePtr = std::unique_ptr<IBackend, void(*)(IBackend* backend)>;
/// BackendCapability class
enum class BackendCapability : uint32_t
{
/// Constant weights can be accessed through the descriptors,
/// On the other hand, non-const weights can be accessed through inputs.
NonConstWeights,
/// Asynchronous Execution.
AsyncExecution,
// add new enum values here
};
/// Device specific knowledge to be passed to the optimizer.
class IDeviceSpec
{
protected:
IDeviceSpec() {}
virtual ~IDeviceSpec() {}
public:
virtual const BackendIdSet& GetSupportedBackends() const = 0;
};
/// Type of identifiers for bindable layers (inputs, outputs).
using LayerBindingId = int;
using ImportedInputId = unsigned int;
using ImportedOutputId = unsigned int;
class PermutationVector
{
public:
using ValueType = unsigned int;
using SizeType = unsigned int;
using ArrayType = std::array<ValueType, MaxNumOfTensorDimensions>;
using ConstIterator = typename ArrayType::const_iterator;
/// @param dimMappings - Indicates how to translate tensor elements from a given source into the target destination,
/// when source and target potentially have different memory layouts.
///
/// E.g. For a 4-d tensor laid out in a memory with the format (Batch Element, Height, Width, Channels),
/// which is to be passed as an input to ArmNN, each source dimension is mapped to the corresponding
/// ArmNN dimension. The Batch dimension remains the same (0 -> 0). The source Height dimension is mapped
/// to the location of the ArmNN Height dimension (1 -> 2). Similar arguments are made for the Width and
/// Channels (2 -> 3 and 3 -> 1). This will lead to @ref m_DimMappings pointing to the following array:
/// [ 0, 2, 3, 1 ].
///
/// Note that the mapping should be reversed if considering the case of ArmNN 4-d outputs (Batch Element,
/// Channels, Height, Width) being written to a destination with the format mentioned above. We now have
/// 0 -> 0, 2 -> 1, 3 -> 2, 1 -> 3, which, when reordered, lead to the following @ref m_DimMappings contents:
/// [ 0, 3, 1, 2 ].
///
PermutationVector(const ValueType *dimMappings, SizeType numDimMappings);
PermutationVector(std::initializer_list<ValueType> dimMappings);
///
/// Indexing method with out-of-bounds error checking for the m_DimMappings array.
/// @param i - integer value corresponding to index of m_DimMappings array to retrieve element from.
/// @return element at index i of m_DimMappings array.
/// @throws InvalidArgumentException when indexing out-of-bounds index of m_DimMappings array.
///
ValueType operator[](SizeType i) const
{
if (i >= GetSize())
{
throw InvalidArgumentException("Invalid indexing of PermutationVector of size " + std::to_string(GetSize())
+ " at location [" + std::to_string(i) + "].");
}
return m_DimMappings.at(i);
}
SizeType GetSize() const { return m_NumDimMappings; }
ConstIterator begin() const { return m_DimMappings.begin(); }
/**
*
* @return pointer one past the end of the number of mapping not the length of m_DimMappings.
*/
ConstIterator end() const { return m_DimMappings.begin() + m_NumDimMappings; }
bool IsEqual(const PermutationVector& other) const
{
if (m_NumDimMappings != other.m_NumDimMappings) return false;
for (unsigned int i = 0; i < m_NumDimMappings; ++i)
{
if (m_DimMappings[i] != other.m_DimMappings[i]) return false;
}
return true;
}
bool IsInverse(const PermutationVector& other) const
{
bool isInverse = (GetSize() == other.GetSize());
for (SizeType i = 0; isInverse && (i < GetSize()); ++i)
{
isInverse = (m_DimMappings[other.m_DimMappings[i]] == i);
}
return isInverse;
}
private:
ArrayType m_DimMappings;
/// Number of valid entries in @ref m_DimMappings
SizeType m_NumDimMappings;
};
class ITensorHandle;
/// Define the type of callback for the Debug layer to call
/// @param guid - guid of layer connected to the input of the Debug layer
/// @param slotIndex - index of the output slot connected to the input of the Debug layer
/// @param tensorHandle - TensorHandle for the input tensor to the Debug layer
using DebugCallbackFunction = std::function<void(LayerGuid guid, unsigned int slotIndex, ITensorHandle* tensorHandle)>;
/// Define a timer and associated inference ID for recording execution times
using HighResolutionClock = std::chrono::high_resolution_clock::time_point;
using InferenceTimingPair = std::pair<HighResolutionClock, HighResolutionClock>;
/// This list uses X macro technique.
/// See https://en.wikipedia.org/wiki/X_Macro for more info
#define LIST_OF_LAYER_TYPE \
X(Activation) \
X(Addition) \
X(ArgMinMax) \
X(BatchNormalization) \
X(BatchToSpaceNd) \
X(Comparison) \
X(Concat) \
X(Constant) \
X(ConvertBf16ToFp32) \
X(ConvertFp16ToFp32) \
X(ConvertFp32ToBf16) \
X(ConvertFp32ToFp16) \
X(Convolution2d) \
X(Debug) \
X(DepthToSpace) \
X(DepthwiseConvolution2d) \
X(Dequantize) \
X(DetectionPostProcess) \
X(Division) \
X(ElementwiseUnary) \
X(FakeQuantization) \
X(Fill) \
X(Floor) \
X(FullyConnected) \
X(Gather) \
X(Input) \
X(InstanceNormalization) \
X(L2Normalization) \
X(LogicalBinary) \
X(LogSoftmax) \
X(Lstm) \
X(QLstm) \
X(Map) \
X(Maximum) \
X(Mean) \
X(MemCopy) \
X(MemImport) \
X(Merge) \
X(Minimum) \
X(Multiplication) \
X(Normalization) \
X(Output) \
X(Pad) \
X(Permute) \
X(Pooling2d) \
X(PreCompiled) \
X(Prelu) \
X(Quantize) \
X(QuantizedLstm) \
X(Reshape) \
X(Rank) \
X(Resize) \
X(Reduce) \
X(Slice) \
X(Softmax) \
X(SpaceToBatchNd) \
X(SpaceToDepth) \
X(Splitter) \
X(Stack) \
X(StandIn) \
X(StridedSlice) \
X(Subtraction) \
X(Switch) \
X(Transpose) \
X(TransposeConvolution2d) \
X(Unmap) \
X(Cast) \
X(Shape) \
X(UnidirectionalSequenceLstm) \
X(ChannelShuffle) \
X(Convolution3d) \
X(Pooling3d) \
// New layers should be added at last to minimize instability.
/// When adding a new layer, adapt also the LastLayer enum value in the
/// enum class LayerType below
enum class LayerType
{
#define X(name) name,
LIST_OF_LAYER_TYPE
#undef X
FirstLayer = Activation,
LastLayer = UnidirectionalSequenceLstm
};
const char* GetLayerTypeAsCString(LayerType type);
} // namespace armnn