blob: 5592491653323b30a1b8dd6ac7b9939e8d6fa98d [file] [log] [blame]
//
// Copyright © 2021-2024 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
#include "Optimization.hpp"
#include <armnnUtils/QuantizeHelper.hpp>
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <armnnUtils/DataLayoutIndexed.hpp>
namespace armnn
{
namespace optimizations
{
namespace pad_fold
{
inline float GetZeroElement(const TensorInfo& tensorInfo)
{
return static_cast<float>(tensorInfo.IsQuantized() ? tensorInfo.GetQuantizationOffset() : 0);
}
inline float GetLowestElement(const TensorInfo& tensorInfo)
{
constexpr float negativeInfinity = -std::numeric_limits<float>::infinity();
const float scale = tensorInfo.GetQuantizationScale();
const int32_t offset = tensorInfo.GetQuantizationOffset();
switch (tensorInfo.GetDataType())
{
case DataType::Float16:
return armnnUtils::SelectiveQuantize<armnn::Half>(negativeInfinity, scale, offset);
case DataType::Float32:
return armnnUtils::SelectiveQuantize<float>(negativeInfinity, scale, offset);
case DataType::QAsymmU8:
return armnnUtils::SelectiveQuantize<uint8_t>(negativeInfinity, scale, offset);
case DataType::QSymmS16:
return armnnUtils::SelectiveQuantize<int16_t>(negativeInfinity, scale, offset);
case DataType::QSymmS8:
// Fall-through
case DataType::QAsymmS8:
return armnnUtils::SelectiveQuantize<int8_t>(negativeInfinity, scale, offset);
case DataType::BFloat16:
return armnnUtils::SelectiveQuantize<armnn::BFloat16>(negativeInfinity, scale, offset);
default:
{
ARMNN_ASSERT_MSG(false, "Unsupported DataType");
return NAN;
}
}
}
inline bool IsNeutralElement(const Convolution2dDescriptor&, const TensorInfo& tensorInfo, const float tensorValue)
{
return tensorValue == GetZeroElement(tensorInfo);
}
inline bool IsNeutralElement(const DepthwiseConvolution2dDescriptor&,
const TensorInfo& tensorInfo,
const float tensorValue)
{
return tensorValue == GetZeroElement(tensorInfo);
}
inline bool IsNeutralElement(
const Pooling2dDescriptor& descriptor, const TensorInfo& tensorInfo, const float tensorValue)
{
return (descriptor.m_PoolType == PoolingAlgorithm::Max)
? tensorValue <= GetLowestElement(tensorInfo)
: tensorValue == GetZeroElement(tensorInfo);
}
inline bool IsPooling2dPadded(const Pooling2dDescriptor& poolDescriptor)
{
const auto poolingPadValues = std::make_tuple(poolDescriptor.m_PadLeft, poolDescriptor.m_PadRight,
poolDescriptor.m_PadTop, poolDescriptor.m_PadBottom);
if (poolingPadValues != std::make_tuple(0U, 0U, 0U, 0U))
{
return true;
}
return false;
}
template <typename Descriptor>
bool TryFoldPadIntoLayer2d(
const PadDescriptor& padDescriptor, Descriptor& layerDescriptor, const TensorInfo& tensorInfo)
{
armnnUtils::DataLayoutIndexed layout = armnnUtils::DataLayoutIndexed(layerDescriptor.m_DataLayout);
constexpr unsigned int batchIndex = 0;
constexpr auto noPad = std::make_pair(0U, 0U);
if ((!IsNeutralElement(layerDescriptor, tensorInfo, padDescriptor.m_PadValue)) ||
(padDescriptor.m_PadList[batchIndex] != noPad) || (padDescriptor.m_PadList[layout.GetChannelsIndex()] != noPad))
{
return false;
}
const auto& padList = padDescriptor.m_PadList;
// In Convolution2dDescriptor/Pooling2dDescriptor, padLeft and padRight are defined as paddings
// on width dimension whereas padTop and padBottom - paddings on height dimension, so updating
// these according to data layout
layerDescriptor.m_PadLeft += padList[layout.GetWidthIndex()].first;
layerDescriptor.m_PadRight += padList[layout.GetWidthIndex()].second;
layerDescriptor.m_PadTop += padList[layout.GetHeightIndex()].first;
layerDescriptor.m_PadBottom += padList[layout.GetHeightIndex()].second;
return true;
}
inline bool TryFoldPadIntoLayer2d(const PadDescriptor& padDescriptor,
Pooling2dDescriptor& poolDescriptor,
const TensorInfo& tensorInfo,
bool isBackendOptimization = false)
{
// Cannot fold Average or L2 pooling if padding exists and the padding method is Exclude.
if (poolDescriptor.m_PoolType != PoolingAlgorithm::Max &&
IsPooling2dPadded(poolDescriptor) &&
poolDescriptor.m_PaddingMethod == PaddingMethod::Exclude)
{
return false;
}
// Cannot fold Average pooling if data type is quantized and layout is NHWC in Neon backend.
// Therefore, this specific case will become a backend specific optimization.
if (!isBackendOptimization &&
tensorInfo.IsQuantized() &&
poolDescriptor.m_PoolType == PoolingAlgorithm::Average &&
poolDescriptor.m_DataLayout == DataLayout::NHWC)
{
return false;
}
poolDescriptor.m_PaddingMethod = PaddingMethod::IgnoreValue;
return TryFoldPadIntoLayer2d<Pooling2dDescriptor>(padDescriptor, poolDescriptor, tensorInfo);
}
template <typename Layer2dT>
Layer2dT* FoldPadIntoLayer2dImpl(Graph& graph, InputSlot& connection)
{
PadLayer& padLayer = *PolymorphicDowncast<PadLayer*>(&connection.GetConnectedOutputSlot()->GetOwningLayer());
Layer2dT& layer2d = *PolymorphicDowncast<Layer2dT*>(&connection.GetOwningLayer());
const PadDescriptor& padDescriptor = padLayer.GetParameters();
auto newLayer2dDescriptor = layer2d.GetParameters();
if (!TryFoldPadIntoLayer2d(padDescriptor, newLayer2dDescriptor, padLayer.GetOutputSlot().GetTensorInfo()))
{
return nullptr;
}
// Workaround an issue in the compute library. The conv2d algorithm that the
// compute library is choosing is not handling the 1x1 filter case when
// the padding size >= filter size
if constexpr (std::is_same<Layer2dT, armnn::Convolution2dLayer>::value)
{
// Get filter width and height
armnnUtils::DataLayoutIndexed dataLayoutIndex(newLayer2dDescriptor.m_DataLayout);
const TensorShape& filterShape = layer2d.GetInputSlot(1).GetTensorInfo().GetShape();
unsigned int filterWidth = filterShape[dataLayoutIndex.GetWidthIndex()];
unsigned int filterHeight = filterShape[dataLayoutIndex.GetHeightIndex()];
// Calculate total padding and check conditions
auto horizontalPadding = newLayer2dDescriptor.m_PadLeft + newLayer2dDescriptor.m_PadRight;
auto verticalPadding = newLayer2dDescriptor.m_PadTop + newLayer2dDescriptor.m_PadBottom;
if ((filterWidth == 1) && (horizontalPadding >= filterWidth))
{
return nullptr;
}
else if ((filterHeight == 1) && (verticalPadding >= filterHeight))
{
return nullptr;
}
}
// Save original parent output slot of the pad layer
OutputSlot& parentSlot = *padLayer.GetInputSlot(0).GetConnectedOutputSlot();
// Insert new layer2d layer between the pad layer and its parent layer.
const std::string name = std::string("folded-") + padLayer.GetName() + "-into-" + layer2d.GetName();
auto& newLayer2d = *graph.InsertNewLayer<Layer2dT>(padLayer.GetInputSlot(0), newLayer2dDescriptor, name.c_str());
newLayer2d.GetOutputSlot().MoveAllConnections(parentSlot);
// Start at 1 to connect only weights and bias
for (unsigned int i = 1; i < layer2d.GetNumInputSlots(); ++i)
{
if (layer2d.GetInputSlot(i).GetConnectedOutputSlot() != nullptr)
{
Layer& tgtLayer = layer2d.GetInputSlot(i).GetConnectedOutputSlot()->GetOwningLayer();
// Remove old connection and connect to new layer2d
tgtLayer.GetOutputSlot(0).Disconnect(layer2d.GetInputSlot(i));
tgtLayer.GetOutputSlot(0).Connect(newLayer2d.GetInputSlot(i));
}
}
// Moves connections in old layer2d layer output to new layer.
// Old layer2d layer will be removed as it's left unconnected.
// Pad layer will be removed if left unconnected.
layer2d.GetOutputSlot().MoveAllConnections(newLayer2d.GetOutputSlot());
return &newLayer2d;
}
class FoldPadIntoConvolution2dImpl
{
public:
void Run(Graph& graph, InputSlot& connection) const
{
const auto newConv2dLayer = FoldPadIntoLayer2dImpl<Convolution2dLayer>(graph, connection);
if (newConv2dLayer != nullptr)
{
const auto conv2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&connection.GetOwningLayer());
ARMNN_ASSERT_MSG(newConv2dLayer->GetInputSlot(1).GetConnection() != nullptr,
"FoldPadIntoConvolution2d: New convolution layer is missing connection to weights layer");
if (conv2dLayer->GetParameters().m_BiasEnabled)
{
ARMNN_ASSERT_MSG(newConv2dLayer->GetInputSlot(2).GetConnection() != nullptr,
"FoldPadIntoConvolution2d: New convolution layer is missing "
"connection to bias layer.");
}
}
}
protected:
FoldPadIntoConvolution2dImpl() = default;
~FoldPadIntoConvolution2dImpl() = default;
};
class FoldPadIntoDepthwiseConvolution2dImpl
{
public:
void Run(Graph& graph, InputSlot& connection) const
{
const auto newConv2dLayer = FoldPadIntoLayer2dImpl<DepthwiseConvolution2dLayer>(graph, connection);
if (newConv2dLayer != nullptr)
{
const auto conv2dLayer = PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&connection.GetOwningLayer());
ARMNN_ASSERT_MSG(newConv2dLayer->GetInputSlot(1).GetConnection() != nullptr,
"FoldPadIntoDepthwiseConvolution2d: New convolution layer is missing "
"connection to weights layer");
if (conv2dLayer->GetParameters().m_BiasEnabled)
{
ARMNN_ASSERT_MSG(newConv2dLayer->GetInputSlot(2).GetConnection() != nullptr,
"FoldPadIntoConvolution2d: New convolution layer is missing "
"connection to bias layer.");
}
}
}
protected:
FoldPadIntoDepthwiseConvolution2dImpl() = default;
~FoldPadIntoDepthwiseConvolution2dImpl() = default;
};
class FoldPadIntoPooling2dImpl
{
public:
void Run(Graph& graph, InputSlot& connection) const
{
FoldPadIntoLayer2dImpl<Pooling2dLayer>(graph, connection);
}
protected:
FoldPadIntoPooling2dImpl() = default;
~FoldPadIntoPooling2dImpl() = default;
};
} // namespace pad_fold
using FoldPadIntoConvolution2d =
OptimizeForExclusiveConnection<PadLayer, Convolution2dLayer, pad_fold::FoldPadIntoConvolution2dImpl>;
using FoldPadIntoDepthwiseConvolution2d =
OptimizeForExclusiveConnection <PadLayer,
DepthwiseConvolution2dLayer,
pad_fold::FoldPadIntoDepthwiseConvolution2dImpl>;
using FoldPadIntoPooling2d =
OptimizeForExclusiveConnection<PadLayer, Pooling2dLayer, pad_fold::FoldPadIntoPooling2dImpl>;
} // namespace optimizations
} // namespace armnn