blob: 5592491653323b30a1b8dd6ac7b9939e8d6fa98d [file] [log] [blame]
Diego Lopez Recasfe95d722021-03-19 12:40:16 +00001//
Tracy Narine3c3da962024-02-12 14:19:03 +00002// Copyright © 2021-2024 Arm Ltd and Contributors. All rights reserved.
Diego Lopez Recasfe95d722021-03-19 12:40:16 +00003// SPDX-License-Identifier: MIT
4//
5
6#pragma once
7
8#include "Optimization.hpp"
9
Colm Donelanc42a9872022-02-02 16:35:09 +000010#include <armnnUtils/QuantizeHelper.hpp>
Diego Lopez Recasfe95d722021-03-19 12:40:16 +000011
12#include <armnn/utility/PolymorphicDowncast.hpp>
13#include <armnnUtils/DataLayoutIndexed.hpp>
14
15namespace armnn
16{
17namespace optimizations
18{
19namespace pad_fold
20{
21inline float GetZeroElement(const TensorInfo& tensorInfo)
22{
23 return static_cast<float>(tensorInfo.IsQuantized() ? tensorInfo.GetQuantizationOffset() : 0);
24}
25
26inline float GetLowestElement(const TensorInfo& tensorInfo)
27{
28 constexpr float negativeInfinity = -std::numeric_limits<float>::infinity();
29 const float scale = tensorInfo.GetQuantizationScale();
30 const int32_t offset = tensorInfo.GetQuantizationOffset();
31
32 switch (tensorInfo.GetDataType())
33 {
34 case DataType::Float16:
35 return armnnUtils::SelectiveQuantize<armnn::Half>(negativeInfinity, scale, offset);
36 case DataType::Float32:
37 return armnnUtils::SelectiveQuantize<float>(negativeInfinity, scale, offset);
38 case DataType::QAsymmU8:
39 return armnnUtils::SelectiveQuantize<uint8_t>(negativeInfinity, scale, offset);
40 case DataType::QSymmS16:
41 return armnnUtils::SelectiveQuantize<int16_t>(negativeInfinity, scale, offset);
42 case DataType::QSymmS8:
43 // Fall-through
44 case DataType::QAsymmS8:
45 return armnnUtils::SelectiveQuantize<int8_t>(negativeInfinity, scale, offset);
46 case DataType::BFloat16:
47 return armnnUtils::SelectiveQuantize<armnn::BFloat16>(negativeInfinity, scale, offset);
48 default:
49 {
50 ARMNN_ASSERT_MSG(false, "Unsupported DataType");
51 return NAN;
52 }
53 }
54}
55
56inline bool IsNeutralElement(const Convolution2dDescriptor&, const TensorInfo& tensorInfo, const float tensorValue)
57{
58 return tensorValue == GetZeroElement(tensorInfo);
59}
60
Teresa Charlin5786eb72021-05-21 16:29:45 +010061inline bool IsNeutralElement(const DepthwiseConvolution2dDescriptor&,
62 const TensorInfo& tensorInfo,
63 const float tensorValue)
64{
65 return tensorValue == GetZeroElement(tensorInfo);
66}
67
Diego Lopez Recasfe95d722021-03-19 12:40:16 +000068inline bool IsNeutralElement(
69 const Pooling2dDescriptor& descriptor, const TensorInfo& tensorInfo, const float tensorValue)
70{
71 return (descriptor.m_PoolType == PoolingAlgorithm::Max)
72 ? tensorValue <= GetLowestElement(tensorInfo)
73 : tensorValue == GetZeroElement(tensorInfo);
74}
75
Cathal Corbett3883b272022-07-22 16:03:36 +010076inline bool IsPooling2dPadded(const Pooling2dDescriptor& poolDescriptor)
77{
78 const auto poolingPadValues = std::make_tuple(poolDescriptor.m_PadLeft, poolDescriptor.m_PadRight,
79 poolDescriptor.m_PadTop, poolDescriptor.m_PadBottom);
80 if (poolingPadValues != std::make_tuple(0U, 0U, 0U, 0U))
81 {
82 return true;
83 }
84 return false;
85}
86
Diego Lopez Recasfe95d722021-03-19 12:40:16 +000087template <typename Descriptor>
88bool TryFoldPadIntoLayer2d(
89 const PadDescriptor& padDescriptor, Descriptor& layerDescriptor, const TensorInfo& tensorInfo)
90{
91 armnnUtils::DataLayoutIndexed layout = armnnUtils::DataLayoutIndexed(layerDescriptor.m_DataLayout);
92 constexpr unsigned int batchIndex = 0;
93
94 constexpr auto noPad = std::make_pair(0U, 0U);
95
96 if ((!IsNeutralElement(layerDescriptor, tensorInfo, padDescriptor.m_PadValue)) ||
97 (padDescriptor.m_PadList[batchIndex] != noPad) || (padDescriptor.m_PadList[layout.GetChannelsIndex()] != noPad))
98 {
99 return false;
100 }
101
102 const auto& padList = padDescriptor.m_PadList;
103
104 // In Convolution2dDescriptor/Pooling2dDescriptor, padLeft and padRight are defined as paddings
105 // on width dimension whereas padTop and padBottom - paddings on height dimension, so updating
106 // these according to data layout
107 layerDescriptor.m_PadLeft += padList[layout.GetWidthIndex()].first;
108 layerDescriptor.m_PadRight += padList[layout.GetWidthIndex()].second;
109 layerDescriptor.m_PadTop += padList[layout.GetHeightIndex()].first;
110 layerDescriptor.m_PadBottom += padList[layout.GetHeightIndex()].second;
111
112 return true;
113}
114
Cathal Corbett3883b272022-07-22 16:03:36 +0100115inline bool TryFoldPadIntoLayer2d(const PadDescriptor& padDescriptor,
116 Pooling2dDescriptor& poolDescriptor,
117 const TensorInfo& tensorInfo,
118 bool isBackendOptimization = false)
Diego Lopez Recasfe95d722021-03-19 12:40:16 +0000119{
Cathal Corbett3883b272022-07-22 16:03:36 +0100120 // Cannot fold Average or L2 pooling if padding exists and the padding method is Exclude.
121 if (poolDescriptor.m_PoolType != PoolingAlgorithm::Max &&
122 IsPooling2dPadded(poolDescriptor) &&
123 poolDescriptor.m_PaddingMethod == PaddingMethod::Exclude)
Diego Lopez Recasfe95d722021-03-19 12:40:16 +0000124 {
Cathal Corbett3883b272022-07-22 16:03:36 +0100125 return false;
Diego Lopez Recasfe95d722021-03-19 12:40:16 +0000126 }
127
Cathal Corbett3883b272022-07-22 16:03:36 +0100128 // Cannot fold Average pooling if data type is quantized and layout is NHWC in Neon backend.
129 // Therefore, this specific case will become a backend specific optimization.
130 if (!isBackendOptimization &&
131 tensorInfo.IsQuantized() &&
132 poolDescriptor.m_PoolType == PoolingAlgorithm::Average &&
133 poolDescriptor.m_DataLayout == DataLayout::NHWC)
Diego Lopez Recasfe95d722021-03-19 12:40:16 +0000134 {
Cathal Corbett3883b272022-07-22 16:03:36 +0100135 return false;
Diego Lopez Recasfe95d722021-03-19 12:40:16 +0000136 }
Cathal Corbett3883b272022-07-22 16:03:36 +0100137
Diego Lopez Recasfe95d722021-03-19 12:40:16 +0000138 poolDescriptor.m_PaddingMethod = PaddingMethod::IgnoreValue;
139
140 return TryFoldPadIntoLayer2d<Pooling2dDescriptor>(padDescriptor, poolDescriptor, tensorInfo);
141}
142
143template <typename Layer2dT>
144Layer2dT* FoldPadIntoLayer2dImpl(Graph& graph, InputSlot& connection)
145{
146 PadLayer& padLayer = *PolymorphicDowncast<PadLayer*>(&connection.GetConnectedOutputSlot()->GetOwningLayer());
147 Layer2dT& layer2d = *PolymorphicDowncast<Layer2dT*>(&connection.GetOwningLayer());
148
149 const PadDescriptor& padDescriptor = padLayer.GetParameters();
150 auto newLayer2dDescriptor = layer2d.GetParameters();
151
152 if (!TryFoldPadIntoLayer2d(padDescriptor, newLayer2dDescriptor, padLayer.GetOutputSlot().GetTensorInfo()))
153 {
154 return nullptr;
155 }
156
Tracy Narine3c3da962024-02-12 14:19:03 +0000157 // Workaround an issue in the compute library. The conv2d algorithm that the
158 // compute library is choosing is not handling the 1x1 filter case when
159 // the padding size >= filter size
160 if constexpr (std::is_same<Layer2dT, armnn::Convolution2dLayer>::value)
161 {
162 // Get filter width and height
163 armnnUtils::DataLayoutIndexed dataLayoutIndex(newLayer2dDescriptor.m_DataLayout);
164 const TensorShape& filterShape = layer2d.GetInputSlot(1).GetTensorInfo().GetShape();
165 unsigned int filterWidth = filterShape[dataLayoutIndex.GetWidthIndex()];
166 unsigned int filterHeight = filterShape[dataLayoutIndex.GetHeightIndex()];
167 // Calculate total padding and check conditions
168 auto horizontalPadding = newLayer2dDescriptor.m_PadLeft + newLayer2dDescriptor.m_PadRight;
169 auto verticalPadding = newLayer2dDescriptor.m_PadTop + newLayer2dDescriptor.m_PadBottom;
170 if ((filterWidth == 1) && (horizontalPadding >= filterWidth))
171 {
172 return nullptr;
173 }
174 else if ((filterHeight == 1) && (verticalPadding >= filterHeight))
175 {
176 return nullptr;
177 }
178 }
179
Diego Lopez Recasfe95d722021-03-19 12:40:16 +0000180 // Save original parent output slot of the pad layer
181 OutputSlot& parentSlot = *padLayer.GetInputSlot(0).GetConnectedOutputSlot();
182
Teresa Charline8148212023-01-20 13:38:54 +0000183 // Insert new layer2d layer between the pad layer and its parent layer.
Diego Lopez Recasfe95d722021-03-19 12:40:16 +0000184 const std::string name = std::string("folded-") + padLayer.GetName() + "-into-" + layer2d.GetName();
185 auto& newLayer2d = *graph.InsertNewLayer<Layer2dT>(padLayer.GetInputSlot(0), newLayer2dDescriptor, name.c_str());
186
Diego Lopez Recasfe95d722021-03-19 12:40:16 +0000187 newLayer2d.GetOutputSlot().MoveAllConnections(parentSlot);
Keith Davisb4dd5cc2022-04-07 11:32:00 +0100188 // Start at 1 to connect only weights and bias
189 for (unsigned int i = 1; i < layer2d.GetNumInputSlots(); ++i)
190 {
191 if (layer2d.GetInputSlot(i).GetConnectedOutputSlot() != nullptr)
192 {
193 Layer& tgtLayer = layer2d.GetInputSlot(i).GetConnectedOutputSlot()->GetOwningLayer();
Teresa Charline8148212023-01-20 13:38:54 +0000194 // Remove old connection and connect to new layer2d
195 tgtLayer.GetOutputSlot(0).Disconnect(layer2d.GetInputSlot(i));
196 tgtLayer.GetOutputSlot(0).Connect(newLayer2d.GetInputSlot(i));
Keith Davisb4dd5cc2022-04-07 11:32:00 +0100197 }
198 }
Diego Lopez Recasfe95d722021-03-19 12:40:16 +0000199
200 // Moves connections in old layer2d layer output to new layer.
201 // Old layer2d layer will be removed as it's left unconnected.
202 // Pad layer will be removed if left unconnected.
203 layer2d.GetOutputSlot().MoveAllConnections(newLayer2d.GetOutputSlot());
204
205 return &newLayer2d;
206}
207
208class FoldPadIntoConvolution2dImpl
209{
210public:
211 void Run(Graph& graph, InputSlot& connection) const
212 {
213 const auto newConv2dLayer = FoldPadIntoLayer2dImpl<Convolution2dLayer>(graph, connection);
214
215 if (newConv2dLayer != nullptr)
216 {
217 const auto conv2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&connection.GetOwningLayer());
Keith Davisb4dd5cc2022-04-07 11:32:00 +0100218 ARMNN_ASSERT_MSG(newConv2dLayer->GetInputSlot(1).GetConnection() != nullptr,
219 "FoldPadIntoConvolution2d: New convolution layer is missing connection to weights layer");
220
Diego Lopez Recasfe95d722021-03-19 12:40:16 +0000221 if (conv2dLayer->GetParameters().m_BiasEnabled)
222 {
Keith Davisb4dd5cc2022-04-07 11:32:00 +0100223 ARMNN_ASSERT_MSG(newConv2dLayer->GetInputSlot(2).GetConnection() != nullptr,
224 "FoldPadIntoConvolution2d: New convolution layer is missing "
225 "connection to bias layer.");
Diego Lopez Recasfe95d722021-03-19 12:40:16 +0000226 }
227 }
228 }
229
230protected:
231 FoldPadIntoConvolution2dImpl() = default;
232 ~FoldPadIntoConvolution2dImpl() = default;
233};
234
Teresa Charlin5786eb72021-05-21 16:29:45 +0100235class FoldPadIntoDepthwiseConvolution2dImpl
236{
237public:
238 void Run(Graph& graph, InputSlot& connection) const
239 {
Keith Davisb4dd5cc2022-04-07 11:32:00 +0100240 const auto newConv2dLayer = FoldPadIntoLayer2dImpl<DepthwiseConvolution2dLayer>(graph, connection);
Teresa Charlin5786eb72021-05-21 16:29:45 +0100241
Keith Davisb4dd5cc2022-04-07 11:32:00 +0100242 if (newConv2dLayer != nullptr)
Teresa Charlin5786eb72021-05-21 16:29:45 +0100243 {
Keith Davisb4dd5cc2022-04-07 11:32:00 +0100244 const auto conv2dLayer = PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&connection.GetOwningLayer());
Keith Davisb4dd5cc2022-04-07 11:32:00 +0100245 ARMNN_ASSERT_MSG(newConv2dLayer->GetInputSlot(1).GetConnection() != nullptr,
Mike Kellyec67a0f2022-11-25 13:55:24 +0000246 "FoldPadIntoDepthwiseConvolution2d: New convolution layer is missing "
247 "connection to weights layer");
Cathal Corbett06902652022-04-14 17:55:11 +0100248
Keith Davisb4dd5cc2022-04-07 11:32:00 +0100249 if (conv2dLayer->GetParameters().m_BiasEnabled)
Teresa Charlin5786eb72021-05-21 16:29:45 +0100250 {
Keith Davisb4dd5cc2022-04-07 11:32:00 +0100251 ARMNN_ASSERT_MSG(newConv2dLayer->GetInputSlot(2).GetConnection() != nullptr,
252 "FoldPadIntoConvolution2d: New convolution layer is missing "
253 "connection to bias layer.");
Teresa Charlin5786eb72021-05-21 16:29:45 +0100254 }
255 }
256 }
Teresa Charlin5786eb72021-05-21 16:29:45 +0100257protected:
258 FoldPadIntoDepthwiseConvolution2dImpl() = default;
259 ~FoldPadIntoDepthwiseConvolution2dImpl() = default;
260};
261
Diego Lopez Recasfe95d722021-03-19 12:40:16 +0000262class FoldPadIntoPooling2dImpl
263{
264public:
265 void Run(Graph& graph, InputSlot& connection) const
266 {
267 FoldPadIntoLayer2dImpl<Pooling2dLayer>(graph, connection);
268 }
269
270protected:
271 FoldPadIntoPooling2dImpl() = default;
272 ~FoldPadIntoPooling2dImpl() = default;
273};
274} // namespace pad_fold
275
276using FoldPadIntoConvolution2d =
277 OptimizeForExclusiveConnection<PadLayer, Convolution2dLayer, pad_fold::FoldPadIntoConvolution2dImpl>;
Teresa Charlin5786eb72021-05-21 16:29:45 +0100278using FoldPadIntoDepthwiseConvolution2d =
279 OptimizeForExclusiveConnection <PadLayer,
280 DepthwiseConvolution2dLayer,
281 pad_fold::FoldPadIntoDepthwiseConvolution2dImpl>;
Diego Lopez Recasfe95d722021-03-19 12:40:16 +0000282using FoldPadIntoPooling2d =
283 OptimizeForExclusiveConnection<PadLayer, Pooling2dLayer, pad_fold::FoldPadIntoPooling2dImpl>;
284
285} // namespace optimizations
286} // namespace armnn
287
288