Diego Lopez Recas | fe95d72 | 2021-03-19 12:40:16 +0000 | [diff] [blame^] | 1 | // |
| 2 | // Copyright © 2017 Arm Ltd. All rights reserved. |
| 3 | // SPDX-License-Identifier: MIT |
| 4 | // |
| 5 | |
| 6 | #pragma once |
| 7 | |
| 8 | #include "Optimization.hpp" |
| 9 | |
| 10 | #include <QuantizeHelper.hpp> |
| 11 | |
| 12 | #include <armnn/utility/PolymorphicDowncast.hpp> |
| 13 | #include <armnnUtils/DataLayoutIndexed.hpp> |
| 14 | |
| 15 | namespace armnn |
| 16 | { |
| 17 | namespace optimizations |
| 18 | { |
| 19 | namespace pad_fold |
| 20 | { |
| 21 | inline float GetZeroElement(const TensorInfo& tensorInfo) |
| 22 | { |
| 23 | return static_cast<float>(tensorInfo.IsQuantized() ? tensorInfo.GetQuantizationOffset() : 0); |
| 24 | } |
| 25 | |
| 26 | inline float GetLowestElement(const TensorInfo& tensorInfo) |
| 27 | { |
| 28 | constexpr float negativeInfinity = -std::numeric_limits<float>::infinity(); |
| 29 | const float scale = tensorInfo.GetQuantizationScale(); |
| 30 | const int32_t offset = tensorInfo.GetQuantizationOffset(); |
| 31 | |
| 32 | switch (tensorInfo.GetDataType()) |
| 33 | { |
| 34 | case DataType::Float16: |
| 35 | return armnnUtils::SelectiveQuantize<armnn::Half>(negativeInfinity, scale, offset); |
| 36 | case DataType::Float32: |
| 37 | return armnnUtils::SelectiveQuantize<float>(negativeInfinity, scale, offset); |
| 38 | case DataType::QAsymmU8: |
| 39 | return armnnUtils::SelectiveQuantize<uint8_t>(negativeInfinity, scale, offset); |
| 40 | case DataType::QSymmS16: |
| 41 | return armnnUtils::SelectiveQuantize<int16_t>(negativeInfinity, scale, offset); |
| 42 | case DataType::QSymmS8: |
| 43 | // Fall-through |
| 44 | case DataType::QAsymmS8: |
| 45 | return armnnUtils::SelectiveQuantize<int8_t>(negativeInfinity, scale, offset); |
| 46 | case DataType::BFloat16: |
| 47 | return armnnUtils::SelectiveQuantize<armnn::BFloat16>(negativeInfinity, scale, offset); |
| 48 | default: |
| 49 | { |
| 50 | ARMNN_ASSERT_MSG(false, "Unsupported DataType"); |
| 51 | return NAN; |
| 52 | } |
| 53 | } |
| 54 | } |
| 55 | |
| 56 | inline bool IsNeutralElement(const Convolution2dDescriptor&, const TensorInfo& tensorInfo, const float tensorValue) |
| 57 | { |
| 58 | return tensorValue == GetZeroElement(tensorInfo); |
| 59 | } |
| 60 | |
| 61 | inline bool IsNeutralElement( |
| 62 | const Pooling2dDescriptor& descriptor, const TensorInfo& tensorInfo, const float tensorValue) |
| 63 | { |
| 64 | return (descriptor.m_PoolType == PoolingAlgorithm::Max) |
| 65 | ? tensorValue <= GetLowestElement(tensorInfo) |
| 66 | : tensorValue == GetZeroElement(tensorInfo); |
| 67 | } |
| 68 | |
| 69 | template <typename Descriptor> |
| 70 | bool TryFoldPadIntoLayer2d( |
| 71 | const PadDescriptor& padDescriptor, Descriptor& layerDescriptor, const TensorInfo& tensorInfo) |
| 72 | { |
| 73 | armnnUtils::DataLayoutIndexed layout = armnnUtils::DataLayoutIndexed(layerDescriptor.m_DataLayout); |
| 74 | constexpr unsigned int batchIndex = 0; |
| 75 | |
| 76 | constexpr auto noPad = std::make_pair(0U, 0U); |
| 77 | |
| 78 | if ((!IsNeutralElement(layerDescriptor, tensorInfo, padDescriptor.m_PadValue)) || |
| 79 | (padDescriptor.m_PadList[batchIndex] != noPad) || (padDescriptor.m_PadList[layout.GetChannelsIndex()] != noPad)) |
| 80 | { |
| 81 | return false; |
| 82 | } |
| 83 | |
| 84 | const auto& padList = padDescriptor.m_PadList; |
| 85 | |
| 86 | // In Convolution2dDescriptor/Pooling2dDescriptor, padLeft and padRight are defined as paddings |
| 87 | // on width dimension whereas padTop and padBottom - paddings on height dimension, so updating |
| 88 | // these according to data layout |
| 89 | layerDescriptor.m_PadLeft += padList[layout.GetWidthIndex()].first; |
| 90 | layerDescriptor.m_PadRight += padList[layout.GetWidthIndex()].second; |
| 91 | layerDescriptor.m_PadTop += padList[layout.GetHeightIndex()].first; |
| 92 | layerDescriptor.m_PadBottom += padList[layout.GetHeightIndex()].second; |
| 93 | |
| 94 | return true; |
| 95 | } |
| 96 | |
| 97 | inline bool TryFoldPadIntoLayer2d( |
| 98 | const PadDescriptor& padDescriptor, Pooling2dDescriptor& poolDescriptor, const TensorInfo& tensorInfo) |
| 99 | { |
| 100 | const auto poolingPadValues = std::make_tuple(poolDescriptor.m_PadLeft, poolDescriptor.m_PadRight, |
| 101 | poolDescriptor.m_PadTop, poolDescriptor.m_PadBottom); |
| 102 | bool poolHasPadding = false; |
| 103 | if (poolingPadValues != std::make_tuple(0U, 0U, 0U, 0U)) |
| 104 | { |
| 105 | poolHasPadding = true; |
| 106 | } |
| 107 | |
| 108 | // We cannot fold Average or L2 pooling if there's is already padding and that padding method is Exclude. |
| 109 | if (poolDescriptor.m_PoolType != PoolingAlgorithm::Max) // PoolingAlgorithm::Average or PoolingAlgorithm::L2 |
| 110 | { |
| 111 | if ((poolHasPadding) && (poolDescriptor.m_PaddingMethod == PaddingMethod::Exclude)) |
| 112 | { |
| 113 | return false; |
| 114 | } |
| 115 | } |
| 116 | poolDescriptor.m_PaddingMethod = PaddingMethod::IgnoreValue; |
| 117 | |
| 118 | return TryFoldPadIntoLayer2d<Pooling2dDescriptor>(padDescriptor, poolDescriptor, tensorInfo); |
| 119 | } |
| 120 | |
| 121 | template <typename Layer2dT> |
| 122 | Layer2dT* FoldPadIntoLayer2dImpl(Graph& graph, InputSlot& connection) |
| 123 | { |
| 124 | PadLayer& padLayer = *PolymorphicDowncast<PadLayer*>(&connection.GetConnectedOutputSlot()->GetOwningLayer()); |
| 125 | Layer2dT& layer2d = *PolymorphicDowncast<Layer2dT*>(&connection.GetOwningLayer()); |
| 126 | |
| 127 | const PadDescriptor& padDescriptor = padLayer.GetParameters(); |
| 128 | auto newLayer2dDescriptor = layer2d.GetParameters(); |
| 129 | |
| 130 | if (!TryFoldPadIntoLayer2d(padDescriptor, newLayer2dDescriptor, padLayer.GetOutputSlot().GetTensorInfo())) |
| 131 | { |
| 132 | return nullptr; |
| 133 | } |
| 134 | |
| 135 | // Save original parent output slot of the pad layer |
| 136 | OutputSlot& parentSlot = *padLayer.GetInputSlot(0).GetConnectedOutputSlot(); |
| 137 | |
| 138 | // Insert new layer2d layer between the pad layer an its parent layer. |
| 139 | const std::string name = std::string("folded-") + padLayer.GetName() + "-into-" + layer2d.GetName(); |
| 140 | auto& newLayer2d = *graph.InsertNewLayer<Layer2dT>(padLayer.GetInputSlot(0), newLayer2dDescriptor, name.c_str()); |
| 141 | |
| 142 | // Reconnect the pad layer with its original parent. |
| 143 | newLayer2d.GetOutputSlot().MoveAllConnections(parentSlot); |
| 144 | |
| 145 | // Moves connections in old layer2d layer output to new layer. |
| 146 | // Old layer2d layer will be removed as it's left unconnected. |
| 147 | // Pad layer will be removed if left unconnected. |
| 148 | layer2d.GetOutputSlot().MoveAllConnections(newLayer2d.GetOutputSlot()); |
| 149 | |
| 150 | return &newLayer2d; |
| 151 | } |
| 152 | |
| 153 | class FoldPadIntoConvolution2dImpl |
| 154 | { |
| 155 | public: |
| 156 | void Run(Graph& graph, InputSlot& connection) const |
| 157 | { |
| 158 | const auto newConv2dLayer = FoldPadIntoLayer2dImpl<Convolution2dLayer>(graph, connection); |
| 159 | |
| 160 | if (newConv2dLayer != nullptr) |
| 161 | { |
| 162 | const auto conv2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&connection.GetOwningLayer()); |
| 163 | // Copy weights and bias to the new convolution layer |
| 164 | ARMNN_ASSERT_MSG(conv2dLayer->m_Weight != nullptr, |
| 165 | "FoldPadIntoConvolution2d: Weights data should not be null."); |
| 166 | newConv2dLayer->m_Weight = std::move(conv2dLayer->m_Weight); |
| 167 | |
| 168 | if (conv2dLayer->GetParameters().m_BiasEnabled) |
| 169 | { |
| 170 | ARMNN_ASSERT_MSG(conv2dLayer->m_Bias != nullptr, |
| 171 | "FoldPadIntoConvolution2d: Bias data should not be null if bias is enabled."); |
| 172 | newConv2dLayer->m_Bias = std::move(conv2dLayer->m_Bias); |
| 173 | } |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | protected: |
| 178 | FoldPadIntoConvolution2dImpl() = default; |
| 179 | ~FoldPadIntoConvolution2dImpl() = default; |
| 180 | }; |
| 181 | |
| 182 | class FoldPadIntoPooling2dImpl |
| 183 | { |
| 184 | public: |
| 185 | void Run(Graph& graph, InputSlot& connection) const |
| 186 | { |
| 187 | FoldPadIntoLayer2dImpl<Pooling2dLayer>(graph, connection); |
| 188 | } |
| 189 | |
| 190 | protected: |
| 191 | FoldPadIntoPooling2dImpl() = default; |
| 192 | ~FoldPadIntoPooling2dImpl() = default; |
| 193 | }; |
| 194 | } // namespace pad_fold |
| 195 | |
| 196 | using FoldPadIntoConvolution2d = |
| 197 | OptimizeForExclusiveConnection<PadLayer, Convolution2dLayer, pad_fold::FoldPadIntoConvolution2dImpl>; |
| 198 | using FoldPadIntoPooling2d = |
| 199 | OptimizeForExclusiveConnection<PadLayer, Pooling2dLayer, pad_fold::FoldPadIntoPooling2dImpl>; |
| 200 | |
| 201 | } // namespace optimizations |
| 202 | } // namespace armnn |
| 203 | |
| 204 | |