telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 1 | // |
| 2 | // Copyright © 2017 Arm Ltd. All rights reserved. |
David Beck | ecb56cd | 2018-09-05 12:52:57 +0100 | [diff] [blame] | 3 | // SPDX-License-Identifier: MIT |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 4 | // |
| 5 | |
| 6 | #pragma once |
| 7 | |
| 8 | #include "RefWorkloadUtils.hpp" |
narpra01 | 5f70318 | 2018-10-26 16:24:58 +0100 | [diff] [blame] | 9 | #include "TensorBufferArrayView.hpp" |
Mike Kelly | 9b39832 | 2019-05-22 17:21:49 +0100 | [diff] [blame] | 10 | #include "BaseIterator.hpp" |
| 11 | #include "Decoders.hpp" |
| 12 | #include "Encoders.hpp" |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 13 | |
| 14 | #include <armnn/Tensor.hpp> |
| 15 | |
Matteo Martincigh | 2135015 | 2018-11-28 16:22:22 +0000 | [diff] [blame] | 16 | #include <DataLayoutIndexed.hpp> |
Matthew Bentham | 8800c00 | 2018-11-19 13:19:28 +0000 | [diff] [blame] | 17 | |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 18 | #include <boost/assert.hpp> |
| 19 | #include <boost/numeric/conversion/cast.hpp> |
| 20 | |
Matteo Martincigh | 4631582 | 2018-11-28 16:22:36 +0000 | [diff] [blame] | 21 | #include <DataLayoutIndexed.hpp> |
| 22 | |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 23 | #include <cmath> |
| 24 | #include <limits> |
| 25 | |
| 26 | namespace armnn |
| 27 | { |
| 28 | |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 29 | /// Performs multiplication of an integer with a multiplier which is less than one, |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 30 | /// using quantized integer arithmetic which is consistent with AndroidNN's CPU executor. |
| 31 | struct QuantizedMultiplierSmallerThanOne |
| 32 | { |
| 33 | public: |
| 34 | /// Constructs a QuantizedMultiplierSmallerThanOne which will multiply by the given multiplier. |
| 35 | /// This stores the appropriate integer quantities (derived from the given multiplier) for later use. |
| 36 | /// The implementation of this function is adapted from Android NN's QuantizeMultiplierSmallerThanOne(). |
| 37 | QuantizedMultiplierSmallerThanOne(float multiplier); |
| 38 | |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 39 | /// The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmallerThanOne(). |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 40 | int32_t operator*(int32_t rhs) const; |
| 41 | |
| 42 | private: |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 43 | /// The implementation of this function is adapted from gemmlowp's SaturatingRoundingDoublingHighMul(). |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 44 | static int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b); |
| 45 | |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 46 | /// The implementation of this function is adapted from gemmlowp's RoundingDivideByPOT(). |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 47 | static int32_t RoundingDivideByPOT(int32_t x, int exponent); |
| 48 | |
| 49 | int32_t m_Multiplier; |
| 50 | int32_t m_RightShift; |
| 51 | }; |
| 52 | |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 53 | /// An implementation shared by normal and depthwise convolution. |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 54 | template<typename ConvData, typename InputType, typename BiasType, typename AccumulatorType> |
| 55 | static void ConvImpl(ConvData data, |
| 56 | const InputType* inputData, |
| 57 | float inputScale, |
| 58 | int32_t inputOffset, |
| 59 | const InputType* filterData, |
| 60 | float filterScale, |
| 61 | int32_t filterOffset, |
| 62 | const BiasType* biasData, |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 63 | float outputScale, |
| 64 | int32_t outputOffset, |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 65 | const TensorInfo& filterInfo, |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 66 | bool depthwise = false) |
| 67 | { |
surmeh01 | 3537c2c | 2018-05-18 16:31:43 +0100 | [diff] [blame] | 68 | if (data.m_Parameters.m_BiasEnabled && !biasData) |
| 69 | { |
| 70 | throw InvalidArgumentException("Bias is enabled but the bias data is invalid"); |
| 71 | } |
| 72 | |
Matteo Martincigh | 747ef82 | 2018-12-18 09:26:39 +0000 | [diff] [blame] | 73 | const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]); |
| 74 | const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]); |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 75 | |
Matteo Martincigh | 747ef82 | 2018-12-18 09:26:39 +0000 | [diff] [blame] | 76 | TensorBufferArrayView<InputType> output(outputInfo.GetShape(), |
narpra01 | 5f70318 | 2018-10-26 16:24:58 +0100 | [diff] [blame] | 77 | GetOutputTensorData<InputType>(0, data), |
| 78 | data.m_Parameters.m_DataLayout); |
| 79 | |
Matteo Martincigh | 2135015 | 2018-11-28 16:22:22 +0000 | [diff] [blame] | 80 | const armnnUtils::DataLayoutIndexed dataLayoutIndexed(data.m_Parameters.m_DataLayout); |
Matteo Martincigh | 4631582 | 2018-11-28 16:22:36 +0000 | [diff] [blame] | 81 | |
Nikhil Raj | e4dfd6e | 2018-10-18 10:11:04 +0100 | [diff] [blame] | 82 | const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex(); |
| 83 | const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex(); |
| 84 | const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex(); |
| 85 | |
Matteo Martincigh | 747ef82 | 2018-12-18 09:26:39 +0000 | [diff] [blame] | 86 | unsigned int depthMultiplier = depthwise ? filterInfo.GetShape()[0] : 1; |
| 87 | unsigned int inputChannels = depthwise ? filterInfo.GetShape()[1] : filterInfo.GetShape()[channelsIndex]; |
| 88 | unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : filterInfo.GetShape()[0]; |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 89 | |
Matteo Martincigh | 747ef82 | 2018-12-18 09:26:39 +0000 | [diff] [blame] | 90 | unsigned int batchSize = outputInfo.GetShape()[0]; |
| 91 | unsigned int outputHeight = outputInfo.GetShape()[heightIndex]; |
| 92 | unsigned int outputWidth = outputInfo.GetShape()[widthIndex]; |
| 93 | unsigned int inputHeight = inputInfo.GetShape()[heightIndex]; |
| 94 | unsigned int inputWidth = inputInfo.GetShape()[widthIndex]; |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 95 | |
Matteo Martincigh | 747ef82 | 2018-12-18 09:26:39 +0000 | [diff] [blame] | 96 | unsigned int filterHeight = depthwise ? filterInfo.GetShape()[2] : filterInfo.GetShape()[heightIndex]; |
| 97 | unsigned int filterWidth = depthwise ? filterInfo.GetShape()[3] : filterInfo.GetShape()[widthIndex]; |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 98 | |
Matteo Martincigh | 4631582 | 2018-11-28 16:22:36 +0000 | [diff] [blame] | 99 | unsigned int paddingTop = data.m_Parameters.m_PadTop; |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 100 | unsigned int paddingLeft = data.m_Parameters.m_PadLeft; |
Matteo Martincigh | 4631582 | 2018-11-28 16:22:36 +0000 | [diff] [blame] | 101 | unsigned int xStride = data.m_Parameters.m_StrideX; |
| 102 | unsigned int yStride = data.m_Parameters.m_StrideY; |
Matthew Bentham | 49cb7d0 | 2019-05-10 16:55:23 +0100 | [diff] [blame] | 103 | unsigned int xDilation = data.m_Parameters.m_DilationX; |
| 104 | unsigned int yDilation = data.m_Parameters.m_DilationY; |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 105 | |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 106 | // The world's least efficient convolution. |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 107 | for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++) |
| 108 | { |
Matteo Martincigh | 747ef82 | 2018-12-18 09:26:39 +0000 | [diff] [blame] | 109 | for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++) |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 110 | { |
Matteo Martincigh | 747ef82 | 2018-12-18 09:26:39 +0000 | [diff] [blame] | 111 | for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++) |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 112 | { |
Matteo Martincigh | 747ef82 | 2018-12-18 09:26:39 +0000 | [diff] [blame] | 113 | for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++) |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 114 | { |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 115 | // This loop goes over each output element. |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 116 | AccumulatorType sum = AccumulatorType(); |
| 117 | |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 118 | // For depthwise, each output channel corresponds to exactly one input channel. |
| 119 | // For normal, must loop over each input channel. |
Matteo Martincigh | 747ef82 | 2018-12-18 09:26:39 +0000 | [diff] [blame] | 120 | for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++) |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 121 | { |
| 122 | unsigned int depthwiseMultiplierIdx = 0; |
| 123 | if (depthwise) |
| 124 | { |
Matteo Martincigh | 747ef82 | 2018-12-18 09:26:39 +0000 | [diff] [blame] | 125 | cInput = cOutput / depthMultiplier; |
| 126 | depthwiseMultiplierIdx = cOutput % depthMultiplier; |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 127 | } |
| 128 | |
Matteo Martincigh | 747ef82 | 2018-12-18 09:26:39 +0000 | [diff] [blame] | 129 | for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++) |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 130 | { |
Matteo Martincigh | 747ef82 | 2018-12-18 09:26:39 +0000 | [diff] [blame] | 131 | for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++) |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 132 | { |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 133 | // This loop goes over each input element for each output element. |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 134 | |
Matteo Martincigh | 747ef82 | 2018-12-18 09:26:39 +0000 | [diff] [blame] | 135 | unsigned int filterIndex = 0; |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 136 | |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 137 | // Since dimensionality of kernel depends on depthwiseness, so does index. |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 138 | if (depthwise) |
| 139 | { |
Matteo Martincigh | 747ef82 | 2018-12-18 09:26:39 +0000 | [diff] [blame] | 140 | filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels + |
| 141 | cInput * filterWidth * filterHeight + |
| 142 | yFilter * filterWidth + |
| 143 | xFilter; |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 144 | } |
| 145 | else |
| 146 | { |
narpra01 | 5f70318 | 2018-10-26 16:24:58 +0100 | [diff] [blame] | 147 | if (data.m_Parameters.m_DataLayout == DataLayout::NHWC) |
| 148 | { |
Matteo Martincigh | 747ef82 | 2018-12-18 09:26:39 +0000 | [diff] [blame] | 149 | filterIndex = cOutput * filterHeight * filterWidth * inputChannels + |
| 150 | yFilter * filterWidth * inputChannels + |
| 151 | xFilter * inputChannels + |
narpra01 | 5f70318 | 2018-10-26 16:24:58 +0100 | [diff] [blame] | 152 | cInput; |
| 153 | } |
| 154 | else |
| 155 | { |
Matteo Martincigh | 747ef82 | 2018-12-18 09:26:39 +0000 | [diff] [blame] | 156 | filterIndex = cOutput * filterWidth * filterHeight * inputChannels + |
| 157 | cInput * filterWidth * filterHeight + |
| 158 | yFilter * filterWidth + |
narpra01 | 5f70318 | 2018-10-26 16:24:58 +0100 | [diff] [blame] | 159 | xFilter; |
| 160 | } |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 161 | } |
narpra01 | 5f70318 | 2018-10-26 16:24:58 +0100 | [diff] [blame] | 162 | |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 163 | AccumulatorType filterValue = filterData[filterIndex] - |
| 164 | boost::numeric_cast<AccumulatorType>(filterOffset); |
| 165 | |
Matthew Bentham | 49cb7d0 | 2019-05-10 16:55:23 +0100 | [diff] [blame] | 166 | unsigned int yInput = yOutput * yStride + yFilter * yDilation; |
| 167 | unsigned int xInput = xOutput * xStride + xFilter * xDilation; |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 168 | |
| 169 | AccumulatorType inputValue; |
| 170 | |
telsoa01 | c577f2c | 2018-08-31 09:22:23 +0100 | [diff] [blame] | 171 | // Check if we're in the padding. |
Matteo Martincigh | 747ef82 | 2018-12-18 09:26:39 +0000 | [diff] [blame] | 172 | if (yInput < paddingTop || yInput >= inputHeight + paddingTop || |
| 173 | xInput < paddingLeft || xInput >= inputWidth + paddingLeft ) |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 174 | { |
| 175 | inputValue = AccumulatorType(); |
| 176 | } |
| 177 | else |
| 178 | { |
narpra01 | 5f70318 | 2018-10-26 16:24:58 +0100 | [diff] [blame] | 179 | unsigned int inputIndex; |
| 180 | |
| 181 | if (data.m_Parameters.m_DataLayout == DataLayout::NHWC) |
| 182 | { |
Matteo Martincigh | 747ef82 | 2018-12-18 09:26:39 +0000 | [diff] [blame] | 183 | inputIndex = batchIdx * inputHeight * inputWidth * inputChannels + |
| 184 | (yInput - paddingTop) * inputWidth * inputChannels + |
| 185 | (xInput - paddingLeft) * inputChannels + |
narpra01 | 5f70318 | 2018-10-26 16:24:58 +0100 | [diff] [blame] | 186 | cInput; |
| 187 | |
| 188 | } |
| 189 | else |
| 190 | { |
Matteo Martincigh | 747ef82 | 2018-12-18 09:26:39 +0000 | [diff] [blame] | 191 | inputIndex = batchIdx * inputWidth * inputHeight * inputChannels + |
| 192 | inputWidth * inputHeight * cInput + |
| 193 | inputWidth * (yInput - paddingTop) + |
narpra01 | 5f70318 | 2018-10-26 16:24:58 +0100 | [diff] [blame] | 194 | xInput - paddingLeft; |
| 195 | } |
| 196 | |
| 197 | inputValue = inputData[inputIndex] - |
| 198 | boost::numeric_cast<AccumulatorType>(inputOffset); |
| 199 | |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 200 | } |
| 201 | sum += filterValue * inputValue; |
| 202 | } |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | if (data.m_Parameters.m_BiasEnabled) |
| 207 | { |
| 208 | sum += biasData[cOutput]; |
| 209 | } |
| 210 | |
| 211 | if (outputScale != 0.0f) |
| 212 | { |
| 213 | float multiplier = (inputScale * filterScale) / outputScale; |
| 214 | // Apply the multiplier to sum, but do so using some quantized arithmetic which is consistent |
| 215 | // with the AndroidNN CPU implementation. This should be (roughly) equivalent to: |
| 216 | // sum = std::round(multiplier * sum + outputOffset); |
| 217 | sum = boost::numeric_cast<AccumulatorType>( |
| 218 | QuantizedMultiplierSmallerThanOne(multiplier) * boost::numeric_cast<int32_t>(sum)) |
| 219 | + boost::numeric_cast<AccumulatorType>(outputOffset); |
| 220 | sum = std::min<AccumulatorType>(std::max<AccumulatorType>(sum, 0), 255); |
| 221 | } |
| 222 | |
narpra01 | 5f70318 | 2018-10-26 16:24:58 +0100 | [diff] [blame] | 223 | output.Get(batchIdx, cOutput, yOutput, xOutput) = boost::numeric_cast<InputType>(sum); |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 224 | } |
| 225 | } |
| 226 | } |
| 227 | } |
| 228 | } |
| 229 | |
Mike Kelly | 9b39832 | 2019-05-22 17:21:49 +0100 | [diff] [blame] | 230 | void Convolve(const TensorShape& rInputShape, |
| 231 | Decoder<float>& rInputDecoder, |
| 232 | const TensorShape& rOutputShape, |
| 233 | Encoder<float>& rOutputEncoder, |
| 234 | const TensorShape& rFilterShape, |
| 235 | Decoder<float>& rFilterDecoder, |
| 236 | bool biasEnabled, |
| 237 | Decoder<float>* pBiasDecoder, |
| 238 | DataLayout dataLayout, |
| 239 | unsigned int paddingTop, |
| 240 | unsigned int paddingLeft, |
| 241 | unsigned int xStride, |
| 242 | unsigned int yStride, |
| 243 | unsigned int xDilation, |
| 244 | unsigned int yDilation, |
| 245 | bool depthwise = false); |
telsoa01 | 4fcda01 | 2018-03-09 14:13:49 +0000 | [diff] [blame] | 246 | } //namespace armnn |