blob: 3551ba8f903acc1855a4cc1eaf8a8743843fa400 [file] [log] [blame]
telsoa014fcda012018-03-09 14:13:49 +00001//
2// Copyright © 2017 Arm Ltd. All rights reserved.
David Beckecb56cd2018-09-05 12:52:57 +01003// SPDX-License-Identifier: MIT
telsoa014fcda012018-03-09 14:13:49 +00004//
5
6#pragma once
7
8#include "RefWorkloadUtils.hpp"
narpra015f703182018-10-26 16:24:58 +01009#include "TensorBufferArrayView.hpp"
Mike Kelly9b398322019-05-22 17:21:49 +010010#include "BaseIterator.hpp"
11#include "Decoders.hpp"
12#include "Encoders.hpp"
telsoa014fcda012018-03-09 14:13:49 +000013
14#include <armnn/Tensor.hpp>
15
Matteo Martincigh21350152018-11-28 16:22:22 +000016#include <DataLayoutIndexed.hpp>
Matthew Bentham8800c002018-11-19 13:19:28 +000017
telsoa014fcda012018-03-09 14:13:49 +000018#include <boost/assert.hpp>
19#include <boost/numeric/conversion/cast.hpp>
20
Matteo Martincigh46315822018-11-28 16:22:36 +000021#include <DataLayoutIndexed.hpp>
22
telsoa014fcda012018-03-09 14:13:49 +000023#include <cmath>
24#include <limits>
25
26namespace armnn
27{
28
telsoa01c577f2c2018-08-31 09:22:23 +010029/// Performs multiplication of an integer with a multiplier which is less than one,
telsoa014fcda012018-03-09 14:13:49 +000030/// using quantized integer arithmetic which is consistent with AndroidNN's CPU executor.
31struct QuantizedMultiplierSmallerThanOne
32{
33public:
34 /// Constructs a QuantizedMultiplierSmallerThanOne which will multiply by the given multiplier.
35 /// This stores the appropriate integer quantities (derived from the given multiplier) for later use.
36 /// The implementation of this function is adapted from Android NN's QuantizeMultiplierSmallerThanOne().
37 QuantizedMultiplierSmallerThanOne(float multiplier);
38
telsoa01c577f2c2018-08-31 09:22:23 +010039 /// The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmallerThanOne().
telsoa014fcda012018-03-09 14:13:49 +000040 int32_t operator*(int32_t rhs) const;
41
42private:
telsoa01c577f2c2018-08-31 09:22:23 +010043 /// The implementation of this function is adapted from gemmlowp's SaturatingRoundingDoublingHighMul().
telsoa014fcda012018-03-09 14:13:49 +000044 static int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b);
45
telsoa01c577f2c2018-08-31 09:22:23 +010046 /// The implementation of this function is adapted from gemmlowp's RoundingDivideByPOT().
telsoa014fcda012018-03-09 14:13:49 +000047 static int32_t RoundingDivideByPOT(int32_t x, int exponent);
48
49 int32_t m_Multiplier;
50 int32_t m_RightShift;
51};
52
telsoa01c577f2c2018-08-31 09:22:23 +010053/// An implementation shared by normal and depthwise convolution.
telsoa014fcda012018-03-09 14:13:49 +000054template<typename ConvData, typename InputType, typename BiasType, typename AccumulatorType>
55static void ConvImpl(ConvData data,
56 const InputType* inputData,
57 float inputScale,
58 int32_t inputOffset,
59 const InputType* filterData,
60 float filterScale,
61 int32_t filterOffset,
62 const BiasType* biasData,
telsoa014fcda012018-03-09 14:13:49 +000063 float outputScale,
64 int32_t outputOffset,
telsoa01c577f2c2018-08-31 09:22:23 +010065 const TensorInfo& filterInfo,
telsoa014fcda012018-03-09 14:13:49 +000066 bool depthwise = false)
67{
surmeh013537c2c2018-05-18 16:31:43 +010068 if (data.m_Parameters.m_BiasEnabled && !biasData)
69 {
70 throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
71 }
72
Matteo Martincigh747ef822018-12-18 09:26:39 +000073 const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]);
74 const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]);
telsoa014fcda012018-03-09 14:13:49 +000075
Matteo Martincigh747ef822018-12-18 09:26:39 +000076 TensorBufferArrayView<InputType> output(outputInfo.GetShape(),
narpra015f703182018-10-26 16:24:58 +010077 GetOutputTensorData<InputType>(0, data),
78 data.m_Parameters.m_DataLayout);
79
Matteo Martincigh21350152018-11-28 16:22:22 +000080 const armnnUtils::DataLayoutIndexed dataLayoutIndexed(data.m_Parameters.m_DataLayout);
Matteo Martincigh46315822018-11-28 16:22:36 +000081
Nikhil Raje4dfd6e2018-10-18 10:11:04 +010082 const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
83 const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
84 const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
85
Matteo Martincigh747ef822018-12-18 09:26:39 +000086 unsigned int depthMultiplier = depthwise ? filterInfo.GetShape()[0] : 1;
87 unsigned int inputChannels = depthwise ? filterInfo.GetShape()[1] : filterInfo.GetShape()[channelsIndex];
88 unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : filterInfo.GetShape()[0];
telsoa014fcda012018-03-09 14:13:49 +000089
Matteo Martincigh747ef822018-12-18 09:26:39 +000090 unsigned int batchSize = outputInfo.GetShape()[0];
91 unsigned int outputHeight = outputInfo.GetShape()[heightIndex];
92 unsigned int outputWidth = outputInfo.GetShape()[widthIndex];
93 unsigned int inputHeight = inputInfo.GetShape()[heightIndex];
94 unsigned int inputWidth = inputInfo.GetShape()[widthIndex];
telsoa014fcda012018-03-09 14:13:49 +000095
Matteo Martincigh747ef822018-12-18 09:26:39 +000096 unsigned int filterHeight = depthwise ? filterInfo.GetShape()[2] : filterInfo.GetShape()[heightIndex];
97 unsigned int filterWidth = depthwise ? filterInfo.GetShape()[3] : filterInfo.GetShape()[widthIndex];
telsoa014fcda012018-03-09 14:13:49 +000098
Matteo Martincigh46315822018-11-28 16:22:36 +000099 unsigned int paddingTop = data.m_Parameters.m_PadTop;
telsoa014fcda012018-03-09 14:13:49 +0000100 unsigned int paddingLeft = data.m_Parameters.m_PadLeft;
Matteo Martincigh46315822018-11-28 16:22:36 +0000101 unsigned int xStride = data.m_Parameters.m_StrideX;
102 unsigned int yStride = data.m_Parameters.m_StrideY;
Matthew Bentham49cb7d02019-05-10 16:55:23 +0100103 unsigned int xDilation = data.m_Parameters.m_DilationX;
104 unsigned int yDilation = data.m_Parameters.m_DilationY;
telsoa014fcda012018-03-09 14:13:49 +0000105
telsoa01c577f2c2018-08-31 09:22:23 +0100106 // The world's least efficient convolution.
telsoa014fcda012018-03-09 14:13:49 +0000107 for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
108 {
Matteo Martincigh747ef822018-12-18 09:26:39 +0000109 for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
telsoa014fcda012018-03-09 14:13:49 +0000110 {
Matteo Martincigh747ef822018-12-18 09:26:39 +0000111 for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
telsoa014fcda012018-03-09 14:13:49 +0000112 {
Matteo Martincigh747ef822018-12-18 09:26:39 +0000113 for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
telsoa014fcda012018-03-09 14:13:49 +0000114 {
telsoa01c577f2c2018-08-31 09:22:23 +0100115 // This loop goes over each output element.
telsoa014fcda012018-03-09 14:13:49 +0000116 AccumulatorType sum = AccumulatorType();
117
telsoa01c577f2c2018-08-31 09:22:23 +0100118 // For depthwise, each output channel corresponds to exactly one input channel.
119 // For normal, must loop over each input channel.
Matteo Martincigh747ef822018-12-18 09:26:39 +0000120 for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
telsoa014fcda012018-03-09 14:13:49 +0000121 {
122 unsigned int depthwiseMultiplierIdx = 0;
123 if (depthwise)
124 {
Matteo Martincigh747ef822018-12-18 09:26:39 +0000125 cInput = cOutput / depthMultiplier;
126 depthwiseMultiplierIdx = cOutput % depthMultiplier;
telsoa014fcda012018-03-09 14:13:49 +0000127 }
128
Matteo Martincigh747ef822018-12-18 09:26:39 +0000129 for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
telsoa014fcda012018-03-09 14:13:49 +0000130 {
Matteo Martincigh747ef822018-12-18 09:26:39 +0000131 for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
telsoa014fcda012018-03-09 14:13:49 +0000132 {
telsoa01c577f2c2018-08-31 09:22:23 +0100133 // This loop goes over each input element for each output element.
telsoa014fcda012018-03-09 14:13:49 +0000134
Matteo Martincigh747ef822018-12-18 09:26:39 +0000135 unsigned int filterIndex = 0;
telsoa014fcda012018-03-09 14:13:49 +0000136
telsoa01c577f2c2018-08-31 09:22:23 +0100137 // Since dimensionality of kernel depends on depthwiseness, so does index.
telsoa014fcda012018-03-09 14:13:49 +0000138 if (depthwise)
139 {
Matteo Martincigh747ef822018-12-18 09:26:39 +0000140 filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels +
141 cInput * filterWidth * filterHeight +
142 yFilter * filterWidth +
143 xFilter;
telsoa014fcda012018-03-09 14:13:49 +0000144 }
145 else
146 {
narpra015f703182018-10-26 16:24:58 +0100147 if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)
148 {
Matteo Martincigh747ef822018-12-18 09:26:39 +0000149 filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
150 yFilter * filterWidth * inputChannels +
151 xFilter * inputChannels +
narpra015f703182018-10-26 16:24:58 +0100152 cInput;
153 }
154 else
155 {
Matteo Martincigh747ef822018-12-18 09:26:39 +0000156 filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
157 cInput * filterWidth * filterHeight +
158 yFilter * filterWidth +
narpra015f703182018-10-26 16:24:58 +0100159 xFilter;
160 }
telsoa014fcda012018-03-09 14:13:49 +0000161 }
narpra015f703182018-10-26 16:24:58 +0100162
telsoa014fcda012018-03-09 14:13:49 +0000163 AccumulatorType filterValue = filterData[filterIndex] -
164 boost::numeric_cast<AccumulatorType>(filterOffset);
165
Matthew Bentham49cb7d02019-05-10 16:55:23 +0100166 unsigned int yInput = yOutput * yStride + yFilter * yDilation;
167 unsigned int xInput = xOutput * xStride + xFilter * xDilation;
telsoa014fcda012018-03-09 14:13:49 +0000168
169 AccumulatorType inputValue;
170
telsoa01c577f2c2018-08-31 09:22:23 +0100171 // Check if we're in the padding.
Matteo Martincigh747ef822018-12-18 09:26:39 +0000172 if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
173 xInput < paddingLeft || xInput >= inputWidth + paddingLeft )
telsoa014fcda012018-03-09 14:13:49 +0000174 {
175 inputValue = AccumulatorType();
176 }
177 else
178 {
narpra015f703182018-10-26 16:24:58 +0100179 unsigned int inputIndex;
180
181 if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)
182 {
Matteo Martincigh747ef822018-12-18 09:26:39 +0000183 inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
184 (yInput - paddingTop) * inputWidth * inputChannels +
185 (xInput - paddingLeft) * inputChannels +
narpra015f703182018-10-26 16:24:58 +0100186 cInput;
187
188 }
189 else
190 {
Matteo Martincigh747ef822018-12-18 09:26:39 +0000191 inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
192 inputWidth * inputHeight * cInput +
193 inputWidth * (yInput - paddingTop) +
narpra015f703182018-10-26 16:24:58 +0100194 xInput - paddingLeft;
195 }
196
197 inputValue = inputData[inputIndex] -
198 boost::numeric_cast<AccumulatorType>(inputOffset);
199
telsoa014fcda012018-03-09 14:13:49 +0000200 }
201 sum += filterValue * inputValue;
202 }
203 }
204 }
205
206 if (data.m_Parameters.m_BiasEnabled)
207 {
208 sum += biasData[cOutput];
209 }
210
211 if (outputScale != 0.0f)
212 {
213 float multiplier = (inputScale * filterScale) / outputScale;
214 // Apply the multiplier to sum, but do so using some quantized arithmetic which is consistent
215 // with the AndroidNN CPU implementation. This should be (roughly) equivalent to:
216 // sum = std::round(multiplier * sum + outputOffset);
217 sum = boost::numeric_cast<AccumulatorType>(
218 QuantizedMultiplierSmallerThanOne(multiplier) * boost::numeric_cast<int32_t>(sum))
219 + boost::numeric_cast<AccumulatorType>(outputOffset);
220 sum = std::min<AccumulatorType>(std::max<AccumulatorType>(sum, 0), 255);
221 }
222
narpra015f703182018-10-26 16:24:58 +0100223 output.Get(batchIdx, cOutput, yOutput, xOutput) = boost::numeric_cast<InputType>(sum);
telsoa014fcda012018-03-09 14:13:49 +0000224 }
225 }
226 }
227 }
228}
229
Mike Kelly9b398322019-05-22 17:21:49 +0100230void Convolve(const TensorShape& rInputShape,
231 Decoder<float>& rInputDecoder,
232 const TensorShape& rOutputShape,
233 Encoder<float>& rOutputEncoder,
234 const TensorShape& rFilterShape,
235 Decoder<float>& rFilterDecoder,
236 bool biasEnabled,
237 Decoder<float>* pBiasDecoder,
238 DataLayout dataLayout,
239 unsigned int paddingTop,
240 unsigned int paddingLeft,
241 unsigned int xStride,
242 unsigned int yStride,
243 unsigned int xDilation,
244 unsigned int yDilation,
245 bool depthwise = false);
telsoa014fcda012018-03-09 14:13:49 +0000246} //namespace armnn