blob: d7845535df73f58df7f92f611f4f75c51df69572 [file] [log] [blame]
Laurent Carlier749294b2020-06-01 09:03:17 +01001//
telsoa014fcda012018-03-09 14:13:49 +00002// Copyright © 2017 Arm Ltd. All rights reserved.
David Beckecb56cd2018-09-05 12:52:57 +01003// SPDX-License-Identifier: MIT
telsoa014fcda012018-03-09 14:13:49 +00004//
5
6#include "ConvImpl.hpp"
7
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +01008#include <armnn/utility/Assert.hpp>
telsoa014fcda012018-03-09 14:13:49 +00009
10#include <cmath>
11#include <limits>
12
13namespace armnn
14{
15
16QuantizedMultiplierSmallerThanOne::QuantizedMultiplierSmallerThanOne(float multiplier)
17{
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +010018 ARMNN_ASSERT(multiplier >= 0.0f && multiplier < 1.0f);
telsoa014fcda012018-03-09 14:13:49 +000019 if (multiplier == 0.0f)
20 {
21 m_Multiplier = 0;
22 m_RightShift = 0;
23 }
24 else
25 {
26 const double q = std::frexp(multiplier, &m_RightShift);
27 m_RightShift = -m_RightShift;
28 int64_t qFixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +010029 ARMNN_ASSERT(qFixed <= (1ll << 31));
telsoa014fcda012018-03-09 14:13:49 +000030 if (qFixed == (1ll << 31))
31 {
32 qFixed /= 2;
33 --m_RightShift;
34 }
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +010035 ARMNN_ASSERT(m_RightShift >= 0);
36 ARMNN_ASSERT(qFixed <= std::numeric_limits<int32_t>::max());
telsoa014fcda012018-03-09 14:13:49 +000037 m_Multiplier = static_cast<int32_t>(qFixed);
38 }
39}
40
41int32_t QuantizedMultiplierSmallerThanOne::operator*(int32_t rhs) const
42{
43 int32_t x = SaturatingRoundingDoublingHighMul(rhs, m_Multiplier);
44 return RoundingDivideByPOT(x, m_RightShift);
45}
46
47int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)
48{
telsoa01c577f2c2018-08-31 09:22:23 +010049 // Check for overflow.
telsoa014fcda012018-03-09 14:13:49 +000050 if (a == b && a == std::numeric_limits<int32_t>::min())
51 {
52 return std::numeric_limits<int32_t>::max();
53 }
54 int64_t a_64(a);
55 int64_t b_64(b);
56 int64_t ab_64 = a_64 * b_64;
57 int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
58 int32_t ab_x2_high32 = static_cast<std::int32_t>((ab_64 + nudge) / (1ll << 31));
59 return ab_x2_high32;
60}
61
62int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x, int exponent)
63{
Narumol Prangnawaratac2770a2020-04-01 16:51:23 +010064 ARMNN_ASSERT(exponent >= 0 && exponent <= 31);
telsoa014fcda012018-03-09 14:13:49 +000065 int32_t mask = (1 << exponent) - 1;
66 int32_t remainder = x & mask;
67 int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);
68 return (x >> exponent) + (remainder > threshold ? 1 : 0);
69}
70
Mike Kelly9b398322019-05-22 17:21:49 +010071void Convolve(const TensorShape& rInputShape,
72 Decoder<float>& rInputDecoder,
73 const TensorShape& rOutputShape,
74 Encoder<float>& rOutputEncoder,
75 const TensorShape& rFilterShape,
76 Decoder<float>& rFilterDecoder,
77 bool biasEnabled,
78 Decoder<float>* pBiasDecoder,
79 DataLayout dataLayout,
80 unsigned int paddingTop,
81 unsigned int paddingLeft,
82 unsigned int xStride,
83 unsigned int yStride,
84 unsigned int xDilation,
85 unsigned int yDilation,
86 bool depthwise)
87{
88 if (biasEnabled && !pBiasDecoder)
89 {
90 throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
91 }
92 const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
93
94 const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
95 const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
96 const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
97
Finn Williamsb9dcfe62020-09-17 15:58:31 +010098 const unsigned int depthMultiplier = depthwise ? rFilterShape[0] : 1;
99 const unsigned int inputChannels = depthwise ? rFilterShape[1] : rFilterShape[channelsIndex];
100 const unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : rFilterShape[0];
Mike Kelly9b398322019-05-22 17:21:49 +0100101
Finn Williamsb9dcfe62020-09-17 15:58:31 +0100102 const unsigned int batchSize = rOutputShape[0];
103 const unsigned int outputHeight = rOutputShape[heightIndex];
104 const unsigned int outputWidth = rOutputShape[widthIndex];
105 const unsigned int inputHeight = rInputShape[heightIndex];
106 const unsigned int inputWidth = rInputShape[widthIndex];
Mike Kelly9b398322019-05-22 17:21:49 +0100107
Finn Williamsb9dcfe62020-09-17 15:58:31 +0100108 const unsigned int filterHeight = depthwise ? rFilterShape[2] : rFilterShape[heightIndex];
109 const unsigned int filterWidth = depthwise ? rFilterShape[3] : rFilterShape[widthIndex];
Mike Kelly9b398322019-05-22 17:21:49 +0100110
Finn Williamsea8ce702020-09-29 19:54:00 +0100111 const std::vector<float> inputVec = rInputDecoder.DecodeTensor(rInputShape);
112 const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape, depthMultiplier, depthwise);
Finn Williamsb9dcfe62020-09-17 15:58:31 +0100113
Finn Williamsea8ce702020-09-29 19:54:00 +0100114 const TensorShape biasShape{outputChannels};
115 const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
Finn Williamsb9dcfe62020-09-17 15:58:31 +0100116
117 unsigned int depthwiseMultiplierIdx = 0;
Mike Kelly9b398322019-05-22 17:21:49 +0100118 for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
119 {
120 for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
121 {
122 for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
123 {
124 for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
125 {
126 // This loop goes over each output element.
Finn Williamsb9dcfe62020-09-17 15:58:31 +0100127 float sum = 0.0f;
Mike Kelly9b398322019-05-22 17:21:49 +0100128
129 // For depthwise, each output channel corresponds to exactly one input channel.
130 // For normal, must loop over each input channel.
131 for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
132 {
Mike Kelly9b398322019-05-22 17:21:49 +0100133 if (depthwise)
134 {
Finn Williamsb9dcfe62020-09-17 15:58:31 +0100135 depthwiseMultiplierIdx = 0;
Mike Kelly9b398322019-05-22 17:21:49 +0100136 cInput = cOutput / depthMultiplier;
137 depthwiseMultiplierIdx = cOutput % depthMultiplier;
138 }
139
140 for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
141 {
142 for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
143 {
144 // This loop goes over each input element for each output element.
145 unsigned int filterIndex = 0;
146
147 // Since dimensionality of kernel depends on depthwiseness, so does index.
148 if (depthwise)
149 {
150 filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels +
151 cInput * filterWidth * filterHeight +
152 yFilter * filterWidth +
153 xFilter;
154 }
155 else
156 {
Matteo Martincighf2aaab32019-06-06 15:46:22 +0100157 // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
158 // performance regression.
Finn Williamsb9dcfe62020-09-17 15:58:31 +0100159 if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
Matteo Martincighf2aaab32019-06-06 15:46:22 +0100160 {
161 filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
162 yFilter * filterWidth * inputChannels +
163 xFilter * inputChannels +
164 cInput;
165 }
166 else
167 {
168 filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
Finn Williamsb9dcfe62020-09-17 15:58:31 +0100169 cInput * filterWidth * filterHeight +
Matteo Martincighf2aaab32019-06-06 15:46:22 +0100170 yFilter * filterWidth +
171 xFilter;
172 }
Mike Kelly9b398322019-05-22 17:21:49 +0100173 }
Matteo Martincigh18f2d1c2019-06-05 13:54:25 +0100174
Mike Kelly9b398322019-05-22 17:21:49 +0100175 unsigned int yInput = yOutput * yStride + yFilter * yDilation;
176 unsigned int xInput = xOutput * xStride + xFilter * xDilation;
177
178 float inputValue;
179
180 // Check if we're in the padding.
181 if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
Finn Williamsb9dcfe62020-09-17 15:58:31 +0100182 xInput < paddingLeft || xInput >= inputWidth + paddingLeft)
Mike Kelly9b398322019-05-22 17:21:49 +0100183 {
184 inputValue = 0.0f;
185 }
186 else
187 {
Matteo Martincighf2aaab32019-06-06 15:46:22 +0100188 unsigned int inputIndex = 0;
189
190 // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
191 // performance regression.
Finn Williamsb9dcfe62020-09-17 15:58:31 +0100192 if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
Matteo Martincighf2aaab32019-06-06 15:46:22 +0100193 {
Finn Williamsb9dcfe62020-09-17 15:58:31 +0100194 inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
Matteo Martincighf2aaab32019-06-06 15:46:22 +0100195 (yInput - paddingTop) * inputWidth * inputChannels +
196 (xInput - paddingLeft) * inputChannels +
197 cInput;
198 }
199 else
200 {
201 inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
202 inputWidth * inputHeight * cInput +
203 inputWidth * (yInput - paddingTop) +
204 xInput - paddingLeft;
205 }
Finn Williamsb9dcfe62020-09-17 15:58:31 +0100206 inputValue = inputVec[inputIndex];
Mike Kelly9b398322019-05-22 17:21:49 +0100207 }
Matteo Martincigh18f2d1c2019-06-05 13:54:25 +0100208
Finn Williamsb9dcfe62020-09-17 15:58:31 +0100209 sum += filterVec[filterIndex] * inputValue;
Mike Kelly9b398322019-05-22 17:21:49 +0100210 }
211 }
212 }
213
214 if (biasEnabled)
215 {
Finn Williamsb9dcfe62020-09-17 15:58:31 +0100216 sum += biasVec[cOutput];
Mike Kelly9b398322019-05-22 17:21:49 +0100217 }
Mike Kelly9b398322019-05-22 17:21:49 +0100218
Finn Williamsb9dcfe62020-09-17 15:58:31 +0100219 unsigned int outIdx;
220 if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
221 {
222 outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
223 yOutput * outputWidth * outputChannels +
224 xOutput * outputChannels +
225 cOutput;
226 }
227 else
228 {
229 outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
230 cOutput * outputHeight * outputWidth +
231 yOutput * outputWidth +
232 xOutput;
233 }
Matteo Martincigh18f2d1c2019-06-05 13:54:25 +0100234
235 rOutputEncoder[outIdx];
Mike Kelly9b398322019-05-22 17:21:49 +0100236 rOutputEncoder.Set(sum);
Mike Kelly9b398322019-05-22 17:21:49 +0100237 }
238 }
239 }
240 }
241}
242
Aron Virginas-Tar5edc8812019-11-05 18:00:21 +0000243} // namespace armnn