blob: 6a5ac535e44a73e47c6527ef8336875d7dff44af [file] [log] [blame]
telsoa014fcda012018-03-09 14:13:49 +00001//
2// Copyright © 2017 Arm Ltd. All rights reserved.
David Beckecb56cd2018-09-05 12:52:57 +01003// SPDX-License-Identifier: MIT
telsoa014fcda012018-03-09 14:13:49 +00004//
5
6#include "ConvImpl.hpp"
7
8#include <boost/assert.hpp>
9
10#include <cmath>
11#include <limits>
12
13namespace armnn
14{
15
16QuantizedMultiplierSmallerThanOne::QuantizedMultiplierSmallerThanOne(float multiplier)
17{
18 BOOST_ASSERT(multiplier >= 0.0f && multiplier < 1.0f);
19 if (multiplier == 0.0f)
20 {
21 m_Multiplier = 0;
22 m_RightShift = 0;
23 }
24 else
25 {
26 const double q = std::frexp(multiplier, &m_RightShift);
27 m_RightShift = -m_RightShift;
28 int64_t qFixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
29 BOOST_ASSERT(qFixed <= (1ll << 31));
30 if (qFixed == (1ll << 31))
31 {
32 qFixed /= 2;
33 --m_RightShift;
34 }
35 BOOST_ASSERT(m_RightShift >= 0);
36 BOOST_ASSERT(qFixed <= std::numeric_limits<int32_t>::max());
37 m_Multiplier = static_cast<int32_t>(qFixed);
38 }
39}
40
41int32_t QuantizedMultiplierSmallerThanOne::operator*(int32_t rhs) const
42{
43 int32_t x = SaturatingRoundingDoublingHighMul(rhs, m_Multiplier);
44 return RoundingDivideByPOT(x, m_RightShift);
45}
46
47int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)
48{
telsoa01c577f2c2018-08-31 09:22:23 +010049 // Check for overflow.
telsoa014fcda012018-03-09 14:13:49 +000050 if (a == b && a == std::numeric_limits<int32_t>::min())
51 {
52 return std::numeric_limits<int32_t>::max();
53 }
54 int64_t a_64(a);
55 int64_t b_64(b);
56 int64_t ab_64 = a_64 * b_64;
57 int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
58 int32_t ab_x2_high32 = static_cast<std::int32_t>((ab_64 + nudge) / (1ll << 31));
59 return ab_x2_high32;
60}
61
62int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x, int exponent)
63{
64 BOOST_ASSERT(exponent >= 0 && exponent <= 31);
65 int32_t mask = (1 << exponent) - 1;
66 int32_t remainder = x & mask;
67 int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);
68 return (x >> exponent) + (remainder > threshold ? 1 : 0);
69}
70
Mike Kelly9b398322019-05-22 17:21:49 +010071inline unsigned int GetOffset(DataLayout& dataLayout, const TensorShape& shape, unsigned int b, unsigned int c,
72 unsigned int h, unsigned int w)
73{
74 switch (dataLayout)
75 {
76 case DataLayout::NHWC:
77 b *= shape[1] * shape[2] * shape[3];
78 h *= shape[2] * shape[3];
79 w *= shape[3];
80 break;
81 case DataLayout::NCHW:
82 default:
83 b *= shape[1] * shape[2] * shape[3];
84 c *= shape[2] * shape[3];
85 h *= shape[3];
86 break;
87 }
88 return b + c + h + w;
89}
90
91void Convolve(const TensorShape& rInputShape,
92 Decoder<float>& rInputDecoder,
93 const TensorShape& rOutputShape,
94 Encoder<float>& rOutputEncoder,
95 const TensorShape& rFilterShape,
96 Decoder<float>& rFilterDecoder,
97 bool biasEnabled,
98 Decoder<float>* pBiasDecoder,
99 DataLayout dataLayout,
100 unsigned int paddingTop,
101 unsigned int paddingLeft,
102 unsigned int xStride,
103 unsigned int yStride,
104 unsigned int xDilation,
105 unsigned int yDilation,
106 bool depthwise)
107{
108 if (biasEnabled && !pBiasDecoder)
109 {
110 throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
111 }
112 const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
113
114 const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
115 const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
116 const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
117
118 unsigned int depthMultiplier = depthwise ? rFilterShape[0] : 1;
119 unsigned int inputChannels = depthwise ? rFilterShape[1] : rFilterShape[channelsIndex];
120 unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : rFilterShape[0];
121
122 unsigned int batchSize = rOutputShape[0];
123 unsigned int outputHeight = rOutputShape[heightIndex];
124 unsigned int outputWidth = rOutputShape[widthIndex];
125 unsigned int inputHeight = rInputShape[heightIndex];
126 unsigned int inputWidth = rInputShape[widthIndex];
127
128 unsigned int filterHeight = depthwise ? rFilterShape[2] : rFilterShape[heightIndex];
129 unsigned int filterWidth = depthwise ? rFilterShape[3] : rFilterShape[widthIndex];
130
131 for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
132 {
133 for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
134 {
135 for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
136 {
137 for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
138 {
139 // This loop goes over each output element.
140 float sum = 0.0f;
141
142 // For depthwise, each output channel corresponds to exactly one input channel.
143 // For normal, must loop over each input channel.
144 for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
145 {
146 unsigned int depthwiseMultiplierIdx = 0;
147 if (depthwise)
148 {
149 cInput = cOutput / depthMultiplier;
150 depthwiseMultiplierIdx = cOutput % depthMultiplier;
151 }
152
153 for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
154 {
155 for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
156 {
157 // This loop goes over each input element for each output element.
158 unsigned int filterIndex = 0;
159
160 // Since dimensionality of kernel depends on depthwiseness, so does index.
161 if (depthwise)
162 {
163 filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels +
164 cInput * filterWidth * filterHeight +
165 yFilter * filterWidth +
166 xFilter;
167 }
168 else
169 {
170 if (dataLayout == DataLayout::NHWC)
171 {
172 filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
173 yFilter * filterWidth * inputChannels +
174 xFilter * inputChannels +
175 cInput;
176 }
177 else
178 {
179 filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
180 cInput * filterWidth * filterHeight +
181 yFilter * filterWidth +
182 xFilter;
183 }
184 }
185 rFilterDecoder += filterIndex;
186 float filterValue = rFilterDecoder.Get();
187 rFilterDecoder -= filterIndex;
188
189 unsigned int yInput = yOutput * yStride + yFilter * yDilation;
190 unsigned int xInput = xOutput * xStride + xFilter * xDilation;
191
192 float inputValue;
193
194 // Check if we're in the padding.
195 if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
196 xInput < paddingLeft || xInput >= inputWidth + paddingLeft )
197 {
198 inputValue = 0.0f;
199 }
200 else
201 {
202 unsigned int inputIndex;
203
204 if (dataLayout == DataLayout::NHWC)
205 {
206 inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
207 (yInput - paddingTop) * inputWidth * inputChannels +
208 (xInput - paddingLeft) * inputChannels +
209 cInput;
210 }
211 else
212 {
213 inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
214 inputWidth * inputHeight * cInput +
215 inputWidth * (yInput - paddingTop) +
216 xInput - paddingLeft;
217 }
218 rInputDecoder += inputIndex;
219 inputValue = rInputDecoder.Get();
220 rInputDecoder -= inputIndex;
221 }
222 sum += filterValue * inputValue;
223 }
224 }
225 }
226
227 if (biasEnabled)
228 {
229 *pBiasDecoder += cOutput;
230 sum += pBiasDecoder->Get();
231 *pBiasDecoder -= cOutput;
232 }
233 unsigned int outIdx = GetOffset(dataLayout, rOutputShape, batchIdx, cOutput, yOutput, xOutput);
234
235 rOutputEncoder += outIdx;
236 rOutputEncoder.Set(sum);
237 rOutputEncoder -= outIdx;
238 }
239 }
240 }
241 }
242}
243
telsoa014fcda012018-03-09 14:13:49 +0000244} //namespace armnn