blob: 36d86d1532cb0a0aff46b431110ca962bc9ccdb1 [file] [log] [blame]
Pablo Tello299025a2017-09-29 11:30:12 +01001/*
Manuel Bottini959c26d2019-12-02 16:22:35 +00002 * Copyright (c) 2017-2020 ARM Limited.
Pablo Tello299025a2017-09-29 11:30:12 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Gian Marcoe75a02b2017-11-08 12:24:09 +000024#include "GEMMLowp.h"
Pablo Tello299025a2017-09-29 11:30:12 +010025
26#include "arm_compute/core/Types.h"
Georgios Pinitas5a7e7762017-12-01 16:27:29 +000027#include "tests/validation/reference/UtilsQuantizedAsymm.h"
Gian Marco58c57942017-11-28 09:10:03 +000028
Georgios Pinitasafc630f2020-03-30 14:09:27 +010029#include "support/ToolchainSupport.h"
30
Gian Marco58c57942017-11-28 09:10:03 +000031#include <limits>
Pablo Tello299025a2017-09-29 11:30:12 +010032
33namespace arm_compute
34{
35namespace test
36{
37namespace validation
38{
39namespace reference
40{
Gian Marco6b77e912017-11-17 09:27:57 +000041namespace
42{
43template <typename T>
Georgios Pinitas448a81f2019-11-21 14:10:25 +000044struct DataTypeExtractor
45{
46 static DataType data_type()
47 {
48 DataType data_type = DataType::UNKNOWN;
49 if(std::is_same<T, int8_t>::value)
50 {
51 data_type = DataType::QASYMM8_SIGNED;
52 }
53 else if(std::is_same<T, uint8_t>::value)
54 {
55 data_type = DataType::QASYMM8;
56 }
57 else if(std::is_same<T, int16_t>::value)
58 {
59 data_type = DataType::QSYMM16;
60 }
61 return data_type;
62 }
63};
64
Manuel Bottini959c26d2019-12-02 16:22:35 +000065template <typename TIn, typename TOut>
66void quantize_down_scale(const SimpleTensor<TIn> *in, const SimpleTensor<TIn> *bias, SimpleTensor<TOut> *dst, int32_t result_offset, std::vector<int32_t> result_mult_int,
67 std::vector<int32_t> result_shift, int32_t min, int32_t max)
Gian Marco6b77e912017-11-17 09:27:57 +000068{
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +000069 const int cols_in = in->shape().x();
70 const bool is_per_channel = result_mult_int.size() > 1;
Gian Marco6b77e912017-11-17 09:27:57 +000071
72 for(int i = 0; i < in->num_elements(); ++i)
73 {
Gian Marco58c57942017-11-28 09:10:03 +000074 int32_t result = ((*in)[i] + result_offset);
Gian Marco6b77e912017-11-17 09:27:57 +000075
76 if(bias != nullptr)
77 {
78 result += (*bias)[i % cols_in];
79 }
80
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +000081 result *= (is_per_channel) ? result_mult_int[i % cols_in] : result_mult_int[0];
Gian Marco58c57942017-11-28 09:10:03 +000082
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +000083 result >>= (is_per_channel) ? result_shift[i % cols_in] : result_shift[0];
Gian Marco6b77e912017-11-17 09:27:57 +000084
85 // Bounded ReLu
86 if(min != max)
87 {
88 result = std::max(min, std::min(max, result));
89 }
90
Manuel Bottini959c26d2019-12-02 16:22:35 +000091 (*dst)[i] = static_cast<TOut>(std::max<TIn>(std::numeric_limits<TOut>::lowest(),
92 std::min<TIn>(std::numeric_limits<TOut>::max(), result)));
Gian Marco6b77e912017-11-17 09:27:57 +000093 }
94}
Gian Marco58c57942017-11-28 09:10:03 +000095
Georgios Pinitas448a81f2019-11-21 14:10:25 +000096template <typename TIn, typename TOut>
97void quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> *in, const SimpleTensor<TIn> *bias, SimpleTensor<TOut> *dst, std::vector<int32_t> result_fixedpoint_multiplier,
98 std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max)
Gian Marco58c57942017-11-28 09:10:03 +000099{
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000100 const int cols_in = in->shape().x();
101 const bool is_per_channel = result_fixedpoint_multiplier.size() > 1;
Gian Marco58c57942017-11-28 09:10:03 +0000102
103 for(int i = 0; i < in->num_elements(); ++i)
104 {
Georgios Pinitas448a81f2019-11-21 14:10:25 +0000105 TIn result = (*in)[i];
Gian Marco58c57942017-11-28 09:10:03 +0000106
107 if(bias != nullptr)
108 {
109 result += (*bias)[i % cols_in];
110 }
111
112 // Fixed point multiplication
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000113 const int32_t multiplier = (is_per_channel) ? result_fixedpoint_multiplier[i % cols_in] : result_fixedpoint_multiplier[0];
114 const int32_t shift = (is_per_channel) ? result_shift[i % cols_in] : result_shift[0];
115
Georgios Pinitas448a81f2019-11-21 14:10:25 +0000116 if(shift < 0)
117 {
118 result = asymm_int_mult(result * (1 << (-shift)), multiplier);
119 }
120 else
121 {
122 result = asymm_rounding_divide_by_pow2(asymm_int_mult(result, multiplier), shift);
123 }
Gian Marco58c57942017-11-28 09:10:03 +0000124 result += result_offset_after_shift;
125
126 // Bounded ReLu
127 if(min != max)
128 {
129 result = std::max(min, std::min(max, result));
130 }
131
Georgios Pinitas448a81f2019-11-21 14:10:25 +0000132 (*dst)[i] = static_cast<TOut>(std::max<TIn>(std::numeric_limits<TOut>::lowest(),
133 std::min<TIn>(std::numeric_limits<TOut>::max(), result)));
Gian Marco Iodicebc415af2019-06-13 15:58:32 +0100134 }
135}
Sheri Zhang1b14c752020-03-09 14:29:52 +0000136
137template <typename TIn, typename TOut>
138void quantize_down_scale_by_float(const SimpleTensor<TIn> *in, const SimpleTensor<TIn> *bias, SimpleTensor<TOut> *dst, std::vector<float_t> result_real_multiplier,
139 int32_t result_offset, int32_t min, int32_t max)
140{
141 const int cols_in = in->shape().x();
142 const bool is_per_channel = result_real_multiplier.size() > 1;
143
144 for(int i = 0; i < in->num_elements(); ++i)
145 {
146 TIn result = (*in)[i];
147
148 if(bias != nullptr)
149 {
150 result += (*bias)[i % cols_in];
151 }
152
153 // Float multiplication
154 const float_t multiplier = (is_per_channel) ? result_real_multiplier[i % cols_in] : result_real_multiplier[0];
155
156 float_t result_f = static_cast<float_t>(result) * multiplier + static_cast<float_t>(result_offset);
Georgios Pinitasafc630f2020-03-30 14:09:27 +0100157 result = static_cast<TIn>(support::cpp11::round(result_f));
Sheri Zhang1b14c752020-03-09 14:29:52 +0000158
159 // Bounded ReLu
160 if(min != max)
161 {
162 result = std::max(min, std::min(max, result));
163 }
164
165 (*dst)[i] = static_cast<TOut>(std::max<TIn>(std::numeric_limits<TOut>::lowest(),
166 std::min<TIn>(std::numeric_limits<TOut>::max(), result)));
167 }
168}
Gian Marco6b77e912017-11-17 09:27:57 +0000169} // namespace
170
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000171template <typename T_out, typename T_in, typename T_in_1>
172SimpleTensor<T_out> gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> &a, const SimpleTensor<T_in_1> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset)
Pablo Tello299025a2017-09-29 11:30:12 +0100173{
Michalis Spyrouf3dfa272017-11-21 17:52:12 +0000174 static_assert(std::is_same<typename std::decay<T_out>::type, int32_t>::value, "Only int32_t is allowed for the output");
Gian Marcoe75a02b2017-11-08 12:24:09 +0000175
Michalis Spyrouf3dfa272017-11-21 17:52:12 +0000176 DataType dt = std::is_same<T_out, int32_t>::value ? DataType::S32 : DataType::U32;
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100177 SimpleTensor<T_out> c(shape_c, dt);
Gian Marcoe75a02b2017-11-08 12:24:09 +0000178
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100179 const int K = a.shape().x();
180 const int M = a.shape().y();
181 const int N = b.shape().x();
182 const int D = a.shape().z(); // Number of matrices in a batch
183
184 const int a_stride_z = K * M;
185 // Do not slide the matrix B along the 3rd dimension in case matrix B has less than 3 dimensions
186 const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0;
187 const int c_stride_z = N * M;
Gian Marcoe75a02b2017-11-08 12:24:09 +0000188
Michalis Spyrouf3dfa272017-11-21 17:52:12 +0000189 std::vector<T_out> acc;
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100190 acc.resize(N);
Gian Marcoe75a02b2017-11-08 12:24:09 +0000191
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100192 for(int depth = 0; depth < D; ++depth)
Pablo Tello299025a2017-09-29 11:30:12 +0100193 {
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100194 const int base_addr_a = depth * a_stride_z;
195 const int base_addr_b = depth * b_stride_z;
196 const int base_addr_c = depth * c_stride_z;
197
198 for(int i = 0; i < M; ++i)
Pablo Tello299025a2017-09-29 11:30:12 +0100199 {
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100200 for(int j = 0; j < N; ++j)
Pablo Tello299025a2017-09-29 11:30:12 +0100201 {
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100202 acc[j] = 0;
Pablo Tello299025a2017-09-29 11:30:12 +0100203 }
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100204 for(int k = 0; k < K; ++k)
205 {
206 const T_out tmp_a = a_offset + static_cast<T_out>(a[base_addr_a + k + i * K]);
207 for(int j = 0; j < N; ++j)
208 {
209 const T_out tmp_b = b_offset + static_cast<T_out>(b[base_addr_b + j + k * N]);
210 const T_out mult_as_int = tmp_a * tmp_b;
211 acc[j] += mult_as_int;
212 }
213 }
214 for(int j = 0; j < N; ++j)
215 {
216 c[base_addr_c + j + i * N] = acc[j];
217 }
Pablo Tello299025a2017-09-29 11:30:12 +0100218 }
219 }
220
221 return c;
222}
223
Pablo Tello181e6512017-11-15 13:28:27 +0000224// used to validate assembly kernels which don't know anything about offsets
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000225template <typename T1, typename T2, typename T3>
226SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c)
Pablo Tello181e6512017-11-15 13:28:27 +0000227{
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000228 return gemmlowp_matrix_multiply_core<T1, T2, T3>(a, b, shape_c, 0, 0);
Pablo Tello181e6512017-11-15 13:28:27 +0000229}
230
Manuel Bottini959c26d2019-12-02 16:22:35 +0000231template <typename TIn, typename TOut>
232SimpleTensor<TOut> gemmlowp_quantize_down_scale(const SimpleTensor<TIn> &in, int32_t result_offset, std::vector<int32_t> result_mult_int, std::vector<int32_t> result_shift,
233 int32_t min, int32_t max)
Gian Marcoe75a02b2017-11-08 12:24:09 +0000234{
Manuel Bottini959c26d2019-12-02 16:22:35 +0000235 SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
Gian Marcoe75a02b2017-11-08 12:24:09 +0000236
Manuel Bottini959c26d2019-12-02 16:22:35 +0000237 quantize_down_scale<TIn, TOut>(&in, nullptr, &dst, result_offset, result_mult_int, result_shift, min, max);
Gian Marco6b77e912017-11-17 09:27:57 +0000238
239 return dst;
240}
241
Manuel Bottini959c26d2019-12-02 16:22:35 +0000242template <typename TIn, typename TOut>
243SimpleTensor<TOut> gemmlowp_quantize_down_scale(const SimpleTensor<TIn> &in, const SimpleTensor<TIn> &bias, int32_t result_offset, std::vector<int32_t> result_mult_int,
244 std::vector<int32_t> result_shift, int32_t min, int32_t max)
Gian Marco6b77e912017-11-17 09:27:57 +0000245{
Manuel Bottini959c26d2019-12-02 16:22:35 +0000246 SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
Gian Marco6b77e912017-11-17 09:27:57 +0000247
Manuel Bottini959c26d2019-12-02 16:22:35 +0000248 quantize_down_scale<TIn, TOut>(&in, &bias, &dst, result_offset, result_mult_int, result_shift, min, max);
Gian Marcoe75a02b2017-11-08 12:24:09 +0000249
250 return dst;
251}
252
Georgios Pinitas448a81f2019-11-21 14:10:25 +0000253template <typename TIn, typename TOut>
254SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> &in, std::vector<int32_t> result_fixedpoint_multiplier, std::vector<int32_t> result_shift,
255 int32_t result_offset_after_shift, int32_t min, int32_t max)
Gian Marco58c57942017-11-28 09:10:03 +0000256{
Georgios Pinitas448a81f2019-11-21 14:10:25 +0000257 SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
Gian Marco58c57942017-11-28 09:10:03 +0000258
Georgios Pinitas448a81f2019-11-21 14:10:25 +0000259 quantize_down_scale_by_fixedpoint<TIn, TOut>(&in, nullptr, &dst, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
Gian Marco58c57942017-11-28 09:10:03 +0000260
261 return dst;
262}
263
Georgios Pinitas448a81f2019-11-21 14:10:25 +0000264template <typename TIn, typename TOut>
265SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> &in, const SimpleTensor<TIn> &bias, std::vector<int32_t> result_fixedpoint_multiplier,
266 std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max)
Gian Marco58c57942017-11-28 09:10:03 +0000267{
Georgios Pinitas448a81f2019-11-21 14:10:25 +0000268 SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
Gian Marco58c57942017-11-28 09:10:03 +0000269
Georgios Pinitas448a81f2019-11-21 14:10:25 +0000270 quantize_down_scale_by_fixedpoint<TIn, TOut>(&in, &bias, &dst, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
Gian Marco58c57942017-11-28 09:10:03 +0000271
272 return dst;
273}
274
Sheri Zhang1b14c752020-03-09 14:29:52 +0000275template <typename TIn, typename TOut>
276SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> &in, const SimpleTensor<TIn> &bias,
277 std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max)
278{
279 SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
280
281 quantize_down_scale_by_float<TIn, TOut>(&in, &bias, &dst, result_real_multiplier, result_offset, min, max);
282
283 return dst;
284}
285
286template <typename TIn, typename TOut>
287SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> &in,
288 std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max)
289{
290 SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
291
292 quantize_down_scale_by_float<TIn, TOut>(&in, nullptr, &dst, result_real_multiplier, result_offset, min, max);
293
294 return dst;
295}
296
297template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
298 std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);
299template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a,
300 std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);
301template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
302 std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);
303template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a,
304 std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);
Georgios Pinitas448a81f2019-11-21 14:10:25 +0000305template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, std::vector<int32_t> result_fixedpoint_multiplier,
306 std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
307template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
308 std::vector<int32_t> result_fixedpoint_multiplier,
309 std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
310template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, std::vector<int32_t> result_fixedpoint_multiplier,
311 std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
312template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
313 std::vector<int32_t> result_fixedpoint_multiplier,
314 std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
315template SimpleTensor<int16_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, std::vector<int32_t> result_fixedpoint_multiplier,
316 std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
317template SimpleTensor<int16_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
318 std::vector<int32_t> result_fixedpoint_multiplier,
319 std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
Manuel Bottini959c26d2019-12-02 16:22:35 +0000320template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale(const SimpleTensor<int32_t> &a, int32_t result_offset, std::vector<int32_t> result_mult_int,
321 std::vector<int32_t> result_shift, int32_t min, int32_t max);
322template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_offset, std::vector<int32_t> result_mult_int,
323 std::vector<int32_t> result_shift, int32_t min, int32_t max);
324template SimpleTensor<int8_t> gemmlowp_quantize_down_scale(const SimpleTensor<int32_t> &a, int32_t result_offset, std::vector<int32_t> result_mult_int,
325 std::vector<int32_t> result_shift, int32_t min, int32_t max);
326template SimpleTensor<int8_t> gemmlowp_quantize_down_scale(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_offset, std::vector<int32_t> result_mult_int,
327 std::vector<int32_t> result_shift, int32_t min, int32_t max);
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100328template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);
329template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000330template SimpleTensor<int32_t> gemmlowp<int32_t, int8_t, int8_t>(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c);
331template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, uint8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c);
332template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, int8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c);
Pablo Tello299025a2017-09-29 11:30:12 +0100333} // namespace reference
334} // namespace validation
335} // namespace test
336} // namespace arm_compute