blob: 1615b51e73ccb28bd717b6c94b952dd9688fd84a [file] [log] [blame]
Pablo Tello299025a2017-09-29 11:30:12 +01001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2017-2020 Arm Limited.
Pablo Tello299025a2017-09-29 11:30:12 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Gian Marcoe75a02b2017-11-08 12:24:09 +000024#include "GEMMLowp.h"
Pablo Tello299025a2017-09-29 11:30:12 +010025
26#include "arm_compute/core/Types.h"
Georgios Pinitas5a7e7762017-12-01 16:27:29 +000027#include "tests/validation/reference/UtilsQuantizedAsymm.h"
Gian Marco58c57942017-11-28 09:10:03 +000028
Georgios Pinitasafc630f2020-03-30 14:09:27 +010029#include "support/ToolchainSupport.h"
30
Gian Marco58c57942017-11-28 09:10:03 +000031#include <limits>
Pablo Tello299025a2017-09-29 11:30:12 +010032
33namespace arm_compute
34{
35namespace test
36{
37namespace validation
38{
39namespace reference
40{
Gian Marco6b77e912017-11-17 09:27:57 +000041namespace
42{
43template <typename T>
Georgios Pinitas448a81f2019-11-21 14:10:25 +000044struct DataTypeExtractor
45{
46 static DataType data_type()
47 {
48 DataType data_type = DataType::UNKNOWN;
49 if(std::is_same<T, int8_t>::value)
50 {
51 data_type = DataType::QASYMM8_SIGNED;
52 }
53 else if(std::is_same<T, uint8_t>::value)
54 {
55 data_type = DataType::QASYMM8;
56 }
57 else if(std::is_same<T, int16_t>::value)
58 {
59 data_type = DataType::QSYMM16;
60 }
61 return data_type;
62 }
63};
64
Manuel Bottini959c26d2019-12-02 16:22:35 +000065template <typename TIn, typename TOut>
66void quantize_down_scale(const SimpleTensor<TIn> *in, const SimpleTensor<TIn> *bias, SimpleTensor<TOut> *dst, int32_t result_offset, std::vector<int32_t> result_mult_int,
67 std::vector<int32_t> result_shift, int32_t min, int32_t max)
Gian Marco6b77e912017-11-17 09:27:57 +000068{
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +000069 const int cols_in = in->shape().x();
70 const bool is_per_channel = result_mult_int.size() > 1;
Gian Marco6b77e912017-11-17 09:27:57 +000071
Michalis Spyroud1d77222020-04-08 14:10:15 +010072#if defined(_OPENMP)
73 #pragma omp parallel for
74#endif /* _OPENMP */
Gian Marco6b77e912017-11-17 09:27:57 +000075 for(int i = 0; i < in->num_elements(); ++i)
76 {
Gian Marco58c57942017-11-28 09:10:03 +000077 int32_t result = ((*in)[i] + result_offset);
Gian Marco6b77e912017-11-17 09:27:57 +000078
79 if(bias != nullptr)
80 {
81 result += (*bias)[i % cols_in];
82 }
83
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +000084 result *= (is_per_channel) ? result_mult_int[i % cols_in] : result_mult_int[0];
Gian Marco58c57942017-11-28 09:10:03 +000085
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +000086 result >>= (is_per_channel) ? result_shift[i % cols_in] : result_shift[0];
Gian Marco6b77e912017-11-17 09:27:57 +000087
88 // Bounded ReLu
89 if(min != max)
90 {
91 result = std::max(min, std::min(max, result));
92 }
93
Manuel Bottini959c26d2019-12-02 16:22:35 +000094 (*dst)[i] = static_cast<TOut>(std::max<TIn>(std::numeric_limits<TOut>::lowest(),
95 std::min<TIn>(std::numeric_limits<TOut>::max(), result)));
Gian Marco6b77e912017-11-17 09:27:57 +000096 }
97}
Gian Marco58c57942017-11-28 09:10:03 +000098
Georgios Pinitas448a81f2019-11-21 14:10:25 +000099template <typename TIn, typename TOut>
100void quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> *in, const SimpleTensor<TIn> *bias, SimpleTensor<TOut> *dst, std::vector<int32_t> result_fixedpoint_multiplier,
101 std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max)
Gian Marco58c57942017-11-28 09:10:03 +0000102{
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000103 const int cols_in = in->shape().x();
104 const bool is_per_channel = result_fixedpoint_multiplier.size() > 1;
Gian Marco58c57942017-11-28 09:10:03 +0000105
Michalis Spyroud1d77222020-04-08 14:10:15 +0100106#if defined(_OPENMP)
107 #pragma omp parallel for
108#endif /* _OPENMP */
Gian Marco58c57942017-11-28 09:10:03 +0000109 for(int i = 0; i < in->num_elements(); ++i)
110 {
Georgios Pinitas448a81f2019-11-21 14:10:25 +0000111 TIn result = (*in)[i];
Gian Marco58c57942017-11-28 09:10:03 +0000112
113 if(bias != nullptr)
114 {
115 result += (*bias)[i % cols_in];
116 }
117
118 // Fixed point multiplication
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000119 const int32_t multiplier = (is_per_channel) ? result_fixedpoint_multiplier[i % cols_in] : result_fixedpoint_multiplier[0];
120 const int32_t shift = (is_per_channel) ? result_shift[i % cols_in] : result_shift[0];
121
Georgios Pinitas448a81f2019-11-21 14:10:25 +0000122 if(shift < 0)
123 {
124 result = asymm_int_mult(result * (1 << (-shift)), multiplier);
125 }
126 else
127 {
128 result = asymm_rounding_divide_by_pow2(asymm_int_mult(result, multiplier), shift);
129 }
Gian Marco58c57942017-11-28 09:10:03 +0000130 result += result_offset_after_shift;
131
132 // Bounded ReLu
133 if(min != max)
134 {
135 result = std::max(min, std::min(max, result));
136 }
137
Georgios Pinitas448a81f2019-11-21 14:10:25 +0000138 (*dst)[i] = static_cast<TOut>(std::max<TIn>(std::numeric_limits<TOut>::lowest(),
139 std::min<TIn>(std::numeric_limits<TOut>::max(), result)));
Gian Marco Iodicebc415af2019-06-13 15:58:32 +0100140 }
141}
Sheri Zhang1b14c752020-03-09 14:29:52 +0000142
143template <typename TIn, typename TOut>
144void quantize_down_scale_by_float(const SimpleTensor<TIn> *in, const SimpleTensor<TIn> *bias, SimpleTensor<TOut> *dst, std::vector<float_t> result_real_multiplier,
145 int32_t result_offset, int32_t min, int32_t max)
146{
147 const int cols_in = in->shape().x();
148 const bool is_per_channel = result_real_multiplier.size() > 1;
149
Michalis Spyroud1d77222020-04-08 14:10:15 +0100150#if defined(_OPENMP)
151 #pragma omp parallel for
152#endif /* _OPENMP */
Sheri Zhang1b14c752020-03-09 14:29:52 +0000153 for(int i = 0; i < in->num_elements(); ++i)
154 {
155 TIn result = (*in)[i];
156
157 if(bias != nullptr)
158 {
159 result += (*bias)[i % cols_in];
160 }
161
162 // Float multiplication
163 const float_t multiplier = (is_per_channel) ? result_real_multiplier[i % cols_in] : result_real_multiplier[0];
164
165 float_t result_f = static_cast<float_t>(result) * multiplier + static_cast<float_t>(result_offset);
Georgios Pinitasafc630f2020-03-30 14:09:27 +0100166 result = static_cast<TIn>(support::cpp11::round(result_f));
Sheri Zhang1b14c752020-03-09 14:29:52 +0000167
168 // Bounded ReLu
169 if(min != max)
170 {
171 result = std::max(min, std::min(max, result));
172 }
173
174 (*dst)[i] = static_cast<TOut>(std::max<TIn>(std::numeric_limits<TOut>::lowest(),
175 std::min<TIn>(std::numeric_limits<TOut>::max(), result)));
176 }
177}
Gian Marco6b77e912017-11-17 09:27:57 +0000178} // namespace
179
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000180template <typename T_out, typename T_in, typename T_in_1>
181SimpleTensor<T_out> gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> &a, const SimpleTensor<T_in_1> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset)
Pablo Tello299025a2017-09-29 11:30:12 +0100182{
Michalis Spyrouf3dfa272017-11-21 17:52:12 +0000183 static_assert(std::is_same<typename std::decay<T_out>::type, int32_t>::value, "Only int32_t is allowed for the output");
Gian Marcoe75a02b2017-11-08 12:24:09 +0000184
Michalis Spyrouf3dfa272017-11-21 17:52:12 +0000185 DataType dt = std::is_same<T_out, int32_t>::value ? DataType::S32 : DataType::U32;
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100186 SimpleTensor<T_out> c(shape_c, dt);
Gian Marcoe75a02b2017-11-08 12:24:09 +0000187
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100188 const int K = a.shape().x();
189 const int M = a.shape().y();
190 const int N = b.shape().x();
191 const int D = a.shape().z(); // Number of matrices in a batch
192
193 const int a_stride_z = K * M;
194 // Do not slide the matrix B along the 3rd dimension in case matrix B has less than 3 dimensions
195 const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0;
196 const int c_stride_z = N * M;
Gian Marcoe75a02b2017-11-08 12:24:09 +0000197
Michalis Spyrouf3dfa272017-11-21 17:52:12 +0000198 std::vector<T_out> acc;
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100199 acc.resize(N);
Gian Marcoe75a02b2017-11-08 12:24:09 +0000200
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100201 for(int depth = 0; depth < D; ++depth)
Pablo Tello299025a2017-09-29 11:30:12 +0100202 {
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100203 const int base_addr_a = depth * a_stride_z;
204 const int base_addr_b = depth * b_stride_z;
205 const int base_addr_c = depth * c_stride_z;
206
207 for(int i = 0; i < M; ++i)
Pablo Tello299025a2017-09-29 11:30:12 +0100208 {
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100209 for(int j = 0; j < N; ++j)
Pablo Tello299025a2017-09-29 11:30:12 +0100210 {
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100211 acc[j] = 0;
Pablo Tello299025a2017-09-29 11:30:12 +0100212 }
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100213 for(int k = 0; k < K; ++k)
214 {
215 const T_out tmp_a = a_offset + static_cast<T_out>(a[base_addr_a + k + i * K]);
216 for(int j = 0; j < N; ++j)
217 {
218 const T_out tmp_b = b_offset + static_cast<T_out>(b[base_addr_b + j + k * N]);
219 const T_out mult_as_int = tmp_a * tmp_b;
220 acc[j] += mult_as_int;
221 }
222 }
223 for(int j = 0; j < N; ++j)
224 {
225 c[base_addr_c + j + i * N] = acc[j];
226 }
Pablo Tello299025a2017-09-29 11:30:12 +0100227 }
228 }
229
230 return c;
231}
232
Pablo Tello181e6512017-11-15 13:28:27 +0000233// used to validate assembly kernels which don't know anything about offsets
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000234template <typename T1, typename T2, typename T3>
235SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c)
Pablo Tello181e6512017-11-15 13:28:27 +0000236{
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000237 return gemmlowp_matrix_multiply_core<T1, T2, T3>(a, b, shape_c, 0, 0);
Pablo Tello181e6512017-11-15 13:28:27 +0000238}
239
Manuel Bottini959c26d2019-12-02 16:22:35 +0000240template <typename TIn, typename TOut>
241SimpleTensor<TOut> gemmlowp_quantize_down_scale(const SimpleTensor<TIn> &in, int32_t result_offset, std::vector<int32_t> result_mult_int, std::vector<int32_t> result_shift,
242 int32_t min, int32_t max)
Gian Marcoe75a02b2017-11-08 12:24:09 +0000243{
Manuel Bottini959c26d2019-12-02 16:22:35 +0000244 SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
Gian Marcoe75a02b2017-11-08 12:24:09 +0000245
Manuel Bottini959c26d2019-12-02 16:22:35 +0000246 quantize_down_scale<TIn, TOut>(&in, nullptr, &dst, result_offset, result_mult_int, result_shift, min, max);
Gian Marco6b77e912017-11-17 09:27:57 +0000247
248 return dst;
249}
250
Manuel Bottini959c26d2019-12-02 16:22:35 +0000251template <typename TIn, typename TOut>
252SimpleTensor<TOut> gemmlowp_quantize_down_scale(const SimpleTensor<TIn> &in, const SimpleTensor<TIn> &bias, int32_t result_offset, std::vector<int32_t> result_mult_int,
253 std::vector<int32_t> result_shift, int32_t min, int32_t max)
Gian Marco6b77e912017-11-17 09:27:57 +0000254{
Manuel Bottini959c26d2019-12-02 16:22:35 +0000255 SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
Gian Marco6b77e912017-11-17 09:27:57 +0000256
Manuel Bottini959c26d2019-12-02 16:22:35 +0000257 quantize_down_scale<TIn, TOut>(&in, &bias, &dst, result_offset, result_mult_int, result_shift, min, max);
Gian Marcoe75a02b2017-11-08 12:24:09 +0000258
259 return dst;
260}
261
Georgios Pinitas448a81f2019-11-21 14:10:25 +0000262template <typename TIn, typename TOut>
263SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> &in, std::vector<int32_t> result_fixedpoint_multiplier, std::vector<int32_t> result_shift,
264 int32_t result_offset_after_shift, int32_t min, int32_t max)
Gian Marco58c57942017-11-28 09:10:03 +0000265{
Georgios Pinitas448a81f2019-11-21 14:10:25 +0000266 SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
Gian Marco58c57942017-11-28 09:10:03 +0000267
Georgios Pinitas448a81f2019-11-21 14:10:25 +0000268 quantize_down_scale_by_fixedpoint<TIn, TOut>(&in, nullptr, &dst, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
Gian Marco58c57942017-11-28 09:10:03 +0000269
270 return dst;
271}
272
Georgios Pinitas448a81f2019-11-21 14:10:25 +0000273template <typename TIn, typename TOut>
274SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> &in, const SimpleTensor<TIn> &bias, std::vector<int32_t> result_fixedpoint_multiplier,
275 std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max)
Gian Marco58c57942017-11-28 09:10:03 +0000276{
Georgios Pinitas448a81f2019-11-21 14:10:25 +0000277 SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
Gian Marco58c57942017-11-28 09:10:03 +0000278
Georgios Pinitas448a81f2019-11-21 14:10:25 +0000279 quantize_down_scale_by_fixedpoint<TIn, TOut>(&in, &bias, &dst, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
Gian Marco58c57942017-11-28 09:10:03 +0000280
281 return dst;
282}
283
Sheri Zhang1b14c752020-03-09 14:29:52 +0000284template <typename TIn, typename TOut>
285SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> &in, const SimpleTensor<TIn> &bias,
286 std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max)
287{
288 SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
289
290 quantize_down_scale_by_float<TIn, TOut>(&in, &bias, &dst, result_real_multiplier, result_offset, min, max);
291
292 return dst;
293}
294
295template <typename TIn, typename TOut>
296SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> &in,
297 std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max)
298{
299 SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());
300
301 quantize_down_scale_by_float<TIn, TOut>(&in, nullptr, &dst, result_real_multiplier, result_offset, min, max);
302
303 return dst;
304}
305
306template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
307 std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);
308template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a,
309 std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);
310template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
311 std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);
312template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a,
313 std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);
Georgios Pinitas448a81f2019-11-21 14:10:25 +0000314template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, std::vector<int32_t> result_fixedpoint_multiplier,
315 std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
316template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
317 std::vector<int32_t> result_fixedpoint_multiplier,
318 std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
319template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, std::vector<int32_t> result_fixedpoint_multiplier,
320 std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
321template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
322 std::vector<int32_t> result_fixedpoint_multiplier,
323 std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
324template SimpleTensor<int16_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, std::vector<int32_t> result_fixedpoint_multiplier,
325 std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
326template SimpleTensor<int16_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,
327 std::vector<int32_t> result_fixedpoint_multiplier,
328 std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
Manuel Bottini959c26d2019-12-02 16:22:35 +0000329template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale(const SimpleTensor<int32_t> &a, int32_t result_offset, std::vector<int32_t> result_mult_int,
330 std::vector<int32_t> result_shift, int32_t min, int32_t max);
331template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_offset, std::vector<int32_t> result_mult_int,
332 std::vector<int32_t> result_shift, int32_t min, int32_t max);
333template SimpleTensor<int8_t> gemmlowp_quantize_down_scale(const SimpleTensor<int32_t> &a, int32_t result_offset, std::vector<int32_t> result_mult_int,
334 std::vector<int32_t> result_shift, int32_t min, int32_t max);
335template SimpleTensor<int8_t> gemmlowp_quantize_down_scale(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_offset, std::vector<int32_t> result_mult_int,
336 std::vector<int32_t> result_shift, int32_t min, int32_t max);
Georgios Pinitasebf6b8a2018-09-24 16:31:08 +0100337template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);
338template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);
Vidhya Sudhan Loganathan951b8a42019-11-04 14:42:08 +0000339template SimpleTensor<int32_t> gemmlowp<int32_t, int8_t, int8_t>(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c);
340template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, uint8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c);
341template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, int8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c);
Pablo Tello299025a2017-09-29 11:30:12 +0100342} // namespace reference
343} // namespace validation
344} // namespace test
345} // namespace arm_compute