blob: 560460fd330be43d5dec765fd808345f612d103e [file] [log] [blame]
Moritz Pflanzer3ce3ff42017-07-21 17:41:02 +01001/*
Gunes Bayir9d0c4de2023-04-13 18:22:58 +01002 * Copyright (c) 2017-2023 Arm Limited.
Moritz Pflanzer3ce3ff42017-07-21 17:41:02 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Moritz Pflanzera09de0c2017-09-01 20:41:12 +010024#include "tests/validation/Helpers.h"
Gunes Bayir9d0c4de2023-04-13 18:22:58 +010025#include "tests/framework/Asserts.h"
Moritz Pflanzer3ce3ff42017-07-21 17:41:02 +010026
Giorgio Arena1f9ca1d2018-03-01 11:13:45 +000027#include <algorithm>
28#include <cmath>
Gunes Bayir532ce2c2023-09-14 09:13:49 +010029#include <cstdint>
30#include <tuple>
Giorgio Arena1f9ca1d2018-03-01 11:13:45 +000031
Moritz Pflanzer3ce3ff42017-07-21 17:41:02 +010032namespace arm_compute
33{
34namespace test
35{
36namespace validation
37{
Michalis Spyroued7b27d2019-11-27 16:04:17 +000038template <>
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000039SimpleTensor<float> convert_from_asymmetric(const SimpleTensor<uint8_t> &src)
40{
Georgios Pinitas4c5469b2019-05-21 13:32:43 +010041 const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform();
42 SimpleTensor<float> dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() };
Michalis Spyroud1d77222020-04-08 14:10:15 +010043#if defined(_OPENMP)
44 #pragma omp parallel for
45#endif /* _OPENMP */
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000046 for(int i = 0; i < src.num_elements(); ++i)
47 {
Georgios Pinitas4c5469b2019-05-21 13:32:43 +010048 dst[i] = dequantize_qasymm8(src[i], quantization_info);
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000049 }
50 return dst;
51}
52
Michalis Spyroued7b27d2019-11-27 16:04:17 +000053template <>
Georgios Pinitas6e1791b2019-12-02 19:01:25 +000054SimpleTensor<float> convert_from_asymmetric(const SimpleTensor<int8_t> &src)
55{
56 const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform();
57 SimpleTensor<float> dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() };
58
Michalis Spyroud1d77222020-04-08 14:10:15 +010059#if defined(_OPENMP)
60 #pragma omp parallel for
61#endif /* _OPENMP */
Georgios Pinitas6e1791b2019-12-02 19:01:25 +000062 for(int i = 0; i < src.num_elements(); ++i)
63 {
64 dst[i] = dequantize_qasymm8_signed(src[i], quantization_info);
65 }
66 return dst;
67}
68
69template <>
Michele Di Giorgio578a9fc2019-08-23 11:49:04 +010070SimpleTensor<float> convert_from_asymmetric(const SimpleTensor<uint16_t> &src)
71{
72 const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform();
73 SimpleTensor<float> dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() };
74
Michalis Spyroud1d77222020-04-08 14:10:15 +010075#if defined(_OPENMP)
76 #pragma omp parallel for
77#endif /* _OPENMP */
Michele Di Giorgio578a9fc2019-08-23 11:49:04 +010078 for(int i = 0; i < src.num_elements(); ++i)
79 {
80 dst[i] = dequantize_qasymm16(src[i], quantization_info);
81 }
82 return dst;
83}
84
Michele Di Giorgio4aff98f2019-08-28 16:27:26 +010085template <>
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000086SimpleTensor<uint8_t> convert_to_asymmetric(const SimpleTensor<float> &src, const QuantizationInfo &quantization_info)
87{
Georgios Pinitas4c5469b2019-05-21 13:32:43 +010088 SimpleTensor<uint8_t> dst{ src.shape(), DataType::QASYMM8, 1, quantization_info };
89 const UniformQuantizationInfo &qinfo = quantization_info.uniform();
90
Michalis Spyroud1d77222020-04-08 14:10:15 +010091#if defined(_OPENMP)
92 #pragma omp parallel for
93#endif /* _OPENMP */
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000094 for(int i = 0; i < src.num_elements(); ++i)
95 {
Georgios Pinitas4c5469b2019-05-21 13:32:43 +010096 dst[i] = quantize_qasymm8(src[i], qinfo);
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000097 }
98 return dst;
99}
Giorgio Arena1f9ca1d2018-03-01 11:13:45 +0000100
Manuel Bottini3689fcd2019-06-14 17:18:12 +0100101template <>
Sang-Hoon Parkae6ef7c2019-11-13 16:51:45 +0000102SimpleTensor<int8_t> convert_to_asymmetric(const SimpleTensor<float> &src, const QuantizationInfo &quantization_info)
103{
104 SimpleTensor<int8_t> dst{ src.shape(), DataType::QASYMM8_SIGNED, 1, quantization_info };
105 const UniformQuantizationInfo &qinfo = quantization_info.uniform();
106
Michalis Spyroud1d77222020-04-08 14:10:15 +0100107#if defined(_OPENMP)
108 #pragma omp parallel for
109#endif /* _OPENMP */
Sang-Hoon Parkae6ef7c2019-11-13 16:51:45 +0000110 for(int i = 0; i < src.num_elements(); ++i)
111 {
112 dst[i] = quantize_qasymm8_signed(src[i], qinfo);
113 }
114 return dst;
115}
116
Michalis Spyroued7b27d2019-11-27 16:04:17 +0000117template <>
Michele Di Giorgio4aff98f2019-08-28 16:27:26 +0100118SimpleTensor<uint16_t> convert_to_asymmetric(const SimpleTensor<float> &src, const QuantizationInfo &quantization_info)
119{
120 SimpleTensor<uint16_t> dst{ src.shape(), DataType::QASYMM16, 1, quantization_info };
121 const UniformQuantizationInfo &qinfo = quantization_info.uniform();
122
Michalis Spyroud1d77222020-04-08 14:10:15 +0100123#if defined(_OPENMP)
124 #pragma omp parallel for
125#endif /* _OPENMP */
Michele Di Giorgio4aff98f2019-08-28 16:27:26 +0100126 for(int i = 0; i < src.num_elements(); ++i)
127 {
128 dst[i] = quantize_qasymm16(src[i], qinfo);
129 }
130 return dst;
131}
132
133template <>
Manuel Bottini3689fcd2019-06-14 17:18:12 +0100134SimpleTensor<int16_t> convert_to_symmetric(const SimpleTensor<float> &src, const QuantizationInfo &quantization_info)
135{
136 SimpleTensor<int16_t> dst{ src.shape(), DataType::QSYMM16, 1, quantization_info };
137 const UniformQuantizationInfo &qinfo = quantization_info.uniform();
138
Michalis Spyroud1d77222020-04-08 14:10:15 +0100139#if defined(_OPENMP)
140 #pragma omp parallel for
141#endif /* _OPENMP */
Manuel Bottini3689fcd2019-06-14 17:18:12 +0100142 for(int i = 0; i < src.num_elements(); ++i)
143 {
144 dst[i] = quantize_qsymm16(src[i], qinfo);
145 }
146 return dst;
147}
148
149template <>
150SimpleTensor<float> convert_from_symmetric(const SimpleTensor<int16_t> &src)
151{
152 const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform();
153 SimpleTensor<float> dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() };
154
Michalis Spyroud1d77222020-04-08 14:10:15 +0100155#if defined(_OPENMP)
156 #pragma omp parallel for
157#endif /* _OPENMP */
Manuel Bottini3689fcd2019-06-14 17:18:12 +0100158 for(int i = 0; i < src.num_elements(); ++i)
159 {
160 dst[i] = dequantize_qsymm16(src[i], quantization_info);
161 }
162 return dst;
163}
164
Vidhya Sudhan Loganathan71ecf392018-08-31 16:10:16 +0100165template <typename T>
166void matrix_multiply(const SimpleTensor<T> &a, const SimpleTensor<T> &b, SimpleTensor<T> &out)
Giorgio Arena1f9ca1d2018-03-01 11:13:45 +0000167{
168 ARM_COMPUTE_ERROR_ON(a.shape()[0] != b.shape()[1]);
169 ARM_COMPUTE_ERROR_ON(a.shape()[1] != out.shape()[1]);
170 ARM_COMPUTE_ERROR_ON(b.shape()[0] != out.shape()[0]);
171
172 const int M = a.shape()[1]; // Rows
173 const int N = b.shape()[0]; // Cols
174 const int K = b.shape()[1];
175
Michalis Spyroud1d77222020-04-08 14:10:15 +0100176#if defined(_OPENMP)
177 #pragma omp parallel for collapse(2)
178#endif /* _OPENMP */
Giorgio Arena1f9ca1d2018-03-01 11:13:45 +0000179 for(int y = 0; y < M; ++y)
180 {
181 for(int x = 0; x < N; ++x)
182 {
183 float acc = 0.0f;
184 for(int k = 0; k < K; ++k)
185 {
186 acc += a[y * K + k] * b[x + k * N];
187 }
188
189 out[x + y * N] = acc;
190 }
191 }
192}
193
Vidhya Sudhan Loganathan71ecf392018-08-31 16:10:16 +0100194template <typename T>
195void transpose_matrix(const SimpleTensor<T> &in, SimpleTensor<T> &out)
Giorgio Arena1f9ca1d2018-03-01 11:13:45 +0000196{
197 ARM_COMPUTE_ERROR_ON((in.shape()[0] != out.shape()[1]) || (in.shape()[1] != out.shape()[0]));
198
199 const int width = in.shape()[0];
200 const int height = in.shape()[1];
201
Michalis Spyroud1d77222020-04-08 14:10:15 +0100202#if defined(_OPENMP)
203 #pragma omp parallel for collapse(2)
204#endif /* _OPENMP */
Giorgio Arena1f9ca1d2018-03-01 11:13:45 +0000205 for(int y = 0; y < height; ++y)
206 {
207 for(int x = 0; x < width; ++x)
208 {
Gian Marco Iodice5ba5e092018-12-06 17:13:09 +0000209 const T val = in[x + y * width];
Giorgio Arena1f9ca1d2018-03-01 11:13:45 +0000210
211 out[x * height + y] = val;
212 }
213 }
214}
215
216template <typename T>
217void get_tile(const SimpleTensor<T> &in, SimpleTensor<T> &tile, const Coordinates &coord)
218{
Gian Marco Iodicef1c2bf02018-06-13 14:05:54 +0100219 ARM_COMPUTE_ERROR_ON(tile.shape().num_dimensions() > 2);
Giorgio Arena1f9ca1d2018-03-01 11:13:45 +0000220
221 const int w_tile = tile.shape()[0];
222 const int h_tile = tile.shape()[1];
223
224 // Fill the tile with zeros
225 std::fill(tile.data() + 0, (tile.data() + (w_tile * h_tile)), static_cast<T>(0));
226
227 // Check if with the dimensions greater than 2 we could have out-of-bound reads
228 for(size_t d = 2; d < Coordinates::num_max_dimensions; ++d)
229 {
230 if(coord[d] < 0 || coord[d] >= static_cast<int>(in.shape()[d]))
231 {
232 ARM_COMPUTE_ERROR("coord[d] < 0 || coord[d] >= in.shape()[d] with d >= 2");
233 }
234 }
235
236 // Since we could have out-of-bound reads along the X and Y dimensions,
237 // we start calculating the input address with x = 0 and y = 0
238 Coordinates start_coord = coord;
239 start_coord[0] = 0;
240 start_coord[1] = 0;
241
242 // Get input and roi pointers
243 auto in_ptr = static_cast<const T *>(in(start_coord));
244 auto roi_ptr = static_cast<T *>(tile.data());
245
246 const int x_in_start = std::max(0, coord[0]);
247 const int y_in_start = std::max(0, coord[1]);
248 const int x_in_end = std::min(static_cast<int>(in.shape()[0]), coord[0] + w_tile);
249 const int y_in_end = std::min(static_cast<int>(in.shape()[1]), coord[1] + h_tile);
250
251 // Number of elements to copy per row
252 const int n = x_in_end - x_in_start;
253
254 // Starting coordinates for the ROI
255 const int x_tile_start = coord[0] > 0 ? 0 : std::abs(coord[0]);
256 const int y_tile_start = coord[1] > 0 ? 0 : std::abs(coord[1]);
257
258 // Update input pointer
259 in_ptr += x_in_start;
260 in_ptr += (y_in_start * in.shape()[0]);
261
262 // Update ROI pointer
263 roi_ptr += x_tile_start;
264 roi_ptr += (y_tile_start * tile.shape()[0]);
265
266 for(int y = y_in_start; y < y_in_end; ++y)
267 {
268 // Copy per row
269 std::copy(in_ptr, in_ptr + n, roi_ptr);
270
271 in_ptr += in.shape()[0];
272 roi_ptr += tile.shape()[0];
273 }
274}
275
Gian Marco Iodicef1c2bf02018-06-13 14:05:54 +0100276template <typename T>
277void zeros(SimpleTensor<T> &in, const Coordinates &anchor, const TensorShape &shape)
278{
279 ARM_COMPUTE_ERROR_ON(anchor.num_dimensions() != shape.num_dimensions());
280 ARM_COMPUTE_ERROR_ON(in.shape().num_dimensions() > 2);
281 ARM_COMPUTE_ERROR_ON(shape.num_dimensions() > 2);
282
283 // Check if with the dimensions greater than 2 we could have out-of-bound reads
284 for(size_t d = 0; d < Coordinates::num_max_dimensions; ++d)
285 {
286 if(anchor[d] < 0 || ((anchor[d] + shape[d]) > in.shape()[d]))
287 {
288 ARM_COMPUTE_ERROR("anchor[d] < 0 || (anchor[d] + shape[d]) > in.shape()[d]");
289 }
290 }
291
292 // Get input pointer
293 auto in_ptr = static_cast<T *>(in(anchor[0] + anchor[1] * in.shape()[0]));
294
295 const unsigned int n = in.shape()[0];
296
297 for(unsigned int y = 0; y < shape[1]; ++y)
298 {
299 std::fill(in_ptr, in_ptr + shape[0], 0);
300 in_ptr += n;
301 }
302}
303
Michele Di Giorgioed5a4922018-09-13 16:22:01 +0100304std::pair<int, int> get_quantized_bounds(const QuantizationInfo &quant_info, float min, float max)
305{
306 ARM_COMPUTE_ERROR_ON_MSG(min > max, "min must be lower equal than max");
307
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100308 const int min_bound = quantize_qasymm8(min, quant_info.uniform());
309 const int max_bound = quantize_qasymm8(max, quant_info.uniform());
Michalis Spyroubcfd09a2019-05-01 13:03:59 +0100310 return std::pair<int, int> { min_bound, max_bound };
Michele Di Giorgioed5a4922018-09-13 16:22:01 +0100311}
312
Georgios Pinitas6e1791b2019-12-02 19:01:25 +0000313std::pair<int, int> get_quantized_qasymm8_signed_bounds(const QuantizationInfo &quant_info, float min, float max)
314{
315 ARM_COMPUTE_ERROR_ON_MSG(min > max, "min must be lower equal than max");
316
317 const int min_bound = quantize_qasymm8_signed(min, quant_info.uniform());
318 const int max_bound = quantize_qasymm8_signed(max, quant_info.uniform());
319 return std::pair<int, int> { min_bound, max_bound };
320}
321
Georgios Pinitasdbdea0d2019-10-16 19:21:40 +0100322std::pair<int, int> get_symm_quantized_per_channel_bounds(const QuantizationInfo &quant_info, float min, float max, size_t channel_id)
323{
324 ARM_COMPUTE_ERROR_ON_MSG(min > max, "min must be lower equal than max");
325
326 const int min_bound = quantize_qsymm8_per_channel(min, quant_info, channel_id);
327 const int max_bound = quantize_qsymm8_per_channel(max, quant_info, channel_id);
328 return std::pair<int, int> { min_bound, max_bound };
329}
330
Manuel Bottinif733e032021-05-19 16:15:36 +0100331void add_padding_x(std::initializer_list<ITensor *> tensors, const DataLayout &data_layout, bool only_right_pad)
Giorgio Arena63825e82021-03-25 14:54:50 +0000332{
333 if(data_layout == DataLayout::NHWC)
334 {
335 constexpr unsigned int lower = 1U;
336 constexpr unsigned int upper = 16U;
337
338 std::uniform_int_distribution<unsigned int> distribution(lower, upper);
339 size_t seed_offset = 0;
340
341 for(ITensor *tensor : tensors)
342 {
343 ARM_COMPUTE_ERROR_ON(!tensor->info()->is_resizable());
344
345 std::mt19937 gen(library->seed() + seed_offset++);
346
347 const unsigned int right = distribution(gen);
Manuel Bottinif733e032021-05-19 16:15:36 +0100348 const unsigned int left = only_right_pad ? 0 : distribution(gen);
Giorgio Arena63825e82021-03-25 14:54:50 +0000349
350 tensor->info()->extend_padding(PaddingSize(0U, right, 0U, left));
351 }
352 }
353}
354
Gunes Bayirdfcd41a2023-10-11 09:56:05 +0100355QuantizationHint suggest_conv_dst_q_info_and_bias(const QuantizationInfo &in_q_info,
356 const QuantizationInfo &weight_q_info,
357 int32_t height,
358 int32_t width,
359 int32_t channels,
360 DataType data_type,
361 float bias_fraction)
362{
363 /** Quantization Setup of convolution
364 *
365 * Just like any other multiply-accummulate, convolution (2D) operation
366 * multiplies and accumulates the input and weight tensors. This operation
367 * takes place in three dimensions: height, width and channels. All of them
368 * belong to the weight tensor.
369 *
370 * The formula for simple convolution can be written as:
371 * C = sum_h sum_w sum_c(I[h_offset + h, w_offset + w, c] * W[h, w, c])
372 *
373 * Here, h_offset and w_offset are the starting positions in the image. Effects
374 * of paddings are ignored. This accumulation reduces to something like
375 *
376 * C = sum_m(I_index * W_hwc)
377 * where m is height x width x channels.
378 *
379 * Non-unit strides and/or dilations do not change the probabilistic nature of
380 * this sum because we always iterate as the size of the weight tensor.
381 *
382 * Paddings may affect this summation, but it's a boundary condition and so is
383 * neglected for brevity.
384 */
385
386 return suggest_mac_dst_q_info_and_bias(in_q_info, weight_q_info, height * width * channels, data_type, bias_fraction);
387}
388
Gunes Bayir532ce2c2023-09-14 09:13:49 +0100389QuantizationHint suggest_matmul_dst_q_info_and_bias(const QuantizationInfo &lhs_q_info,
Gunes Bayirdfcd41a2023-10-11 09:56:05 +0100390 const QuantizationInfo &rhs_q_info,
391 int32_t m, int32_t n, int32_t k, DataType data_type,
Gunes Bayir532ce2c2023-09-14 09:13:49 +0100392 float bias_fraction)
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100393{
394 ARM_COMPUTE_UNUSED(m, n);
Gunes Bayir532ce2c2023-09-14 09:13:49 +0100395
396 /** Quantization Setup of matrix multiplication
397 *
398 * We have a matrix multiplication of the form C = A * B + D
399 * where A is (m X k), B is (k x n) and C is therefore (m x n).
400 * The bias, D is (1 x n).
401 *
402 * If we have some distributional statistics of A, B and D, i.e. mean and variance,
403 * we can estimate the mean and variance of a single value in C matrix and pick
404 * good scale and offset values for the output and have non-saturated tests.
405 *
406 * Each element in the output matrix can be calculated as follows:
407 * C_ij = sum_k(A_ik * B_kj) + D_j
408 *
409 * Note: All possible A_ik, B_kj, D_j random variables are assumed mutually independent.
410 * Note: In quantized operators, bias is an integer. But, its quantization scale is
411 * assumed to be equal to lhs_scale * rhs_scale, and offset equal to 0.
412 * Note: Since, bias is an integer that should be given as input, we need to pick responsible
413 * values when adding it on top of the summation. This is where "bias_fraction" comes
414 * into play. Based on the fraction given, we also return suggested bias range (min/max)
415 * for not saturating the output.
416 *
417 * Because all random variables are mutually independent, any C_ij has the same statistics,
418 * which is why we return a single destination quantization info object; which is why we can
419 * resort to a more general calculation explained in suggest_mac_dst_q_info_and_bias().
420 *
421 * From a probabilistic perspective, the above calculation reduces to
422 * c = sum_k (a_k * b_k) + d
423 */
424
425 return suggest_mac_dst_q_info_and_bias(lhs_q_info, rhs_q_info, k, data_type, bias_fraction);
426}
427
428QuantizationHint suggest_mac_dst_q_info_and_bias(
Mohammed Suhail Munshi02c452f2023-10-26 00:14:36 +0100429 const QuantizationInfo &a_q_info, const QuantizationInfo &b_q_info, int32_t K, DataType data_type, float bias_fraction, int num_sd)
Gunes Bayir532ce2c2023-09-14 09:13:49 +0100430{
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100431 QuantizationInfo c_q_info;
432
433 ARM_COMPUTE_ASSERT(data_type == DataType::QASYMM8 || data_type == DataType::QASYMM8_SIGNED);
434
435 const int32_t t_max = static_cast<int32_t>(data_type == DataType::QASYMM8 ? std::numeric_limits<uint8_t>::max() : std::numeric_limits<int8_t>::max());
436 const int32_t t_min = static_cast<int32_t>(data_type == DataType::QASYMM8 ? std::numeric_limits<uint8_t>::min() : std::numeric_limits<int8_t>::min());
437
Gunes Bayir532ce2c2023-09-14 09:13:49 +0100438 /** Quantization Setup of multiply-accummulate
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100439 *
Gunes Bayir532ce2c2023-09-14 09:13:49 +0100440 * Expression (in float):
441 * C = sum_k ( A_k * B_k ) + D
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100442 *
Gunes Bayir532ce2c2023-09-14 09:13:49 +0100443 * Lemma: An affine transformation (i.e. aX + b) to a discrete uniform random variable
444 * creates another discrete uniform random variable.
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100445 *
446 * Terminology:
447 * E[X]: Mean of the random variable X (sometimes referred as mu_x)
448 * var(X): Variance of the random variable X (someimes referred as sigma^2_x)
449 * std(X): sqrt(var(X)), standard deviation of X
450 *
451 * 1) Calculate the mean:
Gunes Bayir532ce2c2023-09-14 09:13:49 +0100452 * E[C] = sum_k( E[A_k] * E[B_k] ) + D = K * mean_a * mean_b + mean_d
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100453 *
454 * Since elements of A and B are uniformly distributed random variables, we have
455 * mean_a = (max_a + min_a) / 2, mean_b = (max_b + min_b ) / 2
456 * max_a and min_a can be calculated with the scale_a/b and offset_a/b
457 * by replacing data type minimum and maximums in the equations
458 *
Gunes Bayir532ce2c2023-09-14 09:13:49 +0100459 * We don't know mean_d because we have to choose it based on bias_fraction. If we call
460 * the summation as M_int, similar to above, we have:
461 *
462 * E[C_int] = sum_k( E[A_k_int] * E[B_k_int] ) + E[D_int] = K * mean_a_int * mean_b_int + mean_d_int
463 * \___________________________/
464 * E[M_int]
465 *
466 * We choose a bias mean proportional to the integer summation. This proportion is "bias_fraction".
467 * So, we have D_int = f * M_int (f: fraction), and
468 * E[D_int] = mean_d_int = f * E[M_int]
469 *
470 * This also means, for floating point value of D, the following:
471 * E[D] = mean_d = E[D_int] * a_scale * b_scale
472 *
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100473 * 2) Calculate the variance:
Gunes Bayir532ce2c2023-09-14 09:13:49 +0100474 * var(C) = sum_k( var(A_k * B_k) ) + var(D)
475 * = sum_k ( E[A_k^2 * B_k^2] - E[A_k]^2E[B_k^2] )
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100476 * = ...
Gunes Bayir532ce2c2023-09-14 09:13:49 +0100477 * = K * (var_a * var_b + var_a * mean^2_b + var_b * mean^2_a) + var_d
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100478 *
479 * Similarly, due to uniform random variable properties, we have
480 * var_a = (max_a - min_a)^2 / 12
481 * var_b = (max_b - min_b)^2 / 12
482 *
Gunes Bayir532ce2c2023-09-14 09:13:49 +0100483 * Again, we don't know var_d as we don't know the bias. As set out in the previous section, we have
484 * var(D_int) = var(f * M_int) = f^2 * var(M_int)
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100485 *
Gunes Bayir532ce2c2023-09-14 09:13:49 +0100486 * Using the same expression, we can find var(M_int):
487 * var(C_int) = sum_k( var(A_k_int * B_k_int) ) + var(D_int)
488 * = sum_k ( E[A_k_int^2 * B_k_int^2] - E[A_k_int]^2E[B_k_int^2] )
489 * = ...
490 * = K * (var_a_int * var_b_int + var_a_int * mean^2_b_int + var_b_int * mean^2_a_int) + var_d_int
491 * \_______________________________________________________________________________/
492 * var(M_int)
493 *
494 * Now, we know mean and variance of D_int, we can return a suitable bias range with
495 * [mean_d_int +/- 2 * std_d_int]
496 *
497 * This also means, for floating point value of D, the following:
498 * var(D) = var_d = var(D_int) * a_scale^2 * b_scale^2
499 *
500 * E[D] and var(D) calculated in steps (1) and (2) can be substituted into E[C] and var(C) calculatons.
501 *
502 * 3) Now, we have an idea of what would an average C will look like and how much deviation
503 * is present around it. The exact distribution of C is difficult to come up with dependent on K.
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100504 * But, as K increases, due to Central Limit Theorem, it'll look more like a bell shaped figure,
505 * approaching normal distribution.
506 *
507 * This is useful because, in normal distribution, we know that values +- 2 std_deviation around
508 * the mean constitute 95% of the values. Therefore, setting a plausible range for us:
509 * C_range = [C_min, C_max] = [mean_c - 2 * std_c, mean_c + 2 * std_c]
510 *
511 * 4)
512 * If we map this [C_min, C_max] to [0, 255] or [-128, 127] depending on the signedness of the
513 * data type, we can find a suitable scale and offset for the output. On average, it's expected
514 * that 5% of the output values will saturate and 95% will remain in the range.
515 *
516 * The equations to be solved for offset_c and scale_c are:
517 * C_min = scale_c * (type_min - offset_c)
518 * C_max = scale_c * (type_max - offset_c)
519 */
520
521 const int32_t a_offset = a_q_info.uniform().offset;
522 const float a_scale = a_q_info.uniform().scale;
523 const int32_t b_offset = b_q_info.uniform().offset;
524 const float b_scale = b_q_info.uniform().scale;
525
Gunes Bayir532ce2c2023-09-14 09:13:49 +0100526 // Integer value statistics. Valid for both Lhs/A and Rhs/B
527 const float mean_a_int = (t_max + t_min) / 2.f;
528 constexpr float var_a_int = (256 * 256 - 1) / 12.f; // Discrete uniform RV variance
529 const float mean_b_int = mean_a_int; // A_int and B_int has the same stats
530 constexpr float var_b_int = var_a_int;
531
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100532 // Lhs/A stats
533 const float max_a = (t_max - a_offset) * a_scale;
534 const float min_a = (t_min - a_offset) * a_scale;
535 const float mean_a = (max_a + min_a) / 2;
536 const float var_a = (max_a - min_a) * (max_a - min_a) / 12;
537
538 // Rhs/B stats
539 const float max_b = (t_max - b_offset) * b_scale;
540 const float min_b = (t_min - b_offset) * b_scale;
541 const float mean_b = (max_b + min_b) / 2;
542 const float var_b = (max_b - min_b) * (max_b - min_b) / 12;
543
Gunes Bayir532ce2c2023-09-14 09:13:49 +0100544 // Integer multiplication output/M stats
545 const float mean_m_int = K * mean_a_int * mean_b_int;
546 const float var_m_int = K * (var_a_int * var_b_int + mean_a_int * var_b_int + mean_b_int + var_a_int);
547 const float std_m_int = sqrt(var_m_int);
548
549 // Bias/D both Int and Float statistics
550 const float mean_d_int = bias_fraction * mean_m_int;
551 const float std_d_int = bias_fraction * std_m_int;
552 const float mean_d = a_scale * b_scale * mean_d_int;
553 const float std_d = a_scale * b_scale * std_d_int;
554 const float var_d = std_d * std_d;
555
556 // Also calculate the suggested bias range
Mohammed Suhail Munshi02c452f2023-10-26 00:14:36 +0100557 const int32_t min_bias = mean_d_int - (num_sd * std_d_int);
558 const int32_t max_bias = mean_d_int + (num_sd * std_d_int);
Gunes Bayir532ce2c2023-09-14 09:13:49 +0100559
560 // Output/C stats
561 const float mean_out = K * mean_a * mean_b + mean_d;
562 const float var_out = K * (var_a * var_b + var_a * mean_b * mean_b + var_b * mean_a * mean_a) + var_d;
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100563 const float std_out = sqrt(var_out);
564
565 // Output quantization setup
Mohammed Suhail Munshi02c452f2023-10-26 00:14:36 +0100566 const float scale_out = (2 * num_sd) * std_out / 255;
567 const int32_t offset_out = static_cast<int32_t>(t_min - (mean_out - (num_sd * std_out)) / scale_out);
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100568
569 c_q_info = QuantizationInfo(scale_out, offset_out);
Gunes Bayir532ce2c2023-09-14 09:13:49 +0100570
571 return { c_q_info, min_bias, max_bias };
Gunes Bayir9d0c4de2023-04-13 18:22:58 +0100572}
573
Giorgio Arena1f9ca1d2018-03-01 11:13:45 +0000574template void get_tile(const SimpleTensor<float> &in, SimpleTensor<float> &roi, const Coordinates &coord);
Vidhya Sudhan Loganathan71ecf392018-08-31 16:10:16 +0100575template void get_tile(const SimpleTensor<half> &in, SimpleTensor<half> &roi, const Coordinates &coord);
Gian Marco Iodice5ba5e092018-12-06 17:13:09 +0000576template void get_tile(const SimpleTensor<int> &in, SimpleTensor<int> &roi, const Coordinates &coord);
577template void get_tile(const SimpleTensor<short> &in, SimpleTensor<short> &roi, const Coordinates &coord);
578template void get_tile(const SimpleTensor<char> &in, SimpleTensor<char> &roi, const Coordinates &coord);
Gian Marco Iodicef1c2bf02018-06-13 14:05:54 +0100579template void zeros(SimpleTensor<float> &in, const Coordinates &anchor, const TensorShape &shape);
Vidhya Sudhan Loganathan71ecf392018-08-31 16:10:16 +0100580template void zeros(SimpleTensor<half> &in, const Coordinates &anchor, const TensorShape &shape);
581template void transpose_matrix(const SimpleTensor<float> &in, SimpleTensor<float> &out);
582template void transpose_matrix(const SimpleTensor<half> &in, SimpleTensor<half> &out);
Gian Marco Iodice5ba5e092018-12-06 17:13:09 +0000583template void transpose_matrix(const SimpleTensor<int> &in, SimpleTensor<int> &out);
584template void transpose_matrix(const SimpleTensor<short> &in, SimpleTensor<short> &out);
585template void transpose_matrix(const SimpleTensor<char> &in, SimpleTensor<char> &out);
Adnan AlSinanc5849582022-05-05 11:13:19 +0100586template void transpose_matrix(const SimpleTensor<int8_t> &in, SimpleTensor<int8_t> &out);
587template void transpose_matrix(const SimpleTensor<uint8_t> &in, SimpleTensor<uint8_t> &out);
Vidhya Sudhan Loganathan71ecf392018-08-31 16:10:16 +0100588template void matrix_multiply(const SimpleTensor<float> &a, const SimpleTensor<float> &b, SimpleTensor<float> &out);
589template void matrix_multiply(const SimpleTensor<half> &a, const SimpleTensor<half> &b, SimpleTensor<half> &out);
590
Moritz Pflanzer3ce3ff42017-07-21 17:41:02 +0100591} // namespace validation
592} // namespace test
593} // namespace arm_compute