Blame - tests/validation/Helpers.cpp - ml/ComputeLibrary

blob: 560460fd330be43d5dec765fd808345f612d103e [file] [log] [blame]

Moritz Pflanzer	3ce3ff4	2017-07-21 17:41:02 +0100	[diff] [blame]	1	/*
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	2	* Copyright (c) 2017-2023 Arm Limited.
Moritz Pflanzer	3ce3ff4	2017-07-21 17:41:02 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
Moritz Pflanzer	a09de0c	2017-09-01 20:41:12 +0100	[diff] [blame]	24	#include "tests/validation/Helpers.h"
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	25	#include "tests/framework/Asserts.h"
Moritz Pflanzer	3ce3ff4	2017-07-21 17:41:02 +0100	[diff] [blame]	26
Giorgio Arena	1f9ca1d	2018-03-01 11:13:45 +0000	[diff] [blame]	27	#include <algorithm>
				28	#include <cmath>
Gunes Bayir	532ce2c	2023-09-14 09:13:49 +0100	[diff] [blame]	29	#include <cstdint>
				30	#include <tuple>
Giorgio Arena	1f9ca1d	2018-03-01 11:13:45 +0000	[diff] [blame]	31
Moritz Pflanzer	3ce3ff4	2017-07-21 17:41:02 +0100	[diff] [blame]	32	namespace arm_compute
				33	{
				34	namespace test
				35	{
				36	namespace validation
				37	{
Michalis Spyrou	ed7b27d	2019-11-27 16:04:17 +0000	[diff] [blame]	38	template <>
Anton Lokhmotov	af6204c	2017-11-08 09:34:19 +0000	[diff] [blame]	39	SimpleTensor<float> convert_from_asymmetric(const SimpleTensor<uint8_t> &src)
				40	{
Georgios Pinitas	4c5469b	2019-05-21 13:32:43 +0100	[diff] [blame]	41	const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform();
				42	SimpleTensor<float> dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() };
Michalis Spyrou	d1d7722	2020-04-08 14:10:15 +0100	[diff] [blame]	43	#if defined(_OPENMP)
				44	#pragma omp parallel for
				45	#endif /* _OPENMP */
Anton Lokhmotov	af6204c	2017-11-08 09:34:19 +0000	[diff] [blame]	46	for(int i = 0; i < src.num_elements(); ++i)
				47	{
Georgios Pinitas	4c5469b	2019-05-21 13:32:43 +0100	[diff] [blame]	48	dst[i] = dequantize_qasymm8(src[i], quantization_info);
Anton Lokhmotov	af6204c	2017-11-08 09:34:19 +0000	[diff] [blame]	49	}
				50	return dst;
				51	}
				52
Michalis Spyrou	ed7b27d	2019-11-27 16:04:17 +0000	[diff] [blame]	53	template <>
Georgios Pinitas	6e1791b	2019-12-02 19:01:25 +0000	[diff] [blame]	54	SimpleTensor<float> convert_from_asymmetric(const SimpleTensor<int8_t> &src)
				55	{
				56	const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform();
				57	SimpleTensor<float> dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() };
				58
Michalis Spyrou	d1d7722	2020-04-08 14:10:15 +0100	[diff] [blame]	59	#if defined(_OPENMP)
				60	#pragma omp parallel for
				61	#endif /* _OPENMP */
Georgios Pinitas	6e1791b	2019-12-02 19:01:25 +0000	[diff] [blame]	62	for(int i = 0; i < src.num_elements(); ++i)
				63	{
				64	dst[i] = dequantize_qasymm8_signed(src[i], quantization_info);
				65	}
				66	return dst;
				67	}
				68
				69	template <>
Michele Di Giorgio	578a9fc	2019-08-23 11:49:04 +0100	[diff] [blame]	70	SimpleTensor<float> convert_from_asymmetric(const SimpleTensor<uint16_t> &src)
				71	{
				72	const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform();
				73	SimpleTensor<float> dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() };
				74
Michalis Spyrou	d1d7722	2020-04-08 14:10:15 +0100	[diff] [blame]	75	#if defined(_OPENMP)
				76	#pragma omp parallel for
				77	#endif /* _OPENMP */
Michele Di Giorgio	578a9fc	2019-08-23 11:49:04 +0100	[diff] [blame]	78	for(int i = 0; i < src.num_elements(); ++i)
				79	{
				80	dst[i] = dequantize_qasymm16(src[i], quantization_info);
				81	}
				82	return dst;
				83	}
				84
Michele Di Giorgio	4aff98f	2019-08-28 16:27:26 +0100	[diff] [blame]	85	template <>
Anton Lokhmotov	af6204c	2017-11-08 09:34:19 +0000	[diff] [blame]	86	SimpleTensor<uint8_t> convert_to_asymmetric(const SimpleTensor<float> &src, const QuantizationInfo &quantization_info)
				87	{
Georgios Pinitas	4c5469b	2019-05-21 13:32:43 +0100	[diff] [blame]	88	SimpleTensor<uint8_t> dst{ src.shape(), DataType::QASYMM8, 1, quantization_info };
				89	const UniformQuantizationInfo &qinfo = quantization_info.uniform();
				90
Michalis Spyrou	d1d7722	2020-04-08 14:10:15 +0100	[diff] [blame]	91	#if defined(_OPENMP)
				92	#pragma omp parallel for
				93	#endif /* _OPENMP */
Anton Lokhmotov	af6204c	2017-11-08 09:34:19 +0000	[diff] [blame]	94	for(int i = 0; i < src.num_elements(); ++i)
				95	{
Georgios Pinitas	4c5469b	2019-05-21 13:32:43 +0100	[diff] [blame]	96	dst[i] = quantize_qasymm8(src[i], qinfo);
Anton Lokhmotov	af6204c	2017-11-08 09:34:19 +0000	[diff] [blame]	97	}
				98	return dst;
				99	}
Giorgio Arena	1f9ca1d	2018-03-01 11:13:45 +0000	[diff] [blame]	100
Manuel Bottini	3689fcd	2019-06-14 17:18:12 +0100	[diff] [blame]	101	template <>
Sang-Hoon Park	ae6ef7c	2019-11-13 16:51:45 +0000	[diff] [blame]	102	SimpleTensor<int8_t> convert_to_asymmetric(const SimpleTensor<float> &src, const QuantizationInfo &quantization_info)
				103	{
				104	SimpleTensor<int8_t> dst{ src.shape(), DataType::QASYMM8_SIGNED, 1, quantization_info };
				105	const UniformQuantizationInfo &qinfo = quantization_info.uniform();
				106
Michalis Spyrou	d1d7722	2020-04-08 14:10:15 +0100	[diff] [blame]	107	#if defined(_OPENMP)
				108	#pragma omp parallel for
				109	#endif /* _OPENMP */
Sang-Hoon Park	ae6ef7c	2019-11-13 16:51:45 +0000	[diff] [blame]	110	for(int i = 0; i < src.num_elements(); ++i)
				111	{
				112	dst[i] = quantize_qasymm8_signed(src[i], qinfo);
				113	}
				114	return dst;
				115	}
				116
Michalis Spyrou	ed7b27d	2019-11-27 16:04:17 +0000	[diff] [blame]	117	template <>
Michele Di Giorgio	4aff98f	2019-08-28 16:27:26 +0100	[diff] [blame]	118	SimpleTensor<uint16_t> convert_to_asymmetric(const SimpleTensor<float> &src, const QuantizationInfo &quantization_info)
				119	{
				120	SimpleTensor<uint16_t> dst{ src.shape(), DataType::QASYMM16, 1, quantization_info };
				121	const UniformQuantizationInfo &qinfo = quantization_info.uniform();
				122
Michalis Spyrou	d1d7722	2020-04-08 14:10:15 +0100	[diff] [blame]	123	#if defined(_OPENMP)
				124	#pragma omp parallel for
				125	#endif /* _OPENMP */
Michele Di Giorgio	4aff98f	2019-08-28 16:27:26 +0100	[diff] [blame]	126	for(int i = 0; i < src.num_elements(); ++i)
				127	{
				128	dst[i] = quantize_qasymm16(src[i], qinfo);
				129	}
				130	return dst;
				131	}
				132
				133	template <>
Manuel Bottini	3689fcd	2019-06-14 17:18:12 +0100	[diff] [blame]	134	SimpleTensor<int16_t> convert_to_symmetric(const SimpleTensor<float> &src, const QuantizationInfo &quantization_info)
				135	{
				136	SimpleTensor<int16_t> dst{ src.shape(), DataType::QSYMM16, 1, quantization_info };
				137	const UniformQuantizationInfo &qinfo = quantization_info.uniform();
				138
Michalis Spyrou	d1d7722	2020-04-08 14:10:15 +0100	[diff] [blame]	139	#if defined(_OPENMP)
				140	#pragma omp parallel for
				141	#endif /* _OPENMP */
Manuel Bottini	3689fcd	2019-06-14 17:18:12 +0100	[diff] [blame]	142	for(int i = 0; i < src.num_elements(); ++i)
				143	{
				144	dst[i] = quantize_qsymm16(src[i], qinfo);
				145	}
				146	return dst;
				147	}
				148
				149	template <>
				150	SimpleTensor<float> convert_from_symmetric(const SimpleTensor<int16_t> &src)
				151	{
				152	const UniformQuantizationInfo &quantization_info = src.quantization_info().uniform();
				153	SimpleTensor<float> dst{ src.shape(), DataType::F32, 1, QuantizationInfo(), src.data_layout() };
				154
Michalis Spyrou	d1d7722	2020-04-08 14:10:15 +0100	[diff] [blame]	155	#if defined(_OPENMP)
				156	#pragma omp parallel for
				157	#endif /* _OPENMP */
Manuel Bottini	3689fcd	2019-06-14 17:18:12 +0100	[diff] [blame]	158	for(int i = 0; i < src.num_elements(); ++i)
				159	{
				160	dst[i] = dequantize_qsymm16(src[i], quantization_info);
				161	}
				162	return dst;
				163	}
				164
Vidhya Sudhan Loganathan	71ecf39	2018-08-31 16:10:16 +0100	[diff] [blame]	165	template <typename T>
				166	void matrix_multiply(const SimpleTensor<T> &a, const SimpleTensor<T> &b, SimpleTensor<T> &out)
Giorgio Arena	1f9ca1d	2018-03-01 11:13:45 +0000	[diff] [blame]	167	{
				168	ARM_COMPUTE_ERROR_ON(a.shape()[0] != b.shape()[1]);
				169	ARM_COMPUTE_ERROR_ON(a.shape()[1] != out.shape()[1]);
				170	ARM_COMPUTE_ERROR_ON(b.shape()[0] != out.shape()[0]);
				171
				172	const int M = a.shape()[1]; // Rows
				173	const int N = b.shape()[0]; // Cols
				174	const int K = b.shape()[1];
				175
Michalis Spyrou	d1d7722	2020-04-08 14:10:15 +0100	[diff] [blame]	176	#if defined(_OPENMP)
				177	#pragma omp parallel for collapse(2)
				178	#endif /* _OPENMP */
Giorgio Arena	1f9ca1d	2018-03-01 11:13:45 +0000	[diff] [blame]	179	for(int y = 0; y < M; ++y)
				180	{
				181	for(int x = 0; x < N; ++x)
				182	{
				183	float acc = 0.0f;
				184	for(int k = 0; k < K; ++k)
				185	{
				186	acc += a[y * K + k] * b[x + k * N];
				187	}
				188
				189	out[x + y * N] = acc;
				190	}
				191	}
				192	}
				193
Vidhya Sudhan Loganathan	71ecf39	2018-08-31 16:10:16 +0100	[diff] [blame]	194	template <typename T>
				195	void transpose_matrix(const SimpleTensor<T> &in, SimpleTensor<T> &out)
Giorgio Arena	1f9ca1d	2018-03-01 11:13:45 +0000	[diff] [blame]	196	{
				197	ARM_COMPUTE_ERROR_ON((in.shape()[0] != out.shape()[1]) \|\| (in.shape()[1] != out.shape()[0]));
				198
				199	const int width = in.shape()[0];
				200	const int height = in.shape()[1];
				201
Michalis Spyrou	d1d7722	2020-04-08 14:10:15 +0100	[diff] [blame]	202	#if defined(_OPENMP)
				203	#pragma omp parallel for collapse(2)
				204	#endif /* _OPENMP */
Giorgio Arena	1f9ca1d	2018-03-01 11:13:45 +0000	[diff] [blame]	205	for(int y = 0; y < height; ++y)
				206	{
				207	for(int x = 0; x < width; ++x)
				208	{
Gian Marco Iodice	5ba5e09	2018-12-06 17:13:09 +0000	[diff] [blame]	209	const T val = in[x + y * width];
Giorgio Arena	1f9ca1d	2018-03-01 11:13:45 +0000	[diff] [blame]	210
				211	out[x * height + y] = val;
				212	}
				213	}
				214	}
				215
				216	template <typename T>
				217	void get_tile(const SimpleTensor<T> &in, SimpleTensor<T> &tile, const Coordinates &coord)
				218	{
Gian Marco Iodice	f1c2bf0	2018-06-13 14:05:54 +0100	[diff] [blame]	219	ARM_COMPUTE_ERROR_ON(tile.shape().num_dimensions() > 2);
Giorgio Arena	1f9ca1d	2018-03-01 11:13:45 +0000	[diff] [blame]	220
				221	const int w_tile = tile.shape()[0];
				222	const int h_tile = tile.shape()[1];
				223
				224	// Fill the tile with zeros
				225	std::fill(tile.data() + 0, (tile.data() + (w_tile * h_tile)), static_cast<T>(0));
				226
				227	// Check if with the dimensions greater than 2 we could have out-of-bound reads
				228	for(size_t d = 2; d < Coordinates::num_max_dimensions; ++d)
				229	{
				230	if(coord[d] < 0 \|\| coord[d] >= static_cast<int>(in.shape()[d]))
				231	{
				232	ARM_COMPUTE_ERROR("coord[d] < 0 \|\| coord[d] >= in.shape()[d] with d >= 2");
				233	}
				234	}
				235
				236	// Since we could have out-of-bound reads along the X and Y dimensions,
				237	// we start calculating the input address with x = 0 and y = 0
				238	Coordinates start_coord = coord;
				239	start_coord[0] = 0;
				240	start_coord[1] = 0;
				241
				242	// Get input and roi pointers
				243	auto in_ptr = static_cast<const T *>(in(start_coord));
				244	auto roi_ptr = static_cast<T *>(tile.data());
				245
				246	const int x_in_start = std::max(0, coord[0]);
				247	const int y_in_start = std::max(0, coord[1]);
				248	const int x_in_end = std::min(static_cast<int>(in.shape()[0]), coord[0] + w_tile);
				249	const int y_in_end = std::min(static_cast<int>(in.shape()[1]), coord[1] + h_tile);
				250
				251	// Number of elements to copy per row
				252	const int n = x_in_end - x_in_start;
				253
				254	// Starting coordinates for the ROI
				255	const int x_tile_start = coord[0] > 0 ? 0 : std::abs(coord[0]);
				256	const int y_tile_start = coord[1] > 0 ? 0 : std::abs(coord[1]);
				257
				258	// Update input pointer
				259	in_ptr += x_in_start;
				260	in_ptr += (y_in_start * in.shape()[0]);
				261
				262	// Update ROI pointer
				263	roi_ptr += x_tile_start;
				264	roi_ptr += (y_tile_start * tile.shape()[0]);
				265
				266	for(int y = y_in_start; y < y_in_end; ++y)
				267	{
				268	// Copy per row
				269	std::copy(in_ptr, in_ptr + n, roi_ptr);
				270
				271	in_ptr += in.shape()[0];
				272	roi_ptr += tile.shape()[0];
				273	}
				274	}
				275
Gian Marco Iodice	f1c2bf0	2018-06-13 14:05:54 +0100	[diff] [blame]	276	template <typename T>
				277	void zeros(SimpleTensor<T> &in, const Coordinates &anchor, const TensorShape &shape)
				278	{
				279	ARM_COMPUTE_ERROR_ON(anchor.num_dimensions() != shape.num_dimensions());
				280	ARM_COMPUTE_ERROR_ON(in.shape().num_dimensions() > 2);
				281	ARM_COMPUTE_ERROR_ON(shape.num_dimensions() > 2);
				282
				283	// Check if with the dimensions greater than 2 we could have out-of-bound reads
				284	for(size_t d = 0; d < Coordinates::num_max_dimensions; ++d)
				285	{
				286	if(anchor[d] < 0 \|\| ((anchor[d] + shape[d]) > in.shape()[d]))
				287	{
				288	ARM_COMPUTE_ERROR("anchor[d] < 0 \|\| (anchor[d] + shape[d]) > in.shape()[d]");
				289	}
				290	}
				291
				292	// Get input pointer
				293	auto in_ptr = static_cast<T >(in(anchor[0] + anchor[1] in.shape()[0]));
				294
				295	const unsigned int n = in.shape()[0];
				296
				297	for(unsigned int y = 0; y < shape[1]; ++y)
				298	{
				299	std::fill(in_ptr, in_ptr + shape[0], 0);
				300	in_ptr += n;
				301	}
				302	}
				303
Michele Di Giorgio	ed5a492	2018-09-13 16:22:01 +0100	[diff] [blame]	304	std::pair<int, int> get_quantized_bounds(const QuantizationInfo &quant_info, float min, float max)
				305	{
				306	ARM_COMPUTE_ERROR_ON_MSG(min > max, "min must be lower equal than max");
				307
Georgios Pinitas	4c5469b	2019-05-21 13:32:43 +0100	[diff] [blame]	308	const int min_bound = quantize_qasymm8(min, quant_info.uniform());
				309	const int max_bound = quantize_qasymm8(max, quant_info.uniform());
Michalis Spyrou	bcfd09a	2019-05-01 13:03:59 +0100	[diff] [blame]	310	return std::pair<int, int> { min_bound, max_bound };
Michele Di Giorgio	ed5a492	2018-09-13 16:22:01 +0100	[diff] [blame]	311	}
				312
Georgios Pinitas	6e1791b	2019-12-02 19:01:25 +0000	[diff] [blame]	313	std::pair<int, int> get_quantized_qasymm8_signed_bounds(const QuantizationInfo &quant_info, float min, float max)
				314	{
				315	ARM_COMPUTE_ERROR_ON_MSG(min > max, "min must be lower equal than max");
				316
				317	const int min_bound = quantize_qasymm8_signed(min, quant_info.uniform());
				318	const int max_bound = quantize_qasymm8_signed(max, quant_info.uniform());
				319	return std::pair<int, int> { min_bound, max_bound };
				320	}
				321
Georgios Pinitas	dbdea0d	2019-10-16 19:21:40 +0100	[diff] [blame]	322	std::pair<int, int> get_symm_quantized_per_channel_bounds(const QuantizationInfo &quant_info, float min, float max, size_t channel_id)
				323	{
				324	ARM_COMPUTE_ERROR_ON_MSG(min > max, "min must be lower equal than max");
				325
				326	const int min_bound = quantize_qsymm8_per_channel(min, quant_info, channel_id);
				327	const int max_bound = quantize_qsymm8_per_channel(max, quant_info, channel_id);
				328	return std::pair<int, int> { min_bound, max_bound };
				329	}
				330
Manuel Bottini	f733e03	2021-05-19 16:15:36 +0100	[diff] [blame]	331	void add_padding_x(std::initializer_list<ITensor *> tensors, const DataLayout &data_layout, bool only_right_pad)
Giorgio Arena	63825e8	2021-03-25 14:54:50 +0000	[diff] [blame]	332	{
				333	if(data_layout == DataLayout::NHWC)
				334	{
				335	constexpr unsigned int lower = 1U;
				336	constexpr unsigned int upper = 16U;
				337
				338	std::uniform_int_distribution<unsigned int> distribution(lower, upper);
				339	size_t seed_offset = 0;
				340
				341	for(ITensor *tensor : tensors)
				342	{
				343	ARM_COMPUTE_ERROR_ON(!tensor->info()->is_resizable());
				344
				345	std::mt19937 gen(library->seed() + seed_offset++);
				346
				347	const unsigned int right = distribution(gen);
Manuel Bottini	f733e03	2021-05-19 16:15:36 +0100	[diff] [blame]	348	const unsigned int left = only_right_pad ? 0 : distribution(gen);
Giorgio Arena	63825e8	2021-03-25 14:54:50 +0000	[diff] [blame]	349
				350	tensor->info()->extend_padding(PaddingSize(0U, right, 0U, left));
				351	}
				352	}
				353	}
				354
Gunes Bayir	dfcd41a	2023-10-11 09:56:05 +0100	[diff] [blame]	355	QuantizationHint suggest_conv_dst_q_info_and_bias(const QuantizationInfo &in_q_info,
				356	const QuantizationInfo &weight_q_info,
				357	int32_t height,
				358	int32_t width,
				359	int32_t channels,
				360	DataType data_type,
				361	float bias_fraction)
				362	{
				363	/** Quantization Setup of convolution
				364	*
				365	* Just like any other multiply-accummulate, convolution (2D) operation
				366	* multiplies and accumulates the input and weight tensors. This operation
				367	* takes place in three dimensions: height, width and channels. All of them
				368	* belong to the weight tensor.
				369	*
				370	* The formula for simple convolution can be written as:
				371	* C = sum_h sum_w sum_c(I[h_offset + h, w_offset + w, c] * W[h, w, c])
				372	*
				373	* Here, h_offset and w_offset are the starting positions in the image. Effects
				374	* of paddings are ignored. This accumulation reduces to something like
				375	*
				376	* C = sum_m(I_index * W_hwc)
				377	* where m is height x width x channels.
				378	*
				379	* Non-unit strides and/or dilations do not change the probabilistic nature of
				380	* this sum because we always iterate as the size of the weight tensor.
				381	*
				382	* Paddings may affect this summation, but it's a boundary condition and so is
				383	* neglected for brevity.
				384	*/
				385
				386	return suggest_mac_dst_q_info_and_bias(in_q_info, weight_q_info, height * width * channels, data_type, bias_fraction);
				387	}
				388
Gunes Bayir	532ce2c	2023-09-14 09:13:49 +0100	[diff] [blame]	389	QuantizationHint suggest_matmul_dst_q_info_and_bias(const QuantizationInfo &lhs_q_info,
Gunes Bayir	dfcd41a	2023-10-11 09:56:05 +0100	[diff] [blame]	390	const QuantizationInfo &rhs_q_info,
				391	int32_t m, int32_t n, int32_t k, DataType data_type,
Gunes Bayir	532ce2c	2023-09-14 09:13:49 +0100	[diff] [blame]	392	float bias_fraction)
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	393	{
				394	ARM_COMPUTE_UNUSED(m, n);
Gunes Bayir	532ce2c	2023-09-14 09:13:49 +0100	[diff] [blame]	395
				396	/** Quantization Setup of matrix multiplication
				397	*
				398	* We have a matrix multiplication of the form C = A * B + D
				399	* where A is (m X k), B is (k x n) and C is therefore (m x n).
				400	* The bias, D is (1 x n).
				401	*
				402	* If we have some distributional statistics of A, B and D, i.e. mean and variance,
				403	* we can estimate the mean and variance of a single value in C matrix and pick
				404	* good scale and offset values for the output and have non-saturated tests.
				405	*
				406	* Each element in the output matrix can be calculated as follows:
				407	* C_ij = sum_k(A_ik * B_kj) + D_j
				408	*
				409	* Note: All possible A_ik, B_kj, D_j random variables are assumed mutually independent.
				410	* Note: In quantized operators, bias is an integer. But, its quantization scale is
				411	* assumed to be equal to lhs_scale * rhs_scale, and offset equal to 0.
				412	* Note: Since, bias is an integer that should be given as input, we need to pick responsible
				413	* values when adding it on top of the summation. This is where "bias_fraction" comes
				414	* into play. Based on the fraction given, we also return suggested bias range (min/max)
				415	* for not saturating the output.
				416	*
				417	* Because all random variables are mutually independent, any C_ij has the same statistics,
				418	* which is why we return a single destination quantization info object; which is why we can
				419	* resort to a more general calculation explained in suggest_mac_dst_q_info_and_bias().
				420	*
				421	* From a probabilistic perspective, the above calculation reduces to
				422	* c = sum_k (a_k * b_k) + d
				423	*/
				424
				425	return suggest_mac_dst_q_info_and_bias(lhs_q_info, rhs_q_info, k, data_type, bias_fraction);
				426	}
				427
				428	QuantizationHint suggest_mac_dst_q_info_and_bias(
Mohammed Suhail Munshi	02c452f	2023-10-26 00:14:36 +0100	[diff] [blame]	429	const QuantizationInfo &a_q_info, const QuantizationInfo &b_q_info, int32_t K, DataType data_type, float bias_fraction, int num_sd)
Gunes Bayir	532ce2c	2023-09-14 09:13:49 +0100	[diff] [blame]	430	{
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	431	QuantizationInfo c_q_info;
				432
				433	ARM_COMPUTE_ASSERT(data_type == DataType::QASYMM8 \|\| data_type == DataType::QASYMM8_SIGNED);
				434
				435	const int32_t t_max = static_cast<int32_t>(data_type == DataType::QASYMM8 ? std::numeric_limits<uint8_t>::max() : std::numeric_limits<int8_t>::max());
				436	const int32_t t_min = static_cast<int32_t>(data_type == DataType::QASYMM8 ? std::numeric_limits<uint8_t>::min() : std::numeric_limits<int8_t>::min());
				437
Gunes Bayir	532ce2c	2023-09-14 09:13:49 +0100	[diff] [blame]	438	/** Quantization Setup of multiply-accummulate
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	439	*
Gunes Bayir	532ce2c	2023-09-14 09:13:49 +0100	[diff] [blame]	440	* Expression (in float):
				441	* C = sum_k ( A_k * B_k ) + D
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	442	*
Gunes Bayir	532ce2c	2023-09-14 09:13:49 +0100	[diff] [blame]	443	* Lemma: An affine transformation (i.e. aX + b) to a discrete uniform random variable
				444	* creates another discrete uniform random variable.
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	445	*
				446	* Terminology:
				447	* E[X]: Mean of the random variable X (sometimes referred as mu_x)
				448	* var(X): Variance of the random variable X (someimes referred as sigma^2_x)
				449	* std(X): sqrt(var(X)), standard deviation of X
				450	*
				451	* 1) Calculate the mean:
Gunes Bayir	532ce2c	2023-09-14 09:13:49 +0100	[diff] [blame]	452	* E[C] = sum_k( E[A_k] * E[B_k] ) + D = K * mean_a * mean_b + mean_d
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	453	*
				454	* Since elements of A and B are uniformly distributed random variables, we have
				455	* mean_a = (max_a + min_a) / 2, mean_b = (max_b + min_b ) / 2
				456	* max_a and min_a can be calculated with the scale_a/b and offset_a/b
				457	* by replacing data type minimum and maximums in the equations
				458	*
Gunes Bayir	532ce2c	2023-09-14 09:13:49 +0100	[diff] [blame]	459	* We don't know mean_d because we have to choose it based on bias_fraction. If we call
				460	* the summation as M_int, similar to above, we have:
				461	*
				462	* E[C_int] = sum_k( E[A_k_int] * E[B_k_int] ) + E[D_int] = K * mean_a_int * mean_b_int + mean_d_int
				463	* \___________________________/
				464	* E[M_int]
				465	*
				466	* We choose a bias mean proportional to the integer summation. This proportion is "bias_fraction".
				467	* So, we have D_int = f * M_int (f: fraction), and
				468	* E[D_int] = mean_d_int = f * E[M_int]
				469	*
				470	* This also means, for floating point value of D, the following:
				471	* E[D] = mean_d = E[D_int] * a_scale * b_scale
				472	*
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	473	* 2) Calculate the variance:
Gunes Bayir	532ce2c	2023-09-14 09:13:49 +0100	[diff] [blame]	474	* var(C) = sum_k( var(A_k * B_k) ) + var(D)
				475	* = sum_k ( E[A_k^2 * B_k^2] - E[A_k]^2E[B_k^2] )
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	476	* = ...
Gunes Bayir	532ce2c	2023-09-14 09:13:49 +0100	[diff] [blame]	477	* = K * (var_a * var_b + var_a * mean^2_b + var_b * mean^2_a) + var_d
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	478	*
				479	* Similarly, due to uniform random variable properties, we have
				480	* var_a = (max_a - min_a)^2 / 12
				481	* var_b = (max_b - min_b)^2 / 12
				482	*
Gunes Bayir	532ce2c	2023-09-14 09:13:49 +0100	[diff] [blame]	483	* Again, we don't know var_d as we don't know the bias. As set out in the previous section, we have
				484	* var(D_int) = var(f * M_int) = f^2 * var(M_int)
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	485	*
Gunes Bayir	532ce2c	2023-09-14 09:13:49 +0100	[diff] [blame]	486	* Using the same expression, we can find var(M_int):
				487	* var(C_int) = sum_k( var(A_k_int * B_k_int) ) + var(D_int)
				488	* = sum_k ( E[A_k_int^2 * B_k_int^2] - E[A_k_int]^2E[B_k_int^2] )
				489	* = ...
				490	* = K * (var_a_int * var_b_int + var_a_int * mean^2_b_int + var_b_int * mean^2_a_int) + var_d_int
				491	* \_______________________________________________________________________________/
				492	* var(M_int)
				493	*
				494	* Now, we know mean and variance of D_int, we can return a suitable bias range with
				495	* [mean_d_int +/- 2 * std_d_int]
				496	*
				497	* This also means, for floating point value of D, the following:
				498	* var(D) = var_d = var(D_int) * a_scale^2 * b_scale^2
				499	*
				500	* E[D] and var(D) calculated in steps (1) and (2) can be substituted into E[C] and var(C) calculatons.
				501	*
				502	* 3) Now, we have an idea of what would an average C will look like and how much deviation
				503	* is present around it. The exact distribution of C is difficult to come up with dependent on K.
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	504	* But, as K increases, due to Central Limit Theorem, it'll look more like a bell shaped figure,
				505	* approaching normal distribution.
				506	*
				507	* This is useful because, in normal distribution, we know that values +- 2 std_deviation around
				508	* the mean constitute 95% of the values. Therefore, setting a plausible range for us:
				509	* C_range = [C_min, C_max] = [mean_c - 2 * std_c, mean_c + 2 * std_c]
				510	*
				511	* 4)
				512	* If we map this [C_min, C_max] to [0, 255] or [-128, 127] depending on the signedness of the
				513	* data type, we can find a suitable scale and offset for the output. On average, it's expected
				514	* that 5% of the output values will saturate and 95% will remain in the range.
				515	*
				516	* The equations to be solved for offset_c and scale_c are:
				517	* C_min = scale_c * (type_min - offset_c)
				518	* C_max = scale_c * (type_max - offset_c)
				519	*/
				520
				521	const int32_t a_offset = a_q_info.uniform().offset;
				522	const float a_scale = a_q_info.uniform().scale;
				523	const int32_t b_offset = b_q_info.uniform().offset;
				524	const float b_scale = b_q_info.uniform().scale;
				525
Gunes Bayir	532ce2c	2023-09-14 09:13:49 +0100	[diff] [blame]	526	// Integer value statistics. Valid for both Lhs/A and Rhs/B
				527	const float mean_a_int = (t_max + t_min) / 2.f;
				528	constexpr float var_a_int = (256 * 256 - 1) / 12.f; // Discrete uniform RV variance
				529	const float mean_b_int = mean_a_int; // A_int and B_int has the same stats
				530	constexpr float var_b_int = var_a_int;
				531
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	532	// Lhs/A stats
				533	const float max_a = (t_max - a_offset) * a_scale;
				534	const float min_a = (t_min - a_offset) * a_scale;
				535	const float mean_a = (max_a + min_a) / 2;
				536	const float var_a = (max_a - min_a) * (max_a - min_a) / 12;
				537
				538	// Rhs/B stats
				539	const float max_b = (t_max - b_offset) * b_scale;
				540	const float min_b = (t_min - b_offset) * b_scale;
				541	const float mean_b = (max_b + min_b) / 2;
				542	const float var_b = (max_b - min_b) * (max_b - min_b) / 12;
				543
Gunes Bayir	532ce2c	2023-09-14 09:13:49 +0100	[diff] [blame]	544	// Integer multiplication output/M stats
				545	const float mean_m_int = K * mean_a_int * mean_b_int;
				546	const float var_m_int = K * (var_a_int * var_b_int + mean_a_int * var_b_int + mean_b_int + var_a_int);
				547	const float std_m_int = sqrt(var_m_int);
				548
				549	// Bias/D both Int and Float statistics
				550	const float mean_d_int = bias_fraction * mean_m_int;
				551	const float std_d_int = bias_fraction * std_m_int;
				552	const float mean_d = a_scale * b_scale * mean_d_int;
				553	const float std_d = a_scale * b_scale * std_d_int;
				554	const float var_d = std_d * std_d;
				555
				556	// Also calculate the suggested bias range
Mohammed Suhail Munshi	02c452f	2023-10-26 00:14:36 +0100	[diff] [blame]	557	const int32_t min_bias = mean_d_int - (num_sd * std_d_int);
				558	const int32_t max_bias = mean_d_int + (num_sd * std_d_int);
Gunes Bayir	532ce2c	2023-09-14 09:13:49 +0100	[diff] [blame]	559
				560	// Output/C stats
				561	const float mean_out = K * mean_a * mean_b + mean_d;
				562	const float var_out = K * (var_a * var_b + var_a * mean_b * mean_b + var_b * mean_a * mean_a) + var_d;
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	563	const float std_out = sqrt(var_out);
				564
				565	// Output quantization setup
Mohammed Suhail Munshi	02c452f	2023-10-26 00:14:36 +0100	[diff] [blame]	566	const float scale_out = (2 * num_sd) * std_out / 255;
				567	const int32_t offset_out = static_cast<int32_t>(t_min - (mean_out - (num_sd * std_out)) / scale_out);
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	568
				569	c_q_info = QuantizationInfo(scale_out, offset_out);
Gunes Bayir	532ce2c	2023-09-14 09:13:49 +0100	[diff] [blame]	570
				571	return { c_q_info, min_bias, max_bias };
Gunes Bayir	9d0c4de	2023-04-13 18:22:58 +0100	[diff] [blame]	572	}
				573
Giorgio Arena	1f9ca1d	2018-03-01 11:13:45 +0000	[diff] [blame]	574	template void get_tile(const SimpleTensor<float> &in, SimpleTensor<float> &roi, const Coordinates &coord);
Vidhya Sudhan Loganathan	71ecf39	2018-08-31 16:10:16 +0100	[diff] [blame]	575	template void get_tile(const SimpleTensor<half> &in, SimpleTensor<half> &roi, const Coordinates &coord);
Gian Marco Iodice	5ba5e09	2018-12-06 17:13:09 +0000	[diff] [blame]	576	template void get_tile(const SimpleTensor<int> &in, SimpleTensor<int> &roi, const Coordinates &coord);
				577	template void get_tile(const SimpleTensor<short> &in, SimpleTensor<short> &roi, const Coordinates &coord);
				578	template void get_tile(const SimpleTensor<char> &in, SimpleTensor<char> &roi, const Coordinates &coord);
Gian Marco Iodice	f1c2bf0	2018-06-13 14:05:54 +0100	[diff] [blame]	579	template void zeros(SimpleTensor<float> &in, const Coordinates &anchor, const TensorShape &shape);
Vidhya Sudhan Loganathan	71ecf39	2018-08-31 16:10:16 +0100	[diff] [blame]	580	template void zeros(SimpleTensor<half> &in, const Coordinates &anchor, const TensorShape &shape);
				581	template void transpose_matrix(const SimpleTensor<float> &in, SimpleTensor<float> &out);
				582	template void transpose_matrix(const SimpleTensor<half> &in, SimpleTensor<half> &out);
Gian Marco Iodice	5ba5e09	2018-12-06 17:13:09 +0000	[diff] [blame]	583	template void transpose_matrix(const SimpleTensor<int> &in, SimpleTensor<int> &out);
				584	template void transpose_matrix(const SimpleTensor<short> &in, SimpleTensor<short> &out);
				585	template void transpose_matrix(const SimpleTensor<char> &in, SimpleTensor<char> &out);
Adnan AlSinan	c584958	2022-05-05 11:13:19 +0100	[diff] [blame]	586	template void transpose_matrix(const SimpleTensor<int8_t> &in, SimpleTensor<int8_t> &out);
				587	template void transpose_matrix(const SimpleTensor<uint8_t> &in, SimpleTensor<uint8_t> &out);
Vidhya Sudhan Loganathan	71ecf39	2018-08-31 16:10:16 +0100	[diff] [blame]	588	template void matrix_multiply(const SimpleTensor<float> &a, const SimpleTensor<float> &b, SimpleTensor<float> &out);
				589	template void matrix_multiply(const SimpleTensor<half> &a, const SimpleTensor<half> &b, SimpleTensor<half> &out);
				590
Moritz Pflanzer	3ce3ff4	2017-07-21 17:41:02 +0100	[diff] [blame]	591	} // namespace validation
				592	} // namespace test
				593	} // namespace arm_compute