Blame - src/cpu/kernels/CpuQuantizeKernel.cpp - ml/ComputeLibrary

blob: d2ac6cf8acaed9b7f693ddbe652e7655c1bd94b3 [file] [log] [blame]

Michele Di Giorgio	4e09b38	2017-07-05 18:20:02 +0100	[diff] [blame]	1	/*
Mohammed Suhail Munshi	0a48c4c	2024-01-30 18:25:51 +0000	[diff] [blame]	2	* Copyright (c) 2017-2022, 2024 Arm Limited.
Michele Di Giorgio	4e09b38	2017-07-05 18:20:02 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
Georgios Pinitas	7891a73	2021-08-20 21:39:25 +0100	[diff] [blame]	24	#include "src/cpu/kernels/CpuQuantizeKernel.h"
Michele Di Giorgio	4e09b38	2017-07-05 18:20:02 +0100	[diff] [blame]	25
				26	#include "arm_compute/core/Error.h"
				27	#include "arm_compute/core/Helpers.h"
Michele Di Giorgio	4e09b38	2017-07-05 18:20:02 +0100	[diff] [blame]	28	#include "arm_compute/core/Utils.h"
				29	#include "arm_compute/core/Validate.h"
				30	#include "arm_compute/core/Window.h"
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	31
				32	#include "src/core/CPP/Validate.h"
				33	#include "src/core/helpers/AutoConfiguration.h"
				34	#include "src/core/helpers/WindowHelpers.h"
Georgios Pinitas	ddb93bb	2020-10-02 16:38:59 +0100	[diff] [blame]	35	#include "src/core/NEON/NEAsymm.h"
				36	#include "src/core/NEON/NEMath.h"
				37	#include "src/core/NEON/wrapper/wrapper.h"
John Kesapides	adfb273	2019-03-04 16:29:22 +0000	[diff] [blame]	38
Michele Di Giorgio	4e09b38	2017-07-05 18:20:02 +0100	[diff] [blame]	39	#include <arm_neon.h>
Michele Di Giorgio	d64a46c	2019-10-01 12:25:49 +0100	[diff] [blame]	40	#include <map>
Michele Di Giorgio	4e09b38	2017-07-05 18:20:02 +0100	[diff] [blame]	41
Michele Di Giorgio	d64a46c	2019-10-01 12:25:49 +0100	[diff] [blame]	42	namespace arm_compute
				43	{
Manuel Bottini	0ded4c4	2021-03-09 14:15:27 +0000	[diff] [blame]	44	namespace cpu
				45	{
				46	namespace kernels
				47	{
Alex Gilday	60954c6	2018-03-05 16:22:48 +0000	[diff] [blame]	48	namespace
				49	{
Sang-Hoon Park	fb6aaeb	2019-11-27 15:26:44 +0000	[diff] [blame]	50	constexpr auto window_step = 16;
				51
Manuel Bottini	0ded4c4	2021-03-09 14:15:27 +0000	[diff] [blame]	52	Status validate_arguments(const ITensorInfo src, const ITensorInfo dst)
Alex Gilday	60954c6	2018-03-05 16:22:48 +0000	[diff] [blame]	53	{
Manuel Bottini	0ded4c4	2021-03-09 14:15:27 +0000	[diff] [blame]	54	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
				55	ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src);
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	56	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
				57	DataType::F16, DataType::F32);
Manuel Bottini	0ded4c4	2021-03-09 14:15:27 +0000	[diff] [blame]	58	ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	59	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::QSYMM8, DataType::QASYMM8,
				60	DataType::QASYMM8_SIGNED, DataType::QASYMM16);
Manuel Bottini	0ded4c4	2021-03-09 14:15:27 +0000	[diff] [blame]	61	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst);
Alex Gilday	60954c6	2018-03-05 16:22:48 +0000	[diff] [blame]	62
				63	return Status{};
				64	}
				65
Manuel Bottini	4370cff	2020-02-07 16:31:59 +0000	[diff] [blame]	66	template <typename T>
				67	inline float32x4x4_t load_value(const T *input_ptr)
				68	{
				69	using Tx16_t = typename wrapper::traits::neon_vector<T, 16>::type;
				70	return arm_compute::convert_to_float32x4x4<Tx16_t>(wrapper::vloadq(input_ptr));
				71	}
				72
				73	template <>
Michalis Spyrou	a4f378d	2019-04-26 14:54:54 +0100	[diff] [blame]	74	inline float32x4x4_t load_value(const float *input_ptr)
Alex Gilday	60954c6	2018-03-05 16:22:48 +0000	[diff] [blame]	75	{
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	76	return {wrapper::vloadq(input_ptr), wrapper::vloadq(input_ptr + 4), wrapper::vloadq(input_ptr + 8),
				77	wrapper::vloadq(input_ptr + 12)};
Alex Gilday	60954c6	2018-03-05 16:22:48 +0000	[diff] [blame]	78	}
John Kesapides	adfb273	2019-03-04 16:29:22 +0000	[diff] [blame]	79	#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Manuel Bottini	4370cff	2020-02-07 16:31:59 +0000	[diff] [blame]	80	template <>
				81	inline float32x4x4_t load_value(const float16_t *input_ptr)
John Kesapides	adfb273	2019-03-04 16:29:22 +0000	[diff] [blame]	82	{
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	83	return {vcvt_f32_f16(wrapper::vload(input_ptr)), vcvt_f32_f16(wrapper::vload(input_ptr + 4)),
				84	vcvt_f32_f16(wrapper::vload(input_ptr + 8)), vcvt_f32_f16(wrapper::vload(input_ptr + 12))};
John Kesapides	adfb273	2019-03-04 16:29:22 +0000	[diff] [blame]	85	}
				86
				87	#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Sang-Hoon Park	fb6aaeb	2019-11-27 15:26:44 +0000	[diff] [blame]	88
				89	template <typename element_type>
				90	using vector_type = wrapper::traits::neon_vector_t<element_type, window_step>;
				91
				92	template <typename quantized_type>
				93	vector_type<quantized_type> vquantize_qasymm8(const float32x4x4_t &qv, const UniformQuantizationInfo &qi);
				94
				95	template <>
				96	vector_type<uint8_t> vquantize_qasymm8<uint8_t>(const float32x4x4_t &qv, const UniformQuantizationInfo &qi)
				97	{
				98	return vquantize(qv, qi);
				99	}
				100
				101	template <>
				102	vector_type<int8_t> vquantize_qasymm8<int8_t>(const float32x4x4_t &qv, const UniformQuantizationInfo &qi)
				103	{
				104	return vquantize_signed(qv, qi);
				105	}
				106
Mohammed Suhail Munshi	0a48c4c	2024-01-30 18:25:51 +0000	[diff] [blame]	107	template <typename TOut, typename = typename std::enable_if<std::is_signed<TOut>::value, bool>::type>
				108	inline int8x16_t recombine_8_16(int16x8_t lower, int16x8_t upper)
				109	{
				110	return wrapper::vcombine(wrapper::vqmovn(lower), wrapper::vqmovn(upper));
				111	}
				112
				113	template <typename TOut, typename = typename std::enable_if<std::is_unsigned<TOut>::value, bool>::type>
				114	inline uint8x16_t recombine_8_16(int16x8_t lower, int16x8_t upper)
				115	{
				116	return wrapper::vcombine(wrapper::vqmovun(lower), wrapper::vqmovun(upper));
				117	}
				118
Alex Gilday	60954c6	2018-03-05 16:22:48 +0000	[diff] [blame]	119	} // namespace
				120
Georgios Pinitas	ef516e8	2021-04-30 14:46:05 +0100	[diff] [blame]	121	void CpuQuantizeKernel::configure(const ITensorInfo src, ITensorInfo dst)
Michele Di Giorgio	4e09b38	2017-07-05 18:20:02 +0100	[diff] [blame]	122	{
Manuel Bottini	0ded4c4	2021-03-09 14:15:27 +0000	[diff] [blame]	123	ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
				124	ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst));
Michele Di Giorgio	4e09b38	2017-07-05 18:20:02 +0100	[diff] [blame]	125
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	126	static const std::map<std::string, QuantizeFunctionExecutorPtr> quant_map = {
				127	{"op_QASYMM8_QASYMM8", &CpuQuantizeKernel::run_quantize_qasymm8<uint8_t, uint8_t>},
				128	{"op_QASYMM8_QASYMM8_SIGNED", &CpuQuantizeKernel::run_quantize_qasymm8<uint8_t, int8_t>},
				129	{"op_QASYMM8_QASYMM16", &CpuQuantizeKernel::run_quantize_qasymm16<uint8_t>},
Michele Di Giorgio	d64a46c	2019-10-01 12:25:49 +0100	[diff] [blame]	130
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	131	{"op_QASYMM8_SIGNED_QASYMM8", &CpuQuantizeKernel::run_quantize_qasymm8<int8_t, uint8_t>},
				132	{"op_QASYMM8_SIGNED_QASYMM8_SIGNED", &CpuQuantizeKernel::run_quantize_qasymm8<int8_t, int8_t>},
				133	{"op_QASYMM8_SIGNED_QASYMM16", &CpuQuantizeKernel::run_quantize_qasymm16<int8_t>},
Manuel Bottini	4370cff	2020-02-07 16:31:59 +0000	[diff] [blame]	134
Mohammed Suhail Munshi	0a48c4c	2024-01-30 18:25:51 +0000	[diff] [blame]	135	// Functions for offset only requantization
				136	{"op_OFFSET_ONLY_QASYMM8_QASYMM8", &CpuQuantizeKernel::run_requantize_offset_only<uint8_t, uint8_t>},
				137	{"op_OFFSET_ONLY_QASYMM8_QASYMM8_SIGNED", &CpuQuantizeKernel::run_requantize_offset_only<uint8_t, int8_t>},
				138	{"op_OFFSET_ONLY_QASYMM8_SIGNED_QASYMM8", &CpuQuantizeKernel::run_requantize_offset_only<int8_t, uint8_t>},
				139	{"op_OFFSET_ONLY_QASYMM8_SIGNED_QASYMM8_SIGNED",
				140	&CpuQuantizeKernel::run_requantize_offset_only<int8_t, int8_t>},
				141
				142	// Functions for offset uint8 to int8 and vice versa quantization (no scale changes)
				143	{"op_OFFSET_ONLY_CONVERT_QASYMM8_SIGNED_QASYMM8",
				144	&CpuQuantizeKernel::run_requantize_offset_only_convert<int8_t, uint8_t>},
				145	{"op_OFFSET_ONLY_CONVERT_QASYMM8_QASYMM8_SIGNED",
				146	&CpuQuantizeKernel::run_requantize_offset_only_convert<uint8_t, int8_t>},
				147
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	148	{"op_F32_QSYMM8", &CpuQuantizeKernel::run_quantize_qsymm8<float, int8_t>},
Pablo Marquez Tello	a5d61bf	2022-03-17 12:52:02 +0000	[diff] [blame]	149
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	150	{"op_F32_QASYMM8", &CpuQuantizeKernel::run_quantize_qasymm8<float, uint8_t>},
				151	{"op_F32_QASYMM8_SIGNED", &CpuQuantizeKernel::run_quantize_qasymm8<float, int8_t>},
				152	{"op_F32_QASYMM16", &CpuQuantizeKernel::run_quantize_qasymm16<float>},
Manuel Bottini	4370cff	2020-02-07 16:31:59 +0000	[diff] [blame]	153
Michele Di Giorgio	d64a46c	2019-10-01 12:25:49 +0100	[diff] [blame]	154	#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	155	{"op_F16_QASYMM8", &CpuQuantizeKernel::run_quantize_qasymm8<float16_t, uint8_t>},
				156	{"op_F16_QASYMM8_SIGNED", &CpuQuantizeKernel::run_quantize_qasymm8<float16_t, int8_t>},
				157	{"op_F16_QASYMM16", &CpuQuantizeKernel::run_quantize_qasymm16<float16_t>},
Manuel Bottini	4370cff	2020-02-07 16:31:59 +0000	[diff] [blame]	158	#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
				159	};
				160
				161	std::string function_to_call("op_");
Mohammed Suhail Munshi	0a48c4c	2024-01-30 18:25:51 +0000	[diff] [blame]	162
				163	// For offset only functions - must be 8-bit and have identical scale values.
				164	if (src->quantization_info().scale() == dst->quantization_info().scale() &&
				165	(is_data_type_quantized_asymmetric_char(src->data_type()) &&
				166	is_data_type_quantized_asymmetric_char(dst->data_type())))
				167	{
				168	function_to_call += "OFFSET_ONLY_";
				169	// For optimized datatype conversion 8-bit re-quantization offset only functions.
				170	// These must have an offset of exactly 128 to match requirements - has specific circumstances to match use case.
				171	auto uqinfo =
				172	compute_requantization_scale_offset(src->quantization_info().uniform(), dst->quantization_info().uniform());
				173	const auto src_dt = src->data_type();
				174	if (src->data_type() != dst->data_type() && ((src_dt == DataType::QASYMM8_SIGNED && uqinfo.offset == 128) \|\|
				175	(src_dt == DataType::QASYMM8 && uqinfo.offset == -128)))
				176	{
				177	function_to_call += "CONVERT_";
				178	}
				179	}
				180
				181	// Specify datatype for function
Manuel Bottini	0ded4c4	2021-03-09 14:15:27 +0000	[diff] [blame]	182	function_to_call += string_from_data_type(src->data_type()) + "_";
				183	function_to_call += string_from_data_type(dst->data_type());
Manuel Bottini	4370cff	2020-02-07 16:31:59 +0000	[diff] [blame]	184
				185	auto it = quant_map.find(function_to_call);
				186
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	187	if (it == quant_map.end())
Manuel Bottini	4370cff	2020-02-07 16:31:59 +0000	[diff] [blame]	188	{
				189	ARM_COMPUTE_ERROR("Unsupported combination of input and output data types");
Michele Di Giorgio	d64a46c	2019-10-01 12:25:49 +0100	[diff] [blame]	190	}
Manuel Bottini	4370cff	2020-02-07 16:31:59 +0000	[diff] [blame]	191	_func = it->second;
Michele Di Giorgio	d64a46c	2019-10-01 12:25:49 +0100	[diff] [blame]	192
Mohammed Suhail Munshi	0a48c4c	2024-01-30 18:25:51 +0000	[diff] [blame]	193	// Calculate window. Squash if possible.
				194	Window win;
				195	std::tie(win, _split_dimension) = calculate_squashed_or_max_window(*src);
				196
				197	ICpuKernel::configure(win);
Alex Gilday	60954c6	2018-03-05 16:22:48 +0000	[diff] [blame]	198	}
Michele Di Giorgio	4e09b38	2017-07-05 18:20:02 +0100	[diff] [blame]	199
Georgios Pinitas	ef516e8	2021-04-30 14:46:05 +0100	[diff] [blame]	200	Status CpuQuantizeKernel::validate(const ITensorInfo src, const ITensorInfo dst)
Alex Gilday	60954c6	2018-03-05 16:22:48 +0000	[diff] [blame]	201	{
Manuel Bottini	0ded4c4	2021-03-09 14:15:27 +0000	[diff] [blame]	202	ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst));
Alex Gilday	60954c6	2018-03-05 16:22:48 +0000	[diff] [blame]	203	return Status{};
Michele Di Giorgio	4e09b38	2017-07-05 18:20:02 +0100	[diff] [blame]	204	}
				205
Sang-Hoon Park	fb6aaeb	2019-11-27 15:26:44 +0000	[diff] [blame]	206	template <typename TIn, typename TOut>
Pablo Marquez Tello	a5d61bf	2022-03-17 12:52:02 +0000	[diff] [blame]	207	void CpuQuantizeKernel::run_quantize_qsymm8(const ITensor src, ITensor dst, const Window &window)
				208	{
				209	const auto window_start_x = static_cast<int>(window.x().start());
				210	const auto window_end_x = static_cast<int>(window.x().end());
				211
				212	const UniformQuantizationInfo uqinfo_in = src->info()->quantization_info().uniform();
				213	UniformQuantizationInfo uqinfo = dst->info()->quantization_info().uniform();
Mohammed Suhail Munshi	0a48c4c	2024-01-30 18:25:51 +0000	[diff] [blame]	214	uqinfo = compute_requantization_scale_offset(uqinfo_in, uqinfo);
				215
Pablo Marquez Tello	a5d61bf	2022-03-17 12:52:02 +0000	[diff] [blame]	216	// Collapse window and reset first dimension to handle tail calculations manually
				217	Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
				218	win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
				219
				220	Iterator input(src, win_collapsed);
				221	Iterator output(dst, win_collapsed);
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	222	execute_window_loop(
				223	win_collapsed,
				224	[&](const Coordinates &)
Pablo Marquez Tello	a5d61bf	2022-03-17 12:52:02 +0000	[diff] [blame]	225	{
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	226	auto input_ptr = reinterpret_cast<const TIn *>(input.ptr());
				227	auto output_ptr = reinterpret_cast<TOut *>(output.ptr());
				228	int x = window_start_x;
				229	for (; x <= (window_end_x - window_step); x += window_step)
				230	{
				231	wrapper::vstore(&output_ptr[x], vquantize_qasymm8<TOut>(load_value(&input_ptr[x]), uqinfo));
				232	}
				233	// Compute left-over elements
				234	for (; x < window_end_x; ++x)
				235	{
				236	output_ptr[x] = quantize_qsymm8(input_ptr[x], dst->info()->quantization_info());
				237	}
				238	},
				239	input, output);
Pablo Marquez Tello	a5d61bf	2022-03-17 12:52:02 +0000	[diff] [blame]	240	}
				241
				242	template <typename TIn, typename TOut>
Mohammed Suhail Munshi	0a48c4c	2024-01-30 18:25:51 +0000	[diff] [blame]	243	void CpuQuantizeKernel::run_requantize_offset_only_convert(const ITensor src, ITensor dst, const Window &window)
				244	{
				245	const auto window_start_x = static_cast<int>(window.x().start());
				246	const auto window_end_x = static_cast<int>(window.x().end());
				247
				248	// Calculate output offset difference.
				249	const UniformQuantizationInfo uqinfo_in = src->info()->quantization_info().uniform();
				250	UniformQuantizationInfo uqinfo = dst->info()->quantization_info().uniform();
				251	uqinfo = compute_requantization_scale_offset(uqinfo_in, uqinfo);
				252
				253	// Collapse window and reset first dimension to handle tail calculations manually
				254	Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
				255
				256	win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
				257
				258	// Duplicate offset in signed vector format
				259	const int8x16_t offset = wrapper::vdup_n(static_cast<int8_t>(uqinfo.offset), wrapper::traits::vector_128_tag{});
				260
				261	Iterator input(src, win_collapsed);
				262	Iterator output(dst, win_collapsed);
				263	execute_window_loop(
				264	win_collapsed,
				265	[&](const Coordinates &)
				266	{
				267	auto input_ptr = reinterpret_cast<const TIn *>(input.ptr());
				268	auto output_ptr = reinterpret_cast<TOut *>(output.ptr());
				269	int x = window_start_x;
				270	for (; x <= (window_end_x - window_step); x += window_step)
				271	{
				272	const wrapper::traits::neon_vector_t<TIn, window_step> qv =
				273	wrapper::vloadq(input_ptr + x); // load 128 bit vector of 8 bit datatype
				274
				275	// Signed addition.
				276	auto res = vaddq_s8(reinterpret_cast<int8x16_t>(qv), offset);
				277
				278	// Output is dependent on datatype.
				279	wrapper::vstore(&output_ptr[x],
				280	reinterpret_cast<wrapper::traits::neon_vector_t<TOut, window_step>>(res));
				281	}
				282	// Compute left-over elements
				283	for (; x < window_end_x; ++x)
				284	{
				285	auto result = uqinfo.offset + static_cast<int32_t>(input_ptr[x]);
				286	output_ptr[x] = static_cast<TOut>(result);
				287	}
				288	},
				289	input, output);
				290	}
				291
				292	template <typename TIn, typename TOut>
				293	void CpuQuantizeKernel::run_requantize_offset_only(const ITensor src, ITensor dst, const Window &window)
				294	{
				295	const auto window_start_x = static_cast<int>(window.x().start());
				296	const auto window_end_x = static_cast<int>(window.x().end());
				297
				298	const UniformQuantizationInfo uqinfo_in = src->info()->quantization_info().uniform();
				299	UniformQuantizationInfo uqinfo = dst->info()->quantization_info().uniform();
				300	uqinfo = compute_requantization_scale_offset(uqinfo_in, uqinfo);
				301
				302	// Collapse window and reset first dimension to handle tail calculations manually
				303	Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
				304	win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
				305
				306	// Duplicate offset in signed vector format
				307	const int16x8_t offset = wrapper::vdup_n(static_cast<int16_t>(uqinfo.offset), wrapper::traits::vector_128_tag{});
				308
				309	const int32_t low_bound = (dst->info()->data_type() == DataType::QASYMM8) ? 0 : -128;
				310	const int32_t upper_bound = (dst->info()->data_type() == DataType::QASYMM8) ? 255 : 127;
				311
				312	Iterator input(src, win_collapsed);
				313	Iterator output(dst, win_collapsed);
				314	execute_window_loop(
				315	win_collapsed,
				316	[&](const Coordinates &)
				317	{
				318	auto input_ptr = reinterpret_cast<const TIn *>(input.ptr());
				319	TOut output_ptr = reinterpret_cast<TOut >(output.ptr());
				320
				321	int x = window_start_x;
				322	for (; x <= (window_end_x - window_step); x += window_step)
				323	{
				324	const auto qv = wrapper::vloadq(input_ptr + x); // load 128 bit vector of 8 bit datatype
				325	int16x8_t lower = reinterpret_cast<int16x8_t>(wrapper::vmovl(wrapper::vgetlow(qv)));
				326	int16x8_t upper = reinterpret_cast<int16x8_t>(wrapper::vmovl(wrapper::vgethigh(qv)));
				327
				328	// Signed addition.
				329	lower = wrapper::vqadd(lower, offset);
				330	upper = wrapper::vqadd(upper, offset);
				331
				332	// Output is dependent on datatype.
				333	auto res = recombine_8_16<TOut>(lower, upper);
				334	wrapper::vstore(&output_ptr[x], res);
				335	}
				336	// Compute left-over elements
				337	for (; x < window_end_x; ++x)
				338	{
				339	// Add offset and clamp result to within the range of the output datatype.
				340	int32_t result = uqinfo.offset + static_cast<int32_t>(input_ptr[x]);
				341	result = utility::clamp<int32_t>(result, low_bound, upper_bound);
				342
				343	// Cast result to output datatype.
				344	output_ptr[x] = static_cast<TOut>(result);
				345	}
				346	},
				347	input, output);
				348	}
				349
				350	template <typename TIn, typename TOut>
Georgios Pinitas	ef516e8	2021-04-30 14:46:05 +0100	[diff] [blame]	351	void CpuQuantizeKernel::run_quantize_qasymm8(const ITensor src, ITensor dst, const Window &window)
John Kesapides	adfb273	2019-03-04 16:29:22 +0000	[diff] [blame]	352	{
Sang-Hoon Park	fb6aaeb	2019-11-27 15:26:44 +0000	[diff] [blame]	353	const auto window_start_x = static_cast<int>(window.x().start());
				354	const auto window_end_x = static_cast<int>(window.x().end());
John Kesapides	adfb273	2019-03-04 16:29:22 +0000	[diff] [blame]	355
Manuel Bottini	0ded4c4	2021-03-09 14:15:27 +0000	[diff] [blame]	356	const UniformQuantizationInfo uqinfo_in = src->info()->quantization_info().uniform();
				357	UniformQuantizationInfo uqinfo = dst->info()->quantization_info().uniform();
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	358	if (is_data_type_quantized_asymmetric(src->info()->data_type()))
Manuel Bottini	4370cff	2020-02-07 16:31:59 +0000	[diff] [blame]	359	{
				360	uqinfo = compute_requantization_scale_offset(uqinfo_in, uqinfo);
				361	}
John Kesapides	adfb273	2019-03-04 16:29:22 +0000	[diff] [blame]	362	#ifdef __aarch64__
				363	constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN;
				364	#else //__aarch64__
				365	constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO;
				366	#endif //__aarch64__
				367
				368	// Collapse window and reset first dimension to handle tail calculations manually
				369	Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
				370	win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
				371
Manuel Bottini	0ded4c4	2021-03-09 14:15:27 +0000	[diff] [blame]	372	Iterator input(src, win_collapsed);
				373	Iterator output(dst, win_collapsed);
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	374	execute_window_loop(
				375	win_collapsed,
				376	[&](const Coordinates &)
				377	{
				378	auto input_ptr = reinterpret_cast<const TIn *>(input.ptr());
				379	auto output_ptr = reinterpret_cast<TOut *>(output.ptr());
John Kesapides	adfb273	2019-03-04 16:29:22 +0000	[diff] [blame]	380
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	381	int x = window_start_x;
				382	for (; x <= (window_end_x - window_step); x += window_step)
				383	{
				384	wrapper::vstore(&output_ptr[x], vquantize_qasymm8<TOut>(load_value(&input_ptr[x]), uqinfo));
				385	}
				386	// Compute left-over elements
				387	for (; x < window_end_x; ++x)
				388	{
				389	output_ptr[x] = Qasymm8QuantizationHelper<TOut>::quantize(input_ptr[x], uqinfo, rounding_policy);
				390	}
				391	},
				392	input, output);
John Kesapides	adfb273	2019-03-04 16:29:22 +0000	[diff] [blame]	393	}
				394
Michele Di Giorgio	d64a46c	2019-10-01 12:25:49 +0100	[diff] [blame]	395	template <typename T>
Georgios Pinitas	ef516e8	2021-04-30 14:46:05 +0100	[diff] [blame]	396	void CpuQuantizeKernel::run_quantize_qasymm16(const ITensor src, ITensor dst, const Window &window)
Michele Di Giorgio	d64a46c	2019-10-01 12:25:49 +0100	[diff] [blame]	397	{
Sang-Hoon Park	fb6aaeb	2019-11-27 15:26:44 +0000	[diff] [blame]	398	const auto window_start_x = static_cast<int>(window.x().start());
				399	const auto window_end_x = static_cast<int>(window.x().end());
Michele Di Giorgio	d64a46c	2019-10-01 12:25:49 +0100	[diff] [blame]	400
Manuel Bottini	0ded4c4	2021-03-09 14:15:27 +0000	[diff] [blame]	401	const UniformQuantizationInfo uqinfo_in = src->info()->quantization_info().uniform();
				402	UniformQuantizationInfo uqinfo = dst->info()->quantization_info().uniform();
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	403	if (is_data_type_quantized_asymmetric(src->info()->data_type()))
Manuel Bottini	4370cff	2020-02-07 16:31:59 +0000	[diff] [blame]	404	{
				405	uqinfo = compute_requantization_scale_offset(uqinfo_in, uqinfo);
				406	}
Michele Di Giorgio	d64a46c	2019-10-01 12:25:49 +0100	[diff] [blame]	407	#ifdef __aarch64__
				408	constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN;
				409	#else //__aarch64__
				410	constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO;
				411	#endif //__aarch64__
				412
				413	// Collapse window and reset first dimension to handle tail calculations manually
				414	Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
				415	win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
				416
Manuel Bottini	0ded4c4	2021-03-09 14:15:27 +0000	[diff] [blame]	417	Iterator input(src, win_collapsed);
				418	Iterator output(dst, win_collapsed);
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	419	execute_window_loop(
				420	win_collapsed,
				421	[&](const Coordinates &)
				422	{
				423	auto input_ptr = reinterpret_cast<const T *>(input.ptr());
				424	auto output_ptr = reinterpret_cast<uint16_t *>(output.ptr());
Michele Di Giorgio	d64a46c	2019-10-01 12:25:49 +0100	[diff] [blame]	425
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	426	int x = window_start_x;
				427	for (; x <= (window_end_x - window_step); x += window_step)
				428	{
				429	uint16x8x2_t tmp = vquantize_qasymm16(load_value(&input_ptr[x]), uqinfo);
				430	vst1q_u16(&output_ptr[x], tmp.val[0]);
				431	vst1q_u16(&output_ptr[x + 8], tmp.val[1]);
				432	}
				433	// Compute left-over elements
				434	for (; x < window_end_x; ++x)
				435	{
				436	output_ptr[x] = quantize_qasymm16(input_ptr[x], uqinfo, rounding_policy);
				437	}
				438	},
				439	input, output);
Michele Di Giorgio	d64a46c	2019-10-01 12:25:49 +0100	[diff] [blame]	440	}
				441
Georgios Pinitas	ef516e8	2021-04-30 14:46:05 +0100	[diff] [blame]	442	void CpuQuantizeKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
Michele Di Giorgio	4e09b38	2017-07-05 18:20:02 +0100	[diff] [blame]	443	{
Moritz Pflanzer	c186b57	2017-09-07 09:48:04 +0100	[diff] [blame]	444	ARM_COMPUTE_UNUSED(info);
Michele Di Giorgio	4e09b38	2017-07-05 18:20:02 +0100	[diff] [blame]	445	ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
Manuel Bottini	0ded4c4	2021-03-09 14:15:27 +0000	[diff] [blame]	446	ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
Michele Di Giorgio	d64a46c	2019-10-01 12:25:49 +0100	[diff] [blame]	447	ARM_COMPUTE_ERROR_ON(_func == nullptr);
Michele Di Giorgio	4e09b38	2017-07-05 18:20:02 +0100	[diff] [blame]	448
Manuel Bottini	0ded4c4	2021-03-09 14:15:27 +0000	[diff] [blame]	449	const auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
				450	auto dst = tensors.get_tensor(TensorType::ACL_DST);
				451	(this->*_func)(src, dst, window);
Michele Di Giorgio	4e09b38	2017-07-05 18:20:02 +0100	[diff] [blame]	452	}
Manuel Bottini	0ded4c4	2021-03-09 14:15:27 +0000	[diff] [blame]	453
Georgios Pinitas	ef516e8	2021-04-30 14:46:05 +0100	[diff] [blame]	454	const char *CpuQuantizeKernel::name() const
Manuel Bottini	0ded4c4	2021-03-09 14:15:27 +0000	[diff] [blame]	455	{
Georgios Pinitas	ef516e8	2021-04-30 14:46:05 +0100	[diff] [blame]	456	return "CpuQuantizeKernel";
Manuel Bottini	0ded4c4	2021-03-09 14:15:27 +0000	[diff] [blame]	457	}
Mohammed Suhail Munshi	0a48c4c	2024-01-30 18:25:51 +0000	[diff] [blame]	458
Manuel Bottini	0ded4c4	2021-03-09 14:15:27 +0000	[diff] [blame]	459	} // namespace kernels
				460	} // namespace cpu
Pablo Marquez Tello	a5d61bf	2022-03-17 12:52:02 +0000	[diff] [blame]	461	} // namespace arm_compute