Blame - src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp - ml/armnn

blob: 56337cfdf422d605978045661c854a99d8fae6c6 [file] [log] [blame]

Tracy Narine	10403ec	2023-11-28 11:55:08 +0000	[diff] [blame]	1	//
				2	// Copyright © 2024 Arm Ltd and Contributors. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#include <armnn/Exceptions.hpp>
				7
				8	#pragma once
				9
				10	inline void CreateRescaleTosaOperator(const std::string& inputName,
				11	const std::string& outputName,
				12	DType output_type,
				13	const std::vector<int32_t>& shape,
John Mcloughlin	ceb4428	2024-04-23 16:47:04 +0100	[diff] [blame^]	14	const std::vector<int32_t>& multipliers,
				15	const std::vector<int32_t>& shifts,
Tracy Narine	10403ec	2023-11-28 11:55:08 +0000	[diff] [blame]	16	int32_t input_zp,
				17	int32_t output_zp,
				18	bool double_round,
				19	bool scale32,
John Mcloughlin	ceb4428	2024-04-23 16:47:04 +0100	[diff] [blame^]	20	bool per_channel,
Tracy Narine	10403ec	2023-11-28 11:55:08 +0000	[diff] [blame]	21	TosaSerializationOperator** op,
				22	TosaSerializationTensor** tensor)
				23	{
				24	if (!op)
				25	{
				26	throw armnn::Exception("CreateRescaleTosaOperator: nullptr op");
				27	}
				28
Tracy Narine	10403ec	2023-11-28 11:55:08 +0000	[diff] [blame]	29	TosaRescaleAttribute attribute(input_zp,
				30	output_zp,
				31	multipliers,
				32	shifts,
				33	scale32,
				34	double_round,
John Mcloughlin	ceb4428	2024-04-23 16:47:04 +0100	[diff] [blame^]	35	per_channel,
Teresa Charlin	571a4f7	2024-03-26 11:18:42 +0000	[diff] [blame]	36	false, // input_unsigned
				37	false); // output_unsigned
Tracy Narine	10403ec	2023-11-28 11:55:08 +0000	[diff] [blame]	38
				39	// op
				40	*op = new TosaSerializationOperator(Op_RESCALE, Attribute_RescaleAttribute, &attribute, {inputName}, {outputName});
				41	if (!(*op))
				42	{
				43	throw armnn::Exception("CreateRescaleTosaOperator: failed to created operator");
				44	}
				45	if (tensor != nullptr)
				46	{
				47	// tensor
				48	*tensor = new TosaSerializationTensor(outputName, shape, output_type, {});
				49	if (! (*tensor))
				50	{
				51	throw armnn::Exception("CreateRescaleTosaOperator: failed to created tensor");
				52	}
				53	}
				54	}
				55
				56	inline void CreateRescaleTosaOperator(const std::string& inputName,
				57	const std::string& outputName,
				58	DType output_type,
				59	const std::vector<int32_t>& shape,
John Mcloughlin	ceb4428	2024-04-23 16:47:04 +0100	[diff] [blame^]	60	int32_t scale_multiplier,
				61	int32_t scale_shift,
				62	int32_t input_zp,
				63	int32_t output_zp,
				64	bool double_round,
				65	bool scale32,
				66	bool per_channel,
				67	TosaSerializationOperator** op,
				68	TosaSerializationTensor** tensor)
				69	{
				70	const std::vector<int32_t> multipliers{scale_multiplier};
				71	const std::vector<int32_t> shifts{scale_shift};
				72	CreateRescaleTosaOperator(inputName, outputName, output_type, shape, multipliers, shifts,
				73	input_zp, output_zp, double_round, scale32, per_channel, op, tensor);
				74	}
				75
				76	/// The following is taken from mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp in the LLVM project
				77	/// From a scale value, generates multiplier and shift values where
				78	/// mantissa is in [-1.0,-0.5] or [0.5, 1.0] such that
				79	/// multiplier = mantissa*2^shift for 32-bit scaling.
				80	static void ComputeMultiplierAndShiftTosaScale32(double scale,
				81	int32_t &multiplier,
				82	int32_t &shift)
				83	{
				84	const double mantissa = std::frexp(scale, &shift);
				85	auto shiftedM = std::round(mantissa * (int64_t(1) << 31));
				86
				87	// Can't be greater than 1.0.
				88	if (!(shiftedM <= (int64_t(1) << 31)))
				89	{
				90	throw armnn::Exception("Shifted mantissa exceeds 32 signed bits");
				91	}
				92
				93	if (shiftedM == (int64_t(1) << 31))
				94	{
				95	shiftedM /= 2;
				96	shift++;
				97	}
				98
				99	// TOSA expects right shift to be positive, and embed (1 << 31) into right
				100	// shift bits.
				101	shift = (-shift) + 31;
				102
				103	if (!(shiftedM <= std::numeric_limits<int32_t>::max()))
				104	{
				105	throw armnn::Exception("Shifted mantissa exceeds 32-bit signed output type");
				106	}
				107
				108	multiplier = static_cast<int32_t>(shiftedM);
				109
				110	// Shifting tops out at 62 bits. Right shift to make 62 bits the max.
				111	// The limit of 62 on shift allows the shift to be decomposed as
				112	// two right shifts of 31.
				113	if (shift > 62)
				114	{
				115	// Shifting the multiplier by more than 32-bits is unnecessary.
				116	multiplier = multiplier >> std::min<int32_t>(31, shift - 62);
				117	shift = 62;
				118	}
				119	}
				120
				121	/// The following is taken from mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp in the LLVM project
				122	/// From a scale value, generates multiplier and shift values where
				123	/// mantissa is in [-1.0,-0.5] or [0.5, 1.0] such that
				124	/// multiplier = mantissa*2^shift for 16-bit scaling.
				125	static void ComputeMultiplierAndShiftTosaScale16(double scale,
				126	int32_t &multiplier,
				127	int32_t &shift)
				128	{
				129	const double mantissa = std::frexp(scale, &shift);
				130	auto shiftedM = std::round(mantissa * (int64_t(1) << 15));
				131
				132	// Can't be greater than 1.0.
				133	if (!(shiftedM <= (int64_t(1) << 15)))
				134	{
				135	throw armnn::Exception("Shifted mantissa exceeds 16 signed bits");
				136	}
				137
				138	if (shiftedM == (int64_t(1) << 15))
				139	{
				140	shiftedM /= 2;
				141	shift++;
				142	}
				143
				144	// TOSA expects right shift to be positive and embed (1 << 15) into right
				145	// shift bits.
				146	shift = (-shift) + 15;
				147
				148	if (!(shiftedM <= std::numeric_limits<int32_t>::max()))
				149	{
				150	throw armnn::Exception("Shifted mantissa exceeds 32-bit signed output type");
				151	}
				152
				153	multiplier = static_cast<int32_t>(shiftedM);
				154
				155	// Shifting tops out at 62 bits. Right shift to make 62 bits the max.
				156	// The limit of 62 on shift allows the shift to be decomposed as
				157	// two right shifts of 31.
				158	if (shift > 62)
				159	{
				160	// Shifting the multiplier by more than 31-bits is unnecessary.
				161	multiplier = multiplier >> std::min<int32_t>(31, shift - 62);
				162	shift = 62;
				163	}
				164	}
				165
				166	inline void CreateRescaleTosaOperator(const std::string& inputName,
				167	const std::string& outputName,
				168	DType output_type,
				169	const std::vector<int32_t>& shape,
Tracy Narine	10403ec	2023-11-28 11:55:08 +0000	[diff] [blame]	170	double scale,
				171	int32_t input_zp,
				172	int32_t output_zp,
				173	bool double_round,
				174	bool scale32,
				175	TosaSerializationOperator** op,
				176	TosaSerializationTensor** tensor)
				177	{
Tracy Narine	10403ec	2023-11-28 11:55:08 +0000	[diff] [blame]	178	int32_t multiplier;
				179	int32_t shift;
John Mcloughlin	ceb4428	2024-04-23 16:47:04 +0100	[diff] [blame^]	180
				181	if (scale32)
				182	{
				183	ComputeMultiplierAndShiftTosaScale32(scale, multiplier, shift);
				184	}
				185	else
				186	{
				187	ComputeMultiplierAndShiftTosaScale16(scale, multiplier, shift);
				188	}
				189
Tracy Narine	10403ec	2023-11-28 11:55:08 +0000	[diff] [blame]	190	CreateRescaleTosaOperator(inputName, outputName, output_type, shape, multiplier, shift,
John Mcloughlin	ceb4428	2024-04-23 16:47:04 +0100	[diff] [blame^]	191	input_zp, output_zp, double_round, scale32, false, op, tensor);
				192	}
				193
				194	inline void CreateRescaleTosaOperatorPerChannel(const std::string& inputName,
				195	const std::string& outputName,
				196	DType output_type,
				197	const std::vector<int32_t>& shape,
				198	int32_t input_zp,
				199	int32_t output_zp,
				200	bool double_round,
				201	bool scale32,
				202	double input_scale,
				203	double output_scale,
				204	const std::vector<float>& weight_scales,
				205	TosaSerializationOperator** op,
				206	TosaSerializationTensor** tensor)
				207	{
				208	std::vector<int32_t> op_tensor_multipliers;
				209	std::vector<int32_t> op_tensor_shifts;
				210	op_tensor_multipliers.reserve(weight_scales.size());
				211	op_tensor_shifts.reserve(weight_scales.size());
				212
				213	for (const float& weight_scale : weight_scales)
				214	{
				215	double op_tensor_scale = (input_scale * weight_scale) / output_scale;
				216	int32_t multiplier;
				217	int32_t shift;
				218
				219	if (scale32)
				220	{
				221	ComputeMultiplierAndShiftTosaScale32(op_tensor_scale, multiplier, shift);
				222	}
				223	else
				224	{
				225	ComputeMultiplierAndShiftTosaScale16(op_tensor_scale, multiplier, shift);
				226	}
				227
				228	op_tensor_multipliers.push_back(multiplier);
				229	op_tensor_shifts.push_back(shift);
				230	}
				231
				232	CreateRescaleTosaOperator(inputName, outputName, output_type, shape, op_tensor_multipliers, op_tensor_shifts,
				233	input_zp, output_zp, double_round, scale32, true, op, tensor);
Tracy Narine	10403ec	2023-11-28 11:55:08 +0000	[diff] [blame]	234	}
				235
				236	inline void CreateFromInt32RescaleTosaOperator(const std::string& inputName,
				237	const std::string& outputName,
John Mcloughlin	ceb4428	2024-04-23 16:47:04 +0100	[diff] [blame^]	238	DType output_type,
				239	const std::vector<int32_t>& shape,
				240	double output_scale,
				241	int32_t output_zp,
				242	TosaSerializationOperator** op,
				243	TosaSerializationTensor** tensor)
Tracy Narine	10403ec	2023-11-28 11:55:08 +0000	[diff] [blame]	244	{
John Mcloughlin	ceb4428	2024-04-23 16:47:04 +0100	[diff] [blame^]	245	CreateRescaleTosaOperator(inputName, outputName, output_type, shape, output_scale,
				246	0, output_zp, true, true, op, tensor);
Tracy Narine	10403ec	2023-11-28 11:55:08 +0000	[diff] [blame]	247	}