Blame - src/core/NEON/kernels/convolution/common/qsymm8.cpp - ml/ComputeLibrary

blob: e50263acaa824d20ff2763835c2d9dd14bbd8734 [file] [log] [blame]

Giuseppe Rossini	f01201a	2019-11-06 14:57:49 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2019 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24
				25	#include <algorithm>
				26	#include <cassert>
				27	#include <cstdint>
				28	#include <cmath>
				29	#include <limits>
				30
				31	#include "qsymm8.hpp"
				32
				33	namespace qsymm8 {
				34	#if(__ANDROID__ \|\| BARE_METAL)
				35	template <typename T> T round(T val) { return ::round(val); }
				36	template <typename T> T exp2(T val) { return ::exp2(val); }
				37	template <typename T> T log2(T val) { return ::log2(val); }
				38	#else /* (__ANDROID__ \|\| BARE_METAL) */
				39	template <typename T> T round(T val) { return std::round(val); }
				40	template <typename T> T exp2(T val) { return std::exp2(val); }
				41	template <typename T> T log2(T val) { return std::log2(val); }
				42	#endif /* (__ANDROID__ \|\| BARE_METAL) */
				43
				44	// Symmetric quantization
				45	int8_t QSymm8Params::quantize(float value) const
				46	{
				47	const float transformed = value / scale;
				48	return static_cast<int8_t>(round(std::max(-128.0f, std::min(127.0f, transformed))));
				49	}
				50
				51	float QSymm8Params::dequantize(const int8_t value) const
				52	{
				53	return scale * (static_cast<float>(value));
				54	}
				55
				56	QSymm8RescaleParams QSymm8RescaleParams::make_rescale_params(
				57	const QSymm8Params& weight_quant,
				58	const QSymm8Params& input_quant,
				59	const QSymm8Params& output_quant
				60	)
				61	{
				62	// Based on the gemmlowp approach: https://github.com/google/gemmlowp/blob/master/doc/quantization_example.cc
				63	const float rescale = weight_quant.scale * input_quant.scale / output_quant.scale;
				64	const float shiftf = round(log2(0.5f / rescale));
				65	const float multf = exp2(31.0f + shiftf)*rescale;
				66
				67	int64_t shift = static_cast<int64_t>(shiftf);
				68	int64_t mult = static_cast<int64_t>(multf);
				69
				70	if (mult == (1ll << 31))
				71	{
				72	mult /= 2;
				73	shift--;
				74	}
				75
				76	assert(shift >= 0);
				77	assert(mult <= std::numeric_limits<int32_t>::max());
				78
				79	return QSymm8RescaleParams(
				80	static_cast<int32_t>(shift),
				81	static_cast<int32_t>(mult),
				82	rescale
				83	);
				84	}
				85
				86	QSymm8RescaleParams::QSymm8RescaleParams(int32_t shift, int32_t multi, float rescale)
				87	: shift(shift), multiplier(multi), rescale(rescale)
				88	{
				89	}
				90
				91	// Symmetric per-channel quantization
				92	int8_t QSymm8PerChannelParams::quantize(float value, float scale) const
				93	{
				94	const float transformed = value / scale;
				95	return static_cast<int8_t>(round(std::max(-128.0f, std::min(127.0f, transformed))));
				96	}
				97
				98	float QSymm8PerChannelParams::dequantize(const int8_t value, float scale) const
				99	{
				100	return scale * (static_cast<float>(value));
				101	}
				102
				103	QSymm8PerChannelRescaleParams QSymm8PerChannelRescaleParams::make_rescale_params(
				104	const QSymm8PerChannelParams& weight_quant,
				105	const QSymm8PerChannelParams& input_quant,
				106	const QSymm8PerChannelParams& output_quant
				107	)
				108	{
				109	std::vector<int32_t> shifts;
				110	std::vector<int32_t> mults;
				111	std::vector<float> rescales;
				112
				113	for(size_t s = 0; s< input_quant.scales.size(); s++)
				114	{
				115	// Based on the gemmlowp approach: https://github.com/google/gemmlowp/blob/master/doc/quantization_example.cc
				116	const float rescale = weight_quant.scales[s] * input_quant.scales[s] / output_quant.scales[s];
				117	const float shiftf = round(log2(0.5f / rescale));
				118	const float multf = exp2(31.0f + shiftf)*rescale;
				119
				120	int64_t shift = static_cast<int64_t>(shiftf);
				121	int64_t mult = static_cast<int64_t>(multf);
				122
				123	if (mult == (1ll << 31))
				124	{
				125	mult /= 2;
				126	shift--;
				127	}
				128
				129	assert(shift >= 0);
				130	assert(mult <= std::numeric_limits<int32_t>::max());
				131
				132	shifts.push_back(static_cast<int32_t>(shift));
				133	mults.push_back(static_cast<int32_t>(mult));
				134	rescales.push_back(rescale);
				135	}
				136
				137	return QSymm8PerChannelRescaleParams(shifts, mults, rescales);
				138
				139	}
				140
				141	QSymm8PerChannelRescaleParams QSymm8PerChannelRescaleParams::make_rescale_params(
				142	const QSymm8PerChannelParams& weight_quant,
				143	const qasymm8::QAsymm8Params& input_quant,
				144	const qasymm8::QAsymm8Params& output_quant
				145	)
				146	{
				147	std::vector<int32_t> shifts;
				148	std::vector<int32_t> mults;
				149	std::vector<float> rescales;
				150
				151	for(size_t s = 0; s< weight_quant.scales.size(); s++)
				152	{
				153	// Based on the gemmlowp approach: https://github.com/google/gemmlowp/blob/master/doc/quantization_example.cc
				154	const float rescale = weight_quant.scales[s] * input_quant.scale / output_quant.scale;
				155	const float shiftf = round(log2(0.5f / rescale));
				156	const float multf = exp2(31.0f + shiftf)*rescale;
				157
				158	int64_t shift = static_cast<int64_t>(shiftf);
				159	int64_t mult = static_cast<int64_t>(multf);
				160
				161	if (mult == (1ll << 31))
				162	{
				163	mult /= 2;
				164	shift--;
				165	}
				166
				167	assert(shift >= 0);
				168	assert(mult <= std::numeric_limits<int32_t>::max());
				169
				170	shifts.push_back(static_cast<int32_t>(shift));
				171	mults.push_back(static_cast<int32_t>(mult));
				172	rescales.push_back(rescale);
				173	}
				174
				175	return QSymm8PerChannelRescaleParams(shifts, mults, rescales);
				176
				177	}
				178
				179	QSymm8PerChannelRescaleParams::QSymm8PerChannelRescaleParams(std::vector<int32_t>& shifts, std::vector<int32_t>& multipliers, std::vector<float>& rescales)
				180	: shifts(shifts), multipliers(multipliers), rescales(rescales)
				181	{
				182	}
				183
				184
				185	} // namespace qasymm8