Blame - arm_compute/core/FixedPoint.inl - ml/ComputeLibrary

blob: 5ea0f6c825ba74896f4e154757b01935d665cce1 [file] [log] [blame]

Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2017 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
Michalis Spyrou	bbd9fb9	2017-06-22 12:57:51 +0100	[diff] [blame]	24	#include "arm_compute/core/Error.h"
				25
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	26	#include <cmath>
				27	#include <limits>
				28
				29	namespace
				30	{
				31	template <typename TpIn, typename TpSat>
				32	inline TpSat saturate_convert(TpIn a)
				33	{
				34	if(a > std::numeric_limits<TpSat>::max())
				35	{
				36	a = std::numeric_limits<TpSat>::max();
				37	}
				38	if(a < std::numeric_limits<TpSat>::min())
				39	{
				40	a = std::numeric_limits<TpSat>::min();
				41	}
				42	return static_cast<TpSat>(a);
				43	}
				44	} // namespace
				45
				46	namespace arm_compute
				47	{
				48	inline qint8_t sqshl_qs8(qint8_t a, int shift)
				49	{
				50	qint16_t tmp = static_cast<qint16_t>(a) << shift;
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	51
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	52	// Saturate the result in case of overflow and cast to qint8_t
				53	return saturate_convert<qint16_t, qint8_t>(tmp);
				54	}
				55
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	56	inline qint16_t sqshl_qs16(qint16_t a, int shift)
				57	{
				58	qint32_t tmp = static_cast<qint32_t>(a) << shift;
				59
				60	// Saturate the result in case of overflow and cast to qint16_t
				61	return saturate_convert<qint32_t, qint16_t>(tmp);
				62	}
				63
Michalis Spyrou	bbd9fb9	2017-06-22 12:57:51 +0100	[diff] [blame]	64	inline qint8_t sshr_qs8(qint8_t a, int shift)
				65	{
				66	ARM_COMPUTE_ERROR_ON_MSG(shift == 0, "Shift should not be zero");
				67	const qint8_t round_val = 1 << (shift - 1);
				68	return sqadd_qs8(a, round_val) >> shift;
				69	}
				70
				71	inline qint16_t sshr_qs16(qint16_t a, int shift)
				72	{
				73	ARM_COMPUTE_ERROR_ON_MSG(shift == 0, "Shift should not be zero");
				74	const qint16_t round_val = 1 << (shift - 1);
				75	return sqadd_qs16(a, round_val) >> shift;
				76	}
				77
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	78	inline qint8_t sabs_qs8(qint8_t a)
				79	{
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	80	return (a < 0) ? (a == std::numeric_limits<int8_t>::min()) ? std::numeric_limits<int8_t>::max() : -a : a;
				81	}
				82
				83	inline qint16_t sabs_qs16(qint16_t a)
				84	{
				85	return (a < 0) ? (a == std::numeric_limits<int16_t>::min()) ? std::numeric_limits<int16_t>::max() : -a : a;
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	86	}
				87
				88	inline qint8_t sadd_qs8(qint8_t a, qint8_t b)
				89	{
				90	return a + b;
				91	}
				92
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	93	inline qint16_t sadd_qs16(qint16_t a, qint16_t b)
				94	{
				95	return a + b;
				96	}
				97
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	98	inline qint8_t sqadd_qs8(qint8_t a, qint8_t b)
				99	{
				100	// We need to store the temporary result in qint16_t otherwise we cannot evaluate the overflow
				101	qint16_t tmp = (static_cast<qint16_t>(a) + static_cast<qint16_t>(b));
				102
				103	// Saturate the result in case of overflow and cast to qint8_t
				104	return saturate_convert<qint16_t, qint8_t>(tmp);
				105	}
				106
				107	inline qint16_t sqadd_qs16(qint16_t a, qint16_t b)
				108	{
Georgios Pinitas	9247c92	2017-06-28 18:29:47 +0100	[diff] [blame]	109	// We need to store the temporary result in qint32_t otherwise we cannot evaluate the overflow
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	110	qint32_t tmp = (static_cast<qint32_t>(a) + static_cast<qint32_t>(b));
				111
				112	// Saturate the result in case of overflow and cast to qint16_t
				113	return saturate_convert<qint32_t, qint16_t>(tmp);
				114	}
				115
Georgios Pinitas	9247c92	2017-06-28 18:29:47 +0100	[diff] [blame]	116	inline qint32_t sqadd_qs32(qint32_t a, qint32_t b)
				117	{
				118	// We need to store the temporary result in qint64_t otherwise we cannot evaluate the overflow
				119	qint64_t tmp = (static_cast<qint64_t>(a) + static_cast<qint64_t>(b));
				120
				121	// Saturate the result in case of overflow and cast to qint32_t
				122	return saturate_convert<qint64_t, qint32_t>(tmp);
				123	}
				124
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	125	inline qint8_t ssub_qs8(qint8_t a, qint8_t b)
				126	{
				127	return a - b;
				128	}
				129
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	130	inline qint16_t ssub_qs16(qint16_t a, qint16_t b)
				131	{
				132	return a - b;
				133	}
				134
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	135	inline qint8_t sqsub_qs8(qint8_t a, qint8_t b)
				136	{
				137	// We need to store the temporary result in uint16_t otherwise we cannot evaluate the overflow
				138	qint16_t tmp = static_cast<qint16_t>(a) - static_cast<qint16_t>(b);
				139
				140	// Saturate the result in case of overflow and cast to qint8_t
				141	return saturate_convert<qint16_t, qint8_t>(tmp);
				142	}
				143
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	144	inline qint16_t sqsub_qs16(qint16_t a, qint16_t b)
				145	{
				146	// We need to store the temporary result in qint32_t otherwise we cannot evaluate the overflow
				147	qint32_t tmp = static_cast<qint32_t>(a) - static_cast<qint32_t>(b);
				148
				149	// Saturate the result in case of overflow and cast to qint16_t
				150	return saturate_convert<qint32_t, qint16_t>(tmp);
				151	}
				152
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	153	inline qint8_t smul_qs8(qint8_t a, qint8_t b, int fixed_point_position)
				154	{
				155	const qint16_t round_up_const = (1 << (fixed_point_position - 1));
				156
				157	qint16_t tmp = static_cast<qint16_t>(a) * static_cast<qint16_t>(b);
				158
				159	// Rounding up
				160	tmp += round_up_const;
				161
				162	return static_cast<qint8_t>(tmp >> fixed_point_position);
				163	}
				164
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	165	inline qint16_t smul_qs16(qint16_t a, qint16_t b, int fixed_point_position)
				166	{
				167	const qint32_t round_up_const = (1 << (fixed_point_position - 1));
				168
				169	qint32_t tmp = static_cast<qint32_t>(a) * static_cast<qint32_t>(b);
				170
				171	// Rounding up
				172	tmp += round_up_const;
				173
				174	return static_cast<qint16_t>(tmp >> fixed_point_position);
				175	}
				176
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	177	inline qint8_t sqmul_qs8(qint8_t a, qint8_t b, int fixed_point_position)
				178	{
				179	const qint16_t round_up_const = (1 << (fixed_point_position - 1));
				180
				181	qint16_t tmp = static_cast<qint16_t>(a) * static_cast<qint16_t>(b);
				182
				183	// Rounding up
				184	tmp += round_up_const;
				185
				186	return saturate_convert<qint16_t, qint8_t>(tmp >> fixed_point_position);
				187	}
				188
				189	inline qint16_t sqmul_qs16(qint16_t a, qint16_t b, int fixed_point_position)
				190	{
				191	const qint32_t round_up_const = (1 << (fixed_point_position - 1));
				192
				193	qint32_t tmp = static_cast<qint32_t>(a) * static_cast<qint32_t>(b);
				194
				195	// Rounding up
				196	tmp += round_up_const;
				197
				198	return saturate_convert<qint32_t, qint16_t>(tmp >> fixed_point_position);
				199	}
				200
				201	inline qint16_t sqmull_qs8(qint8_t a, qint8_t b, int fixed_point_position)
				202	{
				203	const qint16_t round_up_const = (1 << (fixed_point_position - 1));
				204
				205	qint16_t tmp = static_cast<qint16_t>(a) * static_cast<qint16_t>(b);
				206
				207	// Rounding up
				208	tmp += round_up_const;
				209
				210	return tmp >> fixed_point_position;
				211	}
				212
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	213	inline qint32_t sqmull_qs16(qint16_t a, qint16_t b, int fixed_point_position)
				214	{
				215	const qint32_t round_up_const = (1 << (fixed_point_position - 1));
				216
				217	qint32_t tmp = static_cast<qint32_t>(a) * static_cast<qint32_t>(b);
				218
				219	// Rounding up
				220	tmp += round_up_const;
				221
				222	return tmp >> fixed_point_position;
				223	}
				224
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	225	inline qint8_t sinvsqrt_qs8(qint8_t a, int fixed_point_position)
				226	{
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	227	const qint8_t shift = 8 - (fixed_point_position + (__builtin_clz(a) - 24));
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	228
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	229	const qint8_t const_three = (3 << fixed_point_position);
				230	qint8_t temp = shift < 0 ? (a << -shift) : (a >> shift);
				231	qint8_t x2 = temp;
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	232
				233	// We need three iterations to find the result
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	234	for(int i = 0; i < 3; ++i)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	235	{
				236	qint8_t three_minus_dx = ssub_qs8(const_three, smul_qs8(temp, smul_qs8(x2, x2, fixed_point_position), fixed_point_position));
				237	x2 = (smul_qs8(x2, three_minus_dx, fixed_point_position) >> 1);
				238	}
				239
				240	temp = shift < 0 ? (x2 << (-shift >> 1)) : (x2 >> (shift >> 1));
				241
				242	return temp;
				243	}
				244
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	245	inline qint16_t sinvsqrt_qs16(qint16_t a, int fixed_point_position)
				246	{
				247	const qint16_t shift = 16 - (fixed_point_position + (__builtin_clz(a) - 16));
				248
				249	const qint16_t const_three = (3 << fixed_point_position);
				250	qint16_t temp = shift < 0 ? (a << -shift) : (a >> shift);
				251	qint16_t x2 = temp;
				252
				253	// We need three iterations to find the result
				254	for(int i = 0; i < 3; ++i)
				255	{
				256	qint16_t three_minus_dx = ssub_qs16(const_three, smul_qs16(temp, smul_qs16(x2, x2, fixed_point_position), fixed_point_position));
				257	x2 = smul_qs16(x2, three_minus_dx, fixed_point_position) >> 1;
				258	}
				259
				260	temp = shift < 0 ? (x2 << ((-shift) >> 1)) : (x2 >> (shift >> 1));
				261
				262	return temp;
				263	}
				264
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	265	inline qint8_t sdiv_qs8(qint8_t a, qint8_t b, int fixed_point_position)
				266	{
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	267	const qint16_t temp = a << fixed_point_position;
				268	return static_cast<qint8_t>(temp / b);
				269	}
				270
				271	inline qint16_t sdiv_qs16(qint16_t a, qint16_t b, int fixed_point_position)
				272	{
				273	const qint32_t temp = a << fixed_point_position;
				274	return static_cast<qint16_t>(temp / b);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	275	}
				276
				277	inline qint8_t sqexp_qs8(qint8_t a, int fixed_point_position)
				278	{
				279	// Constants
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	280	const qint8_t const_one = (1 << fixed_point_position);
				281	const qint8_t ln2 = ((0x58 >> (6 - fixed_point_position)) + 1) >> 1;
				282	const qint8_t inv_ln2 = (((0x38 >> (6 - fixed_point_position)) + 1) >> 1) \| const_one;
				283	const qint8_t A = ((0x7F >> (6 - fixed_point_position)) + 1) >> 1;
				284	const qint8_t B = ((0x3F >> (6 - fixed_point_position)) + 1) >> 1;
				285	const qint8_t C = ((0x16 >> (6 - fixed_point_position)) + 1) >> 1;
				286	const qint8_t D = ((0x05 >> (6 - fixed_point_position)) + 1) >> 1;
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	287
				288	// Polynomial expansion
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	289	const int dec_a = (sqmul_qs8(a, inv_ln2, fixed_point_position) >> fixed_point_position);
				290	const qint8_t alpha = sabs_qs8(sqsub_qs8(a, sqmul_qs8(ln2, sqshl_qs8(dec_a, fixed_point_position), fixed_point_position)));
				291	qint8_t sum = sqadd_qs8(sqmul_qs8(alpha, D, fixed_point_position), C);
				292	sum = sqadd_qs8(sqmul_qs8(alpha, sum, fixed_point_position), B);
				293	sum = sqadd_qs8(sqmul_qs8(alpha, sum, fixed_point_position), A);
				294	sum = sqmul_qs8(alpha, sum, fixed_point_position);
				295	sum = sqadd_qs8(sum, const_one);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	296
				297	return (dec_a < 0) ? (sum >> -dec_a) : sqshl_qs8(sum, dec_a);
				298	}
				299
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	300	inline qint16_t sqexp_qs16(qint16_t a, int fixed_point_position)
				301	{
				302	// Constants
				303	const qint16_t const_one = (1 << fixed_point_position);
				304	const qint16_t ln2 = ((0x58B9 >> (14 - fixed_point_position)) + 1) >> 1;
				305	const qint16_t inv_ln2 = (((0x38AA >> (14 - fixed_point_position)) + 1) >> 1) \| const_one;
				306	const qint16_t A = ((0x7FBA >> (14 - fixed_point_position)) + 1) >> 1;
				307	const qint16_t B = ((0x3FE9 >> (14 - fixed_point_position)) + 1) >> 1;
				308	const qint16_t C = ((0x1693 >> (14 - fixed_point_position)) + 1) >> 1;
				309	const qint16_t D = ((0x0592 >> (14 - fixed_point_position)) + 1) >> 1;
				310
				311	// Polynomial expansion
				312	const int dec_a = (sqmul_qs16(a, inv_ln2, fixed_point_position) >> fixed_point_position);
				313	const qint16_t alpha = sabs_qs16(sqsub_qs16(a, sqmul_qs16(ln2, sqshl_qs16(dec_a, fixed_point_position), fixed_point_position)));
				314	qint16_t sum = sqadd_qs16(sqmul_qs16(alpha, D, fixed_point_position), C);
				315	sum = sqadd_qs16(sqmul_qs16(alpha, sum, fixed_point_position), B);
				316	sum = sqadd_qs16(sqmul_qs16(alpha, sum, fixed_point_position), A);
				317	sum = sqmul_qs16(alpha, sum, fixed_point_position);
				318	sum = sqadd_qs16(sum, const_one);
				319
				320	return (dec_a < 0) ? (sum >> -dec_a) : sqshl_qs16(sum, dec_a);
				321	}
				322
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	323	inline qint8_t slog_qs8(qint8_t a, int fixed_point_position)
				324	{
				325	// Constants
				326	qint8_t const_one = (1 << fixed_point_position);
				327	qint8_t ln2 = (0x58 >> (7 - fixed_point_position));
				328	qint8_t A = (0x5C >> (7 - fixed_point_position - 1));
				329	qint8_t B = -(0x56 >> (7 - fixed_point_position));
				330	qint8_t C = (0x29 >> (7 - fixed_point_position));
				331	qint8_t D = -(0x0A >> (7 - fixed_point_position));
				332
				333	if((const_one == a) \|\| (a < 0))
				334	{
				335	return 0;
				336	}
				337	else if(a < const_one)
				338	{
				339	return -slog_qs8(sdiv_qs8(const_one, a, fixed_point_position), fixed_point_position);
				340	}
				341
				342	// Remove even powers of 2
				343	qint8_t shift_val = 31 - __builtin_clz(a >> fixed_point_position);
				344	a >>= shift_val;
				345	a = ssub_qs8(a, const_one);
				346
				347	// Polynomial expansion
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	348	qint8_t sum = sqadd_qs8(sqmul_qs8(a, D, fixed_point_position), C);
				349	sum = sqadd_qs8(sqmul_qs8(a, sum, fixed_point_position), B);
				350	sum = sqadd_qs8(sqmul_qs8(a, sum, fixed_point_position), A);
				351	sum = sqmul_qs8(a, sum, fixed_point_position);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	352
				353	return smul_qs8(sadd_qs8(sum, shift_val << fixed_point_position), ln2, fixed_point_position);
				354	}
				355
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	356	inline qint16_t slog_qs16(qint16_t a, int fixed_point_position)
				357	{
				358	// Constants
				359	qint16_t const_one = (1 << fixed_point_position);
				360	qint16_t ln2 = (0x58B9 >> (7 - fixed_point_position));
				361	qint16_t A = (0x5C0F >> (7 - fixed_point_position - 1));
				362	qint16_t B = -(0x56AE >> (7 - fixed_point_position));
				363	qint16_t C = (0x2933 >> (7 - fixed_point_position));
				364	qint16_t D = -(0x0AA7 >> (7 - fixed_point_position));
				365
				366	if((const_one == a) \|\| (a < 0))
				367	{
				368	return 0;
				369	}
				370	else if(a < const_one)
				371	{
				372	return -slog_qs16(sdiv_qs16(const_one, a, fixed_point_position), fixed_point_position);
				373	}
				374
				375	// Remove even powers of 2
				376	qint16_t shift_val = 31 - __builtin_clz(a >> fixed_point_position);
				377	a >>= shift_val;
				378	a = ssub_qs16(a, const_one);
				379
				380	// Polynomial expansion
				381	qint16_t sum = sqadd_qs16(sqmul_qs16(a, D, fixed_point_position), C);
				382	sum = sqadd_qs16(sqmul_qs16(a, sum, fixed_point_position), B);
				383	sum = sqadd_qs16(sqmul_qs16(a, sum, fixed_point_position), A);
				384	sum = sqmul_qs16(a, sum, fixed_point_position);
				385
				386	return smul_qs16(sadd_qs16(sum, shift_val << fixed_point_position), ln2, fixed_point_position);
				387	}
				388
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	389	inline float scvt_f32_qs8(qint8_t a, int fixed_point_position)
				390	{
				391	return static_cast<float>(a) / (1 << fixed_point_position);
				392	}
				393
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame]	394	inline qint8_t sqcvt_qs8_f32(float a, int fixed_point_position)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	395	{
				396	// round_nearest_integer(a * 2^(fixed_point_position))
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame]	397	return saturate_convert<float, qint8_t>(a * (1 << fixed_point_position) + ((a >= 0) ? 0.5 : -0.5));
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	398	}
				399
				400	inline float scvt_f32_qs16(qint16_t a, int fixed_point_position)
				401	{
				402	return static_cast<float>(a) / (1 << fixed_point_position);
				403	}
				404
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame]	405	inline qint16_t sqcvt_qs16_f32(float a, int fixed_point_position)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	406	{
				407	// round_nearest_integer(a * 2^(fixed_point_position))
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame]	408	return saturate_convert<float, qint16_t>(a * (1 << fixed_point_position) + ((a >= 0) ? 0.5 : -0.5));
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	409	}
				410
				411	inline qint8_t sqmovn_qs16(qint16_t a)
				412	{
				413	// Saturate the result in case of overflow and cast to qint8_t
				414	return saturate_convert<qint16_t, qint8_t>(a);
				415	}
Georgios Pinitas	9247c92	2017-06-28 18:29:47 +0100	[diff] [blame]	416
				417	inline qint16_t sqmovn_qs32(qint32_t a)
				418	{
				419	// Saturate the result in case of overflow and cast to qint16_t
				420	return saturate_convert<qint32_t, qint16_t>(a);
				421	}
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	422	}