Blame - arm_compute/core/NEON/NEFixedPoint.h - ml/ComputeLibrary

blob: e3eb5d463890899a3e661a2376fab3c8fe9083c8 [file] [log] [blame]

Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2017 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#ifndef __ARM_COMPUTE_NEFIXEDPOINT_H__
				25	#define __ARM_COMPUTE_NEFIXEDPOINT_H__
				26
				27	#include "arm_compute/core/FixedPoint.h"
				28
				29	#include <arm_neon.h>
				30
				31	namespace arm_compute
				32	{
				33	using qint8x8_t = int8x8_t; /*< 8 bit fixed point vector with 8 elements /
				34	using qint8x8x2_t = int8x8x2_t; /*< 8 bit fixed point vector with 16 elements /
				35	using qint8x8x3_t = int8x8x3_t; /*< 8 bit fixed point vector with 24 elements /
				36	using qint8x8x4_t = int8x8x4_t; /*< 8 bit fixed point vector with 32 elements /
				37	using qint8x16_t = int8x16_t; /*< 8 bit fixed point vector with 16 elements /
				38	using qint8x16x2_t = int8x16x2_t; /*< 8 bit fixed point vector with 32 elements /
				39	using qint8x16x3_t = int8x16x3_t; /*< 8 bit fixed point vector with 48 elements /
				40	using qint8x16x4_t = int8x16x4_t; /*< 8 bit fixed point vector with 64 elements /
				41	using qint16x4_t = int16x4_t; /*< 16 bit fixed point vector with 4 elements /
				42	using qint16x4x2_t = int16x4x2_t; /*< 16 bit fixed point vector with 8 elements /
				43	using qint16x4x3_t = int16x4x3_t; /*< 16 bit fixed point vector with 12 elements /
				44	using qint16x4x4_t = int16x4x4_t; /*< 16 bit fixed point vector with 16 elements /
				45	using qint16x8_t = int16x8_t; /*< 16 bit fixed point vector with 8 elements /
				46	using qint16x8x2_t = int16x8x2_t; /*< 16 bit fixed point vector with 16 elements /
				47	using qint16x8x3_t = int16x8x3_t; /*< 16 bit fixed point vector with 24 elements /
				48	using qint16x8x4_t = int16x8x4_t; /*< 16 bit fixed point vector with 32 elements /
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	49	using qint32x4_t = int32x4_t; /*< 32 bit fixed point vector with 4 elements /
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	50
				51	/** Get the lower half of a 16 elements vector
				52	*
				53	* @param[in] a vector of 16 elements
				54	*
				55	* @return 8 bit fixed point vector (8 elements)
				56	*/
				57	qint8x8_t vget_low_qs8(qint8x16_t a);
				58
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	59	/** Get the lower half of a 16 elements vector
				60	*
				61	* @param[in] a vector of 8 elements
				62	*
				63	* @return 16 bit fixed point vector (4 elements)
				64	*/
				65	qint16x4_t vget_low_qs16(qint16x8_t a);
				66
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	67	/** Get the higher half of a 16 elements vector
				68	*
				69	* @param[in] a vector of 16 elements
				70	*
				71	* @return 8 bit fixed point vector (8 elements)
				72	*/
				73	qint8x8_t vget_high_qs8(qint8x16_t a);
				74
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	75	/** Get the higher half of a 16 elements vector
				76	*
				77	* @param[in] a vector of 8 elements
				78	*
				79	* @return 16 bit fixed point vector (4 elements)
				80	*/
				81	qint16x4_t vget_high_qs16(qint16x8_t a);
				82
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	83	/** Load a single 8 bit fixed point vector from memory (8 elements)
				84	*
				85	* @param[in] addr Memory address of the 8 bit fixed point vector to load
				86	*
				87	* @return 8 bit fixed point vector (8 elements)
				88	*/
				89	qint8x8_t vld1_qs8(const qint8_t *addr);
				90
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	91	/** Load a single 16 bit fixed point vector from memory (4 elements)
				92	*
				93	* @param[in] addr Memory address of the 16 bit fixed point vector to load
				94	*
				95	* @return 16 bit fixed point vector (4 elements)
				96	*/
				97	qint16x4_t vld1_qs16(const qint16_t *addr);
				98
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	99	/** Load a single 8 bit fixed point vector from memory (16 elements)
				100	*
				101	* @param[in] addr Memory address of the 8 bit fixed point vector to load
				102	*
				103	* @return 8 bit fixed point vector (16 elements)
				104	*/
				105	qint8x16_t vld1q_qs8(const qint8_t *addr);
				106
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	107	/** Load a single 16 bit fixed point vector from memory (8 elements)
				108	*
				109	* @param[in] addr Memory address of the 16 bit fixed point vector to load
				110	*
				111	* @return 16 bit fixed point vector (8 elements)
				112	*/
				113	qint16x8_t vld1q_qs16(const qint16_t *addr);
				114
				115	/** Load all lanes of 8 bit fixed point vector with same value from memory (8 elements)
				116	*
				117	* @param[in] addr Memory address of the 8 bit fixed point scalar value to load
				118	*
				119	* @return 8 bit fixed point vector (8 elements)
				120	*/
				121	qint8x8_t vld1_dup_qs8(const qint8_t *addr);
				122
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	123	/** Load all lanes of 16 bit fixed point vector with same value from memory (4 elements)
				124	*
				125	* @param[in] addr Memory address of the 16 bit fixed point scalar value to load
				126	*
				127	* @return 16 bit fixed point vector (4 elements)
				128	*/
				129	qint16x4_t vld1_dup_qs16(const qint16_t *addr);
				130
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	131	/** Load all lanes of 8 bit fixed point vector with same value from memory (16 elements)
				132	*
				133	* @param[in] addr Memory address of the 8 bit fixed point scalar value to load
				134	*
				135	* @return 8 bit fixed point vector (16 elements)
				136	*/
				137	qint8x16_t vld1q_dup_qs8(const qint8_t *addr);
				138
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	139	/** Load all lanes of 16 bit fixed point vector with same value from memory (8 elements)
				140	*
				141	* @param[in] addr Memory address of the 16 bit fixed point scalar value to load
				142	*
				143	* @return 16 bit fixed point vector (8 elements)
				144	*/
				145	qint16x8_t vld1q_dup_qs16(const qint16_t *addr);
				146
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	147	/** Store a single 8 bit fixed point vector to memory (8 elements)
				148	*
				149	* @param[in] addr Memory address where the 8 bit fixed point vector should be stored
				150	* @param[in] b 8 bit fixed point vector to store
				151	*
				152	*/
				153	void vst1_qs8(qint8_t *addr, qint8x8_t b);
				154
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	155	/** Store a single 16 bit fixed point vector to memory (4 elements)
				156	*
				157	* @param[in] addr Memory address where the 16 bit fixed point vector should be stored
				158	* @param[in] b 16 bit fixed point vector to store
				159	*
				160	*/
				161	void vst1_qs16(qint16_t *addr, qint16x4_t b);
				162
				163	/** Store a single 8 bit fixed point vector to memory (16 elements)
				164	*
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	165	* @param[in] addr Memory address where the 8 bit fixed point vector should be stored
				166	* @param[in] b 8 bit fixed point vector to store
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	167	*
				168	*/
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	169	void vst1q_qs8(qint8_t *addr, qint8x16_t b);
				170
				171	/** Store a single 16 bit fixed point vector to memory (8 elements)
				172	*
				173	* @param[in] addr Memory address where the 16 bit fixed point vector should be stored
				174	* @param[in] b 16 bit fixed point vector to store
				175	*
				176	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	177	void vst1q_qs16(qint16_t *addr, qint16x8_t b);
				178
				179	/** 16 bit fixed point vector saturating narrow (8 elements)
				180	*
				181	* @param[in] a 16 bit fixed point vector to convert
				182	*
				183	* @return 8 bit fixed point vector
				184	*/
				185	qint8x8_t vqmovn_q16(qint16x8_t a);
				186
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	187	/** 32 bit fixed point vector saturating narrow (4 elements)
				188	*
				189	* @param[in] a 32 bit fixed point vector to convert
				190	*
				191	* @return 16 bit fixed point vector
				192	*/
				193	qint16x4_t vqmovn_q32(qint32x4_t a);
				194
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	195	/** 8 bit fixed point vector duplicate (8 elements)
				196	*
				197	* @param[in] a 8 bit fixed point to duplicate
				198	*
				199	* @return The result of the vector duplication
				200	*/
				201	qint8x8_t vdup_n_qs8(qint8_t a);
				202
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	203	/** 16 bit fixed point vector duplicate (4 elements)
				204	*
				205	* @param[in] a 16 bit fixed point to duplicate
				206	*
				207	* @return The result of the vector duplication
				208	*/
				209	qint16x4_t vdup_n_qs16(qint16_t a);
				210
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	211	/** 8 bit fixed point vector duplicate (16 elements)
				212	*
				213	* @param[in] a 8 bit fixed point to duplicate
				214	*
				215	* @return The result of the vector duplication
				216	*/
				217	qint8x16_t vdupq_n_qs8(qint8_t a);
				218
				219	/** Duplicate a float and convert it to 8 bit fixed point vector (16 elements)
				220	*
				221	* @param[in] a 8 bit fixed point to duplicate
				222	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				223	*
				224	* @return The result of the vector duplication
				225	*/
				226	qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position);
				227
				228	/** 16 bit fixed point vector duplicate (8 elements)
				229	*
				230	* @param[in] a 16 bit fixed point to duplicate
				231	*
				232	* @return The result of the vector duplication
				233	*/
				234	qint16x8_t vdupq_n_qs16(qint16x8_t a);
				235
				236	/** Absolute value of 8 bit fixed point vector (8 elements)
				237	*
				238	* @param[in] a 8 bit fixed point input vector
				239	*
				240	* @return The result of the 8 bit fixed point vector absolute value
				241	*/
				242	qint8x8_t vabs_qs8(qint8x8_t a);
				243
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	244	/** Absolute value of 16 bit fixed point vector (4 elements)
				245	*
				246	* @param[in] a 16 bit fixed point input vector
				247	*
				248	* @return The result of the 16 bit fixed point vector absolute value
				249	*/
				250	qint16x4_t vabs_qs16(qint16x4_t a);
				251
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	252	/** Absolute value of 8 bit fixed point vector (16 elements)
				253	*
				254	* @param[in] a 8 bit fixed point input vector
				255	*
				256	* @return The result of the 8 bit fixed point vector absolute value
				257	*/
				258	qint8x16_t vabsq_qs8(qint8x16_t a);
				259
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	260	/** Absolute value of 16 bit fixed point vector (8 elements)
				261	*
				262	* @param[in] a 16 bit fixed point input vector
				263	*
				264	* @return The result of the 16 bit fixed point vector absolute value
				265	*/
				266	qint16x8_t vabsq_qs16(qint16x8_t a);
				267
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	268	/** Saturating absolute value of 8 bit fixed point vector (8 elements)
				269	*
				270	* @param[in] a 8 bit fixed point input vector
				271	*
				272	* @return The result of the 8 bit fixed point vector absolute value
				273	*/
				274	qint8x8_t vqabs_qs8(qint8x8_t a);
				275
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	276	/** Saturating absolute value of 16 bit fixed point vector (4 elements)
				277	*
				278	* @param[in] a 4 bit fixed point input vector
				279	*
				280	* @return The result of the 16 bit fixed point vector absolute value
				281	*/
				282	qint16x4_t vqabs_qs16(qint16x4_t a);
				283
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	284	/** Saturating absolute value of 8 bit fixed point vector (16 elements)
				285	*
				286	* @param[in] a 8 bit fixed point input vector
				287	*
				288	* @return The result of the 8 bit fixed point vector absolute value
				289	*/
				290	qint8x16_t vqabsq_qs8(qint8x16_t a);
				291
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	292	/** Saturating absolute value of 16 bit fixed point vector (8 elements)
				293	*
				294	* @param[in] a 16 bit fixed point input vector
				295	*
				296	* @return The result of the 16 bit fixed point vector absolute value
				297	*/
				298	qint16x8_t vqabsq_qs16(qint16x8_t a);
				299
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	300	/** 8 bit fixed point vector max (8 elements)
				301	*
				302	* @param[in] a First 8 bit fixed point input vector
				303	* @param[in] b Second 8 bit fixed point input vector
				304	*
				305	* @return The result of the 8 bit fixed point vector max operation
				306	*/
				307	qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b);
				308
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	309	/** 16 bit fixed point vector max (4 elements)
				310	*
				311	* @param[in] a First 16 bit fixed point input vector
				312	* @param[in] b Second 16 bit fixed point input vector
				313	*
				314	* @return The result of the 16 bit fixed point vector max operation
				315	*/
				316	qint16x4_t vmax_qs16(qint16x4_t a, qint16x4_t b);
				317
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	318	/** 8 bit fixed point vector max (16 elements)
				319	*
				320	* @param[in] a First 8 bit fixed point input vector
				321	* @param[in] b Second 8 bit fixed point input vector
				322	*
				323	* @return The result of the 8 bit fixed point vector max operation
				324	*/
				325	qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b);
				326
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	327	/** 16 bit fixed point vector max (8 elements)
				328	*
				329	* @param[in] a First 16 bit fixed point input vector
				330	* @param[in] b Second 16 bit fixed point input vector
				331	*
				332	* @return The result of the 16 bit fixed point vector max operation
				333	*/
				334	qint16x8_t vmaxq_qs16(qint16x8_t a, qint16x8_t b);
				335
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	336	/** 8 bit fixed point vector pairwise max (8 elements)
				337	*
				338	* @param[in] a First 8 bit fixed point input vector
				339	* @param[in] b Second 8 bit fixed point input vector
				340	*
				341	* @return The result of the 8 bit fixed point vector pairwise max operation
				342	*/
				343	qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b);
				344
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	345	/** 16 bit fixed point vector pairwise max (4 elements)
				346	*
				347	* @param[in] a First 16 bit fixed point input vector
				348	* @param[in] b Second 16 bit fixed point input vector
				349	*
				350	* @return The result of the 16 bit fixed point vector pairwise max operation
				351	*/
				352	qint16x4_t vpmax_qs16(qint16x4_t a, qint16x4_t b);
				353
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	354	/** 8 bit fixed point vector min (8 elements)
				355	*
				356	* @param[in] a First 8 bit fixed point input vector
				357	* @param[in] b Second 8 bit fixed point input vector
				358	*
				359	* @return The result of the 8 bit fixed point vector max operation
				360	*/
				361	qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b);
				362
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	363	/** 16 bit fixed point vector min (4 elements)
				364	*
				365	* @param[in] a First 16 bit fixed point input vector
				366	* @param[in] b Second 16 bit fixed point input vector
				367	*
				368	* @return The result of the 16 bit fixed point vector max operation
				369	*/
				370	qint16x4_t vmin_qs16(qint16x4_t a, qint16x4_t b);
				371
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	372	/** 8 bit fixed point vector min (16 elements)
				373	*
				374	* @param[in] a First 8 bit fixed point input vector
				375	* @param[in] b Second 8 bit fixed point input vector
				376	*
				377	* @return The result of the 8 bit fixed point vector min operation
				378	*/
				379	qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b);
				380
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	381	/** 16 bit fixed point vector min (8 elements)
				382	*
				383	* @param[in] a First 16 bit fixed point input vector
				384	* @param[in] b Second 16 bit fixed point input vector
				385	*
				386	* @return The result of the 16 bit fixed point vector min operation
				387	*/
				388	qint16x8_t vminq_qs16(qint16x8_t a, qint16x8_t b);
				389
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	390	/** 8 bit fixed point vector pairwise min (8 elements)
				391	*
				392	* @param[in] a First 8 bit fixed point input vector
				393	* @param[in] b Second 8 bit fixed point input vector
				394	*
				395	* @return The result of the 8 bit fixed point vector pairwise min operation
				396	*/
				397	qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b);
				398
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	399	/** 16 bit fixed point vector pairwise min (4 elements)
				400	*
				401	* @param[in] a First 16 bit fixed point input vector
				402	* @param[in] b Second 16 bit fixed point input vector
				403	*
				404	* @return The result of the 16 bit fixed point vector pairwise min operation
				405	*/
				406	qint16x4_t vpmin_qs16(qint16x4_t a, qint16x4_t b);
				407
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	408	/** 8 bit fixed point vector add (8 elements)
				409	*
				410	* @param[in] a First 8 bit fixed point input vector
				411	* @param[in] b Second 8 bit fixed point input vector
				412	*
				413	* @return The result of the 8 bit fixed point vector addition
				414	*/
				415	qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b);
				416
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	417	/** 16 bit fixed point vector add (4 elements)
				418	*
				419	* @param[in] a First 16 bit fixed point input vector
				420	* @param[in] b Second 16 bit fixed point input vector
				421	*
				422	* @return The result of the 16 bit fixed point vector addition
				423	*/
				424	qint16x4_t vadd_qs16(qint16x4_t a, qint16x4_t b);
				425
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	426	/** 8 bit fixed point vector add (16 elements)
				427	*
				428	* @param[in] a First 8 bit fixed point input vector
				429	* @param[in] b Second 8 bit fixed point input vector
				430	*
				431	* @return The result of the 8 bit fixed point vector addition
				432	*/
				433	qint8x16_t vaddq_qs8(qint8x16_t a, qint8x16_t b);
				434
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	435	/** 16 bit fixed point vector add (8 elements)
				436	*
				437	* @param[in] a First 16 bit fixed point input vector
				438	* @param[in] b Second 16 bit fixed point input vector
				439	*
				440	* @return The result of the 16 bit fixed point vector addition
				441	*/
				442	qint16x8_t vaddq_qs16(qint16x8_t a, qint16x8_t b);
				443
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	444	/** 8 bit fixed point vector saturating add (8 elements)
				445	*
				446	* @param[in] a First 8 bit fixed point input vector
				447	* @param[in] b Second 8 bit fixed point input vector
				448	*
				449	* @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
				450	*/
				451	qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b);
				452
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	453	/** 16 bit fixed point vector saturating add (4 elements)
				454	*
				455	* @param[in] a First 16 bit fixed point input vector
				456	* @param[in] b Second 16 bit fixed point input vector
				457	*
				458	* @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
				459	*/
				460	qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b);
				461
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	462	/** 8 bit fixed point vector saturating add (16 elements)
				463	*
				464	* @param[in] a First 8 bit fixed point input vector
				465	* @param[in] b Second 8 bit fixed point input vector
				466	*
				467	* @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
				468	*/
				469	qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b);
				470
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	471	/** 16 bit fixed point vector saturating add (8 elements)
				472	*
				473	* @param[in] a First 16 bit fixed point input vector
				474	* @param[in] b Second 16 bit fixed point input vector
				475	*
				476	* @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
				477	*/
				478	qint16x8_t vqaddq_qs16(qint16x8_t a, qint16x8_t b);
				479
				480	/** 8 bit fixed point vector saturating pairwise add (8 elements)
				481	*
				482	* @param[in] a 8 bit fixed point input vector
				483	*
				484	* @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
				485	*/
				486	int16x4_t vpaddl_qs8(qint8x8_t a);
				487
				488	/** 8 bit fixed point vector subtraction (8 elements)
				489	*
				490	* @param[in] a First 8 bit fixed point input vector
				491	* @param[in] b Second 8 bit fixed point input vector
				492	*
				493	* @return The result of the 8 bit fixed point vector subtraction
				494	*/
				495	qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b);
				496
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	497	/** 16 bit fixed point vector subtraction (4 elements)
				498	*
				499	* @param[in] a First 16 bit fixed point input vector
				500	* @param[in] b Second 16 bit fixed point input vector
				501	*
				502	* @return The result of the 16 bit fixed point vector subtraction
				503	*/
				504	qint16x4_t vsub_qs16(qint16x4_t a, qint16x4_t b);
				505
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	506	/** 8 bit fixed point vector subtraction (16 elements)
				507	*
				508	* @param[in] a First 8 bit fixed point input vector
				509	* @param[in] b Second 8 bit fixed point input vector
				510	*
				511	* @return The result of the 8 bit fixed point vector subtraction
				512	*/
				513	qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b);
				514
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	515	/** 16 bit fixed point vector subtraction (8 elements)
				516	*
				517	* @param[in] a First 16 bit fixed point input vector
				518	* @param[in] b Second 16 bit fixed point input vector
				519	*
				520	* @return The result of the 16 bit fixed point vector subtraction
				521	*/
				522	qint16x8_t vsubq_qs16(qint16x8_t a, qint16x8_t b);
				523
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	524	/** 8 bit fixed point vector saturating subtraction (8 elements)
				525	*
				526	* @param[in] a First 8 bit fixed point input vector
				527	* @param[in] b Second 8 bit fixed point input vector
				528	*
				529	* @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
				530	*/
				531	qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b);
				532
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	533	/** 16 bit fixed point vector saturating subtraction (4 elements)
				534	*
				535	* @param[in] a First 16 bit fixed point input vector
				536	* @param[in] b Second 16 bit fixed point input vector
				537	*
				538	* @return The result of the 16 bit fixed point vector subtraction. The result is saturated in case of overflow
				539	*/
				540	qint16x4_t vqsub_qs16(qint16x4_t a, qint16x4_t b);
				541
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	542	/** 8 bit fixed point vector saturating subtraction (16 elements)
				543	*
				544	* @param[in] a First 8 bit fixed point input vector
				545	* @param[in] b Second 8 bit fixed point input vector
				546	*
				547	* @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
				548	*/
				549	qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b);
				550
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	551	/** 16 bit fixed point vector saturating subtraction (8 elements)
				552	*
				553	* @param[in] a First 16 bit fixed point input vector
				554	* @param[in] b Second 16 bit fixed point input vector
				555	*
				556	* @return The result of the 16 bit fixed point vector subtraction. The result is saturated in case of overflow
				557	*/
				558	qint16x8_t vqsubq_qs16(qint16x8_t a, qint16x8_t b);
				559
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	560	/** 8 bit fixed point vector multiply (8 elements)
				561	*
				562	* @param[in] a First 8 bit fixed point input vector
				563	* @param[in] b Second 8 bit fixed point input vector
				564	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				565	*
				566	* @return The result of the 8 bit fixed point vector multiplication.
				567	*/
				568	qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
				569
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	570	/** 16 bit fixed point vector multiply (4 elements)
				571	*
				572	* @param[in] a First 16 bit fixed point input vector
				573	* @param[in] b Second 16 bit fixed point input vector
				574	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				575	*
				576	* @return The result of the 16 bit fixed point vector multiplication.
				577	*/
				578	qint16x4_t vmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
				579
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	580	/** 8 bit fixed point vector multiply (16 elements)
				581	*
				582	* @param[in] a First 8 bit fixed point input vector
				583	* @param[in] b Second 8 bit fixed point input vector
				584	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				585	*
				586	* @return The result of the 8 bit fixed point vector multiplication.
				587	*/
				588	qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
				589
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	590	/** 16 bit fixed point vector multiply (8 elements)
				591	*
				592	* @param[in] a First 16 bit fixed point input vector
				593	* @param[in] b Second 16 bit fixed point input vector
				594	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				595	*
				596	* @return The result of the 16 bit fixed point vector multiplication.
				597	*/
				598	qint16x8_t vmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
				599
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	600	/** 8 bit fixed point vector saturating multiply (8 elements)
				601	*
				602	* @param[in] a First 8 bit fixed point input vector
				603	* @param[in] b Second 8 bit fixed point input vector
				604	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				605	*
				606	* @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
				607	*/
				608	qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
				609
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	610	/** 16 bit fixed point vector saturating multiply (4 elements)
				611	*
				612	* @param[in] a First 16 bit fixed point input vector
				613	* @param[in] b Second 16 bit fixed point input vector
				614	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				615	*
				616	* @return The result of the 16 bit fixed point vector multiplication. The result is saturated in case of overflow
				617	*/
				618	qint16x4_t vqmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
				619
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	620	/** 8 bit fixed point vector saturating multiply (16 elements)
				621	*
				622	* @param[in] a First 8 bit fixed point input vector
				623	* @param[in] b Second 8 bit fixed point input vector
				624	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				625	*
				626	* @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
				627	*/
				628	qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
				629
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	630	/** 16 bit fixed point vector saturating multiply (8 elements)
				631	*
				632	* @param[in] a First 16 bit fixed point input vector
				633	* @param[in] b Second 16 bit fixed point input vector
				634	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				635	*
				636	* @return The result of the 16 bit fixed point vector multiplication. The result is saturated in case of overflow
				637	*/
				638	qint16x8_t vqmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
				639
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	640	/** 8 bit fixed point vector long multiply (8 elements)
				641	*
				642	* @param[in] a First 8 bit fixed point input vector
				643	* @param[in] b Second 8 bit fixed point input vector
				644	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				645	*
				646	* @return The result of the 8 bit fixed point long vector multiplication.
				647	*/
				648	qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
				649
				650	/** 8 bit fixed point vector multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				651	*
				652	* @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
				653	* @param[in] b Second 8 bit fixed point input vector
				654	* @param[in] c Third 8 bit fixed point input vector
				655	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				656	*
				657	* @return The result of the 8 bit fixed point vector multiply-accumulate
				658	*/
				659	qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
				660
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	661	/** 16 bit fixed point vector multiply-accumulate (4 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				662	*
				663	* @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
				664	* @param[in] b Second 16 bit fixed point input vector
				665	* @param[in] c Third 16 bit fixed point input vector
				666	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				667	*
				668	* @return The result of the 16 bit fixed point vector multiply-accumulate
				669	*/
				670	qint16x4_t vmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
				671
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	672	/** 8 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				673	*
				674	* @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
				675	* @param[in] b Second 8 bit fixed point input vector
				676	* @param[in] c Third 8 bit fixed point input vector
				677	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				678	*
				679	* @return The result of the 8 bit fixed point vector multiply-accumulate
				680	*/
				681	qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
				682
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	683	/** 16 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				684	*
				685	* @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
				686	* @param[in] b Second 16 bit fixed point input vector
				687	* @param[in] c Third 16 bit fixed point input vector
				688	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				689	*
				690	* @return The result of the 16 bit fixed point vector multiply-accumulate
				691	*/
				692	qint16x8_t vmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position);
				693
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	694	/** 8 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				695	*
				696	* @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
				697	* @param[in] b Second 8 bit fixed point input vector
				698	* @param[in] c Third 8 bit fixed point input vector
				699	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				700	*
				701	* @return The result of the 8 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow
				702	*/
				703	qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
				704
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	705	/** 16 bit fixed point vector saturating multiply-accumulate (4 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				706	*
				707	* @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
				708	* @param[in] b Second 16 bit fixed point input vector
				709	* @param[in] c Third 16 bit fixed point input vector
				710	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				711	*
				712	* @return The result of the 16 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow
				713	*/
				714	qint16x4_t vqmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
				715
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	716	/** 8 bit fixed point vector saturating multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				717	*
				718	* @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
				719	* @param[in] b Second 8 bit fixed point input vector
				720	* @param[in] c Third 8 bit fixed point input vector
				721	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				722	*
				723	* @return The result of the 8 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow
				724	*/
				725	qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
				726
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	727	/** 16 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				728	*
				729	* @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
				730	* @param[in] b Second 16 bit fixed point input vector
				731	* @param[in] c Third 16 bit fixed point input vector
				732	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				733	*
				734	* @return The result of the 16 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow
				735	*/
				736	qint16x8_t vqmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position);
				737
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	738	/** 8 bit fixed point vector multiply-accumulate long (8 elements).
				739	* This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
				740	*
				741	* @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
				742	* @param[in] b Second 8 bit fixed point input vector
				743	* @param[in] c Third 8 bit fixed point input vector
				744	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				745	*
				746	* @return The result of the 8 bit fixed point vector multiply-accumulate long
				747	*/
				748	qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
				749
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	750	/** 16 bit fixed point vector multiply-accumulate long (4 elements).
				751	* This operation performs the product between @p b and @p c and add the result to the 32 bit fixed point vector @p a (a + b * c). 4 elements
				752	*
				753	* @param[in] a First 32 bit fixed point input vector where the result of multiplication must be added to
				754	* @param[in] b Second 16 bit fixed point input vector
				755	* @param[in] c Third 16 bit fixed point input vector
				756	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				757	*
				758	* @return The result of the 16 bit fixed point vector multiply-accumulate long
				759	*/
				760	qint32x4_t vmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
				761
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	762	/** 8 bit fixed point vector saturating multiply-accumulate long (8 elements). The saturation is performed on the 16 bit fixed point output vector.
				763	* This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
				764	*
				765	* @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
				766	* @param[in] b Second 8 bit fixed point input vector
				767	* @param[in] c Third 8 bit fixed point input vector
				768	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				769	*
				770	* @return The result of the 8 bit fixed point vector multiply-accumulate long
				771	*/
				772	qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
				773
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	774	/** 16 bit fixed point vector saturating multiply-accumulate long (4 elements). The saturation is performed on the 16 bit fixed point output vector.
				775	* This operation performs the product between @p b and @p c and add the result to the 32 bit fixed point vector @p a (a + b * c). 4 elements
				776	*
				777	* @param[in] a First 32 bit fixed point input vector where the result of multiplication must be added to
				778	* @param[in] b Second 16 bit fixed point input vector
				779	* @param[in] c Third 16 bit fixed point input vector
				780	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				781	*
				782	* @return The result of the 16 bit fixed point vector multiply-accumulate long
				783	*/
				784	qint32x4_t vqmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
				785
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	786	/** Convert a float vector with 4x2 elements to 8 bit fixed point vector with 8 elements
				787	*
				788	* @param[in] a Float input vector
				789	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				790	*
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame^]	791	* @return The result of the conversion float -> 8 bit fixed point. The result is saturated in case of overflow
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	792	*/
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame^]	793	qint8x8_t vqcvt_qs8_f32(const float32x4x2_t a, int fixed_point_position);
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	794
				795	/** Convert a float vector with 4 elements to 16 bit fixed point vector with 4 elements
				796	*
				797	* @param[in] a Float input vector
				798	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				799	*
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame^]	800	* @return The result of the conversion float -> 16 bit fixed point. The result is saturated in case of overflow
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	801	*/
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame^]	802	qint16x4_t vqcvt_qs16_f32(const float32x4_t a, int fixed_point_position);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	803
				804	/** Convert a float vector with 4x4 elements to 8 bit fixed point vector with 16 elements
				805	*
				806	* @param[in] a Float input vector
				807	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				808	*
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame^]	809	* @return The result of the conversion float -> 8 bit fixed point. The result is saturated in case of overflow
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	810	*/
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame^]	811	qint8x16_t vqcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	812
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	813	/** Convert a float vector with 4x2 elements to 16 bit fixed point vector with 8 elements
				814	*
				815	* @param[in] a Float input vector
				816	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				817	*
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame^]	818	* @return The result of the conversion float -> 16 bit fixed point. The result is saturated in case of overflow
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	819	*/
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame^]	820	qint16x8_t vqcvtq_qs16_f32(const float32x4x2_t &a, int fixed_point_position);
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	821
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	822	/** Convert a 8 bit fixed point vector with 8 elements to a float vector with 4x2 elements
				823	*
				824	* @param[in] a 8 bit fixed point input vector
				825	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				826	*
				827	* @return The result of the conversion 8 bit fixed point -> float32x2x4
				828	*/
				829	float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position);
				830
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	831	/** Convert a 16 bit fixed point vector with 4 elements to a float vector with 4 elements
				832	*
				833	* @param[in] a 16 bit fixed point input vector
				834	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				835	*
				836	* @return The result of the conversion 16 bit fixed point -> float32x2
				837	*/
				838	float32x4_t vcvt_f32_qs16(qint16x4_t a, int fixed_point_position);
				839
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	840	/** Convert a 8 bit fixed point vector with 16 elements to a float vector with 4x4 elements
				841	*
				842	* @param[in] a 8 bit fixed point input vector
				843	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				844	*
				845	* @return The result of the conversion 8 bit fixed point -> float32x4x4
				846	*/
				847	float32x4x4_t vcvtq_qs8_f32(qint8x16_t a, int fixed_point_position);
				848
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	849	/** Convert a 16 bit fixed point vector with 8 elements to a float vector with 4x2 elements
				850	*
				851	* @param[in] a 16 bit fixed point input vector
				852	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				853	*
				854	* @return The result of the conversion 16 bit fixed point -> float32x4x2
				855	*/
				856	float32x4x2_t vcvtq_qs16_f32(qint16x8_t a, int fixed_point_position);
				857
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	858	/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements)
				859	*
				860	* @param[in] a 8bit fixed point input vector
				861	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				862	*
				863	* @return The result of the 8bit reciprocal (1/a).
				864	*/
				865	qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position);
				866
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	867	/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (4 elements)
				868	*
				869	* @param[in] a 16 bit fixed point input vector
				870	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				871	*
				872	* @return The result of the 16 bit reciprocal (1/a).
				873	*/
				874	qint16x4_t vrecip_qs16(qint16x4_t a, int fixed_point_position);
				875
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	876	/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (16 elements)
				877	*
				878	* @param[in] a 8bit fixed point input vector
				879	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				880	*
				881	* @return The result of the 8bit reciprocal (1/a).
				882	*/
				883	qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position);
				884
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	885	/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements)
				886	*
				887	* @param[in] a 16 bit fixed point input vector
				888	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				889	*
				890	* @return The result of the 16 bit reciprocal (1/a).
				891	*/
				892	qint16x8_t vrecipq_qs16(qint16x8_t a, int fixed_point_position);
				893
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	894	/** Division fixed point 8bit (8 elements)
				895	*
				896	* @param[in] a First 8bit fixed point input vector
				897	* @param[in] b Second 8bit fixed point input vector
				898	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				899	*
				900	* @return The quotient and remainder number in fixed point format.
				901	*/
				902	qint8x8_t vdiv_qs8(qint8x8_t a, int8x8_t b, int fixed_point_position);
				903
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	904	/** Division fixed point 16 bit (4 elements)
				905	*
				906	* @param[in] a First 16 bit fixed point input vector
				907	* @param[in] b Second 16 bit fixed point input vector
				908	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				909	*
				910	* @return The quotient and remainder number in fixed point format.
				911	*/
				912	qint16x4_t vdiv_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
				913
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	914	/** Division fixed point 8bit (16 elements)
				915	*
				916	* @param[in] a First 8bit fixed point input vector
				917	* @param[in] b Second 8bit fixed point input vector
				918	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				919	*
				920	* @return The quotient and remainder number in 8bit fixed point format.
				921	*/
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	922	qint8x16_t vdivq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
				923
				924	/** Division fixed point 16 bit (8 elements)
				925	*
				926	* @param[in] a First 16 bit fixed point input vector
				927	* @param[in] b Second 16 bit fixed point input vector
				928	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				929	*
				930	* @return The quotient and remainder number in 16 bit fixed point format.
				931	*/
				932	qint16x8_t vdivq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	933
				934	/** Perform a 4th degree polynomial approximation. (8 elements)
				935	*
				936	* @param[in] a 8bit fixed point input vector
				937	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				938	*
				939	* @return The result of the 8bit taylor approximation.
				940	*/
				941	template <bool islog>
				942	qint8x8_t vtaylor_poly_qs8(qint8x8_t a, int fixed_point_position);
				943
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	944	/** Perform a 4th degree polynomial approximation. (4 elements)
				945	*
				946	* @param[in] a 16 bit fixed point input vector
				947	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				948	*
				949	* @return The result of the 16 bit taylor approximation.
				950	*/
				951	template <bool islog>
				952	qint16x4_t vtaylor_poly_qs16(qint16x4_t a, int fixed_point_position);
				953
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	954	/** Perform a 4th degree polynomial approximation. (16 elements)
				955	*
				956	* @param[in] a 8bit fixed point input vector
				957	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				958	*
				959	* @return The result of the 8bit taylor approximation.
				960	*/
				961	template <bool islog>
				962	qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position);
				963
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	964	/** Perform a 4th degree polynomial approximation. (8 elements)
				965	*
				966	* @param[in] a 16 bit fixed point input vector
				967	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				968	*
				969	* @return The result of the 8bit taylor approximation.
				970	*/
				971	template <bool islog>
				972	qint16x8_t vtaylor_polyq_qs16(qint16x8_t a, int fixed_point_position);
				973
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	974	/** Calculate saturating exponential fixed point 8bit (8 elements)
				975	*
				976	* @param[in] a 8bit fixed point input vector
				977	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				978	*
				979	* @return The result of the 8bit saturating exponential
				980	*/
				981	qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position);
				982
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	983	/** Calculate saturating exponential fixed point 16 bit (4 elements)
				984	*
				985	* @param[in] a 8bit fixed point input vector
				986	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				987	*
				988	* @return The result of the 16 bit saturating exponential
				989	*/
				990	qint16x4_t vqexp_qs16(qint16x4_t a, int fixed_point_position);
				991
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	992	/** Calculate saturating exponential fixed point 8bit (16 elements)
				993	*
				994	* @param[in] a 8bit fixed point input vector
				995	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				996	*
				997	* @return The result of the 8bit saturating exponential
				998	*/
				999	qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position);
				1000
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1001	/** Calculate saturating exponential fixed point 16 bit (8 elements)
				1002	*
				1003	* @param[in] a 16 bit fixed point input vector
				1004	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1005	*
				1006	* @return The result of the 16 bit saturating exponential
				1007	*/
				1008	qint16x8_t vqexpq_qs16(qint16x8_t a, int fixed_point_position);
				1009
				1010	/** Calculate logarithm fixed point 8 bit (8 elements)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1011	*
				1012	* @param[in] a 8bit fixed point input vector
				1013	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1014	*
				1015	* @return The result of the 8bit logarithm.
				1016	*/
				1017	qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position);
				1018
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1019	/** Calculate logarithm fixed point 16 bit (4 elements)
				1020	*
				1021	* @param[in] a 16 bit fixed point input vector
				1022	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1023	*
				1024	* @return The result of the 16 bit logarithm.
				1025	*/
				1026	qint16x4_t vlog_qs16(qint16x4_t a, int fixed_point_position);
				1027
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1028	/** Calculate logarithm fixed point 16bit (16 elements)
				1029	*
				1030	* @param[in] a 8bit fixed point input vector
				1031	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1032	*
				1033	* @return The result of the 8bit logarithm.
				1034	*/
				1035	qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position);
				1036
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1037	/** Calculate logarithm fixed point 16 bit (8 elements)
				1038	*
				1039	* @param[in] a 16 bit fixed point input vector
				1040	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1041	*
				1042	* @return The result of the 16 bit logarithm.
				1043	*/
				1044	qint16x8_t vlogq_qs16(qint16x8_t a, int fixed_point_position);
				1045
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1046	/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
				1047	*
				1048	* @param[in] a 8bit fixed point input vector
				1049	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1050	*
				1051	* @return The result of the 8bit inverse sqrt.
				1052	*/
				1053	qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
				1054
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1055	/** Calculate inverse square root for fixed point 16 bit using Newton-Raphosn method (4 elements)
				1056	*
				1057	* @param[in] a 16 bit fixed point input vector
				1058	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1059	*
				1060	* @return The result of the 16 bit inverse sqrt.
				1061	*/
				1062	qint16x4_t vinvsqrt_qs16(qint16x4_t a, int fixed_point_position);
				1063
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1064	/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
				1065	*
				1066	* @param[in] a 8bit fixed point input vector
				1067	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1068	*
				1069	* @return The result of the 8bit inverse sqrt.
				1070	*/
				1071	qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
				1072
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1073	/** Calculate saturating inverse square root for fixed point 16 bit using Newton-Raphosn method (4 elements)
				1074	*
				1075	* @param[in] a 16 bit fixed point input vector
				1076	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1077	*
				1078	* @return The result of the 16 bit inverse sqrt.
				1079	*/
				1080	qint16x4_t vqinvsqrt_qs16(qint16x4_t a, int fixed_point_position);
				1081
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1082	/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
				1083	*
				1084	* @param[in] a 8bit fixed point input vector
				1085	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1086	*
				1087	* @return The result of the 8bit inverse sqrt.
				1088	*/
				1089	qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
				1090
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1091	/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
				1092	*
				1093	* @param[in] a 16 bit fixed point input vector
				1094	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1095	*
				1096	* @return The result of the 16 bit inverse sqrt.
				1097	*/
				1098	qint16x8_t vinvsqrtq_qs16(qint16x8_t a, int fixed_point_position);
				1099
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1100	/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
				1101	*
				1102	* @param[in] a 8bit fixed point input vector
				1103	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1104	*
				1105	* @return The result of the 8bit inverse sqrt.
				1106	*/
				1107	qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
				1108
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1109	/** Calculate saturating inverse square root for fixed point 16 bit using Newton-Raphosn method (8 elements)
				1110	*
				1111	* @param[in] a 16 bit fixed point input vector
				1112	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1113	*
				1114	* @return The result of the 16 bit inverse sqrt.
				1115	*/
				1116	qint16x8_t vqinvsqrtq_qs16(qint16x8_t a, int fixed_point_position);
				1117
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1118	/** Calculate hyperbolic tangent for fixed point 8bit (8 elements)
				1119	*
				1120	* @param[in] a 8bit fixed point input vector
				1121	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1122	*
				1123	* @return The calculated Hyperbolic Tangent.
				1124	*/
				1125	qint8x8_t vtanh_qs8(qint8x8_t a, int fixed_point_position);
				1126
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1127	/** Calculate hyperbolic tangent for fixed point 16 bit (4 elements)
				1128	*
				1129	* @param[in] a 16 bit fixed point input vector
				1130	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1131	*
				1132	* @return The calculated Hyperbolic Tangent.
				1133	*/
				1134	qint16x4_t vtanh_qs16(qint16x4_t a, int fixed_point_position);
				1135
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1136	/** Calculate hyperbolic tangent for fixed point 8bit (16 elements)
				1137	*
				1138	* @param[in] a 8bit fixed point input vector
				1139	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1140	*
				1141	* @return The calculated Hyperbolic Tangent.
				1142	*/
				1143	qint8x16_t vtanhq_qs8(qint8x16_t a, int fixed_point_position);
				1144
				1145	/** Calculate saturating n power for fixed point 8bit (16 elements).
				1146	*
				1147	* pow(a,b) = e^(b*log(a))
				1148	*
				1149	* @param[in] a 8bit fixed point input vector
				1150	* @param[in] b 8bit fixed point power vector
				1151	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1152	*
				1153	* @return The result of the 8bit power.
				1154	*/
				1155	qint8x8_t vqpowq_qs8(qint8x8_t a, qint8x16_t b, int fixed_point_position);
Michele Di Giorgio	8af2dd6	2017-06-19 15:19:29 +0100	[diff] [blame]	1156
				1157	/** Compute lane-by-lane maximum between elements of a float vector with 4x2 elements
				1158	*
				1159	* @param[in] a Float input vector
				1160	* @param[in] b Float input vector
				1161	*
				1162	* @return The lane-by-lane maximum -> float32x4x2
				1163	*/
				1164	float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b);
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1165
				1166	/** Calculate hyperbolic tangent for fixed point 8bit (8 elements)
				1167	*
				1168	* @param[in] a 16 bit fixed point input vector
				1169	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1170	*
				1171	* @return The calculated Hyperbolic Tangent.
				1172	*/
				1173	qint16x8_t vtanhq_qs16(qint16x8_t a, int fixed_point_position);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1174	}
				1175	#include "arm_compute/core/NEON/NEFixedPoint.inl"
				1176	#endif /* __ARM_COMPUTE_NEFIXEDPOINT_H__ */