Blame - arm_compute/core/NEON/NEFixedPoint.h - ml/ComputeLibrary

blob: 09579f9120e28f0bbbdf656d6dc8cab8097f7cd9 [file] [log] [blame]

Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2017 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#ifndef __ARM_COMPUTE_NEFIXEDPOINT_H__
				25	#define __ARM_COMPUTE_NEFIXEDPOINT_H__
				26
				27	#include "arm_compute/core/FixedPoint.h"
				28
				29	#include <arm_neon.h>
				30
				31	namespace arm_compute
				32	{
				33	using qint8x8_t = int8x8_t; /*< 8 bit fixed point vector with 8 elements /
				34	using qint8x8x2_t = int8x8x2_t; /*< 8 bit fixed point vector with 16 elements /
				35	using qint8x8x3_t = int8x8x3_t; /*< 8 bit fixed point vector with 24 elements /
				36	using qint8x8x4_t = int8x8x4_t; /*< 8 bit fixed point vector with 32 elements /
				37	using qint8x16_t = int8x16_t; /*< 8 bit fixed point vector with 16 elements /
				38	using qint8x16x2_t = int8x16x2_t; /*< 8 bit fixed point vector with 32 elements /
				39	using qint8x16x3_t = int8x16x3_t; /*< 8 bit fixed point vector with 48 elements /
				40	using qint8x16x4_t = int8x16x4_t; /*< 8 bit fixed point vector with 64 elements /
				41	using qint16x4_t = int16x4_t; /*< 16 bit fixed point vector with 4 elements /
				42	using qint16x4x2_t = int16x4x2_t; /*< 16 bit fixed point vector with 8 elements /
				43	using qint16x4x3_t = int16x4x3_t; /*< 16 bit fixed point vector with 12 elements /
				44	using qint16x4x4_t = int16x4x4_t; /*< 16 bit fixed point vector with 16 elements /
				45	using qint16x8_t = int16x8_t; /*< 16 bit fixed point vector with 8 elements /
				46	using qint16x8x2_t = int16x8x2_t; /*< 16 bit fixed point vector with 16 elements /
				47	using qint16x8x3_t = int16x8x3_t; /*< 16 bit fixed point vector with 24 elements /
				48	using qint16x8x4_t = int16x8x4_t; /*< 16 bit fixed point vector with 32 elements /
Georgios Pinitas	9247c92	2017-06-28 18:29:47 +0100	[diff] [blame]	49	using qint32x2_t = int32x2_t; /*< 32 bit fixed point vector with 2 elements /
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	50	using qint32x4_t = int32x4_t; /*< 32 bit fixed point vector with 4 elements /
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	51
				52	/** Get the lower half of a 16 elements vector
				53	*
				54	* @param[in] a vector of 16 elements
				55	*
				56	* @return 8 bit fixed point vector (8 elements)
				57	*/
				58	qint8x8_t vget_low_qs8(qint8x16_t a);
				59
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	60	/** Get the lower half of a 16 elements vector
				61	*
				62	* @param[in] a vector of 8 elements
				63	*
				64	* @return 16 bit fixed point vector (4 elements)
				65	*/
				66	qint16x4_t vget_low_qs16(qint16x8_t a);
				67
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	68	/** Get the higher half of a 16 elements vector
				69	*
				70	* @param[in] a vector of 16 elements
				71	*
				72	* @return 8 bit fixed point vector (8 elements)
				73	*/
				74	qint8x8_t vget_high_qs8(qint8x16_t a);
				75
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	76	/** Get the higher half of a 16 elements vector
				77	*
				78	* @param[in] a vector of 8 elements
				79	*
				80	* @return 16 bit fixed point vector (4 elements)
				81	*/
				82	qint16x4_t vget_high_qs16(qint16x8_t a);
				83
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	84	/** Load a single 8 bit fixed point vector from memory (8 elements)
				85	*
				86	* @param[in] addr Memory address of the 8 bit fixed point vector to load
				87	*
				88	* @return 8 bit fixed point vector (8 elements)
				89	*/
				90	qint8x8_t vld1_qs8(const qint8_t *addr);
				91
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	92	/** Load a single 16 bit fixed point vector from memory (4 elements)
				93	*
				94	* @param[in] addr Memory address of the 16 bit fixed point vector to load
				95	*
				96	* @return 16 bit fixed point vector (4 elements)
				97	*/
				98	qint16x4_t vld1_qs16(const qint16_t *addr);
				99
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	100	/** Load a single 8 bit fixed point vector from memory (16 elements)
				101	*
				102	* @param[in] addr Memory address of the 8 bit fixed point vector to load
				103	*
				104	* @return 8 bit fixed point vector (16 elements)
				105	*/
				106	qint8x16_t vld1q_qs8(const qint8_t *addr);
				107
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	108	/** Load a single 16 bit fixed point vector from memory (8 elements)
				109	*
				110	* @param[in] addr Memory address of the 16 bit fixed point vector to load
				111	*
				112	* @return 16 bit fixed point vector (8 elements)
				113	*/
				114	qint16x8_t vld1q_qs16(const qint16_t *addr);
				115
				116	/** Load all lanes of 8 bit fixed point vector with same value from memory (8 elements)
				117	*
				118	* @param[in] addr Memory address of the 8 bit fixed point scalar value to load
				119	*
				120	* @return 8 bit fixed point vector (8 elements)
				121	*/
				122	qint8x8_t vld1_dup_qs8(const qint8_t *addr);
				123
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	124	/** Load all lanes of 16 bit fixed point vector with same value from memory (4 elements)
				125	*
				126	* @param[in] addr Memory address of the 16 bit fixed point scalar value to load
				127	*
				128	* @return 16 bit fixed point vector (4 elements)
				129	*/
				130	qint16x4_t vld1_dup_qs16(const qint16_t *addr);
				131
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	132	/** Load all lanes of 8 bit fixed point vector with same value from memory (16 elements)
				133	*
				134	* @param[in] addr Memory address of the 8 bit fixed point scalar value to load
				135	*
				136	* @return 8 bit fixed point vector (16 elements)
				137	*/
				138	qint8x16_t vld1q_dup_qs8(const qint8_t *addr);
				139
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	140	/** Load all lanes of 16 bit fixed point vector with same value from memory (8 elements)
				141	*
				142	* @param[in] addr Memory address of the 16 bit fixed point scalar value to load
				143	*
				144	* @return 16 bit fixed point vector (8 elements)
				145	*/
				146	qint16x8_t vld1q_dup_qs16(const qint16_t *addr);
				147
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	148	/** Store a single 8 bit fixed point vector to memory (8 elements)
				149	*
				150	* @param[in] addr Memory address where the 8 bit fixed point vector should be stored
				151	* @param[in] b 8 bit fixed point vector to store
				152	*
				153	*/
				154	void vst1_qs8(qint8_t *addr, qint8x8_t b);
				155
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	156	/** Store a single 16 bit fixed point vector to memory (4 elements)
				157	*
				158	* @param[in] addr Memory address where the 16 bit fixed point vector should be stored
				159	* @param[in] b 16 bit fixed point vector to store
				160	*
				161	*/
				162	void vst1_qs16(qint16_t *addr, qint16x4_t b);
				163
				164	/** Store a single 8 bit fixed point vector to memory (16 elements)
				165	*
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	166	* @param[in] addr Memory address where the 8 bit fixed point vector should be stored
				167	* @param[in] b 8 bit fixed point vector to store
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	168	*
				169	*/
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	170	void vst1q_qs8(qint8_t *addr, qint8x16_t b);
				171
				172	/** Store a single 16 bit fixed point vector to memory (8 elements)
				173	*
				174	* @param[in] addr Memory address where the 16 bit fixed point vector should be stored
				175	* @param[in] b 16 bit fixed point vector to store
				176	*
				177	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	178	void vst1q_qs16(qint16_t *addr, qint16x8_t b);
				179
Georgios Pinitas	ccc65d4	2017-06-27 17:39:11 +0100	[diff] [blame]	180	/** Store two 16 bit fixed point vector to memory (8x2 elements)
				181	*
				182	* @param[in] addr Memory address where the 16 bit fixed point vectors should be stored
				183	* @param[in] b 16 bit fixed point vectors to store
				184	*
				185	*/
				186	void vst2q_qs16(qint16_t *addr, qint16x8x2_t b);
				187
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	188	/** 16 bit fixed point vector saturating narrow (8 elements)
				189	*
				190	* @param[in] a 16 bit fixed point vector to convert
				191	*
				192	* @return 8 bit fixed point vector
				193	*/
				194	qint8x8_t vqmovn_q16(qint16x8_t a);
				195
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	196	/** 32 bit fixed point vector saturating narrow (4 elements)
				197	*
				198	* @param[in] a 32 bit fixed point vector to convert
				199	*
				200	* @return 16 bit fixed point vector
				201	*/
				202	qint16x4_t vqmovn_q32(qint32x4_t a);
				203
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	204	/** 8 bit fixed point vector duplicate (8 elements)
				205	*
				206	* @param[in] a 8 bit fixed point to duplicate
				207	*
				208	* @return The result of the vector duplication
				209	*/
				210	qint8x8_t vdup_n_qs8(qint8_t a);
				211
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	212	/** 16 bit fixed point vector duplicate (4 elements)
				213	*
				214	* @param[in] a 16 bit fixed point to duplicate
				215	*
				216	* @return The result of the vector duplication
				217	*/
				218	qint16x4_t vdup_n_qs16(qint16_t a);
				219
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	220	/** 8 bit fixed point vector duplicate (16 elements)
				221	*
				222	* @param[in] a 8 bit fixed point to duplicate
				223	*
				224	* @return The result of the vector duplication
				225	*/
				226	qint8x16_t vdupq_n_qs8(qint8_t a);
				227
				228	/** Duplicate a float and convert it to 8 bit fixed point vector (16 elements)
				229	*
				230	* @param[in] a 8 bit fixed point to duplicate
				231	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				232	*
				233	* @return The result of the vector duplication
				234	*/
				235	qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position);
				236
				237	/** 16 bit fixed point vector duplicate (8 elements)
				238	*
				239	* @param[in] a 16 bit fixed point to duplicate
				240	*
				241	* @return The result of the vector duplication
				242	*/
				243	qint16x8_t vdupq_n_qs16(qint16x8_t a);
				244
				245	/** Absolute value of 8 bit fixed point vector (8 elements)
				246	*
				247	* @param[in] a 8 bit fixed point input vector
				248	*
				249	* @return The result of the 8 bit fixed point vector absolute value
				250	*/
				251	qint8x8_t vabs_qs8(qint8x8_t a);
				252
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	253	/** Absolute value of 16 bit fixed point vector (4 elements)
				254	*
				255	* @param[in] a 16 bit fixed point input vector
				256	*
				257	* @return The result of the 16 bit fixed point vector absolute value
				258	*/
				259	qint16x4_t vabs_qs16(qint16x4_t a);
				260
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	261	/** Absolute value of 8 bit fixed point vector (16 elements)
				262	*
				263	* @param[in] a 8 bit fixed point input vector
				264	*
				265	* @return The result of the 8 bit fixed point vector absolute value
				266	*/
				267	qint8x16_t vabsq_qs8(qint8x16_t a);
				268
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	269	/** Absolute value of 16 bit fixed point vector (8 elements)
				270	*
				271	* @param[in] a 16 bit fixed point input vector
				272	*
				273	* @return The result of the 16 bit fixed point vector absolute value
				274	*/
				275	qint16x8_t vabsq_qs16(qint16x8_t a);
				276
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	277	/** Saturating absolute value of 8 bit fixed point vector (8 elements)
				278	*
				279	* @param[in] a 8 bit fixed point input vector
				280	*
				281	* @return The result of the 8 bit fixed point vector absolute value
				282	*/
				283	qint8x8_t vqabs_qs8(qint8x8_t a);
				284
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	285	/** Saturating absolute value of 16 bit fixed point vector (4 elements)
				286	*
				287	* @param[in] a 4 bit fixed point input vector
				288	*
				289	* @return The result of the 16 bit fixed point vector absolute value
				290	*/
				291	qint16x4_t vqabs_qs16(qint16x4_t a);
				292
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	293	/** Saturating absolute value of 8 bit fixed point vector (16 elements)
				294	*
				295	* @param[in] a 8 bit fixed point input vector
				296	*
				297	* @return The result of the 8 bit fixed point vector absolute value
				298	*/
				299	qint8x16_t vqabsq_qs8(qint8x16_t a);
				300
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	301	/** Saturating absolute value of 16 bit fixed point vector (8 elements)
				302	*
				303	* @param[in] a 16 bit fixed point input vector
				304	*
				305	* @return The result of the 16 bit fixed point vector absolute value
				306	*/
				307	qint16x8_t vqabsq_qs16(qint16x8_t a);
				308
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	309	/** 8 bit fixed point vector max (8 elements)
				310	*
				311	* @param[in] a First 8 bit fixed point input vector
				312	* @param[in] b Second 8 bit fixed point input vector
				313	*
				314	* @return The result of the 8 bit fixed point vector max operation
				315	*/
				316	qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b);
				317
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	318	/** 16 bit fixed point vector max (4 elements)
				319	*
				320	* @param[in] a First 16 bit fixed point input vector
				321	* @param[in] b Second 16 bit fixed point input vector
				322	*
				323	* @return The result of the 16 bit fixed point vector max operation
				324	*/
				325	qint16x4_t vmax_qs16(qint16x4_t a, qint16x4_t b);
				326
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	327	/** 8 bit fixed point vector max (16 elements)
				328	*
				329	* @param[in] a First 8 bit fixed point input vector
				330	* @param[in] b Second 8 bit fixed point input vector
				331	*
				332	* @return The result of the 8 bit fixed point vector max operation
				333	*/
				334	qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b);
				335
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	336	/** 16 bit fixed point vector max (8 elements)
				337	*
				338	* @param[in] a First 16 bit fixed point input vector
				339	* @param[in] b Second 16 bit fixed point input vector
				340	*
				341	* @return The result of the 16 bit fixed point vector max operation
				342	*/
				343	qint16x8_t vmaxq_qs16(qint16x8_t a, qint16x8_t b);
				344
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	345	/** 8 bit fixed point vector pairwise max (8 elements)
				346	*
				347	* @param[in] a First 8 bit fixed point input vector
				348	* @param[in] b Second 8 bit fixed point input vector
				349	*
				350	* @return The result of the 8 bit fixed point vector pairwise max operation
				351	*/
				352	qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b);
				353
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	354	/** 16 bit fixed point vector pairwise max (4 elements)
				355	*
				356	* @param[in] a First 16 bit fixed point input vector
				357	* @param[in] b Second 16 bit fixed point input vector
				358	*
				359	* @return The result of the 16 bit fixed point vector pairwise max operation
				360	*/
				361	qint16x4_t vpmax_qs16(qint16x4_t a, qint16x4_t b);
				362
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	363	/** 8 bit fixed point vector min (8 elements)
				364	*
				365	* @param[in] a First 8 bit fixed point input vector
				366	* @param[in] b Second 8 bit fixed point input vector
				367	*
				368	* @return The result of the 8 bit fixed point vector max operation
				369	*/
				370	qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b);
				371
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	372	/** 16 bit fixed point vector min (4 elements)
				373	*
				374	* @param[in] a First 16 bit fixed point input vector
				375	* @param[in] b Second 16 bit fixed point input vector
				376	*
				377	* @return The result of the 16 bit fixed point vector max operation
				378	*/
				379	qint16x4_t vmin_qs16(qint16x4_t a, qint16x4_t b);
				380
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	381	/** 8 bit fixed point vector min (16 elements)
				382	*
				383	* @param[in] a First 8 bit fixed point input vector
				384	* @param[in] b Second 8 bit fixed point input vector
				385	*
				386	* @return The result of the 8 bit fixed point vector min operation
				387	*/
				388	qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b);
				389
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	390	/** 16 bit fixed point vector min (8 elements)
				391	*
				392	* @param[in] a First 16 bit fixed point input vector
				393	* @param[in] b Second 16 bit fixed point input vector
				394	*
				395	* @return The result of the 16 bit fixed point vector min operation
				396	*/
				397	qint16x8_t vminq_qs16(qint16x8_t a, qint16x8_t b);
				398
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	399	/** 8 bit fixed point vector pairwise min (8 elements)
				400	*
				401	* @param[in] a First 8 bit fixed point input vector
				402	* @param[in] b Second 8 bit fixed point input vector
				403	*
				404	* @return The result of the 8 bit fixed point vector pairwise min operation
				405	*/
				406	qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b);
				407
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	408	/** 16 bit fixed point vector pairwise min (4 elements)
				409	*
				410	* @param[in] a First 16 bit fixed point input vector
				411	* @param[in] b Second 16 bit fixed point input vector
				412	*
				413	* @return The result of the 16 bit fixed point vector pairwise min operation
				414	*/
				415	qint16x4_t vpmin_qs16(qint16x4_t a, qint16x4_t b);
				416
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	417	/** 8 bit fixed point vector add (8 elements)
				418	*
				419	* @param[in] a First 8 bit fixed point input vector
				420	* @param[in] b Second 8 bit fixed point input vector
				421	*
				422	* @return The result of the 8 bit fixed point vector addition
				423	*/
				424	qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b);
				425
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	426	/** 16 bit fixed point vector add (4 elements)
				427	*
				428	* @param[in] a First 16 bit fixed point input vector
				429	* @param[in] b Second 16 bit fixed point input vector
				430	*
				431	* @return The result of the 16 bit fixed point vector addition
				432	*/
				433	qint16x4_t vadd_qs16(qint16x4_t a, qint16x4_t b);
				434
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	435	/** 8 bit fixed point vector add (16 elements)
				436	*
				437	* @param[in] a First 8 bit fixed point input vector
				438	* @param[in] b Second 8 bit fixed point input vector
				439	*
				440	* @return The result of the 8 bit fixed point vector addition
				441	*/
				442	qint8x16_t vaddq_qs8(qint8x16_t a, qint8x16_t b);
				443
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	444	/** 16 bit fixed point vector add (8 elements)
				445	*
				446	* @param[in] a First 16 bit fixed point input vector
				447	* @param[in] b Second 16 bit fixed point input vector
				448	*
				449	* @return The result of the 16 bit fixed point vector addition
				450	*/
				451	qint16x8_t vaddq_qs16(qint16x8_t a, qint16x8_t b);
				452
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	453	/** 8 bit fixed point vector saturating add (8 elements)
				454	*
				455	* @param[in] a First 8 bit fixed point input vector
				456	* @param[in] b Second 8 bit fixed point input vector
				457	*
				458	* @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
				459	*/
				460	qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b);
				461
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	462	/** 16 bit fixed point vector saturating add (4 elements)
				463	*
				464	* @param[in] a First 16 bit fixed point input vector
				465	* @param[in] b Second 16 bit fixed point input vector
				466	*
				467	* @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
				468	*/
				469	qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b);
				470
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	471	/** 8 bit fixed point vector saturating add (16 elements)
				472	*
				473	* @param[in] a First 8 bit fixed point input vector
				474	* @param[in] b Second 8 bit fixed point input vector
				475	*
				476	* @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
				477	*/
				478	qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b);
				479
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	480	/** 16 bit fixed point vector saturating add (8 elements)
				481	*
				482	* @param[in] a First 16 bit fixed point input vector
				483	* @param[in] b Second 16 bit fixed point input vector
				484	*
				485	* @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
				486	*/
				487	qint16x8_t vqaddq_qs16(qint16x8_t a, qint16x8_t b);
				488
				489	/** 8 bit fixed point vector saturating pairwise add (8 elements)
				490	*
				491	* @param[in] a 8 bit fixed point input vector
				492	*
				493	* @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
				494	*/
				495	int16x4_t vpaddl_qs8(qint8x8_t a);
				496
				497	/** 8 bit fixed point vector subtraction (8 elements)
				498	*
				499	* @param[in] a First 8 bit fixed point input vector
				500	* @param[in] b Second 8 bit fixed point input vector
				501	*
				502	* @return The result of the 8 bit fixed point vector subtraction
				503	*/
				504	qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b);
				505
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	506	/** 16 bit fixed point vector subtraction (4 elements)
				507	*
				508	* @param[in] a First 16 bit fixed point input vector
				509	* @param[in] b Second 16 bit fixed point input vector
				510	*
				511	* @return The result of the 16 bit fixed point vector subtraction
				512	*/
				513	qint16x4_t vsub_qs16(qint16x4_t a, qint16x4_t b);
				514
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	515	/** 8 bit fixed point vector subtraction (16 elements)
				516	*
				517	* @param[in] a First 8 bit fixed point input vector
				518	* @param[in] b Second 8 bit fixed point input vector
				519	*
				520	* @return The result of the 8 bit fixed point vector subtraction
				521	*/
				522	qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b);
				523
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	524	/** 16 bit fixed point vector subtraction (8 elements)
				525	*
				526	* @param[in] a First 16 bit fixed point input vector
				527	* @param[in] b Second 16 bit fixed point input vector
				528	*
				529	* @return The result of the 16 bit fixed point vector subtraction
				530	*/
				531	qint16x8_t vsubq_qs16(qint16x8_t a, qint16x8_t b);
				532
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	533	/** 8 bit fixed point vector saturating subtraction (8 elements)
				534	*
				535	* @param[in] a First 8 bit fixed point input vector
				536	* @param[in] b Second 8 bit fixed point input vector
				537	*
				538	* @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
				539	*/
				540	qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b);
				541
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	542	/** 16 bit fixed point vector saturating subtraction (4 elements)
				543	*
				544	* @param[in] a First 16 bit fixed point input vector
				545	* @param[in] b Second 16 bit fixed point input vector
				546	*
				547	* @return The result of the 16 bit fixed point vector subtraction. The result is saturated in case of overflow
				548	*/
				549	qint16x4_t vqsub_qs16(qint16x4_t a, qint16x4_t b);
				550
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	551	/** 8 bit fixed point vector saturating subtraction (16 elements)
				552	*
				553	* @param[in] a First 8 bit fixed point input vector
				554	* @param[in] b Second 8 bit fixed point input vector
				555	*
				556	* @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
				557	*/
				558	qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b);
				559
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	560	/** 16 bit fixed point vector saturating subtraction (8 elements)
				561	*
				562	* @param[in] a First 16 bit fixed point input vector
				563	* @param[in] b Second 16 bit fixed point input vector
				564	*
				565	* @return The result of the 16 bit fixed point vector subtraction. The result is saturated in case of overflow
				566	*/
				567	qint16x8_t vqsubq_qs16(qint16x8_t a, qint16x8_t b);
				568
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	569	/** 8 bit fixed point vector multiply (8 elements)
				570	*
				571	* @param[in] a First 8 bit fixed point input vector
				572	* @param[in] b Second 8 bit fixed point input vector
				573	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				574	*
				575	* @return The result of the 8 bit fixed point vector multiplication.
				576	*/
				577	qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
				578
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	579	/** 16 bit fixed point vector multiply (4 elements)
				580	*
				581	* @param[in] a First 16 bit fixed point input vector
				582	* @param[in] b Second 16 bit fixed point input vector
				583	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				584	*
				585	* @return The result of the 16 bit fixed point vector multiplication.
				586	*/
				587	qint16x4_t vmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
				588
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	589	/** 8 bit fixed point vector multiply (16 elements)
				590	*
				591	* @param[in] a First 8 bit fixed point input vector
				592	* @param[in] b Second 8 bit fixed point input vector
				593	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				594	*
				595	* @return The result of the 8 bit fixed point vector multiplication.
				596	*/
				597	qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
				598
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	599	/** 16 bit fixed point vector multiply (8 elements)
				600	*
				601	* @param[in] a First 16 bit fixed point input vector
				602	* @param[in] b Second 16 bit fixed point input vector
				603	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				604	*
				605	* @return The result of the 16 bit fixed point vector multiplication.
				606	*/
				607	qint16x8_t vmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
				608
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	609	/** 8 bit fixed point vector saturating multiply (8 elements)
				610	*
				611	* @param[in] a First 8 bit fixed point input vector
				612	* @param[in] b Second 8 bit fixed point input vector
				613	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				614	*
				615	* @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
				616	*/
				617	qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
				618
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	619	/** 16 bit fixed point vector saturating multiply (4 elements)
				620	*
				621	* @param[in] a First 16 bit fixed point input vector
				622	* @param[in] b Second 16 bit fixed point input vector
				623	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				624	*
				625	* @return The result of the 16 bit fixed point vector multiplication. The result is saturated in case of overflow
				626	*/
				627	qint16x4_t vqmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
				628
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	629	/** 8 bit fixed point vector saturating multiply (16 elements)
				630	*
				631	* @param[in] a First 8 bit fixed point input vector
				632	* @param[in] b Second 8 bit fixed point input vector
				633	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				634	*
				635	* @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
				636	*/
				637	qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
				638
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	639	/** 16 bit fixed point vector saturating multiply (8 elements)
				640	*
				641	* @param[in] a First 16 bit fixed point input vector
				642	* @param[in] b Second 16 bit fixed point input vector
				643	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				644	*
				645	* @return The result of the 16 bit fixed point vector multiplication. The result is saturated in case of overflow
				646	*/
				647	qint16x8_t vqmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
				648
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	649	/** 8 bit fixed point vector long multiply (8 elements)
				650	*
				651	* @param[in] a First 8 bit fixed point input vector
				652	* @param[in] b Second 8 bit fixed point input vector
				653	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				654	*
				655	* @return The result of the 8 bit fixed point long vector multiplication.
				656	*/
				657	qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
				658
				659	/** 8 bit fixed point vector multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				660	*
				661	* @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
				662	* @param[in] b Second 8 bit fixed point input vector
				663	* @param[in] c Third 8 bit fixed point input vector
				664	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				665	*
				666	* @return The result of the 8 bit fixed point vector multiply-accumulate
				667	*/
				668	qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
				669
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	670	/** 16 bit fixed point vector multiply-accumulate (4 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				671	*
				672	* @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
				673	* @param[in] b Second 16 bit fixed point input vector
				674	* @param[in] c Third 16 bit fixed point input vector
				675	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				676	*
				677	* @return The result of the 16 bit fixed point vector multiply-accumulate
				678	*/
				679	qint16x4_t vmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
				680
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	681	/** 8 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				682	*
				683	* @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
				684	* @param[in] b Second 8 bit fixed point input vector
				685	* @param[in] c Third 8 bit fixed point input vector
				686	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				687	*
				688	* @return The result of the 8 bit fixed point vector multiply-accumulate
				689	*/
				690	qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
				691
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	692	/** 16 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				693	*
				694	* @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
				695	* @param[in] b Second 16 bit fixed point input vector
				696	* @param[in] c Third 16 bit fixed point input vector
				697	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				698	*
				699	* @return The result of the 16 bit fixed point vector multiply-accumulate
				700	*/
				701	qint16x8_t vmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position);
				702
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	703	/** 8 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				704	*
				705	* @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
				706	* @param[in] b Second 8 bit fixed point input vector
				707	* @param[in] c Third 8 bit fixed point input vector
				708	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				709	*
				710	* @return The result of the 8 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow
				711	*/
				712	qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
				713
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	714	/** 16 bit fixed point vector saturating multiply-accumulate (4 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				715	*
				716	* @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
				717	* @param[in] b Second 16 bit fixed point input vector
				718	* @param[in] c Third 16 bit fixed point input vector
				719	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				720	*
				721	* @return The result of the 16 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow
				722	*/
				723	qint16x4_t vqmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
				724
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	725	/** 8 bit fixed point vector saturating multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				726	*
				727	* @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
				728	* @param[in] b Second 8 bit fixed point input vector
				729	* @param[in] c Third 8 bit fixed point input vector
				730	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				731	*
				732	* @return The result of the 8 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow
				733	*/
				734	qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
				735
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	736	/** 16 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				737	*
				738	* @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
				739	* @param[in] b Second 16 bit fixed point input vector
				740	* @param[in] c Third 16 bit fixed point input vector
				741	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				742	*
				743	* @return The result of the 16 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow
				744	*/
				745	qint16x8_t vqmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position);
				746
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	747	/** 8 bit fixed point vector multiply-accumulate long (8 elements).
				748	* This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
				749	*
				750	* @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
				751	* @param[in] b Second 8 bit fixed point input vector
				752	* @param[in] c Third 8 bit fixed point input vector
				753	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				754	*
				755	* @return The result of the 8 bit fixed point vector multiply-accumulate long
				756	*/
				757	qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
				758
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	759	/** 16 bit fixed point vector multiply-accumulate long (4 elements).
				760	* This operation performs the product between @p b and @p c and add the result to the 32 bit fixed point vector @p a (a + b * c). 4 elements
				761	*
				762	* @param[in] a First 32 bit fixed point input vector where the result of multiplication must be added to
				763	* @param[in] b Second 16 bit fixed point input vector
				764	* @param[in] c Third 16 bit fixed point input vector
				765	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				766	*
				767	* @return The result of the 16 bit fixed point vector multiply-accumulate long
				768	*/
				769	qint32x4_t vmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
				770
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	771	/** 8 bit fixed point vector saturating multiply-accumulate long (8 elements). The saturation is performed on the 16 bit fixed point output vector.
				772	* This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
				773	*
				774	* @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
				775	* @param[in] b Second 8 bit fixed point input vector
				776	* @param[in] c Third 8 bit fixed point input vector
				777	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				778	*
				779	* @return The result of the 8 bit fixed point vector multiply-accumulate long
				780	*/
				781	qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
				782
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	783	/** 16 bit fixed point vector saturating multiply-accumulate long (4 elements). The saturation is performed on the 16 bit fixed point output vector.
				784	* This operation performs the product between @p b and @p c and add the result to the 32 bit fixed point vector @p a (a + b * c). 4 elements
				785	*
				786	* @param[in] a First 32 bit fixed point input vector where the result of multiplication must be added to
				787	* @param[in] b Second 16 bit fixed point input vector
				788	* @param[in] c Third 16 bit fixed point input vector
				789	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				790	*
				791	* @return The result of the 16 bit fixed point vector multiply-accumulate long
				792	*/
				793	qint32x4_t vqmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
				794
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	795	/** Convert a float vector with 4x2 elements to 8 bit fixed point vector with 8 elements
				796	*
				797	* @param[in] a Float input vector
				798	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				799	*
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame]	800	* @return The result of the conversion float -> 8 bit fixed point. The result is saturated in case of overflow
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	801	*/
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame]	802	qint8x8_t vqcvt_qs8_f32(const float32x4x2_t a, int fixed_point_position);
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	803
				804	/** Convert a float vector with 4 elements to 16 bit fixed point vector with 4 elements
				805	*
				806	* @param[in] a Float input vector
				807	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				808	*
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame]	809	* @return The result of the conversion float -> 16 bit fixed point. The result is saturated in case of overflow
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	810	*/
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame]	811	qint16x4_t vqcvt_qs16_f32(const float32x4_t a, int fixed_point_position);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	812
				813	/** Convert a float vector with 4x4 elements to 8 bit fixed point vector with 16 elements
				814	*
				815	* @param[in] a Float input vector
				816	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				817	*
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame]	818	* @return The result of the conversion float -> 8 bit fixed point. The result is saturated in case of overflow
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	819	*/
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame]	820	qint8x16_t vqcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	821
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	822	/** Convert a float vector with 4x2 elements to 16 bit fixed point vector with 8 elements
				823	*
				824	* @param[in] a Float input vector
				825	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				826	*
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame]	827	* @return The result of the conversion float -> 16 bit fixed point. The result is saturated in case of overflow
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	828	*/
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame]	829	qint16x8_t vqcvtq_qs16_f32(const float32x4x2_t &a, int fixed_point_position);
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	830
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	831	/** Convert a 8 bit fixed point vector with 8 elements to a float vector with 4x2 elements
				832	*
				833	* @param[in] a 8 bit fixed point input vector
				834	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				835	*
				836	* @return The result of the conversion 8 bit fixed point -> float32x2x4
				837	*/
				838	float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position);
				839
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	840	/** Convert a 16 bit fixed point vector with 4 elements to a float vector with 4 elements
				841	*
				842	* @param[in] a 16 bit fixed point input vector
				843	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				844	*
				845	* @return The result of the conversion 16 bit fixed point -> float32x2
				846	*/
				847	float32x4_t vcvt_f32_qs16(qint16x4_t a, int fixed_point_position);
				848
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	849	/** Convert a 8 bit fixed point vector with 16 elements to a float vector with 4x4 elements
				850	*
				851	* @param[in] a 8 bit fixed point input vector
				852	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				853	*
				854	* @return The result of the conversion 8 bit fixed point -> float32x4x4
				855	*/
				856	float32x4x4_t vcvtq_qs8_f32(qint8x16_t a, int fixed_point_position);
				857
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	858	/** Convert a 16 bit fixed point vector with 8 elements to a float vector with 4x2 elements
				859	*
				860	* @param[in] a 16 bit fixed point input vector
				861	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				862	*
				863	* @return The result of the conversion 16 bit fixed point -> float32x4x2
				864	*/
				865	float32x4x2_t vcvtq_qs16_f32(qint16x8_t a, int fixed_point_position);
				866
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	867	/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements)
				868	*
				869	* @param[in] a 8bit fixed point input vector
				870	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				871	*
				872	* @return The result of the 8bit reciprocal (1/a).
				873	*/
				874	qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position);
				875
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	876	/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (4 elements)
				877	*
				878	* @param[in] a 16 bit fixed point input vector
				879	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				880	*
				881	* @return The result of the 16 bit reciprocal (1/a).
				882	*/
				883	qint16x4_t vrecip_qs16(qint16x4_t a, int fixed_point_position);
				884
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	885	/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (16 elements)
				886	*
				887	* @param[in] a 8bit fixed point input vector
				888	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				889	*
				890	* @return The result of the 8bit reciprocal (1/a).
				891	*/
				892	qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position);
				893
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	894	/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements)
				895	*
				896	* @param[in] a 16 bit fixed point input vector
				897	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				898	*
				899	* @return The result of the 16 bit reciprocal (1/a).
				900	*/
				901	qint16x8_t vrecipq_qs16(qint16x8_t a, int fixed_point_position);
				902
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	903	/** Division fixed point 8bit (8 elements)
				904	*
				905	* @param[in] a First 8bit fixed point input vector
				906	* @param[in] b Second 8bit fixed point input vector
				907	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				908	*
				909	* @return The quotient and remainder number in fixed point format.
				910	*/
				911	qint8x8_t vdiv_qs8(qint8x8_t a, int8x8_t b, int fixed_point_position);
				912
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	913	/** Division fixed point 16 bit (4 elements)
				914	*
				915	* @param[in] a First 16 bit fixed point input vector
				916	* @param[in] b Second 16 bit fixed point input vector
				917	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				918	*
				919	* @return The quotient and remainder number in fixed point format.
				920	*/
				921	qint16x4_t vdiv_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
				922
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	923	/** Division fixed point 8bit (16 elements)
				924	*
				925	* @param[in] a First 8bit fixed point input vector
				926	* @param[in] b Second 8bit fixed point input vector
				927	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				928	*
				929	* @return The quotient and remainder number in 8bit fixed point format.
				930	*/
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	931	qint8x16_t vdivq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
				932
				933	/** Division fixed point 16 bit (8 elements)
				934	*
				935	* @param[in] a First 16 bit fixed point input vector
				936	* @param[in] b Second 16 bit fixed point input vector
				937	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				938	*
				939	* @return The quotient and remainder number in 16 bit fixed point format.
				940	*/
				941	qint16x8_t vdivq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	942
				943	/** Perform a 4th degree polynomial approximation. (8 elements)
				944	*
				945	* @param[in] a 8bit fixed point input vector
				946	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				947	*
				948	* @return The result of the 8bit taylor approximation.
				949	*/
				950	template <bool islog>
				951	qint8x8_t vtaylor_poly_qs8(qint8x8_t a, int fixed_point_position);
				952
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	953	/** Perform a 4th degree polynomial approximation. (4 elements)
				954	*
				955	* @param[in] a 16 bit fixed point input vector
				956	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				957	*
				958	* @return The result of the 16 bit taylor approximation.
				959	*/
				960	template <bool islog>
				961	qint16x4_t vtaylor_poly_qs16(qint16x4_t a, int fixed_point_position);
				962
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	963	/** Perform a 4th degree polynomial approximation. (16 elements)
				964	*
				965	* @param[in] a 8bit fixed point input vector
				966	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				967	*
				968	* @return The result of the 8bit taylor approximation.
				969	*/
				970	template <bool islog>
				971	qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position);
				972
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	973	/** Perform a 4th degree polynomial approximation. (8 elements)
				974	*
				975	* @param[in] a 16 bit fixed point input vector
				976	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				977	*
				978	* @return The result of the 8bit taylor approximation.
				979	*/
				980	template <bool islog>
				981	qint16x8_t vtaylor_polyq_qs16(qint16x8_t a, int fixed_point_position);
				982
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	983	/** Calculate saturating exponential fixed point 8bit (8 elements)
				984	*
				985	* @param[in] a 8bit fixed point input vector
				986	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				987	*
				988	* @return The result of the 8bit saturating exponential
				989	*/
				990	qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position);
				991
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	992	/** Calculate saturating exponential fixed point 16 bit (4 elements)
				993	*
				994	* @param[in] a 8bit fixed point input vector
				995	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				996	*
				997	* @return The result of the 16 bit saturating exponential
				998	*/
				999	qint16x4_t vqexp_qs16(qint16x4_t a, int fixed_point_position);
				1000
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1001	/** Calculate saturating exponential fixed point 8bit (16 elements)
				1002	*
				1003	* @param[in] a 8bit fixed point input vector
				1004	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1005	*
				1006	* @return The result of the 8bit saturating exponential
				1007	*/
				1008	qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position);
				1009
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1010	/** Calculate saturating exponential fixed point 16 bit (8 elements)
				1011	*
				1012	* @param[in] a 16 bit fixed point input vector
				1013	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1014	*
				1015	* @return The result of the 16 bit saturating exponential
				1016	*/
				1017	qint16x8_t vqexpq_qs16(qint16x8_t a, int fixed_point_position);
				1018
				1019	/** Calculate logarithm fixed point 8 bit (8 elements)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1020	*
				1021	* @param[in] a 8bit fixed point input vector
				1022	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1023	*
				1024	* @return The result of the 8bit logarithm.
				1025	*/
				1026	qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position);
				1027
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1028	/** Calculate logarithm fixed point 16 bit (4 elements)
				1029	*
				1030	* @param[in] a 16 bit fixed point input vector
				1031	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1032	*
				1033	* @return The result of the 16 bit logarithm.
				1034	*/
				1035	qint16x4_t vlog_qs16(qint16x4_t a, int fixed_point_position);
				1036
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1037	/** Calculate logarithm fixed point 16bit (16 elements)
				1038	*
				1039	* @param[in] a 8bit fixed point input vector
				1040	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1041	*
				1042	* @return The result of the 8bit logarithm.
				1043	*/
				1044	qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position);
				1045
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1046	/** Calculate logarithm fixed point 16 bit (8 elements)
				1047	*
				1048	* @param[in] a 16 bit fixed point input vector
				1049	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1050	*
				1051	* @return The result of the 16 bit logarithm.
				1052	*/
				1053	qint16x8_t vlogq_qs16(qint16x8_t a, int fixed_point_position);
				1054
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1055	/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
				1056	*
				1057	* @param[in] a 8bit fixed point input vector
				1058	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1059	*
				1060	* @return The result of the 8bit inverse sqrt.
				1061	*/
				1062	qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
				1063
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1064	/** Calculate inverse square root for fixed point 16 bit using Newton-Raphosn method (4 elements)
				1065	*
				1066	* @param[in] a 16 bit fixed point input vector
				1067	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1068	*
				1069	* @return The result of the 16 bit inverse sqrt.
				1070	*/
				1071	qint16x4_t vinvsqrt_qs16(qint16x4_t a, int fixed_point_position);
				1072
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1073	/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
				1074	*
				1075	* @param[in] a 8bit fixed point input vector
				1076	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1077	*
				1078	* @return The result of the 8bit inverse sqrt.
				1079	*/
				1080	qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
				1081
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1082	/** Calculate saturating inverse square root for fixed point 16 bit using Newton-Raphosn method (4 elements)
				1083	*
				1084	* @param[in] a 16 bit fixed point input vector
				1085	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1086	*
				1087	* @return The result of the 16 bit inverse sqrt.
				1088	*/
				1089	qint16x4_t vqinvsqrt_qs16(qint16x4_t a, int fixed_point_position);
				1090
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1091	/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
				1092	*
				1093	* @param[in] a 8bit fixed point input vector
				1094	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1095	*
				1096	* @return The result of the 8bit inverse sqrt.
				1097	*/
				1098	qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
				1099
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1100	/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
				1101	*
				1102	* @param[in] a 16 bit fixed point input vector
				1103	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1104	*
				1105	* @return The result of the 16 bit inverse sqrt.
				1106	*/
				1107	qint16x8_t vinvsqrtq_qs16(qint16x8_t a, int fixed_point_position);
				1108
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1109	/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
				1110	*
				1111	* @param[in] a 8bit fixed point input vector
				1112	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1113	*
				1114	* @return The result of the 8bit inverse sqrt.
				1115	*/
				1116	qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
				1117
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1118	/** Calculate saturating inverse square root for fixed point 16 bit using Newton-Raphosn method (8 elements)
				1119	*
				1120	* @param[in] a 16 bit fixed point input vector
				1121	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1122	*
				1123	* @return The result of the 16 bit inverse sqrt.
				1124	*/
				1125	qint16x8_t vqinvsqrtq_qs16(qint16x8_t a, int fixed_point_position);
				1126
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1127	/** Calculate hyperbolic tangent for fixed point 8bit (8 elements)
				1128	*
				1129	* @param[in] a 8bit fixed point input vector
				1130	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1131	*
				1132	* @return The calculated Hyperbolic Tangent.
				1133	*/
Georgios Pinitas	ccc65d4	2017-06-27 17:39:11 +0100	[diff] [blame]	1134	qint8x8_t vqtanh_qs8(qint8x8_t a, int fixed_point_position);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1135
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1136	/** Calculate hyperbolic tangent for fixed point 16 bit (4 elements)
				1137	*
				1138	* @param[in] a 16 bit fixed point input vector
				1139	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1140	*
				1141	* @return The calculated Hyperbolic Tangent.
				1142	*/
Georgios Pinitas	ccc65d4	2017-06-27 17:39:11 +0100	[diff] [blame]	1143	qint16x4_t vqtanh_qs16(qint16x4_t a, int fixed_point_position);
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1144
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1145	/** Calculate hyperbolic tangent for fixed point 8bit (16 elements)
				1146	*
				1147	* @param[in] a 8bit fixed point input vector
				1148	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1149	*
				1150	* @return The calculated Hyperbolic Tangent.
				1151	*/
Georgios Pinitas	ccc65d4	2017-06-27 17:39:11 +0100	[diff] [blame]	1152	qint8x16_t vqtanhq_qs8(qint8x16_t a, int fixed_point_position);
				1153
				1154	/** Calculate hyperbolic tangent for fixed point 16bit (8 elements)
				1155	*
				1156	* @param[in] a 16 bit fixed point input vector
				1157	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1158	*
				1159	* @return The calculated Hyperbolic Tangent.
				1160	*/
				1161	qint16x8_t vqtanhq_qs16(qint16x8_t a, int fixed_point_position);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1162
				1163	/** Calculate saturating n power for fixed point 8bit (16 elements).
				1164	*
				1165	* pow(a,b) = e^(b*log(a))
				1166	*
				1167	* @param[in] a 8bit fixed point input vector
				1168	* @param[in] b 8bit fixed point power vector
				1169	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1170	*
				1171	* @return The result of the 8bit power.
				1172	*/
				1173	qint8x8_t vqpowq_qs8(qint8x8_t a, qint8x16_t b, int fixed_point_position);
Michele Di Giorgio	8af2dd6	2017-06-19 15:19:29 +0100	[diff] [blame]	1174
				1175	/** Compute lane-by-lane maximum between elements of a float vector with 4x2 elements
				1176	*
				1177	* @param[in] a Float input vector
				1178	* @param[in] b Float input vector
				1179	*
				1180	* @return The lane-by-lane maximum -> float32x4x2
				1181	*/
				1182	float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1183	}
				1184	#include "arm_compute/core/NEON/NEFixedPoint.inl"
				1185	#endif /* __ARM_COMPUTE_NEFIXEDPOINT_H__ */