Blame - arm_compute/core/NEON/NEFixedPoint.h - ml/ComputeLibrary

blob: 50463b5efe61d82874845f9a30ff842108775e81 [file] [log] [blame]

Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2017 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#ifndef __ARM_COMPUTE_NEFIXEDPOINT_H__
				25	#define __ARM_COMPUTE_NEFIXEDPOINT_H__
				26
				27	#include "arm_compute/core/FixedPoint.h"
				28
				29	#include <arm_neon.h>
				30
				31	namespace arm_compute
				32	{
				33	using qint8x8_t = int8x8_t; /*< 8 bit fixed point vector with 8 elements /
				34	using qint8x8x2_t = int8x8x2_t; /*< 8 bit fixed point vector with 16 elements /
				35	using qint8x8x3_t = int8x8x3_t; /*< 8 bit fixed point vector with 24 elements /
				36	using qint8x8x4_t = int8x8x4_t; /*< 8 bit fixed point vector with 32 elements /
				37	using qint8x16_t = int8x16_t; /*< 8 bit fixed point vector with 16 elements /
				38	using qint8x16x2_t = int8x16x2_t; /*< 8 bit fixed point vector with 32 elements /
				39	using qint8x16x3_t = int8x16x3_t; /*< 8 bit fixed point vector with 48 elements /
				40	using qint8x16x4_t = int8x16x4_t; /*< 8 bit fixed point vector with 64 elements /
				41	using qint16x4_t = int16x4_t; /*< 16 bit fixed point vector with 4 elements /
				42	using qint16x4x2_t = int16x4x2_t; /*< 16 bit fixed point vector with 8 elements /
				43	using qint16x4x3_t = int16x4x3_t; /*< 16 bit fixed point vector with 12 elements /
				44	using qint16x4x4_t = int16x4x4_t; /*< 16 bit fixed point vector with 16 elements /
				45	using qint16x8_t = int16x8_t; /*< 16 bit fixed point vector with 8 elements /
				46	using qint16x8x2_t = int16x8x2_t; /*< 16 bit fixed point vector with 16 elements /
				47	using qint16x8x3_t = int16x8x3_t; /*< 16 bit fixed point vector with 24 elements /
				48	using qint16x8x4_t = int16x8x4_t; /*< 16 bit fixed point vector with 32 elements /
Georgios Pinitas	9247c92	2017-06-28 18:29:47 +0100	[diff] [blame]	49	using qint32x2_t = int32x2_t; /*< 32 bit fixed point vector with 2 elements /
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	50	using qint32x4_t = int32x4_t; /*< 32 bit fixed point vector with 4 elements /
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	51
				52	/** Get the lower half of a 16 elements vector
				53	*
				54	* @param[in] a vector of 16 elements
				55	*
				56	* @return 8 bit fixed point vector (8 elements)
				57	*/
				58	qint8x8_t vget_low_qs8(qint8x16_t a);
				59
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	60	/** Get the lower half of a 16 elements vector
				61	*
				62	* @param[in] a vector of 8 elements
				63	*
				64	* @return 16 bit fixed point vector (4 elements)
				65	*/
				66	qint16x4_t vget_low_qs16(qint16x8_t a);
				67
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	68	/** Get the higher half of a 16 elements vector
				69	*
				70	* @param[in] a vector of 16 elements
				71	*
				72	* @return 8 bit fixed point vector (8 elements)
				73	*/
				74	qint8x8_t vget_high_qs8(qint8x16_t a);
				75
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	76	/** Get the higher half of a 16 elements vector
				77	*
				78	* @param[in] a vector of 8 elements
				79	*
				80	* @return 16 bit fixed point vector (4 elements)
				81	*/
				82	qint16x4_t vget_high_qs16(qint16x8_t a);
				83
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	84	/** Load a single 8 bit fixed point vector from memory (8 elements)
				85	*
				86	* @param[in] addr Memory address of the 8 bit fixed point vector to load
				87	*
				88	* @return 8 bit fixed point vector (8 elements)
				89	*/
				90	qint8x8_t vld1_qs8(const qint8_t *addr);
				91
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	92	/** Load a single 16 bit fixed point vector from memory (4 elements)
				93	*
				94	* @param[in] addr Memory address of the 16 bit fixed point vector to load
				95	*
				96	* @return 16 bit fixed point vector (4 elements)
				97	*/
				98	qint16x4_t vld1_qs16(const qint16_t *addr);
				99
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	100	/** Load a single 8 bit fixed point vector from memory (16 elements)
				101	*
				102	* @param[in] addr Memory address of the 8 bit fixed point vector to load
				103	*
				104	* @return 8 bit fixed point vector (16 elements)
				105	*/
				106	qint8x16_t vld1q_qs8(const qint8_t *addr);
				107
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	108	/** Load a single 16 bit fixed point vector from memory (8 elements)
				109	*
				110	* @param[in] addr Memory address of the 16 bit fixed point vector to load
				111	*
				112	* @return 16 bit fixed point vector (8 elements)
				113	*/
				114	qint16x8_t vld1q_qs16(const qint16_t *addr);
				115
				116	/** Load all lanes of 8 bit fixed point vector with same value from memory (8 elements)
				117	*
				118	* @param[in] addr Memory address of the 8 bit fixed point scalar value to load
				119	*
				120	* @return 8 bit fixed point vector (8 elements)
				121	*/
				122	qint8x8_t vld1_dup_qs8(const qint8_t *addr);
				123
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	124	/** Load all lanes of 16 bit fixed point vector with same value from memory (4 elements)
				125	*
				126	* @param[in] addr Memory address of the 16 bit fixed point scalar value to load
				127	*
				128	* @return 16 bit fixed point vector (4 elements)
				129	*/
				130	qint16x4_t vld1_dup_qs16(const qint16_t *addr);
				131
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	132	/** Load all lanes of 8 bit fixed point vector with same value from memory (16 elements)
				133	*
				134	* @param[in] addr Memory address of the 8 bit fixed point scalar value to load
				135	*
				136	* @return 8 bit fixed point vector (16 elements)
				137	*/
				138	qint8x16_t vld1q_dup_qs8(const qint8_t *addr);
				139
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	140	/** Load all lanes of 16 bit fixed point vector with same value from memory (8 elements)
				141	*
				142	* @param[in] addr Memory address of the 16 bit fixed point scalar value to load
				143	*
				144	* @return 16 bit fixed point vector (8 elements)
				145	*/
				146	qint16x8_t vld1q_dup_qs16(const qint16_t *addr);
				147
Michele Di Giorgio	81f0d15	2017-07-11 15:00:52 +0100	[diff] [blame]	148	/** Load two 16 bit fixed point vectors from memory (8x2 elements)
				149	*
				150	* @param[in] addr Memory address of the 16 bit fixed point vectors to load
				151	*
				152	* @return 16 bit fixed point vectors (8x2 elements)
				153	*/
				154	qint16x8x2_t vld2q_qs16(qint16_t *addr);
				155
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	156	/** Store a single 8 bit fixed point vector to memory (8 elements)
				157	*
				158	* @param[in] addr Memory address where the 8 bit fixed point vector should be stored
				159	* @param[in] b 8 bit fixed point vector to store
				160	*
				161	*/
				162	void vst1_qs8(qint8_t *addr, qint8x8_t b);
				163
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	164	/** Store a single 16 bit fixed point vector to memory (4 elements)
				165	*
				166	* @param[in] addr Memory address where the 16 bit fixed point vector should be stored
				167	* @param[in] b 16 bit fixed point vector to store
				168	*
				169	*/
				170	void vst1_qs16(qint16_t *addr, qint16x4_t b);
				171
				172	/** Store a single 8 bit fixed point vector to memory (16 elements)
				173	*
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	174	* @param[in] addr Memory address where the 8 bit fixed point vector should be stored
				175	* @param[in] b 8 bit fixed point vector to store
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	176	*
				177	*/
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	178	void vst1q_qs8(qint8_t *addr, qint8x16_t b);
				179
				180	/** Store a single 16 bit fixed point vector to memory (8 elements)
				181	*
				182	* @param[in] addr Memory address where the 16 bit fixed point vector should be stored
				183	* @param[in] b 16 bit fixed point vector to store
				184	*
				185	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	186	void vst1q_qs16(qint16_t *addr, qint16x8_t b);
				187
Georgios Pinitas	ccc65d4	2017-06-27 17:39:11 +0100	[diff] [blame]	188	/** Store two 16 bit fixed point vector to memory (8x2 elements)
				189	*
				190	* @param[in] addr Memory address where the 16 bit fixed point vectors should be stored
				191	* @param[in] b 16 bit fixed point vectors to store
				192	*
				193	*/
				194	void vst2q_qs16(qint16_t *addr, qint16x8x2_t b);
				195
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	196	/** 16 bit fixed point vector saturating narrow (8 elements)
				197	*
				198	* @param[in] a 16 bit fixed point vector to convert
				199	*
				200	* @return 8 bit fixed point vector
				201	*/
				202	qint8x8_t vqmovn_q16(qint16x8_t a);
				203
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	204	/** 32 bit fixed point vector saturating narrow (4 elements)
				205	*
				206	* @param[in] a 32 bit fixed point vector to convert
				207	*
				208	* @return 16 bit fixed point vector
				209	*/
				210	qint16x4_t vqmovn_q32(qint32x4_t a);
				211
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	212	/** 8 bit fixed point vector duplicate (8 elements)
				213	*
				214	* @param[in] a 8 bit fixed point to duplicate
				215	*
				216	* @return The result of the vector duplication
				217	*/
				218	qint8x8_t vdup_n_qs8(qint8_t a);
				219
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	220	/** 16 bit fixed point vector duplicate (4 elements)
				221	*
				222	* @param[in] a 16 bit fixed point to duplicate
				223	*
				224	* @return The result of the vector duplication
				225	*/
				226	qint16x4_t vdup_n_qs16(qint16_t a);
				227
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	228	/** 8 bit fixed point vector duplicate (16 elements)
				229	*
				230	* @param[in] a 8 bit fixed point to duplicate
				231	*
				232	* @return The result of the vector duplication
				233	*/
				234	qint8x16_t vdupq_n_qs8(qint8_t a);
				235
				236	/** Duplicate a float and convert it to 8 bit fixed point vector (16 elements)
				237	*
				238	* @param[in] a 8 bit fixed point to duplicate
				239	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				240	*
				241	* @return The result of the vector duplication
				242	*/
				243	qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position);
				244
				245	/** 16 bit fixed point vector duplicate (8 elements)
				246	*
				247	* @param[in] a 16 bit fixed point to duplicate
				248	*
				249	* @return The result of the vector duplication
				250	*/
				251	qint16x8_t vdupq_n_qs16(qint16x8_t a);
				252
				253	/** Absolute value of 8 bit fixed point vector (8 elements)
				254	*
				255	* @param[in] a 8 bit fixed point input vector
				256	*
				257	* @return The result of the 8 bit fixed point vector absolute value
				258	*/
				259	qint8x8_t vabs_qs8(qint8x8_t a);
				260
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	261	/** Absolute value of 16 bit fixed point vector (4 elements)
				262	*
				263	* @param[in] a 16 bit fixed point input vector
				264	*
				265	* @return The result of the 16 bit fixed point vector absolute value
				266	*/
				267	qint16x4_t vabs_qs16(qint16x4_t a);
				268
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	269	/** Absolute value of 8 bit fixed point vector (16 elements)
				270	*
				271	* @param[in] a 8 bit fixed point input vector
				272	*
				273	* @return The result of the 8 bit fixed point vector absolute value
				274	*/
				275	qint8x16_t vabsq_qs8(qint8x16_t a);
				276
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	277	/** Absolute value of 16 bit fixed point vector (8 elements)
				278	*
				279	* @param[in] a 16 bit fixed point input vector
				280	*
				281	* @return The result of the 16 bit fixed point vector absolute value
				282	*/
				283	qint16x8_t vabsq_qs16(qint16x8_t a);
				284
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	285	/** Saturating absolute value of 8 bit fixed point vector (8 elements)
				286	*
				287	* @param[in] a 8 bit fixed point input vector
				288	*
				289	* @return The result of the 8 bit fixed point vector absolute value
				290	*/
				291	qint8x8_t vqabs_qs8(qint8x8_t a);
				292
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	293	/** Saturating absolute value of 16 bit fixed point vector (4 elements)
				294	*
				295	* @param[in] a 4 bit fixed point input vector
				296	*
				297	* @return The result of the 16 bit fixed point vector absolute value
				298	*/
				299	qint16x4_t vqabs_qs16(qint16x4_t a);
				300
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	301	/** Saturating absolute value of 8 bit fixed point vector (16 elements)
				302	*
				303	* @param[in] a 8 bit fixed point input vector
				304	*
				305	* @return The result of the 8 bit fixed point vector absolute value
				306	*/
				307	qint8x16_t vqabsq_qs8(qint8x16_t a);
				308
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	309	/** Saturating absolute value of 16 bit fixed point vector (8 elements)
				310	*
				311	* @param[in] a 16 bit fixed point input vector
				312	*
				313	* @return The result of the 16 bit fixed point vector absolute value
				314	*/
				315	qint16x8_t vqabsq_qs16(qint16x8_t a);
				316
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	317	/** 8 bit fixed point vector max (8 elements)
				318	*
				319	* @param[in] a First 8 bit fixed point input vector
				320	* @param[in] b Second 8 bit fixed point input vector
				321	*
				322	* @return The result of the 8 bit fixed point vector max operation
				323	*/
				324	qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b);
				325
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	326	/** 16 bit fixed point vector max (4 elements)
				327	*
				328	* @param[in] a First 16 bit fixed point input vector
				329	* @param[in] b Second 16 bit fixed point input vector
				330	*
				331	* @return The result of the 16 bit fixed point vector max operation
				332	*/
				333	qint16x4_t vmax_qs16(qint16x4_t a, qint16x4_t b);
				334
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	335	/** 8 bit fixed point vector max (16 elements)
				336	*
				337	* @param[in] a First 8 bit fixed point input vector
				338	* @param[in] b Second 8 bit fixed point input vector
				339	*
				340	* @return The result of the 8 bit fixed point vector max operation
				341	*/
				342	qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b);
				343
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	344	/** 16 bit fixed point vector max (8 elements)
				345	*
				346	* @param[in] a First 16 bit fixed point input vector
				347	* @param[in] b Second 16 bit fixed point input vector
				348	*
				349	* @return The result of the 16 bit fixed point vector max operation
				350	*/
				351	qint16x8_t vmaxq_qs16(qint16x8_t a, qint16x8_t b);
				352
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	353	/** 8 bit fixed point vector pairwise max (8 elements)
				354	*
				355	* @param[in] a First 8 bit fixed point input vector
				356	* @param[in] b Second 8 bit fixed point input vector
				357	*
				358	* @return The result of the 8 bit fixed point vector pairwise max operation
				359	*/
				360	qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b);
				361
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	362	/** 16 bit fixed point vector pairwise max (4 elements)
				363	*
				364	* @param[in] a First 16 bit fixed point input vector
				365	* @param[in] b Second 16 bit fixed point input vector
				366	*
				367	* @return The result of the 16 bit fixed point vector pairwise max operation
				368	*/
				369	qint16x4_t vpmax_qs16(qint16x4_t a, qint16x4_t b);
				370
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	371	/** 8 bit fixed point vector min (8 elements)
				372	*
				373	* @param[in] a First 8 bit fixed point input vector
				374	* @param[in] b Second 8 bit fixed point input vector
				375	*
				376	* @return The result of the 8 bit fixed point vector max operation
				377	*/
				378	qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b);
				379
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	380	/** 16 bit fixed point vector min (4 elements)
				381	*
				382	* @param[in] a First 16 bit fixed point input vector
				383	* @param[in] b Second 16 bit fixed point input vector
				384	*
				385	* @return The result of the 16 bit fixed point vector max operation
				386	*/
				387	qint16x4_t vmin_qs16(qint16x4_t a, qint16x4_t b);
				388
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	389	/** 8 bit fixed point vector min (16 elements)
				390	*
				391	* @param[in] a First 8 bit fixed point input vector
				392	* @param[in] b Second 8 bit fixed point input vector
				393	*
				394	* @return The result of the 8 bit fixed point vector min operation
				395	*/
				396	qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b);
				397
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	398	/** 16 bit fixed point vector min (8 elements)
				399	*
				400	* @param[in] a First 16 bit fixed point input vector
				401	* @param[in] b Second 16 bit fixed point input vector
				402	*
				403	* @return The result of the 16 bit fixed point vector min operation
				404	*/
				405	qint16x8_t vminq_qs16(qint16x8_t a, qint16x8_t b);
				406
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	407	/** 8 bit fixed point vector pairwise min (8 elements)
				408	*
				409	* @param[in] a First 8 bit fixed point input vector
				410	* @param[in] b Second 8 bit fixed point input vector
				411	*
				412	* @return The result of the 8 bit fixed point vector pairwise min operation
				413	*/
				414	qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b);
				415
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	416	/** 16 bit fixed point vector pairwise min (4 elements)
				417	*
				418	* @param[in] a First 16 bit fixed point input vector
				419	* @param[in] b Second 16 bit fixed point input vector
				420	*
				421	* @return The result of the 16 bit fixed point vector pairwise min operation
				422	*/
				423	qint16x4_t vpmin_qs16(qint16x4_t a, qint16x4_t b);
				424
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	425	/** 8 bit fixed point vector add (8 elements)
				426	*
				427	* @param[in] a First 8 bit fixed point input vector
				428	* @param[in] b Second 8 bit fixed point input vector
				429	*
				430	* @return The result of the 8 bit fixed point vector addition
				431	*/
				432	qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b);
				433
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	434	/** 16 bit fixed point vector add (4 elements)
				435	*
				436	* @param[in] a First 16 bit fixed point input vector
				437	* @param[in] b Second 16 bit fixed point input vector
				438	*
				439	* @return The result of the 16 bit fixed point vector addition
				440	*/
				441	qint16x4_t vadd_qs16(qint16x4_t a, qint16x4_t b);
				442
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	443	/** 8 bit fixed point vector add (16 elements)
				444	*
				445	* @param[in] a First 8 bit fixed point input vector
				446	* @param[in] b Second 8 bit fixed point input vector
				447	*
				448	* @return The result of the 8 bit fixed point vector addition
				449	*/
				450	qint8x16_t vaddq_qs8(qint8x16_t a, qint8x16_t b);
				451
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	452	/** 16 bit fixed point vector add (8 elements)
				453	*
				454	* @param[in] a First 16 bit fixed point input vector
				455	* @param[in] b Second 16 bit fixed point input vector
				456	*
				457	* @return The result of the 16 bit fixed point vector addition
				458	*/
				459	qint16x8_t vaddq_qs16(qint16x8_t a, qint16x8_t b);
				460
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	461	/** 8 bit fixed point vector saturating add (8 elements)
				462	*
				463	* @param[in] a First 8 bit fixed point input vector
				464	* @param[in] b Second 8 bit fixed point input vector
				465	*
				466	* @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
				467	*/
				468	qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b);
				469
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	470	/** 16 bit fixed point vector saturating add (4 elements)
				471	*
				472	* @param[in] a First 16 bit fixed point input vector
				473	* @param[in] b Second 16 bit fixed point input vector
				474	*
				475	* @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
				476	*/
				477	qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b);
				478
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	479	/** 8 bit fixed point vector saturating add (16 elements)
				480	*
				481	* @param[in] a First 8 bit fixed point input vector
				482	* @param[in] b Second 8 bit fixed point input vector
				483	*
				484	* @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
				485	*/
				486	qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b);
				487
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	488	/** 16 bit fixed point vector saturating add (8 elements)
				489	*
				490	* @param[in] a First 16 bit fixed point input vector
				491	* @param[in] b Second 16 bit fixed point input vector
				492	*
				493	* @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
				494	*/
				495	qint16x8_t vqaddq_qs16(qint16x8_t a, qint16x8_t b);
				496
				497	/** 8 bit fixed point vector saturating pairwise add (8 elements)
				498	*
				499	* @param[in] a 8 bit fixed point input vector
				500	*
				501	* @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
				502	*/
				503	int16x4_t vpaddl_qs8(qint8x8_t a);
				504
				505	/** 8 bit fixed point vector subtraction (8 elements)
				506	*
				507	* @param[in] a First 8 bit fixed point input vector
				508	* @param[in] b Second 8 bit fixed point input vector
				509	*
				510	* @return The result of the 8 bit fixed point vector subtraction
				511	*/
				512	qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b);
				513
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	514	/** 16 bit fixed point vector subtraction (4 elements)
				515	*
				516	* @param[in] a First 16 bit fixed point input vector
				517	* @param[in] b Second 16 bit fixed point input vector
				518	*
				519	* @return The result of the 16 bit fixed point vector subtraction
				520	*/
				521	qint16x4_t vsub_qs16(qint16x4_t a, qint16x4_t b);
				522
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	523	/** 8 bit fixed point vector subtraction (16 elements)
				524	*
				525	* @param[in] a First 8 bit fixed point input vector
				526	* @param[in] b Second 8 bit fixed point input vector
				527	*
				528	* @return The result of the 8 bit fixed point vector subtraction
				529	*/
				530	qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b);
				531
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	532	/** 16 bit fixed point vector subtraction (8 elements)
				533	*
				534	* @param[in] a First 16 bit fixed point input vector
				535	* @param[in] b Second 16 bit fixed point input vector
				536	*
				537	* @return The result of the 16 bit fixed point vector subtraction
				538	*/
				539	qint16x8_t vsubq_qs16(qint16x8_t a, qint16x8_t b);
				540
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	541	/** 8 bit fixed point vector saturating subtraction (8 elements)
				542	*
				543	* @param[in] a First 8 bit fixed point input vector
				544	* @param[in] b Second 8 bit fixed point input vector
				545	*
				546	* @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
				547	*/
				548	qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b);
				549
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	550	/** 16 bit fixed point vector saturating subtraction (4 elements)
				551	*
				552	* @param[in] a First 16 bit fixed point input vector
				553	* @param[in] b Second 16 bit fixed point input vector
				554	*
				555	* @return The result of the 16 bit fixed point vector subtraction. The result is saturated in case of overflow
				556	*/
				557	qint16x4_t vqsub_qs16(qint16x4_t a, qint16x4_t b);
				558
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	559	/** 8 bit fixed point vector saturating subtraction (16 elements)
				560	*
				561	* @param[in] a First 8 bit fixed point input vector
				562	* @param[in] b Second 8 bit fixed point input vector
				563	*
				564	* @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
				565	*/
				566	qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b);
				567
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	568	/** 16 bit fixed point vector saturating subtraction (8 elements)
				569	*
				570	* @param[in] a First 16 bit fixed point input vector
				571	* @param[in] b Second 16 bit fixed point input vector
				572	*
				573	* @return The result of the 16 bit fixed point vector subtraction. The result is saturated in case of overflow
				574	*/
				575	qint16x8_t vqsubq_qs16(qint16x8_t a, qint16x8_t b);
				576
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	577	/** 8 bit fixed point vector multiply (8 elements)
				578	*
				579	* @param[in] a First 8 bit fixed point input vector
				580	* @param[in] b Second 8 bit fixed point input vector
				581	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				582	*
				583	* @return The result of the 8 bit fixed point vector multiplication.
				584	*/
				585	qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
				586
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	587	/** 16 bit fixed point vector multiply (4 elements)
				588	*
				589	* @param[in] a First 16 bit fixed point input vector
				590	* @param[in] b Second 16 bit fixed point input vector
				591	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				592	*
				593	* @return The result of the 16 bit fixed point vector multiplication.
				594	*/
				595	qint16x4_t vmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
				596
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	597	/** 8 bit fixed point vector multiply (16 elements)
				598	*
				599	* @param[in] a First 8 bit fixed point input vector
				600	* @param[in] b Second 8 bit fixed point input vector
				601	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				602	*
				603	* @return The result of the 8 bit fixed point vector multiplication.
				604	*/
				605	qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
				606
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	607	/** 16 bit fixed point vector multiply (8 elements)
				608	*
				609	* @param[in] a First 16 bit fixed point input vector
				610	* @param[in] b Second 16 bit fixed point input vector
				611	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				612	*
				613	* @return The result of the 16 bit fixed point vector multiplication.
				614	*/
				615	qint16x8_t vmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
				616
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	617	/** 8 bit fixed point vector saturating multiply (8 elements)
				618	*
				619	* @param[in] a First 8 bit fixed point input vector
				620	* @param[in] b Second 8 bit fixed point input vector
				621	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				622	*
				623	* @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
				624	*/
				625	qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
				626
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	627	/** 16 bit fixed point vector saturating multiply (4 elements)
				628	*
				629	* @param[in] a First 16 bit fixed point input vector
				630	* @param[in] b Second 16 bit fixed point input vector
				631	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				632	*
				633	* @return The result of the 16 bit fixed point vector multiplication. The result is saturated in case of overflow
				634	*/
				635	qint16x4_t vqmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
				636
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	637	/** 8 bit fixed point vector saturating multiply (16 elements)
				638	*
				639	* @param[in] a First 8 bit fixed point input vector
				640	* @param[in] b Second 8 bit fixed point input vector
				641	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				642	*
				643	* @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
				644	*/
				645	qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
				646
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	647	/** 16 bit fixed point vector saturating multiply (8 elements)
				648	*
				649	* @param[in] a First 16 bit fixed point input vector
				650	* @param[in] b Second 16 bit fixed point input vector
				651	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				652	*
				653	* @return The result of the 16 bit fixed point vector multiplication. The result is saturated in case of overflow
				654	*/
				655	qint16x8_t vqmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
				656
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	657	/** 8 bit fixed point vector long multiply (8 elements)
				658	*
				659	* @param[in] a First 8 bit fixed point input vector
				660	* @param[in] b Second 8 bit fixed point input vector
				661	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				662	*
				663	* @return The result of the 8 bit fixed point long vector multiplication.
				664	*/
				665	qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
				666
				667	/** 8 bit fixed point vector multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				668	*
				669	* @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
				670	* @param[in] b Second 8 bit fixed point input vector
				671	* @param[in] c Third 8 bit fixed point input vector
				672	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				673	*
				674	* @return The result of the 8 bit fixed point vector multiply-accumulate
				675	*/
				676	qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
				677
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	678	/** 16 bit fixed point vector multiply-accumulate (4 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				679	*
				680	* @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
				681	* @param[in] b Second 16 bit fixed point input vector
				682	* @param[in] c Third 16 bit fixed point input vector
				683	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				684	*
				685	* @return The result of the 16 bit fixed point vector multiply-accumulate
				686	*/
				687	qint16x4_t vmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
				688
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	689	/** 8 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				690	*
				691	* @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
				692	* @param[in] b Second 8 bit fixed point input vector
				693	* @param[in] c Third 8 bit fixed point input vector
				694	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				695	*
				696	* @return The result of the 8 bit fixed point vector multiply-accumulate
				697	*/
				698	qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
				699
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	700	/** 16 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				701	*
				702	* @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
				703	* @param[in] b Second 16 bit fixed point input vector
				704	* @param[in] c Third 16 bit fixed point input vector
				705	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				706	*
				707	* @return The result of the 16 bit fixed point vector multiply-accumulate
				708	*/
				709	qint16x8_t vmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position);
				710
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	711	/** 8 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				712	*
				713	* @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
				714	* @param[in] b Second 8 bit fixed point input vector
				715	* @param[in] c Third 8 bit fixed point input vector
				716	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				717	*
				718	* @return The result of the 8 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow
				719	*/
				720	qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
				721
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	722	/** 16 bit fixed point vector saturating multiply-accumulate (4 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				723	*
				724	* @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
				725	* @param[in] b Second 16 bit fixed point input vector
				726	* @param[in] c Third 16 bit fixed point input vector
				727	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				728	*
				729	* @return The result of the 16 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow
				730	*/
				731	qint16x4_t vqmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
				732
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	733	/** 8 bit fixed point vector saturating multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				734	*
				735	* @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
				736	* @param[in] b Second 8 bit fixed point input vector
				737	* @param[in] c Third 8 bit fixed point input vector
				738	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				739	*
				740	* @return The result of the 8 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow
				741	*/
				742	qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
				743
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	744	/** 16 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				745	*
				746	* @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
				747	* @param[in] b Second 16 bit fixed point input vector
				748	* @param[in] c Third 16 bit fixed point input vector
				749	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				750	*
				751	* @return The result of the 16 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow
				752	*/
				753	qint16x8_t vqmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position);
				754
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	755	/** 8 bit fixed point vector multiply-accumulate long (8 elements).
				756	* This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
				757	*
				758	* @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
				759	* @param[in] b Second 8 bit fixed point input vector
				760	* @param[in] c Third 8 bit fixed point input vector
				761	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				762	*
				763	* @return The result of the 8 bit fixed point vector multiply-accumulate long
				764	*/
				765	qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
				766
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	767	/** 16 bit fixed point vector multiply-accumulate long (4 elements).
				768	* This operation performs the product between @p b and @p c and add the result to the 32 bit fixed point vector @p a (a + b * c). 4 elements
				769	*
				770	* @param[in] a First 32 bit fixed point input vector where the result of multiplication must be added to
				771	* @param[in] b Second 16 bit fixed point input vector
				772	* @param[in] c Third 16 bit fixed point input vector
				773	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				774	*
				775	* @return The result of the 16 bit fixed point vector multiply-accumulate long
				776	*/
				777	qint32x4_t vmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
				778
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	779	/** 8 bit fixed point vector saturating multiply-accumulate long (8 elements). The saturation is performed on the 16 bit fixed point output vector.
				780	* This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
				781	*
				782	* @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
				783	* @param[in] b Second 8 bit fixed point input vector
				784	* @param[in] c Third 8 bit fixed point input vector
				785	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				786	*
				787	* @return The result of the 8 bit fixed point vector multiply-accumulate long
				788	*/
				789	qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
				790
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	791	/** 16 bit fixed point vector saturating multiply-accumulate long (4 elements). The saturation is performed on the 16 bit fixed point output vector.
				792	* This operation performs the product between @p b and @p c and add the result to the 32 bit fixed point vector @p a (a + b * c). 4 elements
				793	*
				794	* @param[in] a First 32 bit fixed point input vector where the result of multiplication must be added to
				795	* @param[in] b Second 16 bit fixed point input vector
				796	* @param[in] c Third 16 bit fixed point input vector
				797	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				798	*
				799	* @return The result of the 16 bit fixed point vector multiply-accumulate long
				800	*/
				801	qint32x4_t vqmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
				802
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	803	/** Convert a float vector with 4x2 elements to 8 bit fixed point vector with 8 elements
				804	*
				805	* @param[in] a Float input vector
				806	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				807	*
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame]	808	* @return The result of the conversion float -> 8 bit fixed point. The result is saturated in case of overflow
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	809	*/
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame]	810	qint8x8_t vqcvt_qs8_f32(const float32x4x2_t a, int fixed_point_position);
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	811
				812	/** Convert a float vector with 4 elements to 16 bit fixed point vector with 4 elements
				813	*
				814	* @param[in] a Float input vector
				815	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				816	*
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame]	817	* @return The result of the conversion float -> 16 bit fixed point. The result is saturated in case of overflow
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	818	*/
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame]	819	qint16x4_t vqcvt_qs16_f32(const float32x4_t a, int fixed_point_position);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	820
				821	/** Convert a float vector with 4x4 elements to 8 bit fixed point vector with 16 elements
				822	*
				823	* @param[in] a Float input vector
				824	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				825	*
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame]	826	* @return The result of the conversion float -> 8 bit fixed point. The result is saturated in case of overflow
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	827	*/
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame]	828	qint8x16_t vqcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	829
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	830	/** Convert a float vector with 4x2 elements to 16 bit fixed point vector with 8 elements
				831	*
				832	* @param[in] a Float input vector
				833	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				834	*
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame]	835	* @return The result of the conversion float -> 16 bit fixed point. The result is saturated in case of overflow
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	836	*/
Georgios Pinitas	21efeb4	2017-07-04 12:47:17 +0100	[diff] [blame]	837	qint16x8_t vqcvtq_qs16_f32(const float32x4x2_t &a, int fixed_point_position);
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	838
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	839	/** Convert a 8 bit fixed point vector with 8 elements to a float vector with 4x2 elements
				840	*
				841	* @param[in] a 8 bit fixed point input vector
				842	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				843	*
				844	* @return The result of the conversion 8 bit fixed point -> float32x2x4
				845	*/
				846	float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position);
				847
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	848	/** Convert a 16 bit fixed point vector with 4 elements to a float vector with 4 elements
				849	*
				850	* @param[in] a 16 bit fixed point input vector
				851	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				852	*
				853	* @return The result of the conversion 16 bit fixed point -> float32x2
				854	*/
				855	float32x4_t vcvt_f32_qs16(qint16x4_t a, int fixed_point_position);
				856
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	857	/** Convert a 8 bit fixed point vector with 16 elements to a float vector with 4x4 elements
				858	*
				859	* @param[in] a 8 bit fixed point input vector
				860	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				861	*
				862	* @return The result of the conversion 8 bit fixed point -> float32x4x4
				863	*/
				864	float32x4x4_t vcvtq_qs8_f32(qint8x16_t a, int fixed_point_position);
				865
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	866	/** Convert a 16 bit fixed point vector with 8 elements to a float vector with 4x2 elements
				867	*
				868	* @param[in] a 16 bit fixed point input vector
				869	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				870	*
				871	* @return The result of the conversion 16 bit fixed point -> float32x4x2
				872	*/
				873	float32x4x2_t vcvtq_qs16_f32(qint16x8_t a, int fixed_point_position);
				874
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	875	/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements)
				876	*
				877	* @param[in] a 8bit fixed point input vector
				878	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				879	*
				880	* @return The result of the 8bit reciprocal (1/a).
				881	*/
				882	qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position);
				883
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	884	/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (4 elements)
				885	*
				886	* @param[in] a 16 bit fixed point input vector
				887	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				888	*
				889	* @return The result of the 16 bit reciprocal (1/a).
				890	*/
				891	qint16x4_t vrecip_qs16(qint16x4_t a, int fixed_point_position);
				892
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	893	/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (16 elements)
				894	*
				895	* @param[in] a 8bit fixed point input vector
				896	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				897	*
				898	* @return The result of the 8bit reciprocal (1/a).
				899	*/
				900	qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position);
				901
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	902	/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements)
				903	*
				904	* @param[in] a 16 bit fixed point input vector
				905	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				906	*
				907	* @return The result of the 16 bit reciprocal (1/a).
				908	*/
				909	qint16x8_t vrecipq_qs16(qint16x8_t a, int fixed_point_position);
				910
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	911	/** Division fixed point 8bit (8 elements)
				912	*
				913	* @param[in] a First 8bit fixed point input vector
				914	* @param[in] b Second 8bit fixed point input vector
				915	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				916	*
				917	* @return The quotient and remainder number in fixed point format.
				918	*/
				919	qint8x8_t vdiv_qs8(qint8x8_t a, int8x8_t b, int fixed_point_position);
				920
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	921	/** Division fixed point 16 bit (4 elements)
				922	*
				923	* @param[in] a First 16 bit fixed point input vector
				924	* @param[in] b Second 16 bit fixed point input vector
				925	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				926	*
				927	* @return The quotient and remainder number in fixed point format.
				928	*/
				929	qint16x4_t vdiv_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
				930
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	931	/** Division fixed point 8bit (16 elements)
				932	*
				933	* @param[in] a First 8bit fixed point input vector
				934	* @param[in] b Second 8bit fixed point input vector
				935	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				936	*
				937	* @return The quotient and remainder number in 8bit fixed point format.
				938	*/
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	939	qint8x16_t vdivq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
				940
				941	/** Division fixed point 16 bit (8 elements)
				942	*
				943	* @param[in] a First 16 bit fixed point input vector
				944	* @param[in] b Second 16 bit fixed point input vector
				945	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				946	*
				947	* @return The quotient and remainder number in 16 bit fixed point format.
				948	*/
				949	qint16x8_t vdivq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	950
				951	/** Perform a 4th degree polynomial approximation. (8 elements)
				952	*
				953	* @param[in] a 8bit fixed point input vector
				954	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				955	*
				956	* @return The result of the 8bit taylor approximation.
				957	*/
				958	template <bool islog>
				959	qint8x8_t vtaylor_poly_qs8(qint8x8_t a, int fixed_point_position);
				960
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	961	/** Perform a 4th degree polynomial approximation. (4 elements)
				962	*
				963	* @param[in] a 16 bit fixed point input vector
				964	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				965	*
				966	* @return The result of the 16 bit taylor approximation.
				967	*/
				968	template <bool islog>
				969	qint16x4_t vtaylor_poly_qs16(qint16x4_t a, int fixed_point_position);
				970
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	971	/** Perform a 4th degree polynomial approximation. (16 elements)
				972	*
				973	* @param[in] a 8bit fixed point input vector
				974	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				975	*
				976	* @return The result of the 8bit taylor approximation.
				977	*/
				978	template <bool islog>
				979	qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position);
				980
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	981	/** Perform a 4th degree polynomial approximation. (8 elements)
				982	*
				983	* @param[in] a 16 bit fixed point input vector
				984	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				985	*
				986	* @return The result of the 8bit taylor approximation.
				987	*/
				988	template <bool islog>
				989	qint16x8_t vtaylor_polyq_qs16(qint16x8_t a, int fixed_point_position);
				990
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	991	/** Calculate saturating exponential fixed point 8bit (8 elements)
				992	*
				993	* @param[in] a 8bit fixed point input vector
				994	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				995	*
				996	* @return The result of the 8bit saturating exponential
				997	*/
				998	qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position);
				999
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1000	/** Calculate saturating exponential fixed point 16 bit (4 elements)
				1001	*
				1002	* @param[in] a 8bit fixed point input vector
				1003	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1004	*
				1005	* @return The result of the 16 bit saturating exponential
				1006	*/
				1007	qint16x4_t vqexp_qs16(qint16x4_t a, int fixed_point_position);
				1008
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1009	/** Calculate saturating exponential fixed point 8bit (16 elements)
				1010	*
				1011	* @param[in] a 8bit fixed point input vector
				1012	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1013	*
				1014	* @return The result of the 8bit saturating exponential
				1015	*/
				1016	qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position);
				1017
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1018	/** Calculate saturating exponential fixed point 16 bit (8 elements)
				1019	*
				1020	* @param[in] a 16 bit fixed point input vector
				1021	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1022	*
				1023	* @return The result of the 16 bit saturating exponential
				1024	*/
				1025	qint16x8_t vqexpq_qs16(qint16x8_t a, int fixed_point_position);
				1026
				1027	/** Calculate logarithm fixed point 8 bit (8 elements)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1028	*
				1029	* @param[in] a 8bit fixed point input vector
				1030	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1031	*
				1032	* @return The result of the 8bit logarithm.
				1033	*/
				1034	qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position);
				1035
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1036	/** Calculate logarithm fixed point 16 bit (4 elements)
				1037	*
				1038	* @param[in] a 16 bit fixed point input vector
				1039	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1040	*
				1041	* @return The result of the 16 bit logarithm.
				1042	*/
				1043	qint16x4_t vlog_qs16(qint16x4_t a, int fixed_point_position);
				1044
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1045	/** Calculate logarithm fixed point 16bit (16 elements)
				1046	*
				1047	* @param[in] a 8bit fixed point input vector
				1048	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1049	*
				1050	* @return The result of the 8bit logarithm.
				1051	*/
				1052	qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position);
				1053
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1054	/** Calculate logarithm fixed point 16 bit (8 elements)
				1055	*
				1056	* @param[in] a 16 bit fixed point input vector
				1057	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1058	*
				1059	* @return The result of the 16 bit logarithm.
				1060	*/
				1061	qint16x8_t vlogq_qs16(qint16x8_t a, int fixed_point_position);
				1062
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1063	/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
				1064	*
				1065	* @param[in] a 8bit fixed point input vector
				1066	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1067	*
				1068	* @return The result of the 8bit inverse sqrt.
				1069	*/
				1070	qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
				1071
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1072	/** Calculate inverse square root for fixed point 16 bit using Newton-Raphosn method (4 elements)
				1073	*
				1074	* @param[in] a 16 bit fixed point input vector
				1075	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1076	*
				1077	* @return The result of the 16 bit inverse sqrt.
				1078	*/
				1079	qint16x4_t vinvsqrt_qs16(qint16x4_t a, int fixed_point_position);
				1080
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1081	/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
				1082	*
				1083	* @param[in] a 8bit fixed point input vector
				1084	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1085	*
				1086	* @return The result of the 8bit inverse sqrt.
				1087	*/
				1088	qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
				1089
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1090	/** Calculate saturating inverse square root for fixed point 16 bit using Newton-Raphosn method (4 elements)
				1091	*
				1092	* @param[in] a 16 bit fixed point input vector
				1093	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1094	*
				1095	* @return The result of the 16 bit inverse sqrt.
				1096	*/
				1097	qint16x4_t vqinvsqrt_qs16(qint16x4_t a, int fixed_point_position);
				1098
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1099	/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
				1100	*
				1101	* @param[in] a 8bit fixed point input vector
				1102	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1103	*
				1104	* @return The result of the 8bit inverse sqrt.
				1105	*/
				1106	qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
				1107
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1108	/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
				1109	*
				1110	* @param[in] a 16 bit fixed point input vector
				1111	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1112	*
				1113	* @return The result of the 16 bit inverse sqrt.
				1114	*/
				1115	qint16x8_t vinvsqrtq_qs16(qint16x8_t a, int fixed_point_position);
				1116
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1117	/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
				1118	*
				1119	* @param[in] a 8bit fixed point input vector
				1120	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1121	*
				1122	* @return The result of the 8bit inverse sqrt.
				1123	*/
				1124	qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
				1125
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1126	/** Calculate saturating inverse square root for fixed point 16 bit using Newton-Raphosn method (8 elements)
				1127	*
				1128	* @param[in] a 16 bit fixed point input vector
				1129	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1130	*
				1131	* @return The result of the 16 bit inverse sqrt.
				1132	*/
				1133	qint16x8_t vqinvsqrtq_qs16(qint16x8_t a, int fixed_point_position);
				1134
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1135	/** Calculate hyperbolic tangent for fixed point 8bit (8 elements)
				1136	*
				1137	* @param[in] a 8bit fixed point input vector
				1138	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1139	*
				1140	* @return The calculated Hyperbolic Tangent.
				1141	*/
Georgios Pinitas	ccc65d4	2017-06-27 17:39:11 +0100	[diff] [blame]	1142	qint8x8_t vqtanh_qs8(qint8x8_t a, int fixed_point_position);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1143
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1144	/** Calculate hyperbolic tangent for fixed point 16 bit (4 elements)
				1145	*
				1146	* @param[in] a 16 bit fixed point input vector
				1147	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1148	*
				1149	* @return The calculated Hyperbolic Tangent.
				1150	*/
Georgios Pinitas	ccc65d4	2017-06-27 17:39:11 +0100	[diff] [blame]	1151	qint16x4_t vqtanh_qs16(qint16x4_t a, int fixed_point_position);
Michalis Spyrou	0a8334c	2017-06-14 18:00:05 +0100	[diff] [blame]	1152
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1153	/** Calculate hyperbolic tangent for fixed point 8bit (16 elements)
				1154	*
				1155	* @param[in] a 8bit fixed point input vector
				1156	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1157	*
				1158	* @return The calculated Hyperbolic Tangent.
				1159	*/
Georgios Pinitas	ccc65d4	2017-06-27 17:39:11 +0100	[diff] [blame]	1160	qint8x16_t vqtanhq_qs8(qint8x16_t a, int fixed_point_position);
				1161
				1162	/** Calculate hyperbolic tangent for fixed point 16bit (8 elements)
				1163	*
				1164	* @param[in] a 16 bit fixed point input vector
				1165	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1166	*
				1167	* @return The calculated Hyperbolic Tangent.
				1168	*/
				1169	qint16x8_t vqtanhq_qs16(qint16x8_t a, int fixed_point_position);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1170
				1171	/** Calculate saturating n power for fixed point 8bit (16 elements).
				1172	*
				1173	* pow(a,b) = e^(b*log(a))
				1174	*
				1175	* @param[in] a 8bit fixed point input vector
				1176	* @param[in] b 8bit fixed point power vector
				1177	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				1178	*
				1179	* @return The result of the 8bit power.
				1180	*/
				1181	qint8x8_t vqpowq_qs8(qint8x8_t a, qint8x16_t b, int fixed_point_position);
Michele Di Giorgio	8af2dd6	2017-06-19 15:19:29 +0100	[diff] [blame]	1182
				1183	/** Compute lane-by-lane maximum between elements of a float vector with 4x2 elements
				1184	*
				1185	* @param[in] a Float input vector
				1186	* @param[in] b Float input vector
				1187	*
				1188	* @return The lane-by-lane maximum -> float32x4x2
				1189	*/
				1190	float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1191	}
				1192	#include "arm_compute/core/NEON/NEFixedPoint.inl"
				1193	#endif /* __ARM_COMPUTE_NEFIXEDPOINT_H__ */