Blame - arm_compute/core/NEON/NEFixedPoint.h - ml/ComputeLibrary

blob: 201c5b5e7efce7cc338a40312eb00c464d0a3aed [file] [log] [blame]

Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2017 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#ifndef __ARM_COMPUTE_NEFIXEDPOINT_H__
				25	#define __ARM_COMPUTE_NEFIXEDPOINT_H__
				26
				27	#include "arm_compute/core/FixedPoint.h"
				28
				29	#include <arm_neon.h>
				30
				31	namespace arm_compute
				32	{
				33	using qint8x8_t = int8x8_t; /*< 8 bit fixed point vector with 8 elements /
				34	using qint8x8x2_t = int8x8x2_t; /*< 8 bit fixed point vector with 16 elements /
				35	using qint8x8x3_t = int8x8x3_t; /*< 8 bit fixed point vector with 24 elements /
				36	using qint8x8x4_t = int8x8x4_t; /*< 8 bit fixed point vector with 32 elements /
				37	using qint8x16_t = int8x16_t; /*< 8 bit fixed point vector with 16 elements /
				38	using qint8x16x2_t = int8x16x2_t; /*< 8 bit fixed point vector with 32 elements /
				39	using qint8x16x3_t = int8x16x3_t; /*< 8 bit fixed point vector with 48 elements /
				40	using qint8x16x4_t = int8x16x4_t; /*< 8 bit fixed point vector with 64 elements /
				41	using qint16x4_t = int16x4_t; /*< 16 bit fixed point vector with 4 elements /
				42	using qint16x4x2_t = int16x4x2_t; /*< 16 bit fixed point vector with 8 elements /
				43	using qint16x4x3_t = int16x4x3_t; /*< 16 bit fixed point vector with 12 elements /
				44	using qint16x4x4_t = int16x4x4_t; /*< 16 bit fixed point vector with 16 elements /
				45	using qint16x8_t = int16x8_t; /*< 16 bit fixed point vector with 8 elements /
				46	using qint16x8x2_t = int16x8x2_t; /*< 16 bit fixed point vector with 16 elements /
				47	using qint16x8x3_t = int16x8x3_t; /*< 16 bit fixed point vector with 24 elements /
				48	using qint16x8x4_t = int16x8x4_t; /*< 16 bit fixed point vector with 32 elements /
				49
				50	/** Get the lower half of a 16 elements vector
				51	*
				52	* @param[in] a vector of 16 elements
				53	*
				54	* @return 8 bit fixed point vector (8 elements)
				55	*/
				56	qint8x8_t vget_low_qs8(qint8x16_t a);
				57
				58	/** Get the higher half of a 16 elements vector
				59	*
				60	* @param[in] a vector of 16 elements
				61	*
				62	* @return 8 bit fixed point vector (8 elements)
				63	*/
				64	qint8x8_t vget_high_qs8(qint8x16_t a);
				65
				66	/** Load a single 8 bit fixed point vector from memory (8 elements)
				67	*
				68	* @param[in] addr Memory address of the 8 bit fixed point vector to load
				69	*
				70	* @return 8 bit fixed point vector (8 elements)
				71	*/
				72	qint8x8_t vld1_qs8(const qint8_t *addr);
				73
				74	/** Load a single 8 bit fixed point vector from memory (16 elements)
				75	*
				76	* @param[in] addr Memory address of the 8 bit fixed point vector to load
				77	*
				78	* @return 8 bit fixed point vector (16 elements)
				79	*/
				80	qint8x16_t vld1q_qs8(const qint8_t *addr);
				81
				82	/** Load a single 16 bit fixed point vector from memory (4 elements)
				83	*
				84	* @param[in] addr Memory address of the 16 bit fixed point vector to load
				85	*
				86	* @return 16 bit fixed point vector (4 elements)
				87	*/
				88	qint16x4_t vld1_qs16(const qint16_t *addr);
				89
				90	/** Load a single 16 bit fixed point vector from memory (8 elements)
				91	*
				92	* @param[in] addr Memory address of the 16 bit fixed point vector to load
				93	*
				94	* @return 16 bit fixed point vector (8 elements)
				95	*/
				96	qint16x8_t vld1q_qs16(const qint16_t *addr);
				97
				98	/** Load all lanes of 8 bit fixed point vector with same value from memory (8 elements)
				99	*
				100	* @param[in] addr Memory address of the 8 bit fixed point scalar value to load
				101	*
				102	* @return 8 bit fixed point vector (8 elements)
				103	*/
				104	qint8x8_t vld1_dup_qs8(const qint8_t *addr);
				105
				106	/** Load all lanes of 8 bit fixed point vector with same value from memory (16 elements)
				107	*
				108	* @param[in] addr Memory address of the 8 bit fixed point scalar value to load
				109	*
				110	* @return 8 bit fixed point vector (16 elements)
				111	*/
				112	qint8x16_t vld1q_dup_qs8(const qint8_t *addr);
				113
				114	/** Store a single 8 bit fixed point vector to memory (8 elements)
				115	*
				116	* @param[in] addr Memory address where the 8 bit fixed point vector should be stored
				117	* @param[in] b 8 bit fixed point vector to store
				118	*
				119	*/
				120	void vst1_qs8(qint8_t *addr, qint8x8_t b);
				121
				122	/** Store a single 8 bit fixed point vector to memory (16 elements)
				123	*
				124	* @param[in] addr Memory address where the 8 bit fixed point vector should be stored
				125	* @param[in] b 8 bit fixed point vector to store
				126	*
				127	*/
				128	void vst1q_qs8(qint8_t *addr, qint8x16_t b);
				129
				130	/** Store a single 16 bit fixed point vector to memory (4 elements)
				131	*
				132	* @param[in] addr Memory address where the 16 bit fixed point vector should be stored
				133	* @param[in] b 16 bit fixed point vector to store
				134	*
				135	*/
				136	void vst1_qs16(qint16_t *addr, qint16x4_t b);
				137
				138	/** Store a single 8 bit fixed point vector to memory (16 elements)
				139	*
				140	* @param[in] addr Memory address where the 16 bit fixed point vector should be stored
				141	* @param[in] b 16 bit fixed point vector to store
				142	*
				143	*/
				144	void vst1q_qs16(qint16_t *addr, qint16x8_t b);
				145
				146	/** 16 bit fixed point vector saturating narrow (8 elements)
				147	*
				148	* @param[in] a 16 bit fixed point vector to convert
				149	*
				150	* @return 8 bit fixed point vector
				151	*/
				152	qint8x8_t vqmovn_q16(qint16x8_t a);
				153
				154	/** 8 bit fixed point vector duplicate (8 elements)
				155	*
				156	* @param[in] a 8 bit fixed point to duplicate
				157	*
				158	* @return The result of the vector duplication
				159	*/
				160	qint8x8_t vdup_n_qs8(qint8_t a);
				161
				162	/** 8 bit fixed point vector duplicate (16 elements)
				163	*
				164	* @param[in] a 8 bit fixed point to duplicate
				165	*
				166	* @return The result of the vector duplication
				167	*/
				168	qint8x16_t vdupq_n_qs8(qint8_t a);
				169
				170	/** Duplicate a float and convert it to 8 bit fixed point vector (16 elements)
				171	*
				172	* @param[in] a 8 bit fixed point to duplicate
				173	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				174	*
				175	* @return The result of the vector duplication
				176	*/
				177	qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position);
				178
				179	/** 16 bit fixed point vector duplicate (8 elements)
				180	*
				181	* @param[in] a 16 bit fixed point to duplicate
				182	*
				183	* @return The result of the vector duplication
				184	*/
				185	qint16x8_t vdupq_n_qs16(qint16x8_t a);
				186
				187	/** Absolute value of 8 bit fixed point vector (8 elements)
				188	*
				189	* @param[in] a 8 bit fixed point input vector
				190	*
				191	* @return The result of the 8 bit fixed point vector absolute value
				192	*/
				193	qint8x8_t vabs_qs8(qint8x8_t a);
				194
				195	/** Absolute value of 8 bit fixed point vector (16 elements)
				196	*
				197	* @param[in] a 8 bit fixed point input vector
				198	*
				199	* @return The result of the 8 bit fixed point vector absolute value
				200	*/
				201	qint8x16_t vabsq_qs8(qint8x16_t a);
				202
				203	/** Saturating absolute value of 8 bit fixed point vector (8 elements)
				204	*
				205	* @param[in] a 8 bit fixed point input vector
				206	*
				207	* @return The result of the 8 bit fixed point vector absolute value
				208	*/
				209	qint8x8_t vqabs_qs8(qint8x8_t a);
				210
				211	/** Saturating absolute value of 8 bit fixed point vector (16 elements)
				212	*
				213	* @param[in] a 8 bit fixed point input vector
				214	*
				215	* @return The result of the 8 bit fixed point vector absolute value
				216	*/
				217	qint8x16_t vqabsq_qs8(qint8x16_t a);
				218
				219	/** 8 bit fixed point vector max (8 elements)
				220	*
				221	* @param[in] a First 8 bit fixed point input vector
				222	* @param[in] b Second 8 bit fixed point input vector
				223	*
				224	* @return The result of the 8 bit fixed point vector max operation
				225	*/
				226	qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b);
				227
				228	/** 8 bit fixed point vector max (16 elements)
				229	*
				230	* @param[in] a First 8 bit fixed point input vector
				231	* @param[in] b Second 8 bit fixed point input vector
				232	*
				233	* @return The result of the 8 bit fixed point vector max operation
				234	*/
				235	qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b);
				236
				237	/** 8 bit fixed point vector pairwise max (8 elements)
				238	*
				239	* @param[in] a First 8 bit fixed point input vector
				240	* @param[in] b Second 8 bit fixed point input vector
				241	*
				242	* @return The result of the 8 bit fixed point vector pairwise max operation
				243	*/
				244	qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b);
				245
				246	/** 8 bit fixed point vector min (8 elements)
				247	*
				248	* @param[in] a First 8 bit fixed point input vector
				249	* @param[in] b Second 8 bit fixed point input vector
				250	*
				251	* @return The result of the 8 bit fixed point vector max operation
				252	*/
				253	qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b);
				254
				255	/** 8 bit fixed point vector min (16 elements)
				256	*
				257	* @param[in] a First 8 bit fixed point input vector
				258	* @param[in] b Second 8 bit fixed point input vector
				259	*
				260	* @return The result of the 8 bit fixed point vector min operation
				261	*/
				262	qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b);
				263
				264	/** 8 bit fixed point vector pairwise min (8 elements)
				265	*
				266	* @param[in] a First 8 bit fixed point input vector
				267	* @param[in] b Second 8 bit fixed point input vector
				268	*
				269	* @return The result of the 8 bit fixed point vector pairwise min operation
				270	*/
				271	qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b);
				272
				273	/** 8 bit fixed point vector add (8 elements)
				274	*
				275	* @param[in] a First 8 bit fixed point input vector
				276	* @param[in] b Second 8 bit fixed point input vector
				277	*
				278	* @return The result of the 8 bit fixed point vector addition
				279	*/
				280	qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b);
				281
				282	/** 8 bit fixed point vector add (16 elements)
				283	*
				284	* @param[in] a First 8 bit fixed point input vector
				285	* @param[in] b Second 8 bit fixed point input vector
				286	*
				287	* @return The result of the 8 bit fixed point vector addition
				288	*/
				289	qint8x16_t vaddq_qs8(qint8x16_t a, qint8x16_t b);
				290
				291	/** 8 bit fixed point vector saturating add (8 elements)
				292	*
				293	* @param[in] a First 8 bit fixed point input vector
				294	* @param[in] b Second 8 bit fixed point input vector
				295	*
				296	* @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
				297	*/
				298	qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b);
				299
				300	/** 8 bit fixed point vector saturating add (16 elements)
				301	*
				302	* @param[in] a First 8 bit fixed point input vector
				303	* @param[in] b Second 8 bit fixed point input vector
				304	*
				305	* @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
				306	*/
				307	qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b);
				308
				309	/** 16 bit fixed point vector saturating add (4 elements)
				310	*
				311	* @param[in] a First 16 bit fixed point input vector
				312	* @param[in] b Second 16 bit fixed point input vector
				313	*
				314	* @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
				315	*/
				316	qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b);
				317
				318	/** 16 bit fixed point vector saturating add (8 elements)
				319	*
				320	* @param[in] a First 16 bit fixed point input vector
				321	* @param[in] b Second 16 bit fixed point input vector
				322	*
				323	* @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
				324	*/
				325	qint16x8_t vqaddq_qs16(qint16x8_t a, qint16x8_t b);
				326
				327	/** 8 bit fixed point vector saturating pairwise add (8 elements)
				328	*
				329	* @param[in] a 8 bit fixed point input vector
				330	*
				331	* @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
				332	*/
				333	int16x4_t vpaddl_qs8(qint8x8_t a);
				334
				335	/** 8 bit fixed point vector subtraction (8 elements)
				336	*
				337	* @param[in] a First 8 bit fixed point input vector
				338	* @param[in] b Second 8 bit fixed point input vector
				339	*
				340	* @return The result of the 8 bit fixed point vector subtraction
				341	*/
				342	qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b);
				343
				344	/** 8 bit fixed point vector subtraction (16 elements)
				345	*
				346	* @param[in] a First 8 bit fixed point input vector
				347	* @param[in] b Second 8 bit fixed point input vector
				348	*
				349	* @return The result of the 8 bit fixed point vector subtraction
				350	*/
				351	qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b);
				352
				353	/** 8 bit fixed point vector saturating subtraction (8 elements)
				354	*
				355	* @param[in] a First 8 bit fixed point input vector
				356	* @param[in] b Second 8 bit fixed point input vector
				357	*
				358	* @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
				359	*/
				360	qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b);
				361
				362	/** 8 bit fixed point vector saturating subtraction (16 elements)
				363	*
				364	* @param[in] a First 8 bit fixed point input vector
				365	* @param[in] b Second 8 bit fixed point input vector
				366	*
				367	* @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
				368	*/
				369	qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b);
				370
				371	/** 8 bit fixed point vector multiply (8 elements)
				372	*
				373	* @param[in] a First 8 bit fixed point input vector
				374	* @param[in] b Second 8 bit fixed point input vector
				375	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				376	*
				377	* @return The result of the 8 bit fixed point vector multiplication.
				378	*/
				379	qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
				380
				381	/** 8 bit fixed point vector multiply (16 elements)
				382	*
				383	* @param[in] a First 8 bit fixed point input vector
				384	* @param[in] b Second 8 bit fixed point input vector
				385	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				386	*
				387	* @return The result of the 8 bit fixed point vector multiplication.
				388	*/
				389	qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
				390
				391	/** 8 bit fixed point vector saturating multiply (8 elements)
				392	*
				393	* @param[in] a First 8 bit fixed point input vector
				394	* @param[in] b Second 8 bit fixed point input vector
				395	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				396	*
				397	* @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
				398	*/
				399	qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
				400
				401	/** 8 bit fixed point vector saturating multiply (16 elements)
				402	*
				403	* @param[in] a First 8 bit fixed point input vector
				404	* @param[in] b Second 8 bit fixed point input vector
				405	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				406	*
				407	* @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
				408	*/
				409	qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
				410
				411	/** 8 bit fixed point vector long multiply (8 elements)
				412	*
				413	* @param[in] a First 8 bit fixed point input vector
				414	* @param[in] b Second 8 bit fixed point input vector
				415	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				416	*
				417	* @return The result of the 8 bit fixed point long vector multiplication.
				418	*/
				419	qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
				420
				421	/** 8 bit fixed point vector multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				422	*
				423	* @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
				424	* @param[in] b Second 8 bit fixed point input vector
				425	* @param[in] c Third 8 bit fixed point input vector
				426	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				427	*
				428	* @return The result of the 8 bit fixed point vector multiply-accumulate
				429	*/
				430	qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
				431
				432	/** 8 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				433	*
				434	* @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
				435	* @param[in] b Second 8 bit fixed point input vector
				436	* @param[in] c Third 8 bit fixed point input vector
				437	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				438	*
				439	* @return The result of the 8 bit fixed point vector multiply-accumulate
				440	*/
				441	qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
				442
				443	/** 8 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				444	*
				445	* @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
				446	* @param[in] b Second 8 bit fixed point input vector
				447	* @param[in] c Third 8 bit fixed point input vector
				448	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				449	*
				450	* @return The result of the 8 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow
				451	*/
				452	qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
				453
				454	/** 8 bit fixed point vector saturating multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
				455	*
				456	* @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
				457	* @param[in] b Second 8 bit fixed point input vector
				458	* @param[in] c Third 8 bit fixed point input vector
				459	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				460	*
				461	* @return The result of the 8 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow
				462	*/
				463	qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
				464
				465	/** 8 bit fixed point vector multiply-accumulate long (8 elements).
				466	* This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
				467	*
				468	* @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
				469	* @param[in] b Second 8 bit fixed point input vector
				470	* @param[in] c Third 8 bit fixed point input vector
				471	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				472	*
				473	* @return The result of the 8 bit fixed point vector multiply-accumulate long
				474	*/
				475	qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
				476
				477	/** 8 bit fixed point vector saturating multiply-accumulate long (8 elements). The saturation is performed on the 16 bit fixed point output vector.
				478	* This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
				479	*
				480	* @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
				481	* @param[in] b Second 8 bit fixed point input vector
				482	* @param[in] c Third 8 bit fixed point input vector
				483	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				484	*
				485	* @return The result of the 8 bit fixed point vector multiply-accumulate long
				486	*/
				487	qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
				488
				489	/** Convert a float vector with 4x2 elements to 8 bit fixed point vector with 8 elements
				490	*
				491	* @param[in] a Float input vector
				492	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				493	*
				494	* @return The result of the conversion float -> 8 bit fixed point
				495	*/
				496	qint8x8_t vcvt_qs8_f32(const float32x4x2_t &a, int fixed_point_position);
				497
				498	/** Convert a float vector with 4x4 elements to 8 bit fixed point vector with 16 elements
				499	*
				500	* @param[in] a Float input vector
				501	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				502	*
				503	* @return The result of the conversion float -> 8 bit fixed point
				504	*/
				505	qint8x16_t vcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position);
				506
				507	/** Convert a 8 bit fixed point vector with 8 elements to a float vector with 4x2 elements
				508	*
				509	* @param[in] a 8 bit fixed point input vector
				510	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				511	*
				512	* @return The result of the conversion 8 bit fixed point -> float32x2x4
				513	*/
				514	float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position);
				515
				516	/** Convert a 8 bit fixed point vector with 16 elements to a float vector with 4x4 elements
				517	*
				518	* @param[in] a 8 bit fixed point input vector
				519	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				520	*
				521	* @return The result of the conversion 8 bit fixed point -> float32x4x4
				522	*/
				523	float32x4x4_t vcvtq_qs8_f32(qint8x16_t a, int fixed_point_position);
				524
				525	/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements)
				526	*
				527	* @param[in] a 8bit fixed point input vector
				528	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				529	*
				530	* @return The result of the 8bit reciprocal (1/a).
				531	*/
				532	qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position);
				533
				534	/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (16 elements)
				535	*
				536	* @param[in] a 8bit fixed point input vector
				537	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				538	*
				539	* @return The result of the 8bit reciprocal (1/a).
				540	*/
				541	qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position);
				542
				543	/** Division fixed point 8bit (8 elements)
				544	*
				545	* @param[in] a First 8bit fixed point input vector
				546	* @param[in] b Second 8bit fixed point input vector
				547	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				548	*
				549	* @return The quotient and remainder number in fixed point format.
				550	*/
				551	qint8x8_t vdiv_qs8(qint8x8_t a, int8x8_t b, int fixed_point_position);
				552
				553	/** Division fixed point 8bit (16 elements)
				554	*
				555	* @param[in] a First 8bit fixed point input vector
				556	* @param[in] b Second 8bit fixed point input vector
				557	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				558	*
				559	* @return The quotient and remainder number in 8bit fixed point format.
				560	*/
				561	qint8x16_t vdivq_qs8(qint8x16_t a, int8x16_t b, int fixed_point_position);
				562
				563	/** Perform a 4th degree polynomial approximation. (8 elements)
				564	*
				565	* @param[in] a 8bit fixed point input vector
				566	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				567	*
				568	* @return The result of the 8bit taylor approximation.
				569	*/
				570	template <bool islog>
				571	qint8x8_t vtaylor_poly_qs8(qint8x8_t a, int fixed_point_position);
				572
				573	/** Perform a 4th degree polynomial approximation. (16 elements)
				574	*
				575	* @param[in] a 8bit fixed point input vector
				576	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				577	*
				578	* @return The result of the 8bit taylor approximation.
				579	*/
				580	template <bool islog>
				581	qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position);
				582
				583	/** Calculate saturating exponential fixed point 8bit (8 elements)
				584	*
				585	* @param[in] a 8bit fixed point input vector
				586	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				587	*
				588	* @return The result of the 8bit saturating exponential
				589	*/
				590	qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position);
				591
				592	/** Calculate saturating exponential fixed point 8bit (16 elements)
				593	*
				594	* @param[in] a 8bit fixed point input vector
				595	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				596	*
				597	* @return The result of the 8bit saturating exponential
				598	*/
				599	qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position);
				600
				601	/** Calculate logarithm fixed point 16bit (8 elements)
				602	*
				603	* @param[in] a 8bit fixed point input vector
				604	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				605	*
				606	* @return The result of the 8bit logarithm.
				607	*/
				608	qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position);
				609
				610	/** Calculate logarithm fixed point 16bit (16 elements)
				611	*
				612	* @param[in] a 8bit fixed point input vector
				613	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				614	*
				615	* @return The result of the 8bit logarithm.
				616	*/
				617	qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position);
				618
				619	/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
				620	*
				621	* @param[in] a 8bit fixed point input vector
				622	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				623	*
				624	* @return The result of the 8bit inverse sqrt.
				625	*/
				626	qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
				627
				628	/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
				629	*
				630	* @param[in] a 8bit fixed point input vector
				631	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				632	*
				633	* @return The result of the 8bit inverse sqrt.
				634	*/
				635	qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
				636
				637	/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
				638	*
				639	* @param[in] a 8bit fixed point input vector
				640	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				641	*
				642	* @return The result of the 8bit inverse sqrt.
				643	*/
				644	qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
				645
				646	/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
				647	*
				648	* @param[in] a 8bit fixed point input vector
				649	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				650	*
				651	* @return The result of the 8bit inverse sqrt.
				652	*/
				653	qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
				654
				655	/** Calculate hyperbolic tangent for fixed point 8bit (8 elements)
				656	*
				657	* @param[in] a 8bit fixed point input vector
				658	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				659	*
				660	* @return The calculated Hyperbolic Tangent.
				661	*/
				662	qint8x8_t vtanh_qs8(qint8x8_t a, int fixed_point_position);
				663
				664	/** Calculate hyperbolic tangent for fixed point 8bit (16 elements)
				665	*
				666	* @param[in] a 8bit fixed point input vector
				667	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				668	*
				669	* @return The calculated Hyperbolic Tangent.
				670	*/
				671	qint8x16_t vtanhq_qs8(qint8x16_t a, int fixed_point_position);
				672
				673	/** Calculate saturating n power for fixed point 8bit (16 elements).
				674	*
				675	* pow(a,b) = e^(b*log(a))
				676	*
				677	* @param[in] a 8bit fixed point input vector
				678	* @param[in] b 8bit fixed point power vector
				679	* @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
				680	*
				681	* @return The result of the 8bit power.
				682	*/
				683	qint8x8_t vqpowq_qs8(qint8x8_t a, qint8x16_t b, int fixed_point_position);
Michele Di Giorgio	8af2dd6	2017-06-19 15:19:29 +0100	[diff] [blame^]	684
				685	/** Compute lane-by-lane maximum between elements of a float vector with 4x2 elements
				686	*
				687	* @param[in] a Float input vector
				688	* @param[in] b Float input vector
				689	*
				690	* @return The lane-by-lane maximum -> float32x4x2
				691	*/
				692	float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	693	}
				694	#include "arm_compute/core/NEON/NEFixedPoint.inl"
				695	#endif /* __ARM_COMPUTE_NEFIXEDPOINT_H__ */