blob: 5719b636187b21e01c6211bc3bc392641a8005b5 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEFIXEDPOINT_H__
25#define __ARM_COMPUTE_NEFIXEDPOINT_H__
26
27#include "arm_compute/core/FixedPoint.h"
28
29#include <arm_neon.h>
30
31namespace arm_compute
32{
33using qint8x8_t = int8x8_t; /**< 8 bit fixed point vector with 8 elements */
34using qint8x8x2_t = int8x8x2_t; /**< 8 bit fixed point vector with 16 elements */
35using qint8x8x3_t = int8x8x3_t; /**< 8 bit fixed point vector with 24 elements */
36using qint8x8x4_t = int8x8x4_t; /**< 8 bit fixed point vector with 32 elements */
37using qint8x16_t = int8x16_t; /**< 8 bit fixed point vector with 16 elements */
38using qint8x16x2_t = int8x16x2_t; /**< 8 bit fixed point vector with 32 elements */
39using qint8x16x3_t = int8x16x3_t; /**< 8 bit fixed point vector with 48 elements */
40using qint8x16x4_t = int8x16x4_t; /**< 8 bit fixed point vector with 64 elements */
41using qint16x4_t = int16x4_t; /**< 16 bit fixed point vector with 4 elements */
42using qint16x4x2_t = int16x4x2_t; /**< 16 bit fixed point vector with 8 elements */
43using qint16x4x3_t = int16x4x3_t; /**< 16 bit fixed point vector with 12 elements */
44using qint16x4x4_t = int16x4x4_t; /**< 16 bit fixed point vector with 16 elements */
45using qint16x8_t = int16x8_t; /**< 16 bit fixed point vector with 8 elements */
46using qint16x8x2_t = int16x8x2_t; /**< 16 bit fixed point vector with 16 elements */
47using qint16x8x3_t = int16x8x3_t; /**< 16 bit fixed point vector with 24 elements */
48using qint16x8x4_t = int16x8x4_t; /**< 16 bit fixed point vector with 32 elements */
Georgios Pinitas9247c922017-06-28 18:29:47 +010049using qint32x2_t = int32x2_t; /**< 32 bit fixed point vector with 2 elements */
Michalis Spyrou0a8334c2017-06-14 18:00:05 +010050using qint32x4_t = int32x4_t; /**< 32 bit fixed point vector with 4 elements */
Pablo Tellof87cc7f2017-07-26 10:28:40 +010051using qint32x4x2_t = int32x4x2_t; /**< 32 bit fixed point vector with 8 elements */
Anthony Barbier6ff3b192017-09-04 18:44:23 +010052
53/** Get the lower half of a 16 elements vector
54 *
55 * @param[in] a vector of 16 elements
56 *
57 * @return 8 bit fixed point vector (8 elements)
58 */
59qint8x8_t vget_low_qs8(qint8x16_t a);
60
Michalis Spyrou0a8334c2017-06-14 18:00:05 +010061/** Get the lower half of a 16 elements vector
62 *
63 * @param[in] a vector of 8 elements
64 *
65 * @return 16 bit fixed point vector (4 elements)
66 */
67qint16x4_t vget_low_qs16(qint16x8_t a);
68
Anthony Barbier6ff3b192017-09-04 18:44:23 +010069/** Get the higher half of a 16 elements vector
70 *
71 * @param[in] a vector of 16 elements
72 *
73 * @return 8 bit fixed point vector (8 elements)
74 */
75qint8x8_t vget_high_qs8(qint8x16_t a);
76
Michalis Spyrou0a8334c2017-06-14 18:00:05 +010077/** Get the higher half of a 16 elements vector
78 *
79 * @param[in] a vector of 8 elements
80 *
81 * @return 16 bit fixed point vector (4 elements)
82 */
83qint16x4_t vget_high_qs16(qint16x8_t a);
84
Anthony Barbier6ff3b192017-09-04 18:44:23 +010085/** Load a single 8 bit fixed point vector from memory (8 elements)
86 *
87 * @param[in] addr Memory address of the 8 bit fixed point vector to load
88 *
89 * @return 8 bit fixed point vector (8 elements)
90 */
91qint8x8_t vld1_qs8(const qint8_t *addr);
92
Anthony Barbier6ff3b192017-09-04 18:44:23 +010093/** Load a single 16 bit fixed point vector from memory (4 elements)
94 *
95 * @param[in] addr Memory address of the 16 bit fixed point vector to load
96 *
97 * @return 16 bit fixed point vector (4 elements)
98 */
99qint16x4_t vld1_qs16(const qint16_t *addr);
100
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100101/** Load a single 8 bit fixed point vector from memory (16 elements)
102 *
103 * @param[in] addr Memory address of the 8 bit fixed point vector to load
104 *
105 * @return 8 bit fixed point vector (16 elements)
106 */
107qint8x16_t vld1q_qs8(const qint8_t *addr);
108
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100109/** Load a single 16 bit fixed point vector from memory (8 elements)
110 *
111 * @param[in] addr Memory address of the 16 bit fixed point vector to load
112 *
113 * @return 16 bit fixed point vector (8 elements)
114 */
115qint16x8_t vld1q_qs16(const qint16_t *addr);
116
117/** Load all lanes of 8 bit fixed point vector with same value from memory (8 elements)
118 *
119 * @param[in] addr Memory address of the 8 bit fixed point scalar value to load
120 *
121 * @return 8 bit fixed point vector (8 elements)
122 */
123qint8x8_t vld1_dup_qs8(const qint8_t *addr);
124
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100125/** Load all lanes of 16 bit fixed point vector with same value from memory (4 elements)
126 *
127 * @param[in] addr Memory address of the 16 bit fixed point scalar value to load
128 *
129 * @return 16 bit fixed point vector (4 elements)
130 */
131qint16x4_t vld1_dup_qs16(const qint16_t *addr);
132
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100133/** Load all lanes of 8 bit fixed point vector with same value from memory (16 elements)
134 *
135 * @param[in] addr Memory address of the 8 bit fixed point scalar value to load
136 *
137 * @return 8 bit fixed point vector (16 elements)
138 */
139qint8x16_t vld1q_dup_qs8(const qint8_t *addr);
140
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100141/** Load all lanes of 16 bit fixed point vector with same value from memory (8 elements)
142 *
143 * @param[in] addr Memory address of the 16 bit fixed point scalar value to load
144 *
145 * @return 16 bit fixed point vector (8 elements)
146 */
147qint16x8_t vld1q_dup_qs16(const qint16_t *addr);
148
Michele Di Giorgio81f0d152017-07-11 15:00:52 +0100149/** Load two 16 bit fixed point vectors from memory (8x2 elements)
150 *
151 * @param[in] addr Memory address of the 16 bit fixed point vectors to load
152 *
153 * @return 16 bit fixed point vectors (8x2 elements)
154 */
155qint16x8x2_t vld2q_qs16(qint16_t *addr);
156
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100157/** Store a single 8 bit fixed point vector to memory (8 elements)
158 *
159 * @param[in] addr Memory address where the 8 bit fixed point vector should be stored
160 * @param[in] b 8 bit fixed point vector to store
161 *
162 */
163void vst1_qs8(qint8_t *addr, qint8x8_t b);
164
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100165/** Store a single 16 bit fixed point vector to memory (4 elements)
166 *
167 * @param[in] addr Memory address where the 16 bit fixed point vector should be stored
168 * @param[in] b 16 bit fixed point vector to store
169 *
170 */
171void vst1_qs16(qint16_t *addr, qint16x4_t b);
172
173/** Store a single 8 bit fixed point vector to memory (16 elements)
174 *
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100175 * @param[in] addr Memory address where the 8 bit fixed point vector should be stored
176 * @param[in] b 8 bit fixed point vector to store
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100177 *
178 */
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100179void vst1q_qs8(qint8_t *addr, qint8x16_t b);
180
181/** Store a single 16 bit fixed point vector to memory (8 elements)
Anthony Barbierf202e502017-11-23 18:02:04 +0000182 *
183 * @param[in] addr Memory address where the 16 bit fixed point vector should be stored
184 * @param[in] b 16 bit fixed point vector to store
185 *
186 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100187void vst1q_qs16(qint16_t *addr, qint16x8_t b);
188
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100189/** Store two 16 bit fixed point vector to memory (8x2 elements)
Anthony Barbierf202e502017-11-23 18:02:04 +0000190 *
191 * @param[in] addr Memory address where the 16 bit fixed point vectors should be stored
192 * @param[in] b 16 bit fixed point vectors to store
193 *
194 */
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100195void vst2q_qs16(qint16_t *addr, qint16x8x2_t b);
196
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100197/** 16 bit fixed point vector saturating narrow (8 elements)
198 *
199 * @param[in] a 16 bit fixed point vector to convert
200 *
201 * @return 8 bit fixed point vector
202 */
203qint8x8_t vqmovn_q16(qint16x8_t a);
204
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100205/** 32 bit fixed point vector saturating narrow (4 elements)
206 *
207 * @param[in] a 32 bit fixed point vector to convert
208 *
209 * @return 16 bit fixed point vector
210 */
211qint16x4_t vqmovn_q32(qint32x4_t a);
212
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100213/** 8 bit fixed point vector duplicate (8 elements)
214 *
215 * @param[in] a 8 bit fixed point to duplicate
216 *
217 * @return The result of the vector duplication
218 */
219qint8x8_t vdup_n_qs8(qint8_t a);
220
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100221/** 16 bit fixed point vector duplicate (4 elements)
222 *
223 * @param[in] a 16 bit fixed point to duplicate
224 *
225 * @return The result of the vector duplication
226 */
227qint16x4_t vdup_n_qs16(qint16_t a);
228
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100229/** 8 bit fixed point vector duplicate (16 elements)
230 *
231 * @param[in] a 8 bit fixed point to duplicate
232 *
233 * @return The result of the vector duplication
234 */
235qint8x16_t vdupq_n_qs8(qint8_t a);
236
237/** Duplicate a float and convert it to 8 bit fixed point vector (16 elements)
238 *
Michele Di Giorgiod5e65c72017-07-26 17:09:17 +0100239 * @param[in] a floating point value to convert and duplicate
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100240 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
241 *
242 * @return The result of the vector duplication
243 */
244qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position);
245
Michele Di Giorgiod5e65c72017-07-26 17:09:17 +0100246/** Duplicate a float and convert it to 16 bit fixed point vector (8 elements)
247 *
248 * @param[in] a floating point value to convert and duplicate
249 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
250 *
251 * @return The result of the vector duplication
252 */
253qint16x8_t vdupq_n_qs16_f32(float a, int fixed_point_position);
254
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100255/** 16 bit fixed point vector duplicate (8 elements)
256 *
257 * @param[in] a 16 bit fixed point to duplicate
258 *
259 * @return The result of the vector duplication
260 */
261qint16x8_t vdupq_n_qs16(qint16x8_t a);
262
263/** Absolute value of 8 bit fixed point vector (8 elements)
264 *
265 * @param[in] a 8 bit fixed point input vector
266 *
267 * @return The result of the 8 bit fixed point vector absolute value
268 */
269qint8x8_t vabs_qs8(qint8x8_t a);
270
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100271/** Absolute value of 16 bit fixed point vector (4 elements)
272 *
273 * @param[in] a 16 bit fixed point input vector
274 *
275 * @return The result of the 16 bit fixed point vector absolute value
276 */
277qint16x4_t vabs_qs16(qint16x4_t a);
278
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100279/** Absolute value of 8 bit fixed point vector (16 elements)
280 *
281 * @param[in] a 8 bit fixed point input vector
282 *
283 * @return The result of the 8 bit fixed point vector absolute value
284 */
285qint8x16_t vabsq_qs8(qint8x16_t a);
286
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100287/** Absolute value of 16 bit fixed point vector (8 elements)
288 *
289 * @param[in] a 16 bit fixed point input vector
290 *
291 * @return The result of the 16 bit fixed point vector absolute value
292 */
293qint16x8_t vabsq_qs16(qint16x8_t a);
294
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100295/** Saturating absolute value of 8 bit fixed point vector (8 elements)
296 *
297 * @param[in] a 8 bit fixed point input vector
298 *
299 * @return The result of the 8 bit fixed point vector absolute value
300 */
301qint8x8_t vqabs_qs8(qint8x8_t a);
302
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100303/** Saturating absolute value of 16 bit fixed point vector (4 elements)
304 *
305 * @param[in] a 4 bit fixed point input vector
306 *
307 * @return The result of the 16 bit fixed point vector absolute value
308 */
309qint16x4_t vqabs_qs16(qint16x4_t a);
310
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100311/** Saturating absolute value of 8 bit fixed point vector (16 elements)
312 *
313 * @param[in] a 8 bit fixed point input vector
314 *
315 * @return The result of the 8 bit fixed point vector absolute value
316 */
317qint8x16_t vqabsq_qs8(qint8x16_t a);
318
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100319/** Saturating absolute value of 16 bit fixed point vector (8 elements)
320 *
321 * @param[in] a 16 bit fixed point input vector
322 *
323 * @return The result of the 16 bit fixed point vector absolute value
324 */
325qint16x8_t vqabsq_qs16(qint16x8_t a);
326
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100327/** 8 bit fixed point vector max (8 elements)
328 *
329 * @param[in] a First 8 bit fixed point input vector
330 * @param[in] b Second 8 bit fixed point input vector
331 *
332 * @return The result of the 8 bit fixed point vector max operation
333 */
334qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b);
335
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100336/** 16 bit fixed point vector max (4 elements)
337 *
338 * @param[in] a First 16 bit fixed point input vector
339 * @param[in] b Second 16 bit fixed point input vector
340 *
341 * @return The result of the 16 bit fixed point vector max operation
342 */
343qint16x4_t vmax_qs16(qint16x4_t a, qint16x4_t b);
344
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100345/** 8 bit fixed point vector max (16 elements)
346 *
347 * @param[in] a First 8 bit fixed point input vector
348 * @param[in] b Second 8 bit fixed point input vector
349 *
350 * @return The result of the 8 bit fixed point vector max operation
351 */
352qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b);
353
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100354/** 16 bit fixed point vector max (8 elements)
355 *
356 * @param[in] a First 16 bit fixed point input vector
357 * @param[in] b Second 16 bit fixed point input vector
358 *
359 * @return The result of the 16 bit fixed point vector max operation
360 */
361qint16x8_t vmaxq_qs16(qint16x8_t a, qint16x8_t b);
362
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100363/** 8 bit fixed point vector pairwise max (8 elements)
364 *
365 * @param[in] a First 8 bit fixed point input vector
366 * @param[in] b Second 8 bit fixed point input vector
367 *
368 * @return The result of the 8 bit fixed point vector pairwise max operation
369 */
370qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b);
371
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100372/** 16 bit fixed point vector pairwise max (4 elements)
373 *
374 * @param[in] a First 16 bit fixed point input vector
375 * @param[in] b Second 16 bit fixed point input vector
376 *
377 * @return The result of the 16 bit fixed point vector pairwise max operation
378 */
379qint16x4_t vpmax_qs16(qint16x4_t a, qint16x4_t b);
380
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100381/** 8 bit fixed point vector min (8 elements)
382 *
383 * @param[in] a First 8 bit fixed point input vector
384 * @param[in] b Second 8 bit fixed point input vector
385 *
386 * @return The result of the 8 bit fixed point vector max operation
387 */
388qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b);
389
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100390/** 16 bit fixed point vector min (4 elements)
391 *
392 * @param[in] a First 16 bit fixed point input vector
393 * @param[in] b Second 16 bit fixed point input vector
394 *
395 * @return The result of the 16 bit fixed point vector max operation
396 */
397qint16x4_t vmin_qs16(qint16x4_t a, qint16x4_t b);
398
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100399/** 8 bit fixed point vector min (16 elements)
400 *
401 * @param[in] a First 8 bit fixed point input vector
402 * @param[in] b Second 8 bit fixed point input vector
403 *
404 * @return The result of the 8 bit fixed point vector min operation
405 */
406qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b);
407
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100408/** 16 bit fixed point vector min (8 elements)
409 *
410 * @param[in] a First 16 bit fixed point input vector
411 * @param[in] b Second 16 bit fixed point input vector
412 *
413 * @return The result of the 16 bit fixed point vector min operation
414 */
415qint16x8_t vminq_qs16(qint16x8_t a, qint16x8_t b);
416
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100417/** 8 bit fixed point vector pairwise min (8 elements)
418 *
419 * @param[in] a First 8 bit fixed point input vector
420 * @param[in] b Second 8 bit fixed point input vector
421 *
422 * @return The result of the 8 bit fixed point vector pairwise min operation
423 */
424qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b);
425
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100426/** 16 bit fixed point vector pairwise min (4 elements)
427 *
428 * @param[in] a First 16 bit fixed point input vector
429 * @param[in] b Second 16 bit fixed point input vector
430 *
431 * @return The result of the 16 bit fixed point vector pairwise min operation
432 */
433qint16x4_t vpmin_qs16(qint16x4_t a, qint16x4_t b);
434
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100435/** 8 bit fixed point vector add (8 elements)
436 *
437 * @param[in] a First 8 bit fixed point input vector
438 * @param[in] b Second 8 bit fixed point input vector
439 *
440 * @return The result of the 8 bit fixed point vector addition
441 */
442qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b);
443
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100444/** 16 bit fixed point vector add (4 elements)
445 *
446 * @param[in] a First 16 bit fixed point input vector
447 * @param[in] b Second 16 bit fixed point input vector
448 *
449 * @return The result of the 16 bit fixed point vector addition
450 */
451qint16x4_t vadd_qs16(qint16x4_t a, qint16x4_t b);
452
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100453/** 8 bit fixed point vector add (16 elements)
454 *
455 * @param[in] a First 8 bit fixed point input vector
456 * @param[in] b Second 8 bit fixed point input vector
457 *
458 * @return The result of the 8 bit fixed point vector addition
459 */
460qint8x16_t vaddq_qs8(qint8x16_t a, qint8x16_t b);
461
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100462/** 16 bit fixed point vector add (8 elements)
463 *
464 * @param[in] a First 16 bit fixed point input vector
465 * @param[in] b Second 16 bit fixed point input vector
466 *
467 * @return The result of the 16 bit fixed point vector addition
468 */
469qint16x8_t vaddq_qs16(qint16x8_t a, qint16x8_t b);
470
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100471/** 8 bit fixed point vector saturating add (8 elements)
472 *
473 * @param[in] a First 8 bit fixed point input vector
474 * @param[in] b Second 8 bit fixed point input vector
475 *
476 * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
477 */
478qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b);
479
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100480/** 16 bit fixed point vector saturating add (4 elements)
481 *
482 * @param[in] a First 16 bit fixed point input vector
483 * @param[in] b Second 16 bit fixed point input vector
484 *
485 * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
486 */
487qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b);
488
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100489/** 8 bit fixed point vector saturating add (16 elements)
490 *
491 * @param[in] a First 8 bit fixed point input vector
492 * @param[in] b Second 8 bit fixed point input vector
493 *
494 * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
495 */
496qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b);
497
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100498/** 16 bit fixed point vector saturating add (8 elements)
499 *
500 * @param[in] a First 16 bit fixed point input vector
501 * @param[in] b Second 16 bit fixed point input vector
502 *
503 * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
504 */
505qint16x8_t vqaddq_qs16(qint16x8_t a, qint16x8_t b);
506
507/** 8 bit fixed point vector saturating pairwise add (8 elements)
508 *
509 * @param[in] a 8 bit fixed point input vector
510 *
511 * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
512 */
513int16x4_t vpaddl_qs8(qint8x8_t a);
514
515/** 8 bit fixed point vector subtraction (8 elements)
516 *
517 * @param[in] a First 8 bit fixed point input vector
518 * @param[in] b Second 8 bit fixed point input vector
519 *
520 * @return The result of the 8 bit fixed point vector subtraction
521 */
522qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b);
523
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100524/** 16 bit fixed point vector subtraction (4 elements)
525 *
526 * @param[in] a First 16 bit fixed point input vector
527 * @param[in] b Second 16 bit fixed point input vector
528 *
529 * @return The result of the 16 bit fixed point vector subtraction
530 */
531qint16x4_t vsub_qs16(qint16x4_t a, qint16x4_t b);
532
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100533/** 8 bit fixed point vector subtraction (16 elements)
534 *
535 * @param[in] a First 8 bit fixed point input vector
536 * @param[in] b Second 8 bit fixed point input vector
537 *
538 * @return The result of the 8 bit fixed point vector subtraction
539 */
540qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b);
541
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100542/** 16 bit fixed point vector subtraction (8 elements)
543 *
544 * @param[in] a First 16 bit fixed point input vector
545 * @param[in] b Second 16 bit fixed point input vector
546 *
547 * @return The result of the 16 bit fixed point vector subtraction
548 */
549qint16x8_t vsubq_qs16(qint16x8_t a, qint16x8_t b);
550
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100551/** 8 bit fixed point vector saturating subtraction (8 elements)
552 *
553 * @param[in] a First 8 bit fixed point input vector
554 * @param[in] b Second 8 bit fixed point input vector
555 *
556 * @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
557 */
558qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b);
559
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100560/** 16 bit fixed point vector saturating subtraction (4 elements)
561 *
562 * @param[in] a First 16 bit fixed point input vector
563 * @param[in] b Second 16 bit fixed point input vector
564 *
565 * @return The result of the 16 bit fixed point vector subtraction. The result is saturated in case of overflow
566 */
567qint16x4_t vqsub_qs16(qint16x4_t a, qint16x4_t b);
568
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100569/** 8 bit fixed point vector saturating subtraction (16 elements)
570 *
571 * @param[in] a First 8 bit fixed point input vector
572 * @param[in] b Second 8 bit fixed point input vector
573 *
574 * @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
575 */
576qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b);
577
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100578/** 16 bit fixed point vector saturating subtraction (8 elements)
579 *
580 * @param[in] a First 16 bit fixed point input vector
581 * @param[in] b Second 16 bit fixed point input vector
582 *
583 * @return The result of the 16 bit fixed point vector subtraction. The result is saturated in case of overflow
584 */
585qint16x8_t vqsubq_qs16(qint16x8_t a, qint16x8_t b);
586
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100587/** 8 bit fixed point vector multiply (8 elements)
588 *
589 * @param[in] a First 8 bit fixed point input vector
590 * @param[in] b Second 8 bit fixed point input vector
591 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
592 *
593 * @return The result of the 8 bit fixed point vector multiplication.
594 */
595qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
596
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100597/** 16 bit fixed point vector multiply (4 elements)
598 *
599 * @param[in] a First 16 bit fixed point input vector
600 * @param[in] b Second 16 bit fixed point input vector
601 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
602 *
603 * @return The result of the 16 bit fixed point vector multiplication.
604 */
605qint16x4_t vmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
606
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100607/** 8 bit fixed point vector multiply (16 elements)
608 *
609 * @param[in] a First 8 bit fixed point input vector
610 * @param[in] b Second 8 bit fixed point input vector
611 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
612 *
613 * @return The result of the 8 bit fixed point vector multiplication.
614 */
615qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
616
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100617/** 16 bit fixed point vector multiply (8 elements)
618 *
619 * @param[in] a First 16 bit fixed point input vector
620 * @param[in] b Second 16 bit fixed point input vector
621 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
622 *
623 * @return The result of the 16 bit fixed point vector multiplication.
624 */
625qint16x8_t vmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
626
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100627/** 8 bit fixed point vector saturating multiply (8 elements)
628 *
629 * @param[in] a First 8 bit fixed point input vector
630 * @param[in] b Second 8 bit fixed point input vector
631 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
632 *
633 * @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
634 */
635qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
636
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100637/** 16 bit fixed point vector saturating multiply (4 elements)
638 *
639 * @param[in] a First 16 bit fixed point input vector
640 * @param[in] b Second 16 bit fixed point input vector
641 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
642 *
643 * @return The result of the 16 bit fixed point vector multiplication. The result is saturated in case of overflow
644 */
645qint16x4_t vqmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
646
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100647/** 8 bit fixed point vector saturating multiply (16 elements)
648 *
649 * @param[in] a First 8 bit fixed point input vector
650 * @param[in] b Second 8 bit fixed point input vector
651 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
652 *
653 * @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
654 */
655qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
656
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100657/** 16 bit fixed point vector saturating multiply (8 elements)
658 *
659 * @param[in] a First 16 bit fixed point input vector
660 * @param[in] b Second 16 bit fixed point input vector
661 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
662 *
663 * @return The result of the 16 bit fixed point vector multiplication. The result is saturated in case of overflow
664 */
665qint16x8_t vqmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
666
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100667/** 8 bit fixed point vector long multiply (8 elements)
668 *
669 * @param[in] a First 8 bit fixed point input vector
670 * @param[in] b Second 8 bit fixed point input vector
671 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
672 *
673 * @return The result of the 8 bit fixed point long vector multiplication.
674 */
675qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
676
Pablo Tellof87cc7f2017-07-26 10:28:40 +0100677/** 16 bit fixed point vector long multiply (4 elements)
678 *
679 * @param[in] a First 16 bit fixed point input vector
680 * @param[in] b Second 16 bit fixed point input vector
681 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
682 *
683 * @return The result of the 32 bit fixed point long vector multiplication.
684 */
685qint32x4_t vmull_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
686
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100687/** 8 bit fixed point vector multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
688 *
689 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
690 * @param[in] b Second 8 bit fixed point input vector
691 * @param[in] c Third 8 bit fixed point input vector
692 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
693 *
694 * @return The result of the 8 bit fixed point vector multiply-accumulate
695 */
696qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
697
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100698/** 16 bit fixed point vector multiply-accumulate (4 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
699 *
700 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
701 * @param[in] b Second 16 bit fixed point input vector
702 * @param[in] c Third 16 bit fixed point input vector
703 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
704 *
705 * @return The result of the 16 bit fixed point vector multiply-accumulate
706 */
707qint16x4_t vmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
708
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100709/** 8 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
710 *
711 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
712 * @param[in] b Second 8 bit fixed point input vector
713 * @param[in] c Third 8 bit fixed point input vector
714 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
715 *
716 * @return The result of the 8 bit fixed point vector multiply-accumulate
717 */
718qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
719
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100720/** 16 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
721 *
722 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
723 * @param[in] b Second 16 bit fixed point input vector
724 * @param[in] c Third 16 bit fixed point input vector
725 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
726 *
727 * @return The result of the 16 bit fixed point vector multiply-accumulate
728 */
729qint16x8_t vmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position);
730
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100731/** 8 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
732 *
733 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
734 * @param[in] b Second 8 bit fixed point input vector
735 * @param[in] c Third 8 bit fixed point input vector
736 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
737 *
738 * @return The result of the 8 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow
739 */
740qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
741
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100742/** 16 bit fixed point vector saturating multiply-accumulate (4 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
743 *
744 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
745 * @param[in] b Second 16 bit fixed point input vector
746 * @param[in] c Third 16 bit fixed point input vector
747 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
748 *
749 * @return The result of the 16 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow
750 */
751qint16x4_t vqmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
752
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100753/** 8 bit fixed point vector saturating multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
754 *
755 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
756 * @param[in] b Second 8 bit fixed point input vector
757 * @param[in] c Third 8 bit fixed point input vector
758 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
759 *
760 * @return The result of the 8 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow
761 */
762qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
763
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100764/** 16 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
765 *
766 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
767 * @param[in] b Second 16 bit fixed point input vector
768 * @param[in] c Third 16 bit fixed point input vector
769 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
770 *
771 * @return The result of the 16 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow
772 */
773qint16x8_t vqmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position);
774
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100775/** 8 bit fixed point vector multiply-accumulate long (8 elements).
776 * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
777 *
778 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
779 * @param[in] b Second 8 bit fixed point input vector
780 * @param[in] c Third 8 bit fixed point input vector
781 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
782 *
783 * @return The result of the 8 bit fixed point vector multiply-accumulate long
784 */
785qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
786
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100787/** 16 bit fixed point vector multiply-accumulate long (4 elements).
788 * This operation performs the product between @p b and @p c and add the result to the 32 bit fixed point vector @p a (a + b * c). 4 elements
789 *
790 * @param[in] a First 32 bit fixed point input vector where the result of multiplication must be added to
791 * @param[in] b Second 16 bit fixed point input vector
792 * @param[in] c Third 16 bit fixed point input vector
793 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
794 *
795 * @return The result of the 16 bit fixed point vector multiply-accumulate long
796 */
797qint32x4_t vmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
798
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100799/** 8 bit fixed point vector saturating multiply-accumulate long (8 elements). The saturation is performed on the 16 bit fixed point output vector.
800 * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
801 *
802 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
803 * @param[in] b Second 8 bit fixed point input vector
804 * @param[in] c Third 8 bit fixed point input vector
805 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
806 *
807 * @return The result of the 8 bit fixed point vector multiply-accumulate long
808 */
809qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
810
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100811/** 16 bit fixed point vector saturating multiply-accumulate long (4 elements). The saturation is performed on the 16 bit fixed point output vector.
812 * This operation performs the product between @p b and @p c and add the result to the 32 bit fixed point vector @p a (a + b * c). 4 elements
813 *
814 * @param[in] a First 32 bit fixed point input vector where the result of multiplication must be added to
815 * @param[in] b Second 16 bit fixed point input vector
816 * @param[in] c Third 16 bit fixed point input vector
817 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
818 *
819 * @return The result of the 16 bit fixed point vector multiply-accumulate long
820 */
821qint32x4_t vqmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
822
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100823/** Convert a float vector with 4x2 elements to 8 bit fixed point vector with 8 elements
824 *
825 * @param[in] a Float input vector
826 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
827 *
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100828 * @return The result of the conversion float -> 8 bit fixed point. The result is saturated in case of overflow
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100829 */
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100830qint8x8_t vqcvt_qs8_f32(const float32x4x2_t a, int fixed_point_position);
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100831
832/** Convert a float vector with 4 elements to 16 bit fixed point vector with 4 elements
833 *
834 * @param[in] a Float input vector
835 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
836 *
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100837 * @return The result of the conversion float -> 16 bit fixed point. The result is saturated in case of overflow
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100838 */
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100839qint16x4_t vqcvt_qs16_f32(const float32x4_t a, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100840
841/** Convert a float vector with 4x4 elements to 8 bit fixed point vector with 16 elements
842 *
843 * @param[in] a Float input vector
844 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
845 *
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100846 * @return The result of the conversion float -> 8 bit fixed point. The result is saturated in case of overflow
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100847 */
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100848qint8x16_t vqcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100849
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100850/** Convert a float vector with 4x2 elements to 16 bit fixed point vector with 8 elements
851 *
852 * @param[in] a Float input vector
853 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
854 *
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100855 * @return The result of the conversion float -> 16 bit fixed point. The result is saturated in case of overflow
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100856 */
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100857qint16x8_t vqcvtq_qs16_f32(const float32x4x2_t &a, int fixed_point_position);
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100858
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100859/** Convert a 8 bit fixed point vector with 8 elements to a float vector with 4x2 elements
860 *
861 * @param[in] a 8 bit fixed point input vector
862 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
863 *
864 * @return The result of the conversion 8 bit fixed point -> float32x2x4
865 */
866float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position);
867
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100868/** Convert a 16 bit fixed point vector with 4 elements to a float vector with 4 elements
869 *
870 * @param[in] a 16 bit fixed point input vector
871 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
872 *
873 * @return The result of the conversion 16 bit fixed point -> float32x2
874 */
875float32x4_t vcvt_f32_qs16(qint16x4_t a, int fixed_point_position);
876
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100877/** Convert a 8 bit fixed point vector with 16 elements to a float vector with 4x4 elements
878 *
879 * @param[in] a 8 bit fixed point input vector
880 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
881 *
882 * @return The result of the conversion 8 bit fixed point -> float32x4x4
883 */
884float32x4x4_t vcvtq_qs8_f32(qint8x16_t a, int fixed_point_position);
885
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100886/** Convert a 16 bit fixed point vector with 8 elements to a float vector with 4x2 elements
887 *
888 * @param[in] a 16 bit fixed point input vector
889 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
890 *
891 * @return The result of the conversion 16 bit fixed point -> float32x4x2
892 */
893float32x4x2_t vcvtq_qs16_f32(qint16x8_t a, int fixed_point_position);
894
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100895/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements)
896 *
897 * @param[in] a 8bit fixed point input vector
898 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
899 *
900 * @return The result of the 8bit reciprocal (1/a).
901 */
902qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position);
903
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100904/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (4 elements)
905 *
906 * @param[in] a 16 bit fixed point input vector
907 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
908 *
909 * @return The result of the 16 bit reciprocal (1/a).
910 */
911qint16x4_t vrecip_qs16(qint16x4_t a, int fixed_point_position);
912
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100913/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (16 elements)
914 *
915 * @param[in] a 8bit fixed point input vector
916 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
917 *
918 * @return The result of the 8bit reciprocal (1/a).
919 */
920qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position);
921
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100922/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements)
923 *
924 * @param[in] a 16 bit fixed point input vector
925 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
926 *
927 * @return The result of the 16 bit reciprocal (1/a).
928 */
929qint16x8_t vrecipq_qs16(qint16x8_t a, int fixed_point_position);
930
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100931/** Division fixed point 8bit (8 elements)
932 *
933 * @param[in] a First 8bit fixed point input vector
934 * @param[in] b Second 8bit fixed point input vector
935 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
936 *
937 * @return The quotient and remainder number in fixed point format.
938 */
939qint8x8_t vdiv_qs8(qint8x8_t a, int8x8_t b, int fixed_point_position);
940
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100941/** Division fixed point 16 bit (4 elements)
942 *
943 * @param[in] a First 16 bit fixed point input vector
944 * @param[in] b Second 16 bit fixed point input vector
945 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
946 *
947 * @return The quotient and remainder number in fixed point format.
948 */
949qint16x4_t vdiv_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
950
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100951/** Division fixed point 8bit (16 elements)
952 *
953 * @param[in] a First 8bit fixed point input vector
954 * @param[in] b Second 8bit fixed point input vector
955 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
956 *
957 * @return The quotient and remainder number in 8bit fixed point format.
958 */
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100959qint8x16_t vdivq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
960
961/** Division fixed point 16 bit (8 elements)
962 *
963 * @param[in] a First 16 bit fixed point input vector
964 * @param[in] b Second 16 bit fixed point input vector
965 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
966 *
967 * @return The quotient and remainder number in 16 bit fixed point format.
968 */
969qint16x8_t vdivq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100970
971/** Perform a 4th degree polynomial approximation. (8 elements)
972 *
973 * @param[in] a 8bit fixed point input vector
974 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
975 *
976 * @return The result of the 8bit taylor approximation.
977 */
978template <bool islog>
979qint8x8_t vtaylor_poly_qs8(qint8x8_t a, int fixed_point_position);
980
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100981/** Perform a 4th degree polynomial approximation. (4 elements)
982 *
983 * @param[in] a 16 bit fixed point input vector
984 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
985 *
986 * @return The result of the 16 bit taylor approximation.
987 */
988template <bool islog>
989qint16x4_t vtaylor_poly_qs16(qint16x4_t a, int fixed_point_position);
990
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100991/** Perform a 4th degree polynomial approximation. (16 elements)
992 *
993 * @param[in] a 8bit fixed point input vector
994 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
995 *
996 * @return The result of the 8bit taylor approximation.
997 */
998template <bool islog>
999qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position);
1000
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001001/** Perform a 4th degree polynomial approximation. (8 elements)
1002 *
1003 * @param[in] a 16 bit fixed point input vector
1004 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1005 *
1006 * @return The result of the 8bit taylor approximation.
1007 */
1008template <bool islog>
1009qint16x8_t vtaylor_polyq_qs16(qint16x8_t a, int fixed_point_position);
1010
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001011/** Calculate saturating exponential fixed point 8bit (8 elements)
1012 *
1013 * @param[in] a 8bit fixed point input vector
1014 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1015 *
1016 * @return The result of the 8bit saturating exponential
1017 */
1018qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position);
1019
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001020/** Calculate saturating exponential fixed point 16 bit (4 elements)
1021 *
1022 * @param[in] a 8bit fixed point input vector
1023 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1024 *
1025 * @return The result of the 16 bit saturating exponential
1026 */
1027qint16x4_t vqexp_qs16(qint16x4_t a, int fixed_point_position);
1028
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001029/** Calculate saturating exponential fixed point 8bit (16 elements)
1030 *
1031 * @param[in] a 8bit fixed point input vector
1032 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1033 *
1034 * @return The result of the 8bit saturating exponential
1035 */
1036qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position);
1037
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001038/** Calculate saturating exponential fixed point 16 bit (8 elements)
1039 *
1040 * @param[in] a 16 bit fixed point input vector
1041 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1042 *
1043 * @return The result of the 16 bit saturating exponential
1044 */
1045qint16x8_t vqexpq_qs16(qint16x8_t a, int fixed_point_position);
1046
1047/** Calculate logarithm fixed point 8 bit (8 elements)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001048 *
1049 * @param[in] a 8bit fixed point input vector
1050 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1051 *
1052 * @return The result of the 8bit logarithm.
1053 */
1054qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position);
1055
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001056/** Calculate logarithm fixed point 16 bit (4 elements)
1057 *
1058 * @param[in] a 16 bit fixed point input vector
1059 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1060 *
1061 * @return The result of the 16 bit logarithm.
1062 */
1063qint16x4_t vlog_qs16(qint16x4_t a, int fixed_point_position);
1064
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001065/** Calculate logarithm fixed point 16bit (16 elements)
1066 *
1067 * @param[in] a 8bit fixed point input vector
1068 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1069 *
1070 * @return The result of the 8bit logarithm.
1071 */
1072qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position);
1073
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001074/** Calculate logarithm fixed point 16 bit (8 elements)
1075 *
1076 * @param[in] a 16 bit fixed point input vector
1077 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1078 *
1079 * @return The result of the 16 bit logarithm.
1080 */
1081qint16x8_t vlogq_qs16(qint16x8_t a, int fixed_point_position);
1082
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001083/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
1084 *
1085 * @param[in] a 8bit fixed point input vector
1086 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1087 *
1088 * @return The result of the 8bit inverse sqrt.
1089 */
1090qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
1091
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001092/** Calculate inverse square root for fixed point 16 bit using Newton-Raphosn method (4 elements)
1093 *
1094 * @param[in] a 16 bit fixed point input vector
1095 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1096 *
1097 * @return The result of the 16 bit inverse sqrt.
1098 */
1099qint16x4_t vinvsqrt_qs16(qint16x4_t a, int fixed_point_position);
1100
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001101/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
1102 *
1103 * @param[in] a 8bit fixed point input vector
1104 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1105 *
1106 * @return The result of the 8bit inverse sqrt.
1107 */
1108qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
1109
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001110/** Calculate saturating inverse square root for fixed point 16 bit using Newton-Raphosn method (4 elements)
1111 *
1112 * @param[in] a 16 bit fixed point input vector
1113 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1114 *
1115 * @return The result of the 16 bit inverse sqrt.
1116 */
1117qint16x4_t vqinvsqrt_qs16(qint16x4_t a, int fixed_point_position);
1118
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001119/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
1120 *
1121 * @param[in] a 8bit fixed point input vector
1122 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1123 *
1124 * @return The result of the 8bit inverse sqrt.
1125 */
1126qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
1127
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001128/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
1129 *
1130 * @param[in] a 16 bit fixed point input vector
1131 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1132 *
1133 * @return The result of the 16 bit inverse sqrt.
1134 */
1135qint16x8_t vinvsqrtq_qs16(qint16x8_t a, int fixed_point_position);
1136
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001137/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
1138 *
1139 * @param[in] a 8bit fixed point input vector
1140 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1141 *
1142 * @return The result of the 8bit inverse sqrt.
1143 */
1144qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
1145
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001146/** Calculate saturating inverse square root for fixed point 16 bit using Newton-Raphosn method (8 elements)
1147 *
1148 * @param[in] a 16 bit fixed point input vector
1149 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1150 *
1151 * @return The result of the 16 bit inverse sqrt.
1152 */
1153qint16x8_t vqinvsqrtq_qs16(qint16x8_t a, int fixed_point_position);
1154
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001155/** Calculate hyperbolic tangent for fixed point 8bit (8 elements)
1156 *
1157 * @param[in] a 8bit fixed point input vector
1158 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1159 *
1160 * @return The calculated Hyperbolic Tangent.
1161 */
Georgios Pinitasccc65d42017-06-27 17:39:11 +01001162qint8x8_t vqtanh_qs8(qint8x8_t a, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001163
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001164/** Calculate hyperbolic tangent for fixed point 16 bit (4 elements)
1165 *
1166 * @param[in] a 16 bit fixed point input vector
1167 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1168 *
1169 * @return The calculated Hyperbolic Tangent.
1170 */
Georgios Pinitasccc65d42017-06-27 17:39:11 +01001171qint16x4_t vqtanh_qs16(qint16x4_t a, int fixed_point_position);
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001172
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001173/** Calculate hyperbolic tangent for fixed point 8bit (16 elements)
1174 *
1175 * @param[in] a 8bit fixed point input vector
1176 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1177 *
1178 * @return The calculated Hyperbolic Tangent.
1179 */
Georgios Pinitasccc65d42017-06-27 17:39:11 +01001180qint8x16_t vqtanhq_qs8(qint8x16_t a, int fixed_point_position);
1181
1182/** Calculate hyperbolic tangent for fixed point 16bit (8 elements)
1183 *
1184 * @param[in] a 16 bit fixed point input vector
1185 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1186 *
1187 * @return The calculated Hyperbolic Tangent.
1188 */
1189qint16x8_t vqtanhq_qs16(qint16x8_t a, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001190
1191/** Calculate saturating n power for fixed point 8bit (16 elements).
1192 *
1193 * pow(a,b) = e^(b*log(a))
1194 *
1195 * @param[in] a 8bit fixed point input vector
1196 * @param[in] b 8bit fixed point power vector
1197 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1198 *
1199 * @return The result of the 8bit power.
1200 */
Michele Di Giorgiod5e65c72017-07-26 17:09:17 +01001201qint8x16_t vqpowq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
1202
1203/** Calculate saturating n power for fixed point 16bit (8 elements).
1204 *
1205 * pow(a,b) = e^(b*log(a))
1206 *
1207 * @param[in] a 16bit fixed point input vector
1208 * @param[in] b 16bit fixed point power vector
1209 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1210 *
1211 * @return The result of the 16bit power.
1212 */
1213qint16x8_t vqpowq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001214
1215/** Compute lane-by-lane maximum between elements of a float vector with 4x2 elements
1216 *
1217 * @param[in] a Float input vector
1218 * @param[in] b Float input vector
1219 *
1220 * @return The lane-by-lane maximum -> float32x4x2
1221 */
1222float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b);
Gian Marco Iodice356f6432017-09-22 11:32:21 +01001223} // namespace arm_compute
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001224#include "arm_compute/core/NEON/NEFixedPoint.inl"
1225#endif /* __ARM_COMPUTE_NEFIXEDPOINT_H__ */