blob: e3eb5d463890899a3e661a2376fab3c8fe9083c8 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEFIXEDPOINT_H__
25#define __ARM_COMPUTE_NEFIXEDPOINT_H__
26
27#include "arm_compute/core/FixedPoint.h"
28
29#include <arm_neon.h>
30
31namespace arm_compute
32{
33using qint8x8_t = int8x8_t; /**< 8 bit fixed point vector with 8 elements */
34using qint8x8x2_t = int8x8x2_t; /**< 8 bit fixed point vector with 16 elements */
35using qint8x8x3_t = int8x8x3_t; /**< 8 bit fixed point vector with 24 elements */
36using qint8x8x4_t = int8x8x4_t; /**< 8 bit fixed point vector with 32 elements */
37using qint8x16_t = int8x16_t; /**< 8 bit fixed point vector with 16 elements */
38using qint8x16x2_t = int8x16x2_t; /**< 8 bit fixed point vector with 32 elements */
39using qint8x16x3_t = int8x16x3_t; /**< 8 bit fixed point vector with 48 elements */
40using qint8x16x4_t = int8x16x4_t; /**< 8 bit fixed point vector with 64 elements */
41using qint16x4_t = int16x4_t; /**< 16 bit fixed point vector with 4 elements */
42using qint16x4x2_t = int16x4x2_t; /**< 16 bit fixed point vector with 8 elements */
43using qint16x4x3_t = int16x4x3_t; /**< 16 bit fixed point vector with 12 elements */
44using qint16x4x4_t = int16x4x4_t; /**< 16 bit fixed point vector with 16 elements */
45using qint16x8_t = int16x8_t; /**< 16 bit fixed point vector with 8 elements */
46using qint16x8x2_t = int16x8x2_t; /**< 16 bit fixed point vector with 16 elements */
47using qint16x8x3_t = int16x8x3_t; /**< 16 bit fixed point vector with 24 elements */
48using qint16x8x4_t = int16x8x4_t; /**< 16 bit fixed point vector with 32 elements */
Michalis Spyrou0a8334c2017-06-14 18:00:05 +010049using qint32x4_t = int32x4_t; /**< 32 bit fixed point vector with 4 elements */
Anthony Barbier6ff3b192017-09-04 18:44:23 +010050
51/** Get the lower half of a 16 elements vector
52 *
53 * @param[in] a vector of 16 elements
54 *
55 * @return 8 bit fixed point vector (8 elements)
56 */
57qint8x8_t vget_low_qs8(qint8x16_t a);
58
Michalis Spyrou0a8334c2017-06-14 18:00:05 +010059/** Get the lower half of a 16 elements vector
60 *
61 * @param[in] a vector of 8 elements
62 *
63 * @return 16 bit fixed point vector (4 elements)
64 */
65qint16x4_t vget_low_qs16(qint16x8_t a);
66
Anthony Barbier6ff3b192017-09-04 18:44:23 +010067/** Get the higher half of a 16 elements vector
68 *
69 * @param[in] a vector of 16 elements
70 *
71 * @return 8 bit fixed point vector (8 elements)
72 */
73qint8x8_t vget_high_qs8(qint8x16_t a);
74
Michalis Spyrou0a8334c2017-06-14 18:00:05 +010075/** Get the higher half of a 16 elements vector
76 *
77 * @param[in] a vector of 8 elements
78 *
79 * @return 16 bit fixed point vector (4 elements)
80 */
81qint16x4_t vget_high_qs16(qint16x8_t a);
82
Anthony Barbier6ff3b192017-09-04 18:44:23 +010083/** Load a single 8 bit fixed point vector from memory (8 elements)
84 *
85 * @param[in] addr Memory address of the 8 bit fixed point vector to load
86 *
87 * @return 8 bit fixed point vector (8 elements)
88 */
89qint8x8_t vld1_qs8(const qint8_t *addr);
90
Anthony Barbier6ff3b192017-09-04 18:44:23 +010091/** Load a single 16 bit fixed point vector from memory (4 elements)
92 *
93 * @param[in] addr Memory address of the 16 bit fixed point vector to load
94 *
95 * @return 16 bit fixed point vector (4 elements)
96 */
97qint16x4_t vld1_qs16(const qint16_t *addr);
98
Michalis Spyrou0a8334c2017-06-14 18:00:05 +010099/** Load a single 8 bit fixed point vector from memory (16 elements)
100 *
101 * @param[in] addr Memory address of the 8 bit fixed point vector to load
102 *
103 * @return 8 bit fixed point vector (16 elements)
104 */
105qint8x16_t vld1q_qs8(const qint8_t *addr);
106
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100107/** Load a single 16 bit fixed point vector from memory (8 elements)
108 *
109 * @param[in] addr Memory address of the 16 bit fixed point vector to load
110 *
111 * @return 16 bit fixed point vector (8 elements)
112 */
113qint16x8_t vld1q_qs16(const qint16_t *addr);
114
115/** Load all lanes of 8 bit fixed point vector with same value from memory (8 elements)
116 *
117 * @param[in] addr Memory address of the 8 bit fixed point scalar value to load
118 *
119 * @return 8 bit fixed point vector (8 elements)
120 */
121qint8x8_t vld1_dup_qs8(const qint8_t *addr);
122
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100123/** Load all lanes of 16 bit fixed point vector with same value from memory (4 elements)
124 *
125 * @param[in] addr Memory address of the 16 bit fixed point scalar value to load
126 *
127 * @return 16 bit fixed point vector (4 elements)
128 */
129qint16x4_t vld1_dup_qs16(const qint16_t *addr);
130
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100131/** Load all lanes of 8 bit fixed point vector with same value from memory (16 elements)
132 *
133 * @param[in] addr Memory address of the 8 bit fixed point scalar value to load
134 *
135 * @return 8 bit fixed point vector (16 elements)
136 */
137qint8x16_t vld1q_dup_qs8(const qint8_t *addr);
138
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100139/** Load all lanes of 16 bit fixed point vector with same value from memory (8 elements)
140 *
141 * @param[in] addr Memory address of the 16 bit fixed point scalar value to load
142 *
143 * @return 16 bit fixed point vector (8 elements)
144 */
145qint16x8_t vld1q_dup_qs16(const qint16_t *addr);
146
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100147/** Store a single 8 bit fixed point vector to memory (8 elements)
148 *
149 * @param[in] addr Memory address where the 8 bit fixed point vector should be stored
150 * @param[in] b 8 bit fixed point vector to store
151 *
152 */
153void vst1_qs8(qint8_t *addr, qint8x8_t b);
154
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100155/** Store a single 16 bit fixed point vector to memory (4 elements)
156 *
157 * @param[in] addr Memory address where the 16 bit fixed point vector should be stored
158 * @param[in] b 16 bit fixed point vector to store
159 *
160 */
161void vst1_qs16(qint16_t *addr, qint16x4_t b);
162
163/** Store a single 8 bit fixed point vector to memory (16 elements)
164 *
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100165 * @param[in] addr Memory address where the 8 bit fixed point vector should be stored
166 * @param[in] b 8 bit fixed point vector to store
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100167 *
168 */
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100169void vst1q_qs8(qint8_t *addr, qint8x16_t b);
170
171/** Store a single 16 bit fixed point vector to memory (8 elements)
172*
173* @param[in] addr Memory address where the 16 bit fixed point vector should be stored
174* @param[in] b 16 bit fixed point vector to store
175*
176*/
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100177void vst1q_qs16(qint16_t *addr, qint16x8_t b);
178
179/** 16 bit fixed point vector saturating narrow (8 elements)
180 *
181 * @param[in] a 16 bit fixed point vector to convert
182 *
183 * @return 8 bit fixed point vector
184 */
185qint8x8_t vqmovn_q16(qint16x8_t a);
186
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100187/** 32 bit fixed point vector saturating narrow (4 elements)
188 *
189 * @param[in] a 32 bit fixed point vector to convert
190 *
191 * @return 16 bit fixed point vector
192 */
193qint16x4_t vqmovn_q32(qint32x4_t a);
194
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100195/** 8 bit fixed point vector duplicate (8 elements)
196 *
197 * @param[in] a 8 bit fixed point to duplicate
198 *
199 * @return The result of the vector duplication
200 */
201qint8x8_t vdup_n_qs8(qint8_t a);
202
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100203/** 16 bit fixed point vector duplicate (4 elements)
204 *
205 * @param[in] a 16 bit fixed point to duplicate
206 *
207 * @return The result of the vector duplication
208 */
209qint16x4_t vdup_n_qs16(qint16_t a);
210
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100211/** 8 bit fixed point vector duplicate (16 elements)
212 *
213 * @param[in] a 8 bit fixed point to duplicate
214 *
215 * @return The result of the vector duplication
216 */
217qint8x16_t vdupq_n_qs8(qint8_t a);
218
219/** Duplicate a float and convert it to 8 bit fixed point vector (16 elements)
220 *
221 * @param[in] a 8 bit fixed point to duplicate
222 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
223 *
224 * @return The result of the vector duplication
225 */
226qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position);
227
228/** 16 bit fixed point vector duplicate (8 elements)
229 *
230 * @param[in] a 16 bit fixed point to duplicate
231 *
232 * @return The result of the vector duplication
233 */
234qint16x8_t vdupq_n_qs16(qint16x8_t a);
235
236/** Absolute value of 8 bit fixed point vector (8 elements)
237 *
238 * @param[in] a 8 bit fixed point input vector
239 *
240 * @return The result of the 8 bit fixed point vector absolute value
241 */
242qint8x8_t vabs_qs8(qint8x8_t a);
243
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100244/** Absolute value of 16 bit fixed point vector (4 elements)
245 *
246 * @param[in] a 16 bit fixed point input vector
247 *
248 * @return The result of the 16 bit fixed point vector absolute value
249 */
250qint16x4_t vabs_qs16(qint16x4_t a);
251
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100252/** Absolute value of 8 bit fixed point vector (16 elements)
253 *
254 * @param[in] a 8 bit fixed point input vector
255 *
256 * @return The result of the 8 bit fixed point vector absolute value
257 */
258qint8x16_t vabsq_qs8(qint8x16_t a);
259
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100260/** Absolute value of 16 bit fixed point vector (8 elements)
261 *
262 * @param[in] a 16 bit fixed point input vector
263 *
264 * @return The result of the 16 bit fixed point vector absolute value
265 */
266qint16x8_t vabsq_qs16(qint16x8_t a);
267
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100268/** Saturating absolute value of 8 bit fixed point vector (8 elements)
269 *
270 * @param[in] a 8 bit fixed point input vector
271 *
272 * @return The result of the 8 bit fixed point vector absolute value
273 */
274qint8x8_t vqabs_qs8(qint8x8_t a);
275
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100276/** Saturating absolute value of 16 bit fixed point vector (4 elements)
277 *
278 * @param[in] a 4 bit fixed point input vector
279 *
280 * @return The result of the 16 bit fixed point vector absolute value
281 */
282qint16x4_t vqabs_qs16(qint16x4_t a);
283
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100284/** Saturating absolute value of 8 bit fixed point vector (16 elements)
285 *
286 * @param[in] a 8 bit fixed point input vector
287 *
288 * @return The result of the 8 bit fixed point vector absolute value
289 */
290qint8x16_t vqabsq_qs8(qint8x16_t a);
291
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100292/** Saturating absolute value of 16 bit fixed point vector (8 elements)
293 *
294 * @param[in] a 16 bit fixed point input vector
295 *
296 * @return The result of the 16 bit fixed point vector absolute value
297 */
298qint16x8_t vqabsq_qs16(qint16x8_t a);
299
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100300/** 8 bit fixed point vector max (8 elements)
301 *
302 * @param[in] a First 8 bit fixed point input vector
303 * @param[in] b Second 8 bit fixed point input vector
304 *
305 * @return The result of the 8 bit fixed point vector max operation
306 */
307qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b);
308
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100309/** 16 bit fixed point vector max (4 elements)
310 *
311 * @param[in] a First 16 bit fixed point input vector
312 * @param[in] b Second 16 bit fixed point input vector
313 *
314 * @return The result of the 16 bit fixed point vector max operation
315 */
316qint16x4_t vmax_qs16(qint16x4_t a, qint16x4_t b);
317
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100318/** 8 bit fixed point vector max (16 elements)
319 *
320 * @param[in] a First 8 bit fixed point input vector
321 * @param[in] b Second 8 bit fixed point input vector
322 *
323 * @return The result of the 8 bit fixed point vector max operation
324 */
325qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b);
326
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100327/** 16 bit fixed point vector max (8 elements)
328 *
329 * @param[in] a First 16 bit fixed point input vector
330 * @param[in] b Second 16 bit fixed point input vector
331 *
332 * @return The result of the 16 bit fixed point vector max operation
333 */
334qint16x8_t vmaxq_qs16(qint16x8_t a, qint16x8_t b);
335
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100336/** 8 bit fixed point vector pairwise max (8 elements)
337 *
338 * @param[in] a First 8 bit fixed point input vector
339 * @param[in] b Second 8 bit fixed point input vector
340 *
341 * @return The result of the 8 bit fixed point vector pairwise max operation
342 */
343qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b);
344
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100345/** 16 bit fixed point vector pairwise max (4 elements)
346 *
347 * @param[in] a First 16 bit fixed point input vector
348 * @param[in] b Second 16 bit fixed point input vector
349 *
350 * @return The result of the 16 bit fixed point vector pairwise max operation
351 */
352qint16x4_t vpmax_qs16(qint16x4_t a, qint16x4_t b);
353
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100354/** 8 bit fixed point vector min (8 elements)
355 *
356 * @param[in] a First 8 bit fixed point input vector
357 * @param[in] b Second 8 bit fixed point input vector
358 *
359 * @return The result of the 8 bit fixed point vector max operation
360 */
361qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b);
362
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100363/** 16 bit fixed point vector min (4 elements)
364 *
365 * @param[in] a First 16 bit fixed point input vector
366 * @param[in] b Second 16 bit fixed point input vector
367 *
368 * @return The result of the 16 bit fixed point vector max operation
369 */
370qint16x4_t vmin_qs16(qint16x4_t a, qint16x4_t b);
371
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100372/** 8 bit fixed point vector min (16 elements)
373 *
374 * @param[in] a First 8 bit fixed point input vector
375 * @param[in] b Second 8 bit fixed point input vector
376 *
377 * @return The result of the 8 bit fixed point vector min operation
378 */
379qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b);
380
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100381/** 16 bit fixed point vector min (8 elements)
382 *
383 * @param[in] a First 16 bit fixed point input vector
384 * @param[in] b Second 16 bit fixed point input vector
385 *
386 * @return The result of the 16 bit fixed point vector min operation
387 */
388qint16x8_t vminq_qs16(qint16x8_t a, qint16x8_t b);
389
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100390/** 8 bit fixed point vector pairwise min (8 elements)
391 *
392 * @param[in] a First 8 bit fixed point input vector
393 * @param[in] b Second 8 bit fixed point input vector
394 *
395 * @return The result of the 8 bit fixed point vector pairwise min operation
396 */
397qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b);
398
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100399/** 16 bit fixed point vector pairwise min (4 elements)
400 *
401 * @param[in] a First 16 bit fixed point input vector
402 * @param[in] b Second 16 bit fixed point input vector
403 *
404 * @return The result of the 16 bit fixed point vector pairwise min operation
405 */
406qint16x4_t vpmin_qs16(qint16x4_t a, qint16x4_t b);
407
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100408/** 8 bit fixed point vector add (8 elements)
409 *
410 * @param[in] a First 8 bit fixed point input vector
411 * @param[in] b Second 8 bit fixed point input vector
412 *
413 * @return The result of the 8 bit fixed point vector addition
414 */
415qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b);
416
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100417/** 16 bit fixed point vector add (4 elements)
418 *
419 * @param[in] a First 16 bit fixed point input vector
420 * @param[in] b Second 16 bit fixed point input vector
421 *
422 * @return The result of the 16 bit fixed point vector addition
423 */
424qint16x4_t vadd_qs16(qint16x4_t a, qint16x4_t b);
425
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100426/** 8 bit fixed point vector add (16 elements)
427 *
428 * @param[in] a First 8 bit fixed point input vector
429 * @param[in] b Second 8 bit fixed point input vector
430 *
431 * @return The result of the 8 bit fixed point vector addition
432 */
433qint8x16_t vaddq_qs8(qint8x16_t a, qint8x16_t b);
434
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100435/** 16 bit fixed point vector add (8 elements)
436 *
437 * @param[in] a First 16 bit fixed point input vector
438 * @param[in] b Second 16 bit fixed point input vector
439 *
440 * @return The result of the 16 bit fixed point vector addition
441 */
442qint16x8_t vaddq_qs16(qint16x8_t a, qint16x8_t b);
443
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100444/** 8 bit fixed point vector saturating add (8 elements)
445 *
446 * @param[in] a First 8 bit fixed point input vector
447 * @param[in] b Second 8 bit fixed point input vector
448 *
449 * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
450 */
451qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b);
452
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100453/** 16 bit fixed point vector saturating add (4 elements)
454 *
455 * @param[in] a First 16 bit fixed point input vector
456 * @param[in] b Second 16 bit fixed point input vector
457 *
458 * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
459 */
460qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b);
461
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100462/** 8 bit fixed point vector saturating add (16 elements)
463 *
464 * @param[in] a First 8 bit fixed point input vector
465 * @param[in] b Second 8 bit fixed point input vector
466 *
467 * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
468 */
469qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b);
470
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100471/** 16 bit fixed point vector saturating add (8 elements)
472 *
473 * @param[in] a First 16 bit fixed point input vector
474 * @param[in] b Second 16 bit fixed point input vector
475 *
476 * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
477 */
478qint16x8_t vqaddq_qs16(qint16x8_t a, qint16x8_t b);
479
480/** 8 bit fixed point vector saturating pairwise add (8 elements)
481 *
482 * @param[in] a 8 bit fixed point input vector
483 *
484 * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
485 */
486int16x4_t vpaddl_qs8(qint8x8_t a);
487
488/** 8 bit fixed point vector subtraction (8 elements)
489 *
490 * @param[in] a First 8 bit fixed point input vector
491 * @param[in] b Second 8 bit fixed point input vector
492 *
493 * @return The result of the 8 bit fixed point vector subtraction
494 */
495qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b);
496
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100497/** 16 bit fixed point vector subtraction (4 elements)
498 *
499 * @param[in] a First 16 bit fixed point input vector
500 * @param[in] b Second 16 bit fixed point input vector
501 *
502 * @return The result of the 16 bit fixed point vector subtraction
503 */
504qint16x4_t vsub_qs16(qint16x4_t a, qint16x4_t b);
505
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100506/** 8 bit fixed point vector subtraction (16 elements)
507 *
508 * @param[in] a First 8 bit fixed point input vector
509 * @param[in] b Second 8 bit fixed point input vector
510 *
511 * @return The result of the 8 bit fixed point vector subtraction
512 */
513qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b);
514
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100515/** 16 bit fixed point vector subtraction (8 elements)
516 *
517 * @param[in] a First 16 bit fixed point input vector
518 * @param[in] b Second 16 bit fixed point input vector
519 *
520 * @return The result of the 16 bit fixed point vector subtraction
521 */
522qint16x8_t vsubq_qs16(qint16x8_t a, qint16x8_t b);
523
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100524/** 8 bit fixed point vector saturating subtraction (8 elements)
525 *
526 * @param[in] a First 8 bit fixed point input vector
527 * @param[in] b Second 8 bit fixed point input vector
528 *
529 * @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
530 */
531qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b);
532
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100533/** 16 bit fixed point vector saturating subtraction (4 elements)
534 *
535 * @param[in] a First 16 bit fixed point input vector
536 * @param[in] b Second 16 bit fixed point input vector
537 *
538 * @return The result of the 16 bit fixed point vector subtraction. The result is saturated in case of overflow
539 */
540qint16x4_t vqsub_qs16(qint16x4_t a, qint16x4_t b);
541
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100542/** 8 bit fixed point vector saturating subtraction (16 elements)
543 *
544 * @param[in] a First 8 bit fixed point input vector
545 * @param[in] b Second 8 bit fixed point input vector
546 *
547 * @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
548 */
549qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b);
550
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100551/** 16 bit fixed point vector saturating subtraction (8 elements)
552 *
553 * @param[in] a First 16 bit fixed point input vector
554 * @param[in] b Second 16 bit fixed point input vector
555 *
556 * @return The result of the 16 bit fixed point vector subtraction. The result is saturated in case of overflow
557 */
558qint16x8_t vqsubq_qs16(qint16x8_t a, qint16x8_t b);
559
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100560/** 8 bit fixed point vector multiply (8 elements)
561 *
562 * @param[in] a First 8 bit fixed point input vector
563 * @param[in] b Second 8 bit fixed point input vector
564 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
565 *
566 * @return The result of the 8 bit fixed point vector multiplication.
567 */
568qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
569
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100570/** 16 bit fixed point vector multiply (4 elements)
571 *
572 * @param[in] a First 16 bit fixed point input vector
573 * @param[in] b Second 16 bit fixed point input vector
574 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
575 *
576 * @return The result of the 16 bit fixed point vector multiplication.
577 */
578qint16x4_t vmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
579
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100580/** 8 bit fixed point vector multiply (16 elements)
581 *
582 * @param[in] a First 8 bit fixed point input vector
583 * @param[in] b Second 8 bit fixed point input vector
584 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
585 *
586 * @return The result of the 8 bit fixed point vector multiplication.
587 */
588qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
589
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100590/** 16 bit fixed point vector multiply (8 elements)
591 *
592 * @param[in] a First 16 bit fixed point input vector
593 * @param[in] b Second 16 bit fixed point input vector
594 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
595 *
596 * @return The result of the 16 bit fixed point vector multiplication.
597 */
598qint16x8_t vmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
599
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100600/** 8 bit fixed point vector saturating multiply (8 elements)
601 *
602 * @param[in] a First 8 bit fixed point input vector
603 * @param[in] b Second 8 bit fixed point input vector
604 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
605 *
606 * @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
607 */
608qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
609
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100610/** 16 bit fixed point vector saturating multiply (4 elements)
611 *
612 * @param[in] a First 16 bit fixed point input vector
613 * @param[in] b Second 16 bit fixed point input vector
614 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
615 *
616 * @return The result of the 16 bit fixed point vector multiplication. The result is saturated in case of overflow
617 */
618qint16x4_t vqmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
619
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100620/** 8 bit fixed point vector saturating multiply (16 elements)
621 *
622 * @param[in] a First 8 bit fixed point input vector
623 * @param[in] b Second 8 bit fixed point input vector
624 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
625 *
626 * @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
627 */
628qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
629
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100630/** 16 bit fixed point vector saturating multiply (8 elements)
631 *
632 * @param[in] a First 16 bit fixed point input vector
633 * @param[in] b Second 16 bit fixed point input vector
634 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
635 *
636 * @return The result of the 16 bit fixed point vector multiplication. The result is saturated in case of overflow
637 */
638qint16x8_t vqmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
639
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100640/** 8 bit fixed point vector long multiply (8 elements)
641 *
642 * @param[in] a First 8 bit fixed point input vector
643 * @param[in] b Second 8 bit fixed point input vector
644 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
645 *
646 * @return The result of the 8 bit fixed point long vector multiplication.
647 */
648qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
649
650/** 8 bit fixed point vector multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
651 *
652 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
653 * @param[in] b Second 8 bit fixed point input vector
654 * @param[in] c Third 8 bit fixed point input vector
655 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
656 *
657 * @return The result of the 8 bit fixed point vector multiply-accumulate
658 */
659qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
660
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100661/** 16 bit fixed point vector multiply-accumulate (4 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
662 *
663 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
664 * @param[in] b Second 16 bit fixed point input vector
665 * @param[in] c Third 16 bit fixed point input vector
666 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
667 *
668 * @return The result of the 16 bit fixed point vector multiply-accumulate
669 */
670qint16x4_t vmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
671
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100672/** 8 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
673 *
674 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
675 * @param[in] b Second 8 bit fixed point input vector
676 * @param[in] c Third 8 bit fixed point input vector
677 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
678 *
679 * @return The result of the 8 bit fixed point vector multiply-accumulate
680 */
681qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
682
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100683/** 16 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
684 *
685 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
686 * @param[in] b Second 16 bit fixed point input vector
687 * @param[in] c Third 16 bit fixed point input vector
688 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
689 *
690 * @return The result of the 16 bit fixed point vector multiply-accumulate
691 */
692qint16x8_t vmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position);
693
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100694/** 8 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
695 *
696 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
697 * @param[in] b Second 8 bit fixed point input vector
698 * @param[in] c Third 8 bit fixed point input vector
699 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
700 *
701 * @return The result of the 8 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow
702 */
703qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
704
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100705/** 16 bit fixed point vector saturating multiply-accumulate (4 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
706 *
707 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
708 * @param[in] b Second 16 bit fixed point input vector
709 * @param[in] c Third 16 bit fixed point input vector
710 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
711 *
712 * @return The result of the 16 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow
713 */
714qint16x4_t vqmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
715
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100716/** 8 bit fixed point vector saturating multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
717 *
718 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
719 * @param[in] b Second 8 bit fixed point input vector
720 * @param[in] c Third 8 bit fixed point input vector
721 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
722 *
723 * @return The result of the 8 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow
724 */
725qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
726
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100727/** 16 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
728 *
729 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
730 * @param[in] b Second 16 bit fixed point input vector
731 * @param[in] c Third 16 bit fixed point input vector
732 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
733 *
734 * @return The result of the 16 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow
735 */
736qint16x8_t vqmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position);
737
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100738/** 8 bit fixed point vector multiply-accumulate long (8 elements).
739 * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
740 *
741 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
742 * @param[in] b Second 8 bit fixed point input vector
743 * @param[in] c Third 8 bit fixed point input vector
744 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
745 *
746 * @return The result of the 8 bit fixed point vector multiply-accumulate long
747 */
748qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
749
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100750/** 16 bit fixed point vector multiply-accumulate long (4 elements).
751 * This operation performs the product between @p b and @p c and add the result to the 32 bit fixed point vector @p a (a + b * c). 4 elements
752 *
753 * @param[in] a First 32 bit fixed point input vector where the result of multiplication must be added to
754 * @param[in] b Second 16 bit fixed point input vector
755 * @param[in] c Third 16 bit fixed point input vector
756 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
757 *
758 * @return The result of the 16 bit fixed point vector multiply-accumulate long
759 */
760qint32x4_t vmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
761
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100762/** 8 bit fixed point vector saturating multiply-accumulate long (8 elements). The saturation is performed on the 16 bit fixed point output vector.
763 * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
764 *
765 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
766 * @param[in] b Second 8 bit fixed point input vector
767 * @param[in] c Third 8 bit fixed point input vector
768 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
769 *
770 * @return The result of the 8 bit fixed point vector multiply-accumulate long
771 */
772qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
773
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100774/** 16 bit fixed point vector saturating multiply-accumulate long (4 elements). The saturation is performed on the 16 bit fixed point output vector.
775 * This operation performs the product between @p b and @p c and add the result to the 32 bit fixed point vector @p a (a + b * c). 4 elements
776 *
777 * @param[in] a First 32 bit fixed point input vector where the result of multiplication must be added to
778 * @param[in] b Second 16 bit fixed point input vector
779 * @param[in] c Third 16 bit fixed point input vector
780 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
781 *
782 * @return The result of the 16 bit fixed point vector multiply-accumulate long
783 */
784qint32x4_t vqmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
785
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100786/** Convert a float vector with 4x2 elements to 8 bit fixed point vector with 8 elements
787 *
788 * @param[in] a Float input vector
789 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
790 *
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100791 * @return The result of the conversion float -> 8 bit fixed point. The result is saturated in case of overflow
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100792 */
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100793qint8x8_t vqcvt_qs8_f32(const float32x4x2_t a, int fixed_point_position);
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100794
795/** Convert a float vector with 4 elements to 16 bit fixed point vector with 4 elements
796 *
797 * @param[in] a Float input vector
798 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
799 *
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100800 * @return The result of the conversion float -> 16 bit fixed point. The result is saturated in case of overflow
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100801 */
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100802qint16x4_t vqcvt_qs16_f32(const float32x4_t a, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100803
804/** Convert a float vector with 4x4 elements to 8 bit fixed point vector with 16 elements
805 *
806 * @param[in] a Float input vector
807 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
808 *
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100809 * @return The result of the conversion float -> 8 bit fixed point. The result is saturated in case of overflow
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100810 */
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100811qint8x16_t vqcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100812
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100813/** Convert a float vector with 4x2 elements to 16 bit fixed point vector with 8 elements
814 *
815 * @param[in] a Float input vector
816 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
817 *
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100818 * @return The result of the conversion float -> 16 bit fixed point. The result is saturated in case of overflow
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100819 */
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100820qint16x8_t vqcvtq_qs16_f32(const float32x4x2_t &a, int fixed_point_position);
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100821
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100822/** Convert a 8 bit fixed point vector with 8 elements to a float vector with 4x2 elements
823 *
824 * @param[in] a 8 bit fixed point input vector
825 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
826 *
827 * @return The result of the conversion 8 bit fixed point -> float32x2x4
828 */
829float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position);
830
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100831/** Convert a 16 bit fixed point vector with 4 elements to a float vector with 4 elements
832 *
833 * @param[in] a 16 bit fixed point input vector
834 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
835 *
836 * @return The result of the conversion 16 bit fixed point -> float32x2
837 */
838float32x4_t vcvt_f32_qs16(qint16x4_t a, int fixed_point_position);
839
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100840/** Convert a 8 bit fixed point vector with 16 elements to a float vector with 4x4 elements
841 *
842 * @param[in] a 8 bit fixed point input vector
843 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
844 *
845 * @return The result of the conversion 8 bit fixed point -> float32x4x4
846 */
847float32x4x4_t vcvtq_qs8_f32(qint8x16_t a, int fixed_point_position);
848
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100849/** Convert a 16 bit fixed point vector with 8 elements to a float vector with 4x2 elements
850 *
851 * @param[in] a 16 bit fixed point input vector
852 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
853 *
854 * @return The result of the conversion 16 bit fixed point -> float32x4x2
855 */
856float32x4x2_t vcvtq_qs16_f32(qint16x8_t a, int fixed_point_position);
857
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100858/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements)
859 *
860 * @param[in] a 8bit fixed point input vector
861 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
862 *
863 * @return The result of the 8bit reciprocal (1/a).
864 */
865qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position);
866
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100867/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (4 elements)
868 *
869 * @param[in] a 16 bit fixed point input vector
870 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
871 *
872 * @return The result of the 16 bit reciprocal (1/a).
873 */
874qint16x4_t vrecip_qs16(qint16x4_t a, int fixed_point_position);
875
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100876/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (16 elements)
877 *
878 * @param[in] a 8bit fixed point input vector
879 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
880 *
881 * @return The result of the 8bit reciprocal (1/a).
882 */
883qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position);
884
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100885/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements)
886 *
887 * @param[in] a 16 bit fixed point input vector
888 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
889 *
890 * @return The result of the 16 bit reciprocal (1/a).
891 */
892qint16x8_t vrecipq_qs16(qint16x8_t a, int fixed_point_position);
893
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100894/** Division fixed point 8bit (8 elements)
895 *
896 * @param[in] a First 8bit fixed point input vector
897 * @param[in] b Second 8bit fixed point input vector
898 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
899 *
900 * @return The quotient and remainder number in fixed point format.
901 */
902qint8x8_t vdiv_qs8(qint8x8_t a, int8x8_t b, int fixed_point_position);
903
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100904/** Division fixed point 16 bit (4 elements)
905 *
906 * @param[in] a First 16 bit fixed point input vector
907 * @param[in] b Second 16 bit fixed point input vector
908 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
909 *
910 * @return The quotient and remainder number in fixed point format.
911 */
912qint16x4_t vdiv_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
913
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100914/** Division fixed point 8bit (16 elements)
915 *
916 * @param[in] a First 8bit fixed point input vector
917 * @param[in] b Second 8bit fixed point input vector
918 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
919 *
920 * @return The quotient and remainder number in 8bit fixed point format.
921 */
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100922qint8x16_t vdivq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
923
924/** Division fixed point 16 bit (8 elements)
925 *
926 * @param[in] a First 16 bit fixed point input vector
927 * @param[in] b Second 16 bit fixed point input vector
928 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
929 *
930 * @return The quotient and remainder number in 16 bit fixed point format.
931 */
932qint16x8_t vdivq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100933
934/** Perform a 4th degree polynomial approximation. (8 elements)
935 *
936 * @param[in] a 8bit fixed point input vector
937 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
938 *
939 * @return The result of the 8bit taylor approximation.
940 */
941template <bool islog>
942qint8x8_t vtaylor_poly_qs8(qint8x8_t a, int fixed_point_position);
943
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100944/** Perform a 4th degree polynomial approximation. (4 elements)
945 *
946 * @param[in] a 16 bit fixed point input vector
947 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
948 *
949 * @return The result of the 16 bit taylor approximation.
950 */
951template <bool islog>
952qint16x4_t vtaylor_poly_qs16(qint16x4_t a, int fixed_point_position);
953
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100954/** Perform a 4th degree polynomial approximation. (16 elements)
955 *
956 * @param[in] a 8bit fixed point input vector
957 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
958 *
959 * @return The result of the 8bit taylor approximation.
960 */
961template <bool islog>
962qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position);
963
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100964/** Perform a 4th degree polynomial approximation. (8 elements)
965 *
966 * @param[in] a 16 bit fixed point input vector
967 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
968 *
969 * @return The result of the 8bit taylor approximation.
970 */
971template <bool islog>
972qint16x8_t vtaylor_polyq_qs16(qint16x8_t a, int fixed_point_position);
973
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100974/** Calculate saturating exponential fixed point 8bit (8 elements)
975 *
976 * @param[in] a 8bit fixed point input vector
977 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
978 *
979 * @return The result of the 8bit saturating exponential
980 */
981qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position);
982
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100983/** Calculate saturating exponential fixed point 16 bit (4 elements)
984 *
985 * @param[in] a 8bit fixed point input vector
986 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
987 *
988 * @return The result of the 16 bit saturating exponential
989 */
990qint16x4_t vqexp_qs16(qint16x4_t a, int fixed_point_position);
991
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100992/** Calculate saturating exponential fixed point 8bit (16 elements)
993 *
994 * @param[in] a 8bit fixed point input vector
995 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
996 *
997 * @return The result of the 8bit saturating exponential
998 */
999qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position);
1000
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001001/** Calculate saturating exponential fixed point 16 bit (8 elements)
1002 *
1003 * @param[in] a 16 bit fixed point input vector
1004 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1005 *
1006 * @return The result of the 16 bit saturating exponential
1007 */
1008qint16x8_t vqexpq_qs16(qint16x8_t a, int fixed_point_position);
1009
1010/** Calculate logarithm fixed point 8 bit (8 elements)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001011 *
1012 * @param[in] a 8bit fixed point input vector
1013 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1014 *
1015 * @return The result of the 8bit logarithm.
1016 */
1017qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position);
1018
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001019/** Calculate logarithm fixed point 16 bit (4 elements)
1020 *
1021 * @param[in] a 16 bit fixed point input vector
1022 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1023 *
1024 * @return The result of the 16 bit logarithm.
1025 */
1026qint16x4_t vlog_qs16(qint16x4_t a, int fixed_point_position);
1027
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001028/** Calculate logarithm fixed point 16bit (16 elements)
1029 *
1030 * @param[in] a 8bit fixed point input vector
1031 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1032 *
1033 * @return The result of the 8bit logarithm.
1034 */
1035qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position);
1036
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001037/** Calculate logarithm fixed point 16 bit (8 elements)
1038 *
1039 * @param[in] a 16 bit fixed point input vector
1040 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1041 *
1042 * @return The result of the 16 bit logarithm.
1043 */
1044qint16x8_t vlogq_qs16(qint16x8_t a, int fixed_point_position);
1045
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001046/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
1047 *
1048 * @param[in] a 8bit fixed point input vector
1049 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1050 *
1051 * @return The result of the 8bit inverse sqrt.
1052 */
1053qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
1054
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001055/** Calculate inverse square root for fixed point 16 bit using Newton-Raphosn method (4 elements)
1056 *
1057 * @param[in] a 16 bit fixed point input vector
1058 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1059 *
1060 * @return The result of the 16 bit inverse sqrt.
1061 */
1062qint16x4_t vinvsqrt_qs16(qint16x4_t a, int fixed_point_position);
1063
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001064/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
1065 *
1066 * @param[in] a 8bit fixed point input vector
1067 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1068 *
1069 * @return The result of the 8bit inverse sqrt.
1070 */
1071qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
1072
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001073/** Calculate saturating inverse square root for fixed point 16 bit using Newton-Raphosn method (4 elements)
1074 *
1075 * @param[in] a 16 bit fixed point input vector
1076 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1077 *
1078 * @return The result of the 16 bit inverse sqrt.
1079 */
1080qint16x4_t vqinvsqrt_qs16(qint16x4_t a, int fixed_point_position);
1081
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001082/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
1083 *
1084 * @param[in] a 8bit fixed point input vector
1085 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1086 *
1087 * @return The result of the 8bit inverse sqrt.
1088 */
1089qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
1090
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001091/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
1092 *
1093 * @param[in] a 16 bit fixed point input vector
1094 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1095 *
1096 * @return The result of the 16 bit inverse sqrt.
1097 */
1098qint16x8_t vinvsqrtq_qs16(qint16x8_t a, int fixed_point_position);
1099
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001100/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
1101 *
1102 * @param[in] a 8bit fixed point input vector
1103 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1104 *
1105 * @return The result of the 8bit inverse sqrt.
1106 */
1107qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
1108
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001109/** Calculate saturating inverse square root for fixed point 16 bit using Newton-Raphosn method (8 elements)
1110 *
1111 * @param[in] a 16 bit fixed point input vector
1112 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1113 *
1114 * @return The result of the 16 bit inverse sqrt.
1115 */
1116qint16x8_t vqinvsqrtq_qs16(qint16x8_t a, int fixed_point_position);
1117
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001118/** Calculate hyperbolic tangent for fixed point 8bit (8 elements)
1119 *
1120 * @param[in] a 8bit fixed point input vector
1121 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1122 *
1123 * @return The calculated Hyperbolic Tangent.
1124 */
1125qint8x8_t vtanh_qs8(qint8x8_t a, int fixed_point_position);
1126
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001127/** Calculate hyperbolic tangent for fixed point 16 bit (4 elements)
1128 *
1129 * @param[in] a 16 bit fixed point input vector
1130 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1131 *
1132 * @return The calculated Hyperbolic Tangent.
1133 */
1134qint16x4_t vtanh_qs16(qint16x4_t a, int fixed_point_position);
1135
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001136/** Calculate hyperbolic tangent for fixed point 8bit (16 elements)
1137 *
1138 * @param[in] a 8bit fixed point input vector
1139 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1140 *
1141 * @return The calculated Hyperbolic Tangent.
1142 */
1143qint8x16_t vtanhq_qs8(qint8x16_t a, int fixed_point_position);
1144
1145/** Calculate saturating n power for fixed point 8bit (16 elements).
1146 *
1147 * pow(a,b) = e^(b*log(a))
1148 *
1149 * @param[in] a 8bit fixed point input vector
1150 * @param[in] b 8bit fixed point power vector
1151 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1152 *
1153 * @return The result of the 8bit power.
1154 */
1155qint8x8_t vqpowq_qs8(qint8x8_t a, qint8x16_t b, int fixed_point_position);
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001156
1157/** Compute lane-by-lane maximum between elements of a float vector with 4x2 elements
1158 *
1159 * @param[in] a Float input vector
1160 * @param[in] b Float input vector
1161 *
1162 * @return The lane-by-lane maximum -> float32x4x2
1163 */
1164float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b);
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001165
1166/** Calculate hyperbolic tangent for fixed point 8bit (8 elements)
1167 *
1168 * @param[in] a 16 bit fixed point input vector
1169 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1170 *
1171 * @return The calculated Hyperbolic Tangent.
1172 */
1173qint16x8_t vtanhq_qs16(qint16x8_t a, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001174}
1175#include "arm_compute/core/NEON/NEFixedPoint.inl"
1176#endif /* __ARM_COMPUTE_NEFIXEDPOINT_H__ */