blob: 09579f9120e28f0bbbdf656d6dc8cab8097f7cd9 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEFIXEDPOINT_H__
25#define __ARM_COMPUTE_NEFIXEDPOINT_H__
26
27#include "arm_compute/core/FixedPoint.h"
28
29#include <arm_neon.h>
30
31namespace arm_compute
32{
33using qint8x8_t = int8x8_t; /**< 8 bit fixed point vector with 8 elements */
34using qint8x8x2_t = int8x8x2_t; /**< 8 bit fixed point vector with 16 elements */
35using qint8x8x3_t = int8x8x3_t; /**< 8 bit fixed point vector with 24 elements */
36using qint8x8x4_t = int8x8x4_t; /**< 8 bit fixed point vector with 32 elements */
37using qint8x16_t = int8x16_t; /**< 8 bit fixed point vector with 16 elements */
38using qint8x16x2_t = int8x16x2_t; /**< 8 bit fixed point vector with 32 elements */
39using qint8x16x3_t = int8x16x3_t; /**< 8 bit fixed point vector with 48 elements */
40using qint8x16x4_t = int8x16x4_t; /**< 8 bit fixed point vector with 64 elements */
41using qint16x4_t = int16x4_t; /**< 16 bit fixed point vector with 4 elements */
42using qint16x4x2_t = int16x4x2_t; /**< 16 bit fixed point vector with 8 elements */
43using qint16x4x3_t = int16x4x3_t; /**< 16 bit fixed point vector with 12 elements */
44using qint16x4x4_t = int16x4x4_t; /**< 16 bit fixed point vector with 16 elements */
45using qint16x8_t = int16x8_t; /**< 16 bit fixed point vector with 8 elements */
46using qint16x8x2_t = int16x8x2_t; /**< 16 bit fixed point vector with 16 elements */
47using qint16x8x3_t = int16x8x3_t; /**< 16 bit fixed point vector with 24 elements */
48using qint16x8x4_t = int16x8x4_t; /**< 16 bit fixed point vector with 32 elements */
Georgios Pinitas9247c922017-06-28 18:29:47 +010049using qint32x2_t = int32x2_t; /**< 32 bit fixed point vector with 2 elements */
Michalis Spyrou0a8334c2017-06-14 18:00:05 +010050using qint32x4_t = int32x4_t; /**< 32 bit fixed point vector with 4 elements */
Anthony Barbier6ff3b192017-09-04 18:44:23 +010051
52/** Get the lower half of a 16 elements vector
53 *
54 * @param[in] a vector of 16 elements
55 *
56 * @return 8 bit fixed point vector (8 elements)
57 */
58qint8x8_t vget_low_qs8(qint8x16_t a);
59
Michalis Spyrou0a8334c2017-06-14 18:00:05 +010060/** Get the lower half of a 16 elements vector
61 *
62 * @param[in] a vector of 8 elements
63 *
64 * @return 16 bit fixed point vector (4 elements)
65 */
66qint16x4_t vget_low_qs16(qint16x8_t a);
67
Anthony Barbier6ff3b192017-09-04 18:44:23 +010068/** Get the higher half of a 16 elements vector
69 *
70 * @param[in] a vector of 16 elements
71 *
72 * @return 8 bit fixed point vector (8 elements)
73 */
74qint8x8_t vget_high_qs8(qint8x16_t a);
75
Michalis Spyrou0a8334c2017-06-14 18:00:05 +010076/** Get the higher half of a 16 elements vector
77 *
78 * @param[in] a vector of 8 elements
79 *
80 * @return 16 bit fixed point vector (4 elements)
81 */
82qint16x4_t vget_high_qs16(qint16x8_t a);
83
Anthony Barbier6ff3b192017-09-04 18:44:23 +010084/** Load a single 8 bit fixed point vector from memory (8 elements)
85 *
86 * @param[in] addr Memory address of the 8 bit fixed point vector to load
87 *
88 * @return 8 bit fixed point vector (8 elements)
89 */
90qint8x8_t vld1_qs8(const qint8_t *addr);
91
Anthony Barbier6ff3b192017-09-04 18:44:23 +010092/** Load a single 16 bit fixed point vector from memory (4 elements)
93 *
94 * @param[in] addr Memory address of the 16 bit fixed point vector to load
95 *
96 * @return 16 bit fixed point vector (4 elements)
97 */
98qint16x4_t vld1_qs16(const qint16_t *addr);
99
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100100/** Load a single 8 bit fixed point vector from memory (16 elements)
101 *
102 * @param[in] addr Memory address of the 8 bit fixed point vector to load
103 *
104 * @return 8 bit fixed point vector (16 elements)
105 */
106qint8x16_t vld1q_qs8(const qint8_t *addr);
107
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100108/** Load a single 16 bit fixed point vector from memory (8 elements)
109 *
110 * @param[in] addr Memory address of the 16 bit fixed point vector to load
111 *
112 * @return 16 bit fixed point vector (8 elements)
113 */
114qint16x8_t vld1q_qs16(const qint16_t *addr);
115
116/** Load all lanes of 8 bit fixed point vector with same value from memory (8 elements)
117 *
118 * @param[in] addr Memory address of the 8 bit fixed point scalar value to load
119 *
120 * @return 8 bit fixed point vector (8 elements)
121 */
122qint8x8_t vld1_dup_qs8(const qint8_t *addr);
123
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100124/** Load all lanes of 16 bit fixed point vector with same value from memory (4 elements)
125 *
126 * @param[in] addr Memory address of the 16 bit fixed point scalar value to load
127 *
128 * @return 16 bit fixed point vector (4 elements)
129 */
130qint16x4_t vld1_dup_qs16(const qint16_t *addr);
131
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100132/** Load all lanes of 8 bit fixed point vector with same value from memory (16 elements)
133 *
134 * @param[in] addr Memory address of the 8 bit fixed point scalar value to load
135 *
136 * @return 8 bit fixed point vector (16 elements)
137 */
138qint8x16_t vld1q_dup_qs8(const qint8_t *addr);
139
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100140/** Load all lanes of 16 bit fixed point vector with same value from memory (8 elements)
141 *
142 * @param[in] addr Memory address of the 16 bit fixed point scalar value to load
143 *
144 * @return 16 bit fixed point vector (8 elements)
145 */
146qint16x8_t vld1q_dup_qs16(const qint16_t *addr);
147
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100148/** Store a single 8 bit fixed point vector to memory (8 elements)
149 *
150 * @param[in] addr Memory address where the 8 bit fixed point vector should be stored
151 * @param[in] b 8 bit fixed point vector to store
152 *
153 */
154void vst1_qs8(qint8_t *addr, qint8x8_t b);
155
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100156/** Store a single 16 bit fixed point vector to memory (4 elements)
157 *
158 * @param[in] addr Memory address where the 16 bit fixed point vector should be stored
159 * @param[in] b 16 bit fixed point vector to store
160 *
161 */
162void vst1_qs16(qint16_t *addr, qint16x4_t b);
163
164/** Store a single 8 bit fixed point vector to memory (16 elements)
165 *
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100166 * @param[in] addr Memory address where the 8 bit fixed point vector should be stored
167 * @param[in] b 8 bit fixed point vector to store
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100168 *
169 */
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100170void vst1q_qs8(qint8_t *addr, qint8x16_t b);
171
172/** Store a single 16 bit fixed point vector to memory (8 elements)
173*
174* @param[in] addr Memory address where the 16 bit fixed point vector should be stored
175* @param[in] b 16 bit fixed point vector to store
176*
177*/
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100178void vst1q_qs16(qint16_t *addr, qint16x8_t b);
179
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100180/** Store two 16 bit fixed point vector to memory (8x2 elements)
181*
182* @param[in] addr Memory address where the 16 bit fixed point vectors should be stored
183* @param[in] b 16 bit fixed point vectors to store
184*
185*/
186void vst2q_qs16(qint16_t *addr, qint16x8x2_t b);
187
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100188/** 16 bit fixed point vector saturating narrow (8 elements)
189 *
190 * @param[in] a 16 bit fixed point vector to convert
191 *
192 * @return 8 bit fixed point vector
193 */
194qint8x8_t vqmovn_q16(qint16x8_t a);
195
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100196/** 32 bit fixed point vector saturating narrow (4 elements)
197 *
198 * @param[in] a 32 bit fixed point vector to convert
199 *
200 * @return 16 bit fixed point vector
201 */
202qint16x4_t vqmovn_q32(qint32x4_t a);
203
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100204/** 8 bit fixed point vector duplicate (8 elements)
205 *
206 * @param[in] a 8 bit fixed point to duplicate
207 *
208 * @return The result of the vector duplication
209 */
210qint8x8_t vdup_n_qs8(qint8_t a);
211
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100212/** 16 bit fixed point vector duplicate (4 elements)
213 *
214 * @param[in] a 16 bit fixed point to duplicate
215 *
216 * @return The result of the vector duplication
217 */
218qint16x4_t vdup_n_qs16(qint16_t a);
219
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100220/** 8 bit fixed point vector duplicate (16 elements)
221 *
222 * @param[in] a 8 bit fixed point to duplicate
223 *
224 * @return The result of the vector duplication
225 */
226qint8x16_t vdupq_n_qs8(qint8_t a);
227
228/** Duplicate a float and convert it to 8 bit fixed point vector (16 elements)
229 *
230 * @param[in] a 8 bit fixed point to duplicate
231 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
232 *
233 * @return The result of the vector duplication
234 */
235qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position);
236
237/** 16 bit fixed point vector duplicate (8 elements)
238 *
239 * @param[in] a 16 bit fixed point to duplicate
240 *
241 * @return The result of the vector duplication
242 */
243qint16x8_t vdupq_n_qs16(qint16x8_t a);
244
245/** Absolute value of 8 bit fixed point vector (8 elements)
246 *
247 * @param[in] a 8 bit fixed point input vector
248 *
249 * @return The result of the 8 bit fixed point vector absolute value
250 */
251qint8x8_t vabs_qs8(qint8x8_t a);
252
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100253/** Absolute value of 16 bit fixed point vector (4 elements)
254 *
255 * @param[in] a 16 bit fixed point input vector
256 *
257 * @return The result of the 16 bit fixed point vector absolute value
258 */
259qint16x4_t vabs_qs16(qint16x4_t a);
260
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100261/** Absolute value of 8 bit fixed point vector (16 elements)
262 *
263 * @param[in] a 8 bit fixed point input vector
264 *
265 * @return The result of the 8 bit fixed point vector absolute value
266 */
267qint8x16_t vabsq_qs8(qint8x16_t a);
268
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100269/** Absolute value of 16 bit fixed point vector (8 elements)
270 *
271 * @param[in] a 16 bit fixed point input vector
272 *
273 * @return The result of the 16 bit fixed point vector absolute value
274 */
275qint16x8_t vabsq_qs16(qint16x8_t a);
276
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100277/** Saturating absolute value of 8 bit fixed point vector (8 elements)
278 *
279 * @param[in] a 8 bit fixed point input vector
280 *
281 * @return The result of the 8 bit fixed point vector absolute value
282 */
283qint8x8_t vqabs_qs8(qint8x8_t a);
284
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100285/** Saturating absolute value of 16 bit fixed point vector (4 elements)
286 *
287 * @param[in] a 4 bit fixed point input vector
288 *
289 * @return The result of the 16 bit fixed point vector absolute value
290 */
291qint16x4_t vqabs_qs16(qint16x4_t a);
292
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100293/** Saturating absolute value of 8 bit fixed point vector (16 elements)
294 *
295 * @param[in] a 8 bit fixed point input vector
296 *
297 * @return The result of the 8 bit fixed point vector absolute value
298 */
299qint8x16_t vqabsq_qs8(qint8x16_t a);
300
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100301/** Saturating absolute value of 16 bit fixed point vector (8 elements)
302 *
303 * @param[in] a 16 bit fixed point input vector
304 *
305 * @return The result of the 16 bit fixed point vector absolute value
306 */
307qint16x8_t vqabsq_qs16(qint16x8_t a);
308
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100309/** 8 bit fixed point vector max (8 elements)
310 *
311 * @param[in] a First 8 bit fixed point input vector
312 * @param[in] b Second 8 bit fixed point input vector
313 *
314 * @return The result of the 8 bit fixed point vector max operation
315 */
316qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b);
317
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100318/** 16 bit fixed point vector max (4 elements)
319 *
320 * @param[in] a First 16 bit fixed point input vector
321 * @param[in] b Second 16 bit fixed point input vector
322 *
323 * @return The result of the 16 bit fixed point vector max operation
324 */
325qint16x4_t vmax_qs16(qint16x4_t a, qint16x4_t b);
326
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100327/** 8 bit fixed point vector max (16 elements)
328 *
329 * @param[in] a First 8 bit fixed point input vector
330 * @param[in] b Second 8 bit fixed point input vector
331 *
332 * @return The result of the 8 bit fixed point vector max operation
333 */
334qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b);
335
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100336/** 16 bit fixed point vector max (8 elements)
337 *
338 * @param[in] a First 16 bit fixed point input vector
339 * @param[in] b Second 16 bit fixed point input vector
340 *
341 * @return The result of the 16 bit fixed point vector max operation
342 */
343qint16x8_t vmaxq_qs16(qint16x8_t a, qint16x8_t b);
344
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100345/** 8 bit fixed point vector pairwise max (8 elements)
346 *
347 * @param[in] a First 8 bit fixed point input vector
348 * @param[in] b Second 8 bit fixed point input vector
349 *
350 * @return The result of the 8 bit fixed point vector pairwise max operation
351 */
352qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b);
353
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100354/** 16 bit fixed point vector pairwise max (4 elements)
355 *
356 * @param[in] a First 16 bit fixed point input vector
357 * @param[in] b Second 16 bit fixed point input vector
358 *
359 * @return The result of the 16 bit fixed point vector pairwise max operation
360 */
361qint16x4_t vpmax_qs16(qint16x4_t a, qint16x4_t b);
362
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100363/** 8 bit fixed point vector min (8 elements)
364 *
365 * @param[in] a First 8 bit fixed point input vector
366 * @param[in] b Second 8 bit fixed point input vector
367 *
368 * @return The result of the 8 bit fixed point vector max operation
369 */
370qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b);
371
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100372/** 16 bit fixed point vector min (4 elements)
373 *
374 * @param[in] a First 16 bit fixed point input vector
375 * @param[in] b Second 16 bit fixed point input vector
376 *
377 * @return The result of the 16 bit fixed point vector max operation
378 */
379qint16x4_t vmin_qs16(qint16x4_t a, qint16x4_t b);
380
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100381/** 8 bit fixed point vector min (16 elements)
382 *
383 * @param[in] a First 8 bit fixed point input vector
384 * @param[in] b Second 8 bit fixed point input vector
385 *
386 * @return The result of the 8 bit fixed point vector min operation
387 */
388qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b);
389
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100390/** 16 bit fixed point vector min (8 elements)
391 *
392 * @param[in] a First 16 bit fixed point input vector
393 * @param[in] b Second 16 bit fixed point input vector
394 *
395 * @return The result of the 16 bit fixed point vector min operation
396 */
397qint16x8_t vminq_qs16(qint16x8_t a, qint16x8_t b);
398
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100399/** 8 bit fixed point vector pairwise min (8 elements)
400 *
401 * @param[in] a First 8 bit fixed point input vector
402 * @param[in] b Second 8 bit fixed point input vector
403 *
404 * @return The result of the 8 bit fixed point vector pairwise min operation
405 */
406qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b);
407
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100408/** 16 bit fixed point vector pairwise min (4 elements)
409 *
410 * @param[in] a First 16 bit fixed point input vector
411 * @param[in] b Second 16 bit fixed point input vector
412 *
413 * @return The result of the 16 bit fixed point vector pairwise min operation
414 */
415qint16x4_t vpmin_qs16(qint16x4_t a, qint16x4_t b);
416
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100417/** 8 bit fixed point vector add (8 elements)
418 *
419 * @param[in] a First 8 bit fixed point input vector
420 * @param[in] b Second 8 bit fixed point input vector
421 *
422 * @return The result of the 8 bit fixed point vector addition
423 */
424qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b);
425
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100426/** 16 bit fixed point vector add (4 elements)
427 *
428 * @param[in] a First 16 bit fixed point input vector
429 * @param[in] b Second 16 bit fixed point input vector
430 *
431 * @return The result of the 16 bit fixed point vector addition
432 */
433qint16x4_t vadd_qs16(qint16x4_t a, qint16x4_t b);
434
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100435/** 8 bit fixed point vector add (16 elements)
436 *
437 * @param[in] a First 8 bit fixed point input vector
438 * @param[in] b Second 8 bit fixed point input vector
439 *
440 * @return The result of the 8 bit fixed point vector addition
441 */
442qint8x16_t vaddq_qs8(qint8x16_t a, qint8x16_t b);
443
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100444/** 16 bit fixed point vector add (8 elements)
445 *
446 * @param[in] a First 16 bit fixed point input vector
447 * @param[in] b Second 16 bit fixed point input vector
448 *
449 * @return The result of the 16 bit fixed point vector addition
450 */
451qint16x8_t vaddq_qs16(qint16x8_t a, qint16x8_t b);
452
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100453/** 8 bit fixed point vector saturating add (8 elements)
454 *
455 * @param[in] a First 8 bit fixed point input vector
456 * @param[in] b Second 8 bit fixed point input vector
457 *
458 * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
459 */
460qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b);
461
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100462/** 16 bit fixed point vector saturating add (4 elements)
463 *
464 * @param[in] a First 16 bit fixed point input vector
465 * @param[in] b Second 16 bit fixed point input vector
466 *
467 * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
468 */
469qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b);
470
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100471/** 8 bit fixed point vector saturating add (16 elements)
472 *
473 * @param[in] a First 8 bit fixed point input vector
474 * @param[in] b Second 8 bit fixed point input vector
475 *
476 * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
477 */
478qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b);
479
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100480/** 16 bit fixed point vector saturating add (8 elements)
481 *
482 * @param[in] a First 16 bit fixed point input vector
483 * @param[in] b Second 16 bit fixed point input vector
484 *
485 * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
486 */
487qint16x8_t vqaddq_qs16(qint16x8_t a, qint16x8_t b);
488
489/** 8 bit fixed point vector saturating pairwise add (8 elements)
490 *
491 * @param[in] a 8 bit fixed point input vector
492 *
493 * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
494 */
495int16x4_t vpaddl_qs8(qint8x8_t a);
496
497/** 8 bit fixed point vector subtraction (8 elements)
498 *
499 * @param[in] a First 8 bit fixed point input vector
500 * @param[in] b Second 8 bit fixed point input vector
501 *
502 * @return The result of the 8 bit fixed point vector subtraction
503 */
504qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b);
505
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100506/** 16 bit fixed point vector subtraction (4 elements)
507 *
508 * @param[in] a First 16 bit fixed point input vector
509 * @param[in] b Second 16 bit fixed point input vector
510 *
511 * @return The result of the 16 bit fixed point vector subtraction
512 */
513qint16x4_t vsub_qs16(qint16x4_t a, qint16x4_t b);
514
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100515/** 8 bit fixed point vector subtraction (16 elements)
516 *
517 * @param[in] a First 8 bit fixed point input vector
518 * @param[in] b Second 8 bit fixed point input vector
519 *
520 * @return The result of the 8 bit fixed point vector subtraction
521 */
522qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b);
523
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100524/** 16 bit fixed point vector subtraction (8 elements)
525 *
526 * @param[in] a First 16 bit fixed point input vector
527 * @param[in] b Second 16 bit fixed point input vector
528 *
529 * @return The result of the 16 bit fixed point vector subtraction
530 */
531qint16x8_t vsubq_qs16(qint16x8_t a, qint16x8_t b);
532
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100533/** 8 bit fixed point vector saturating subtraction (8 elements)
534 *
535 * @param[in] a First 8 bit fixed point input vector
536 * @param[in] b Second 8 bit fixed point input vector
537 *
538 * @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
539 */
540qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b);
541
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100542/** 16 bit fixed point vector saturating subtraction (4 elements)
543 *
544 * @param[in] a First 16 bit fixed point input vector
545 * @param[in] b Second 16 bit fixed point input vector
546 *
547 * @return The result of the 16 bit fixed point vector subtraction. The result is saturated in case of overflow
548 */
549qint16x4_t vqsub_qs16(qint16x4_t a, qint16x4_t b);
550
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100551/** 8 bit fixed point vector saturating subtraction (16 elements)
552 *
553 * @param[in] a First 8 bit fixed point input vector
554 * @param[in] b Second 8 bit fixed point input vector
555 *
556 * @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
557 */
558qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b);
559
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100560/** 16 bit fixed point vector saturating subtraction (8 elements)
561 *
562 * @param[in] a First 16 bit fixed point input vector
563 * @param[in] b Second 16 bit fixed point input vector
564 *
565 * @return The result of the 16 bit fixed point vector subtraction. The result is saturated in case of overflow
566 */
567qint16x8_t vqsubq_qs16(qint16x8_t a, qint16x8_t b);
568
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100569/** 8 bit fixed point vector multiply (8 elements)
570 *
571 * @param[in] a First 8 bit fixed point input vector
572 * @param[in] b Second 8 bit fixed point input vector
573 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
574 *
575 * @return The result of the 8 bit fixed point vector multiplication.
576 */
577qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
578
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100579/** 16 bit fixed point vector multiply (4 elements)
580 *
581 * @param[in] a First 16 bit fixed point input vector
582 * @param[in] b Second 16 bit fixed point input vector
583 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
584 *
585 * @return The result of the 16 bit fixed point vector multiplication.
586 */
587qint16x4_t vmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
588
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100589/** 8 bit fixed point vector multiply (16 elements)
590 *
591 * @param[in] a First 8 bit fixed point input vector
592 * @param[in] b Second 8 bit fixed point input vector
593 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
594 *
595 * @return The result of the 8 bit fixed point vector multiplication.
596 */
597qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
598
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100599/** 16 bit fixed point vector multiply (8 elements)
600 *
601 * @param[in] a First 16 bit fixed point input vector
602 * @param[in] b Second 16 bit fixed point input vector
603 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
604 *
605 * @return The result of the 16 bit fixed point vector multiplication.
606 */
607qint16x8_t vmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
608
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100609/** 8 bit fixed point vector saturating multiply (8 elements)
610 *
611 * @param[in] a First 8 bit fixed point input vector
612 * @param[in] b Second 8 bit fixed point input vector
613 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
614 *
615 * @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
616 */
617qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
618
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100619/** 16 bit fixed point vector saturating multiply (4 elements)
620 *
621 * @param[in] a First 16 bit fixed point input vector
622 * @param[in] b Second 16 bit fixed point input vector
623 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
624 *
625 * @return The result of the 16 bit fixed point vector multiplication. The result is saturated in case of overflow
626 */
627qint16x4_t vqmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
628
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100629/** 8 bit fixed point vector saturating multiply (16 elements)
630 *
631 * @param[in] a First 8 bit fixed point input vector
632 * @param[in] b Second 8 bit fixed point input vector
633 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
634 *
635 * @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
636 */
637qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
638
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100639/** 16 bit fixed point vector saturating multiply (8 elements)
640 *
641 * @param[in] a First 16 bit fixed point input vector
642 * @param[in] b Second 16 bit fixed point input vector
643 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
644 *
645 * @return The result of the 16 bit fixed point vector multiplication. The result is saturated in case of overflow
646 */
647qint16x8_t vqmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
648
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100649/** 8 bit fixed point vector long multiply (8 elements)
650 *
651 * @param[in] a First 8 bit fixed point input vector
652 * @param[in] b Second 8 bit fixed point input vector
653 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
654 *
655 * @return The result of the 8 bit fixed point long vector multiplication.
656 */
657qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
658
659/** 8 bit fixed point vector multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
660 *
661 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
662 * @param[in] b Second 8 bit fixed point input vector
663 * @param[in] c Third 8 bit fixed point input vector
664 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
665 *
666 * @return The result of the 8 bit fixed point vector multiply-accumulate
667 */
668qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
669
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100670/** 16 bit fixed point vector multiply-accumulate (4 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
671 *
672 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
673 * @param[in] b Second 16 bit fixed point input vector
674 * @param[in] c Third 16 bit fixed point input vector
675 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
676 *
677 * @return The result of the 16 bit fixed point vector multiply-accumulate
678 */
679qint16x4_t vmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
680
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100681/** 8 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
682 *
683 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
684 * @param[in] b Second 8 bit fixed point input vector
685 * @param[in] c Third 8 bit fixed point input vector
686 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
687 *
688 * @return The result of the 8 bit fixed point vector multiply-accumulate
689 */
690qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
691
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100692/** 16 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
693 *
694 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
695 * @param[in] b Second 16 bit fixed point input vector
696 * @param[in] c Third 16 bit fixed point input vector
697 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
698 *
699 * @return The result of the 16 bit fixed point vector multiply-accumulate
700 */
701qint16x8_t vmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position);
702
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100703/** 8 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
704 *
705 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
706 * @param[in] b Second 8 bit fixed point input vector
707 * @param[in] c Third 8 bit fixed point input vector
708 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
709 *
710 * @return The result of the 8 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow
711 */
712qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
713
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100714/** 16 bit fixed point vector saturating multiply-accumulate (4 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
715 *
716 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
717 * @param[in] b Second 16 bit fixed point input vector
718 * @param[in] c Third 16 bit fixed point input vector
719 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
720 *
721 * @return The result of the 16 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow
722 */
723qint16x4_t vqmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
724
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100725/** 8 bit fixed point vector saturating multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
726 *
727 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
728 * @param[in] b Second 8 bit fixed point input vector
729 * @param[in] c Third 8 bit fixed point input vector
730 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
731 *
732 * @return The result of the 8 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow
733 */
734qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
735
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100736/** 16 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
737 *
738 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
739 * @param[in] b Second 16 bit fixed point input vector
740 * @param[in] c Third 16 bit fixed point input vector
741 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
742 *
743 * @return The result of the 16 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow
744 */
745qint16x8_t vqmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position);
746
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100747/** 8 bit fixed point vector multiply-accumulate long (8 elements).
748 * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
749 *
750 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
751 * @param[in] b Second 8 bit fixed point input vector
752 * @param[in] c Third 8 bit fixed point input vector
753 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
754 *
755 * @return The result of the 8 bit fixed point vector multiply-accumulate long
756 */
757qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
758
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100759/** 16 bit fixed point vector multiply-accumulate long (4 elements).
760 * This operation performs the product between @p b and @p c and add the result to the 32 bit fixed point vector @p a (a + b * c). 4 elements
761 *
762 * @param[in] a First 32 bit fixed point input vector where the result of multiplication must be added to
763 * @param[in] b Second 16 bit fixed point input vector
764 * @param[in] c Third 16 bit fixed point input vector
765 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
766 *
767 * @return The result of the 16 bit fixed point vector multiply-accumulate long
768 */
769qint32x4_t vmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
770
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100771/** 8 bit fixed point vector saturating multiply-accumulate long (8 elements). The saturation is performed on the 16 bit fixed point output vector.
772 * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
773 *
774 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
775 * @param[in] b Second 8 bit fixed point input vector
776 * @param[in] c Third 8 bit fixed point input vector
777 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
778 *
779 * @return The result of the 8 bit fixed point vector multiply-accumulate long
780 */
781qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
782
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100783/** 16 bit fixed point vector saturating multiply-accumulate long (4 elements). The saturation is performed on the 16 bit fixed point output vector.
784 * This operation performs the product between @p b and @p c and add the result to the 32 bit fixed point vector @p a (a + b * c). 4 elements
785 *
786 * @param[in] a First 32 bit fixed point input vector where the result of multiplication must be added to
787 * @param[in] b Second 16 bit fixed point input vector
788 * @param[in] c Third 16 bit fixed point input vector
789 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
790 *
791 * @return The result of the 16 bit fixed point vector multiply-accumulate long
792 */
793qint32x4_t vqmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
794
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100795/** Convert a float vector with 4x2 elements to 8 bit fixed point vector with 8 elements
796 *
797 * @param[in] a Float input vector
798 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
799 *
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100800 * @return The result of the conversion float -> 8 bit fixed point. The result is saturated in case of overflow
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100801 */
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100802qint8x8_t vqcvt_qs8_f32(const float32x4x2_t a, int fixed_point_position);
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100803
804/** Convert a float vector with 4 elements to 16 bit fixed point vector with 4 elements
805 *
806 * @param[in] a Float input vector
807 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
808 *
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100809 * @return The result of the conversion float -> 16 bit fixed point. The result is saturated in case of overflow
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100810 */
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100811qint16x4_t vqcvt_qs16_f32(const float32x4_t a, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100812
813/** Convert a float vector with 4x4 elements to 8 bit fixed point vector with 16 elements
814 *
815 * @param[in] a Float input vector
816 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
817 *
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100818 * @return The result of the conversion float -> 8 bit fixed point. The result is saturated in case of overflow
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100819 */
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100820qint8x16_t vqcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100821
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100822/** Convert a float vector with 4x2 elements to 16 bit fixed point vector with 8 elements
823 *
824 * @param[in] a Float input vector
825 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
826 *
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100827 * @return The result of the conversion float -> 16 bit fixed point. The result is saturated in case of overflow
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100828 */
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100829qint16x8_t vqcvtq_qs16_f32(const float32x4x2_t &a, int fixed_point_position);
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100830
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100831/** Convert a 8 bit fixed point vector with 8 elements to a float vector with 4x2 elements
832 *
833 * @param[in] a 8 bit fixed point input vector
834 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
835 *
836 * @return The result of the conversion 8 bit fixed point -> float32x2x4
837 */
838float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position);
839
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100840/** Convert a 16 bit fixed point vector with 4 elements to a float vector with 4 elements
841 *
842 * @param[in] a 16 bit fixed point input vector
843 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
844 *
845 * @return The result of the conversion 16 bit fixed point -> float32x2
846 */
847float32x4_t vcvt_f32_qs16(qint16x4_t a, int fixed_point_position);
848
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100849/** Convert a 8 bit fixed point vector with 16 elements to a float vector with 4x4 elements
850 *
851 * @param[in] a 8 bit fixed point input vector
852 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
853 *
854 * @return The result of the conversion 8 bit fixed point -> float32x4x4
855 */
856float32x4x4_t vcvtq_qs8_f32(qint8x16_t a, int fixed_point_position);
857
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100858/** Convert a 16 bit fixed point vector with 8 elements to a float vector with 4x2 elements
859 *
860 * @param[in] a 16 bit fixed point input vector
861 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
862 *
863 * @return The result of the conversion 16 bit fixed point -> float32x4x2
864 */
865float32x4x2_t vcvtq_qs16_f32(qint16x8_t a, int fixed_point_position);
866
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100867/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements)
868 *
869 * @param[in] a 8bit fixed point input vector
870 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
871 *
872 * @return The result of the 8bit reciprocal (1/a).
873 */
874qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position);
875
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100876/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (4 elements)
877 *
878 * @param[in] a 16 bit fixed point input vector
879 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
880 *
881 * @return The result of the 16 bit reciprocal (1/a).
882 */
883qint16x4_t vrecip_qs16(qint16x4_t a, int fixed_point_position);
884
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100885/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (16 elements)
886 *
887 * @param[in] a 8bit fixed point input vector
888 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
889 *
890 * @return The result of the 8bit reciprocal (1/a).
891 */
892qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position);
893
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100894/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements)
895 *
896 * @param[in] a 16 bit fixed point input vector
897 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
898 *
899 * @return The result of the 16 bit reciprocal (1/a).
900 */
901qint16x8_t vrecipq_qs16(qint16x8_t a, int fixed_point_position);
902
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100903/** Division fixed point 8bit (8 elements)
904 *
905 * @param[in] a First 8bit fixed point input vector
906 * @param[in] b Second 8bit fixed point input vector
907 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
908 *
909 * @return The quotient and remainder number in fixed point format.
910 */
911qint8x8_t vdiv_qs8(qint8x8_t a, int8x8_t b, int fixed_point_position);
912
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100913/** Division fixed point 16 bit (4 elements)
914 *
915 * @param[in] a First 16 bit fixed point input vector
916 * @param[in] b Second 16 bit fixed point input vector
917 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
918 *
919 * @return The quotient and remainder number in fixed point format.
920 */
921qint16x4_t vdiv_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
922
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100923/** Division fixed point 8bit (16 elements)
924 *
925 * @param[in] a First 8bit fixed point input vector
926 * @param[in] b Second 8bit fixed point input vector
927 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
928 *
929 * @return The quotient and remainder number in 8bit fixed point format.
930 */
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100931qint8x16_t vdivq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
932
933/** Division fixed point 16 bit (8 elements)
934 *
935 * @param[in] a First 16 bit fixed point input vector
936 * @param[in] b Second 16 bit fixed point input vector
937 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
938 *
939 * @return The quotient and remainder number in 16 bit fixed point format.
940 */
941qint16x8_t vdivq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100942
943/** Perform a 4th degree polynomial approximation. (8 elements)
944 *
945 * @param[in] a 8bit fixed point input vector
946 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
947 *
948 * @return The result of the 8bit taylor approximation.
949 */
950template <bool islog>
951qint8x8_t vtaylor_poly_qs8(qint8x8_t a, int fixed_point_position);
952
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100953/** Perform a 4th degree polynomial approximation. (4 elements)
954 *
955 * @param[in] a 16 bit fixed point input vector
956 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
957 *
958 * @return The result of the 16 bit taylor approximation.
959 */
960template <bool islog>
961qint16x4_t vtaylor_poly_qs16(qint16x4_t a, int fixed_point_position);
962
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100963/** Perform a 4th degree polynomial approximation. (16 elements)
964 *
965 * @param[in] a 8bit fixed point input vector
966 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
967 *
968 * @return The result of the 8bit taylor approximation.
969 */
970template <bool islog>
971qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position);
972
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100973/** Perform a 4th degree polynomial approximation. (8 elements)
974 *
975 * @param[in] a 16 bit fixed point input vector
976 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
977 *
978 * @return The result of the 8bit taylor approximation.
979 */
980template <bool islog>
981qint16x8_t vtaylor_polyq_qs16(qint16x8_t a, int fixed_point_position);
982
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100983/** Calculate saturating exponential fixed point 8bit (8 elements)
984 *
985 * @param[in] a 8bit fixed point input vector
986 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
987 *
988 * @return The result of the 8bit saturating exponential
989 */
990qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position);
991
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100992/** Calculate saturating exponential fixed point 16 bit (4 elements)
993 *
994 * @param[in] a 8bit fixed point input vector
995 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
996 *
997 * @return The result of the 16 bit saturating exponential
998 */
999qint16x4_t vqexp_qs16(qint16x4_t a, int fixed_point_position);
1000
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001001/** Calculate saturating exponential fixed point 8bit (16 elements)
1002 *
1003 * @param[in] a 8bit fixed point input vector
1004 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1005 *
1006 * @return The result of the 8bit saturating exponential
1007 */
1008qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position);
1009
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001010/** Calculate saturating exponential fixed point 16 bit (8 elements)
1011 *
1012 * @param[in] a 16 bit fixed point input vector
1013 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1014 *
1015 * @return The result of the 16 bit saturating exponential
1016 */
1017qint16x8_t vqexpq_qs16(qint16x8_t a, int fixed_point_position);
1018
1019/** Calculate logarithm fixed point 8 bit (8 elements)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001020 *
1021 * @param[in] a 8bit fixed point input vector
1022 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1023 *
1024 * @return The result of the 8bit logarithm.
1025 */
1026qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position);
1027
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001028/** Calculate logarithm fixed point 16 bit (4 elements)
1029 *
1030 * @param[in] a 16 bit fixed point input vector
1031 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1032 *
1033 * @return The result of the 16 bit logarithm.
1034 */
1035qint16x4_t vlog_qs16(qint16x4_t a, int fixed_point_position);
1036
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001037/** Calculate logarithm fixed point 16bit (16 elements)
1038 *
1039 * @param[in] a 8bit fixed point input vector
1040 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1041 *
1042 * @return The result of the 8bit logarithm.
1043 */
1044qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position);
1045
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001046/** Calculate logarithm fixed point 16 bit (8 elements)
1047 *
1048 * @param[in] a 16 bit fixed point input vector
1049 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1050 *
1051 * @return The result of the 16 bit logarithm.
1052 */
1053qint16x8_t vlogq_qs16(qint16x8_t a, int fixed_point_position);
1054
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001055/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
1056 *
1057 * @param[in] a 8bit fixed point input vector
1058 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1059 *
1060 * @return The result of the 8bit inverse sqrt.
1061 */
1062qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
1063
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001064/** Calculate inverse square root for fixed point 16 bit using Newton-Raphosn method (4 elements)
1065 *
1066 * @param[in] a 16 bit fixed point input vector
1067 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1068 *
1069 * @return The result of the 16 bit inverse sqrt.
1070 */
1071qint16x4_t vinvsqrt_qs16(qint16x4_t a, int fixed_point_position);
1072
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001073/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
1074 *
1075 * @param[in] a 8bit fixed point input vector
1076 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1077 *
1078 * @return The result of the 8bit inverse sqrt.
1079 */
1080qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
1081
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001082/** Calculate saturating inverse square root for fixed point 16 bit using Newton-Raphosn method (4 elements)
1083 *
1084 * @param[in] a 16 bit fixed point input vector
1085 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1086 *
1087 * @return The result of the 16 bit inverse sqrt.
1088 */
1089qint16x4_t vqinvsqrt_qs16(qint16x4_t a, int fixed_point_position);
1090
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001091/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
1092 *
1093 * @param[in] a 8bit fixed point input vector
1094 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1095 *
1096 * @return The result of the 8bit inverse sqrt.
1097 */
1098qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
1099
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001100/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
1101 *
1102 * @param[in] a 16 bit fixed point input vector
1103 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1104 *
1105 * @return The result of the 16 bit inverse sqrt.
1106 */
1107qint16x8_t vinvsqrtq_qs16(qint16x8_t a, int fixed_point_position);
1108
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001109/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
1110 *
1111 * @param[in] a 8bit fixed point input vector
1112 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1113 *
1114 * @return The result of the 8bit inverse sqrt.
1115 */
1116qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
1117
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001118/** Calculate saturating inverse square root for fixed point 16 bit using Newton-Raphosn method (8 elements)
1119 *
1120 * @param[in] a 16 bit fixed point input vector
1121 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1122 *
1123 * @return The result of the 16 bit inverse sqrt.
1124 */
1125qint16x8_t vqinvsqrtq_qs16(qint16x8_t a, int fixed_point_position);
1126
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001127/** Calculate hyperbolic tangent for fixed point 8bit (8 elements)
1128 *
1129 * @param[in] a 8bit fixed point input vector
1130 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1131 *
1132 * @return The calculated Hyperbolic Tangent.
1133 */
Georgios Pinitasccc65d42017-06-27 17:39:11 +01001134qint8x8_t vqtanh_qs8(qint8x8_t a, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001135
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001136/** Calculate hyperbolic tangent for fixed point 16 bit (4 elements)
1137 *
1138 * @param[in] a 16 bit fixed point input vector
1139 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1140 *
1141 * @return The calculated Hyperbolic Tangent.
1142 */
Georgios Pinitasccc65d42017-06-27 17:39:11 +01001143qint16x4_t vqtanh_qs16(qint16x4_t a, int fixed_point_position);
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001144
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001145/** Calculate hyperbolic tangent for fixed point 8bit (16 elements)
1146 *
1147 * @param[in] a 8bit fixed point input vector
1148 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1149 *
1150 * @return The calculated Hyperbolic Tangent.
1151 */
Georgios Pinitasccc65d42017-06-27 17:39:11 +01001152qint8x16_t vqtanhq_qs8(qint8x16_t a, int fixed_point_position);
1153
1154/** Calculate hyperbolic tangent for fixed point 16bit (8 elements)
1155 *
1156 * @param[in] a 16 bit fixed point input vector
1157 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1158 *
1159 * @return The calculated Hyperbolic Tangent.
1160 */
1161qint16x8_t vqtanhq_qs16(qint16x8_t a, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001162
1163/** Calculate saturating n power for fixed point 8bit (16 elements).
1164 *
1165 * pow(a,b) = e^(b*log(a))
1166 *
1167 * @param[in] a 8bit fixed point input vector
1168 * @param[in] b 8bit fixed point power vector
1169 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1170 *
1171 * @return The result of the 8bit power.
1172 */
1173qint8x8_t vqpowq_qs8(qint8x8_t a, qint8x16_t b, int fixed_point_position);
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001174
1175/** Compute lane-by-lane maximum between elements of a float vector with 4x2 elements
1176 *
1177 * @param[in] a Float input vector
1178 * @param[in] b Float input vector
1179 *
1180 * @return The lane-by-lane maximum -> float32x4x2
1181 */
1182float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001183}
1184#include "arm_compute/core/NEON/NEFixedPoint.inl"
1185#endif /* __ARM_COMPUTE_NEFIXEDPOINT_H__ */