blob: 50463b5efe61d82874845f9a30ff842108775e81 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEFIXEDPOINT_H__
25#define __ARM_COMPUTE_NEFIXEDPOINT_H__
26
27#include "arm_compute/core/FixedPoint.h"
28
29#include <arm_neon.h>
30
31namespace arm_compute
32{
33using qint8x8_t = int8x8_t; /**< 8 bit fixed point vector with 8 elements */
34using qint8x8x2_t = int8x8x2_t; /**< 8 bit fixed point vector with 16 elements */
35using qint8x8x3_t = int8x8x3_t; /**< 8 bit fixed point vector with 24 elements */
36using qint8x8x4_t = int8x8x4_t; /**< 8 bit fixed point vector with 32 elements */
37using qint8x16_t = int8x16_t; /**< 8 bit fixed point vector with 16 elements */
38using qint8x16x2_t = int8x16x2_t; /**< 8 bit fixed point vector with 32 elements */
39using qint8x16x3_t = int8x16x3_t; /**< 8 bit fixed point vector with 48 elements */
40using qint8x16x4_t = int8x16x4_t; /**< 8 bit fixed point vector with 64 elements */
41using qint16x4_t = int16x4_t; /**< 16 bit fixed point vector with 4 elements */
42using qint16x4x2_t = int16x4x2_t; /**< 16 bit fixed point vector with 8 elements */
43using qint16x4x3_t = int16x4x3_t; /**< 16 bit fixed point vector with 12 elements */
44using qint16x4x4_t = int16x4x4_t; /**< 16 bit fixed point vector with 16 elements */
45using qint16x8_t = int16x8_t; /**< 16 bit fixed point vector with 8 elements */
46using qint16x8x2_t = int16x8x2_t; /**< 16 bit fixed point vector with 16 elements */
47using qint16x8x3_t = int16x8x3_t; /**< 16 bit fixed point vector with 24 elements */
48using qint16x8x4_t = int16x8x4_t; /**< 16 bit fixed point vector with 32 elements */
Georgios Pinitas9247c922017-06-28 18:29:47 +010049using qint32x2_t = int32x2_t; /**< 32 bit fixed point vector with 2 elements */
Michalis Spyrou0a8334c2017-06-14 18:00:05 +010050using qint32x4_t = int32x4_t; /**< 32 bit fixed point vector with 4 elements */
Anthony Barbier6ff3b192017-09-04 18:44:23 +010051
52/** Get the lower half of a 16 elements vector
53 *
54 * @param[in] a vector of 16 elements
55 *
56 * @return 8 bit fixed point vector (8 elements)
57 */
58qint8x8_t vget_low_qs8(qint8x16_t a);
59
Michalis Spyrou0a8334c2017-06-14 18:00:05 +010060/** Get the lower half of a 16 elements vector
61 *
62 * @param[in] a vector of 8 elements
63 *
64 * @return 16 bit fixed point vector (4 elements)
65 */
66qint16x4_t vget_low_qs16(qint16x8_t a);
67
Anthony Barbier6ff3b192017-09-04 18:44:23 +010068/** Get the higher half of a 16 elements vector
69 *
70 * @param[in] a vector of 16 elements
71 *
72 * @return 8 bit fixed point vector (8 elements)
73 */
74qint8x8_t vget_high_qs8(qint8x16_t a);
75
Michalis Spyrou0a8334c2017-06-14 18:00:05 +010076/** Get the higher half of a 16 elements vector
77 *
78 * @param[in] a vector of 8 elements
79 *
80 * @return 16 bit fixed point vector (4 elements)
81 */
82qint16x4_t vget_high_qs16(qint16x8_t a);
83
Anthony Barbier6ff3b192017-09-04 18:44:23 +010084/** Load a single 8 bit fixed point vector from memory (8 elements)
85 *
86 * @param[in] addr Memory address of the 8 bit fixed point vector to load
87 *
88 * @return 8 bit fixed point vector (8 elements)
89 */
90qint8x8_t vld1_qs8(const qint8_t *addr);
91
Anthony Barbier6ff3b192017-09-04 18:44:23 +010092/** Load a single 16 bit fixed point vector from memory (4 elements)
93 *
94 * @param[in] addr Memory address of the 16 bit fixed point vector to load
95 *
96 * @return 16 bit fixed point vector (4 elements)
97 */
98qint16x4_t vld1_qs16(const qint16_t *addr);
99
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100100/** Load a single 8 bit fixed point vector from memory (16 elements)
101 *
102 * @param[in] addr Memory address of the 8 bit fixed point vector to load
103 *
104 * @return 8 bit fixed point vector (16 elements)
105 */
106qint8x16_t vld1q_qs8(const qint8_t *addr);
107
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100108/** Load a single 16 bit fixed point vector from memory (8 elements)
109 *
110 * @param[in] addr Memory address of the 16 bit fixed point vector to load
111 *
112 * @return 16 bit fixed point vector (8 elements)
113 */
114qint16x8_t vld1q_qs16(const qint16_t *addr);
115
116/** Load all lanes of 8 bit fixed point vector with same value from memory (8 elements)
117 *
118 * @param[in] addr Memory address of the 8 bit fixed point scalar value to load
119 *
120 * @return 8 bit fixed point vector (8 elements)
121 */
122qint8x8_t vld1_dup_qs8(const qint8_t *addr);
123
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100124/** Load all lanes of 16 bit fixed point vector with same value from memory (4 elements)
125 *
126 * @param[in] addr Memory address of the 16 bit fixed point scalar value to load
127 *
128 * @return 16 bit fixed point vector (4 elements)
129 */
130qint16x4_t vld1_dup_qs16(const qint16_t *addr);
131
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100132/** Load all lanes of 8 bit fixed point vector with same value from memory (16 elements)
133 *
134 * @param[in] addr Memory address of the 8 bit fixed point scalar value to load
135 *
136 * @return 8 bit fixed point vector (16 elements)
137 */
138qint8x16_t vld1q_dup_qs8(const qint8_t *addr);
139
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100140/** Load all lanes of 16 bit fixed point vector with same value from memory (8 elements)
141 *
142 * @param[in] addr Memory address of the 16 bit fixed point scalar value to load
143 *
144 * @return 16 bit fixed point vector (8 elements)
145 */
146qint16x8_t vld1q_dup_qs16(const qint16_t *addr);
147
Michele Di Giorgio81f0d152017-07-11 15:00:52 +0100148/** Load two 16 bit fixed point vectors from memory (8x2 elements)
149 *
150 * @param[in] addr Memory address of the 16 bit fixed point vectors to load
151 *
152 * @return 16 bit fixed point vectors (8x2 elements)
153 */
154qint16x8x2_t vld2q_qs16(qint16_t *addr);
155
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100156/** Store a single 8 bit fixed point vector to memory (8 elements)
157 *
158 * @param[in] addr Memory address where the 8 bit fixed point vector should be stored
159 * @param[in] b 8 bit fixed point vector to store
160 *
161 */
162void vst1_qs8(qint8_t *addr, qint8x8_t b);
163
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100164/** Store a single 16 bit fixed point vector to memory (4 elements)
165 *
166 * @param[in] addr Memory address where the 16 bit fixed point vector should be stored
167 * @param[in] b 16 bit fixed point vector to store
168 *
169 */
170void vst1_qs16(qint16_t *addr, qint16x4_t b);
171
172/** Store a single 8 bit fixed point vector to memory (16 elements)
173 *
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100174 * @param[in] addr Memory address where the 8 bit fixed point vector should be stored
175 * @param[in] b 8 bit fixed point vector to store
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100176 *
177 */
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100178void vst1q_qs8(qint8_t *addr, qint8x16_t b);
179
180/** Store a single 16 bit fixed point vector to memory (8 elements)
181*
182* @param[in] addr Memory address where the 16 bit fixed point vector should be stored
183* @param[in] b 16 bit fixed point vector to store
184*
185*/
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100186void vst1q_qs16(qint16_t *addr, qint16x8_t b);
187
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100188/** Store two 16 bit fixed point vector to memory (8x2 elements)
189*
190* @param[in] addr Memory address where the 16 bit fixed point vectors should be stored
191* @param[in] b 16 bit fixed point vectors to store
192*
193*/
194void vst2q_qs16(qint16_t *addr, qint16x8x2_t b);
195
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100196/** 16 bit fixed point vector saturating narrow (8 elements)
197 *
198 * @param[in] a 16 bit fixed point vector to convert
199 *
200 * @return 8 bit fixed point vector
201 */
202qint8x8_t vqmovn_q16(qint16x8_t a);
203
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100204/** 32 bit fixed point vector saturating narrow (4 elements)
205 *
206 * @param[in] a 32 bit fixed point vector to convert
207 *
208 * @return 16 bit fixed point vector
209 */
210qint16x4_t vqmovn_q32(qint32x4_t a);
211
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100212/** 8 bit fixed point vector duplicate (8 elements)
213 *
214 * @param[in] a 8 bit fixed point to duplicate
215 *
216 * @return The result of the vector duplication
217 */
218qint8x8_t vdup_n_qs8(qint8_t a);
219
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100220/** 16 bit fixed point vector duplicate (4 elements)
221 *
222 * @param[in] a 16 bit fixed point to duplicate
223 *
224 * @return The result of the vector duplication
225 */
226qint16x4_t vdup_n_qs16(qint16_t a);
227
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100228/** 8 bit fixed point vector duplicate (16 elements)
229 *
230 * @param[in] a 8 bit fixed point to duplicate
231 *
232 * @return The result of the vector duplication
233 */
234qint8x16_t vdupq_n_qs8(qint8_t a);
235
236/** Duplicate a float and convert it to 8 bit fixed point vector (16 elements)
237 *
238 * @param[in] a 8 bit fixed point to duplicate
239 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
240 *
241 * @return The result of the vector duplication
242 */
243qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position);
244
245/** 16 bit fixed point vector duplicate (8 elements)
246 *
247 * @param[in] a 16 bit fixed point to duplicate
248 *
249 * @return The result of the vector duplication
250 */
251qint16x8_t vdupq_n_qs16(qint16x8_t a);
252
253/** Absolute value of 8 bit fixed point vector (8 elements)
254 *
255 * @param[in] a 8 bit fixed point input vector
256 *
257 * @return The result of the 8 bit fixed point vector absolute value
258 */
259qint8x8_t vabs_qs8(qint8x8_t a);
260
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100261/** Absolute value of 16 bit fixed point vector (4 elements)
262 *
263 * @param[in] a 16 bit fixed point input vector
264 *
265 * @return The result of the 16 bit fixed point vector absolute value
266 */
267qint16x4_t vabs_qs16(qint16x4_t a);
268
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100269/** Absolute value of 8 bit fixed point vector (16 elements)
270 *
271 * @param[in] a 8 bit fixed point input vector
272 *
273 * @return The result of the 8 bit fixed point vector absolute value
274 */
275qint8x16_t vabsq_qs8(qint8x16_t a);
276
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100277/** Absolute value of 16 bit fixed point vector (8 elements)
278 *
279 * @param[in] a 16 bit fixed point input vector
280 *
281 * @return The result of the 16 bit fixed point vector absolute value
282 */
283qint16x8_t vabsq_qs16(qint16x8_t a);
284
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100285/** Saturating absolute value of 8 bit fixed point vector (8 elements)
286 *
287 * @param[in] a 8 bit fixed point input vector
288 *
289 * @return The result of the 8 bit fixed point vector absolute value
290 */
291qint8x8_t vqabs_qs8(qint8x8_t a);
292
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100293/** Saturating absolute value of 16 bit fixed point vector (4 elements)
294 *
295 * @param[in] a 4 bit fixed point input vector
296 *
297 * @return The result of the 16 bit fixed point vector absolute value
298 */
299qint16x4_t vqabs_qs16(qint16x4_t a);
300
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100301/** Saturating absolute value of 8 bit fixed point vector (16 elements)
302 *
303 * @param[in] a 8 bit fixed point input vector
304 *
305 * @return The result of the 8 bit fixed point vector absolute value
306 */
307qint8x16_t vqabsq_qs8(qint8x16_t a);
308
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100309/** Saturating absolute value of 16 bit fixed point vector (8 elements)
310 *
311 * @param[in] a 16 bit fixed point input vector
312 *
313 * @return The result of the 16 bit fixed point vector absolute value
314 */
315qint16x8_t vqabsq_qs16(qint16x8_t a);
316
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100317/** 8 bit fixed point vector max (8 elements)
318 *
319 * @param[in] a First 8 bit fixed point input vector
320 * @param[in] b Second 8 bit fixed point input vector
321 *
322 * @return The result of the 8 bit fixed point vector max operation
323 */
324qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b);
325
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100326/** 16 bit fixed point vector max (4 elements)
327 *
328 * @param[in] a First 16 bit fixed point input vector
329 * @param[in] b Second 16 bit fixed point input vector
330 *
331 * @return The result of the 16 bit fixed point vector max operation
332 */
333qint16x4_t vmax_qs16(qint16x4_t a, qint16x4_t b);
334
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100335/** 8 bit fixed point vector max (16 elements)
336 *
337 * @param[in] a First 8 bit fixed point input vector
338 * @param[in] b Second 8 bit fixed point input vector
339 *
340 * @return The result of the 8 bit fixed point vector max operation
341 */
342qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b);
343
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100344/** 16 bit fixed point vector max (8 elements)
345 *
346 * @param[in] a First 16 bit fixed point input vector
347 * @param[in] b Second 16 bit fixed point input vector
348 *
349 * @return The result of the 16 bit fixed point vector max operation
350 */
351qint16x8_t vmaxq_qs16(qint16x8_t a, qint16x8_t b);
352
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100353/** 8 bit fixed point vector pairwise max (8 elements)
354 *
355 * @param[in] a First 8 bit fixed point input vector
356 * @param[in] b Second 8 bit fixed point input vector
357 *
358 * @return The result of the 8 bit fixed point vector pairwise max operation
359 */
360qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b);
361
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100362/** 16 bit fixed point vector pairwise max (4 elements)
363 *
364 * @param[in] a First 16 bit fixed point input vector
365 * @param[in] b Second 16 bit fixed point input vector
366 *
367 * @return The result of the 16 bit fixed point vector pairwise max operation
368 */
369qint16x4_t vpmax_qs16(qint16x4_t a, qint16x4_t b);
370
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100371/** 8 bit fixed point vector min (8 elements)
372 *
373 * @param[in] a First 8 bit fixed point input vector
374 * @param[in] b Second 8 bit fixed point input vector
375 *
376 * @return The result of the 8 bit fixed point vector max operation
377 */
378qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b);
379
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100380/** 16 bit fixed point vector min (4 elements)
381 *
382 * @param[in] a First 16 bit fixed point input vector
383 * @param[in] b Second 16 bit fixed point input vector
384 *
385 * @return The result of the 16 bit fixed point vector max operation
386 */
387qint16x4_t vmin_qs16(qint16x4_t a, qint16x4_t b);
388
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100389/** 8 bit fixed point vector min (16 elements)
390 *
391 * @param[in] a First 8 bit fixed point input vector
392 * @param[in] b Second 8 bit fixed point input vector
393 *
394 * @return The result of the 8 bit fixed point vector min operation
395 */
396qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b);
397
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100398/** 16 bit fixed point vector min (8 elements)
399 *
400 * @param[in] a First 16 bit fixed point input vector
401 * @param[in] b Second 16 bit fixed point input vector
402 *
403 * @return The result of the 16 bit fixed point vector min operation
404 */
405qint16x8_t vminq_qs16(qint16x8_t a, qint16x8_t b);
406
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100407/** 8 bit fixed point vector pairwise min (8 elements)
408 *
409 * @param[in] a First 8 bit fixed point input vector
410 * @param[in] b Second 8 bit fixed point input vector
411 *
412 * @return The result of the 8 bit fixed point vector pairwise min operation
413 */
414qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b);
415
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100416/** 16 bit fixed point vector pairwise min (4 elements)
417 *
418 * @param[in] a First 16 bit fixed point input vector
419 * @param[in] b Second 16 bit fixed point input vector
420 *
421 * @return The result of the 16 bit fixed point vector pairwise min operation
422 */
423qint16x4_t vpmin_qs16(qint16x4_t a, qint16x4_t b);
424
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100425/** 8 bit fixed point vector add (8 elements)
426 *
427 * @param[in] a First 8 bit fixed point input vector
428 * @param[in] b Second 8 bit fixed point input vector
429 *
430 * @return The result of the 8 bit fixed point vector addition
431 */
432qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b);
433
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100434/** 16 bit fixed point vector add (4 elements)
435 *
436 * @param[in] a First 16 bit fixed point input vector
437 * @param[in] b Second 16 bit fixed point input vector
438 *
439 * @return The result of the 16 bit fixed point vector addition
440 */
441qint16x4_t vadd_qs16(qint16x4_t a, qint16x4_t b);
442
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100443/** 8 bit fixed point vector add (16 elements)
444 *
445 * @param[in] a First 8 bit fixed point input vector
446 * @param[in] b Second 8 bit fixed point input vector
447 *
448 * @return The result of the 8 bit fixed point vector addition
449 */
450qint8x16_t vaddq_qs8(qint8x16_t a, qint8x16_t b);
451
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100452/** 16 bit fixed point vector add (8 elements)
453 *
454 * @param[in] a First 16 bit fixed point input vector
455 * @param[in] b Second 16 bit fixed point input vector
456 *
457 * @return The result of the 16 bit fixed point vector addition
458 */
459qint16x8_t vaddq_qs16(qint16x8_t a, qint16x8_t b);
460
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100461/** 8 bit fixed point vector saturating add (8 elements)
462 *
463 * @param[in] a First 8 bit fixed point input vector
464 * @param[in] b Second 8 bit fixed point input vector
465 *
466 * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
467 */
468qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b);
469
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100470/** 16 bit fixed point vector saturating add (4 elements)
471 *
472 * @param[in] a First 16 bit fixed point input vector
473 * @param[in] b Second 16 bit fixed point input vector
474 *
475 * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
476 */
477qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b);
478
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100479/** 8 bit fixed point vector saturating add (16 elements)
480 *
481 * @param[in] a First 8 bit fixed point input vector
482 * @param[in] b Second 8 bit fixed point input vector
483 *
484 * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
485 */
486qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b);
487
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100488/** 16 bit fixed point vector saturating add (8 elements)
489 *
490 * @param[in] a First 16 bit fixed point input vector
491 * @param[in] b Second 16 bit fixed point input vector
492 *
493 * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
494 */
495qint16x8_t vqaddq_qs16(qint16x8_t a, qint16x8_t b);
496
497/** 8 bit fixed point vector saturating pairwise add (8 elements)
498 *
499 * @param[in] a 8 bit fixed point input vector
500 *
501 * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
502 */
503int16x4_t vpaddl_qs8(qint8x8_t a);
504
505/** 8 bit fixed point vector subtraction (8 elements)
506 *
507 * @param[in] a First 8 bit fixed point input vector
508 * @param[in] b Second 8 bit fixed point input vector
509 *
510 * @return The result of the 8 bit fixed point vector subtraction
511 */
512qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b);
513
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100514/** 16 bit fixed point vector subtraction (4 elements)
515 *
516 * @param[in] a First 16 bit fixed point input vector
517 * @param[in] b Second 16 bit fixed point input vector
518 *
519 * @return The result of the 16 bit fixed point vector subtraction
520 */
521qint16x4_t vsub_qs16(qint16x4_t a, qint16x4_t b);
522
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100523/** 8 bit fixed point vector subtraction (16 elements)
524 *
525 * @param[in] a First 8 bit fixed point input vector
526 * @param[in] b Second 8 bit fixed point input vector
527 *
528 * @return The result of the 8 bit fixed point vector subtraction
529 */
530qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b);
531
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100532/** 16 bit fixed point vector subtraction (8 elements)
533 *
534 * @param[in] a First 16 bit fixed point input vector
535 * @param[in] b Second 16 bit fixed point input vector
536 *
537 * @return The result of the 16 bit fixed point vector subtraction
538 */
539qint16x8_t vsubq_qs16(qint16x8_t a, qint16x8_t b);
540
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100541/** 8 bit fixed point vector saturating subtraction (8 elements)
542 *
543 * @param[in] a First 8 bit fixed point input vector
544 * @param[in] b Second 8 bit fixed point input vector
545 *
546 * @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
547 */
548qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b);
549
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100550/** 16 bit fixed point vector saturating subtraction (4 elements)
551 *
552 * @param[in] a First 16 bit fixed point input vector
553 * @param[in] b Second 16 bit fixed point input vector
554 *
555 * @return The result of the 16 bit fixed point vector subtraction. The result is saturated in case of overflow
556 */
557qint16x4_t vqsub_qs16(qint16x4_t a, qint16x4_t b);
558
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100559/** 8 bit fixed point vector saturating subtraction (16 elements)
560 *
561 * @param[in] a First 8 bit fixed point input vector
562 * @param[in] b Second 8 bit fixed point input vector
563 *
564 * @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
565 */
566qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b);
567
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100568/** 16 bit fixed point vector saturating subtraction (8 elements)
569 *
570 * @param[in] a First 16 bit fixed point input vector
571 * @param[in] b Second 16 bit fixed point input vector
572 *
573 * @return The result of the 16 bit fixed point vector subtraction. The result is saturated in case of overflow
574 */
575qint16x8_t vqsubq_qs16(qint16x8_t a, qint16x8_t b);
576
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100577/** 8 bit fixed point vector multiply (8 elements)
578 *
579 * @param[in] a First 8 bit fixed point input vector
580 * @param[in] b Second 8 bit fixed point input vector
581 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
582 *
583 * @return The result of the 8 bit fixed point vector multiplication.
584 */
585qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
586
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100587/** 16 bit fixed point vector multiply (4 elements)
588 *
589 * @param[in] a First 16 bit fixed point input vector
590 * @param[in] b Second 16 bit fixed point input vector
591 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
592 *
593 * @return The result of the 16 bit fixed point vector multiplication.
594 */
595qint16x4_t vmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
596
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100597/** 8 bit fixed point vector multiply (16 elements)
598 *
599 * @param[in] a First 8 bit fixed point input vector
600 * @param[in] b Second 8 bit fixed point input vector
601 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
602 *
603 * @return The result of the 8 bit fixed point vector multiplication.
604 */
605qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
606
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100607/** 16 bit fixed point vector multiply (8 elements)
608 *
609 * @param[in] a First 16 bit fixed point input vector
610 * @param[in] b Second 16 bit fixed point input vector
611 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
612 *
613 * @return The result of the 16 bit fixed point vector multiplication.
614 */
615qint16x8_t vmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
616
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100617/** 8 bit fixed point vector saturating multiply (8 elements)
618 *
619 * @param[in] a First 8 bit fixed point input vector
620 * @param[in] b Second 8 bit fixed point input vector
621 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
622 *
623 * @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
624 */
625qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
626
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100627/** 16 bit fixed point vector saturating multiply (4 elements)
628 *
629 * @param[in] a First 16 bit fixed point input vector
630 * @param[in] b Second 16 bit fixed point input vector
631 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
632 *
633 * @return The result of the 16 bit fixed point vector multiplication. The result is saturated in case of overflow
634 */
635qint16x4_t vqmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
636
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100637/** 8 bit fixed point vector saturating multiply (16 elements)
638 *
639 * @param[in] a First 8 bit fixed point input vector
640 * @param[in] b Second 8 bit fixed point input vector
641 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
642 *
643 * @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
644 */
645qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
646
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100647/** 16 bit fixed point vector saturating multiply (8 elements)
648 *
649 * @param[in] a First 16 bit fixed point input vector
650 * @param[in] b Second 16 bit fixed point input vector
651 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
652 *
653 * @return The result of the 16 bit fixed point vector multiplication. The result is saturated in case of overflow
654 */
655qint16x8_t vqmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
656
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100657/** 8 bit fixed point vector long multiply (8 elements)
658 *
659 * @param[in] a First 8 bit fixed point input vector
660 * @param[in] b Second 8 bit fixed point input vector
661 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
662 *
663 * @return The result of the 8 bit fixed point long vector multiplication.
664 */
665qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
666
667/** 8 bit fixed point vector multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
668 *
669 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
670 * @param[in] b Second 8 bit fixed point input vector
671 * @param[in] c Third 8 bit fixed point input vector
672 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
673 *
674 * @return The result of the 8 bit fixed point vector multiply-accumulate
675 */
676qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
677
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100678/** 16 bit fixed point vector multiply-accumulate (4 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
679 *
680 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
681 * @param[in] b Second 16 bit fixed point input vector
682 * @param[in] c Third 16 bit fixed point input vector
683 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
684 *
685 * @return The result of the 16 bit fixed point vector multiply-accumulate
686 */
687qint16x4_t vmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
688
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100689/** 8 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
690 *
691 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
692 * @param[in] b Second 8 bit fixed point input vector
693 * @param[in] c Third 8 bit fixed point input vector
694 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
695 *
696 * @return The result of the 8 bit fixed point vector multiply-accumulate
697 */
698qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
699
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100700/** 16 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
701 *
702 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
703 * @param[in] b Second 16 bit fixed point input vector
704 * @param[in] c Third 16 bit fixed point input vector
705 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
706 *
707 * @return The result of the 16 bit fixed point vector multiply-accumulate
708 */
709qint16x8_t vmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position);
710
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100711/** 8 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
712 *
713 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
714 * @param[in] b Second 8 bit fixed point input vector
715 * @param[in] c Third 8 bit fixed point input vector
716 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
717 *
718 * @return The result of the 8 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow
719 */
720qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
721
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100722/** 16 bit fixed point vector saturating multiply-accumulate (4 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
723 *
724 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
725 * @param[in] b Second 16 bit fixed point input vector
726 * @param[in] c Third 16 bit fixed point input vector
727 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
728 *
729 * @return The result of the 16 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow
730 */
731qint16x4_t vqmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
732
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100733/** 8 bit fixed point vector saturating multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
734 *
735 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
736 * @param[in] b Second 8 bit fixed point input vector
737 * @param[in] c Third 8 bit fixed point input vector
738 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
739 *
740 * @return The result of the 8 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow
741 */
742qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
743
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100744/** 16 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
745 *
746 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
747 * @param[in] b Second 16 bit fixed point input vector
748 * @param[in] c Third 16 bit fixed point input vector
749 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
750 *
751 * @return The result of the 16 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow
752 */
753qint16x8_t vqmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position);
754
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100755/** 8 bit fixed point vector multiply-accumulate long (8 elements).
756 * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
757 *
758 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
759 * @param[in] b Second 8 bit fixed point input vector
760 * @param[in] c Third 8 bit fixed point input vector
761 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
762 *
763 * @return The result of the 8 bit fixed point vector multiply-accumulate long
764 */
765qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
766
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100767/** 16 bit fixed point vector multiply-accumulate long (4 elements).
768 * This operation performs the product between @p b and @p c and add the result to the 32 bit fixed point vector @p a (a + b * c). 4 elements
769 *
770 * @param[in] a First 32 bit fixed point input vector where the result of multiplication must be added to
771 * @param[in] b Second 16 bit fixed point input vector
772 * @param[in] c Third 16 bit fixed point input vector
773 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
774 *
775 * @return The result of the 16 bit fixed point vector multiply-accumulate long
776 */
777qint32x4_t vmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
778
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100779/** 8 bit fixed point vector saturating multiply-accumulate long (8 elements). The saturation is performed on the 16 bit fixed point output vector.
780 * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
781 *
782 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
783 * @param[in] b Second 8 bit fixed point input vector
784 * @param[in] c Third 8 bit fixed point input vector
785 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
786 *
787 * @return The result of the 8 bit fixed point vector multiply-accumulate long
788 */
789qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
790
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100791/** 16 bit fixed point vector saturating multiply-accumulate long (4 elements). The saturation is performed on the 16 bit fixed point output vector.
792 * This operation performs the product between @p b and @p c and add the result to the 32 bit fixed point vector @p a (a + b * c). 4 elements
793 *
794 * @param[in] a First 32 bit fixed point input vector where the result of multiplication must be added to
795 * @param[in] b Second 16 bit fixed point input vector
796 * @param[in] c Third 16 bit fixed point input vector
797 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
798 *
799 * @return The result of the 16 bit fixed point vector multiply-accumulate long
800 */
801qint32x4_t vqmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
802
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100803/** Convert a float vector with 4x2 elements to 8 bit fixed point vector with 8 elements
804 *
805 * @param[in] a Float input vector
806 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
807 *
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100808 * @return The result of the conversion float -> 8 bit fixed point. The result is saturated in case of overflow
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100809 */
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100810qint8x8_t vqcvt_qs8_f32(const float32x4x2_t a, int fixed_point_position);
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100811
812/** Convert a float vector with 4 elements to 16 bit fixed point vector with 4 elements
813 *
814 * @param[in] a Float input vector
815 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
816 *
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100817 * @return The result of the conversion float -> 16 bit fixed point. The result is saturated in case of overflow
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100818 */
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100819qint16x4_t vqcvt_qs16_f32(const float32x4_t a, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100820
821/** Convert a float vector with 4x4 elements to 8 bit fixed point vector with 16 elements
822 *
823 * @param[in] a Float input vector
824 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
825 *
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100826 * @return The result of the conversion float -> 8 bit fixed point. The result is saturated in case of overflow
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100827 */
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100828qint8x16_t vqcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100829
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100830/** Convert a float vector with 4x2 elements to 16 bit fixed point vector with 8 elements
831 *
832 * @param[in] a Float input vector
833 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
834 *
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100835 * @return The result of the conversion float -> 16 bit fixed point. The result is saturated in case of overflow
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100836 */
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100837qint16x8_t vqcvtq_qs16_f32(const float32x4x2_t &a, int fixed_point_position);
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100838
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100839/** Convert a 8 bit fixed point vector with 8 elements to a float vector with 4x2 elements
840 *
841 * @param[in] a 8 bit fixed point input vector
842 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
843 *
844 * @return The result of the conversion 8 bit fixed point -> float32x2x4
845 */
846float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position);
847
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100848/** Convert a 16 bit fixed point vector with 4 elements to a float vector with 4 elements
849 *
850 * @param[in] a 16 bit fixed point input vector
851 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
852 *
853 * @return The result of the conversion 16 bit fixed point -> float32x2
854 */
855float32x4_t vcvt_f32_qs16(qint16x4_t a, int fixed_point_position);
856
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100857/** Convert a 8 bit fixed point vector with 16 elements to a float vector with 4x4 elements
858 *
859 * @param[in] a 8 bit fixed point input vector
860 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
861 *
862 * @return The result of the conversion 8 bit fixed point -> float32x4x4
863 */
864float32x4x4_t vcvtq_qs8_f32(qint8x16_t a, int fixed_point_position);
865
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100866/** Convert a 16 bit fixed point vector with 8 elements to a float vector with 4x2 elements
867 *
868 * @param[in] a 16 bit fixed point input vector
869 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
870 *
871 * @return The result of the conversion 16 bit fixed point -> float32x4x2
872 */
873float32x4x2_t vcvtq_qs16_f32(qint16x8_t a, int fixed_point_position);
874
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100875/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements)
876 *
877 * @param[in] a 8bit fixed point input vector
878 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
879 *
880 * @return The result of the 8bit reciprocal (1/a).
881 */
882qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position);
883
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100884/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (4 elements)
885 *
886 * @param[in] a 16 bit fixed point input vector
887 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
888 *
889 * @return The result of the 16 bit reciprocal (1/a).
890 */
891qint16x4_t vrecip_qs16(qint16x4_t a, int fixed_point_position);
892
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100893/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (16 elements)
894 *
895 * @param[in] a 8bit fixed point input vector
896 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
897 *
898 * @return The result of the 8bit reciprocal (1/a).
899 */
900qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position);
901
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100902/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements)
903 *
904 * @param[in] a 16 bit fixed point input vector
905 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
906 *
907 * @return The result of the 16 bit reciprocal (1/a).
908 */
909qint16x8_t vrecipq_qs16(qint16x8_t a, int fixed_point_position);
910
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100911/** Division fixed point 8bit (8 elements)
912 *
913 * @param[in] a First 8bit fixed point input vector
914 * @param[in] b Second 8bit fixed point input vector
915 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
916 *
917 * @return The quotient and remainder number in fixed point format.
918 */
919qint8x8_t vdiv_qs8(qint8x8_t a, int8x8_t b, int fixed_point_position);
920
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100921/** Division fixed point 16 bit (4 elements)
922 *
923 * @param[in] a First 16 bit fixed point input vector
924 * @param[in] b Second 16 bit fixed point input vector
925 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
926 *
927 * @return The quotient and remainder number in fixed point format.
928 */
929qint16x4_t vdiv_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
930
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100931/** Division fixed point 8bit (16 elements)
932 *
933 * @param[in] a First 8bit fixed point input vector
934 * @param[in] b Second 8bit fixed point input vector
935 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
936 *
937 * @return The quotient and remainder number in 8bit fixed point format.
938 */
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100939qint8x16_t vdivq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
940
941/** Division fixed point 16 bit (8 elements)
942 *
943 * @param[in] a First 16 bit fixed point input vector
944 * @param[in] b Second 16 bit fixed point input vector
945 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
946 *
947 * @return The quotient and remainder number in 16 bit fixed point format.
948 */
949qint16x8_t vdivq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100950
951/** Perform a 4th degree polynomial approximation. (8 elements)
952 *
953 * @param[in] a 8bit fixed point input vector
954 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
955 *
956 * @return The result of the 8bit taylor approximation.
957 */
958template <bool islog>
959qint8x8_t vtaylor_poly_qs8(qint8x8_t a, int fixed_point_position);
960
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100961/** Perform a 4th degree polynomial approximation. (4 elements)
962 *
963 * @param[in] a 16 bit fixed point input vector
964 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
965 *
966 * @return The result of the 16 bit taylor approximation.
967 */
968template <bool islog>
969qint16x4_t vtaylor_poly_qs16(qint16x4_t a, int fixed_point_position);
970
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100971/** Perform a 4th degree polynomial approximation. (16 elements)
972 *
973 * @param[in] a 8bit fixed point input vector
974 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
975 *
976 * @return The result of the 8bit taylor approximation.
977 */
978template <bool islog>
979qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position);
980
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100981/** Perform a 4th degree polynomial approximation. (8 elements)
982 *
983 * @param[in] a 16 bit fixed point input vector
984 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
985 *
986 * @return The result of the 8bit taylor approximation.
987 */
988template <bool islog>
989qint16x8_t vtaylor_polyq_qs16(qint16x8_t a, int fixed_point_position);
990
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100991/** Calculate saturating exponential fixed point 8bit (8 elements)
992 *
993 * @param[in] a 8bit fixed point input vector
994 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
995 *
996 * @return The result of the 8bit saturating exponential
997 */
998qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position);
999
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001000/** Calculate saturating exponential fixed point 16 bit (4 elements)
1001 *
1002 * @param[in] a 8bit fixed point input vector
1003 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1004 *
1005 * @return The result of the 16 bit saturating exponential
1006 */
1007qint16x4_t vqexp_qs16(qint16x4_t a, int fixed_point_position);
1008
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001009/** Calculate saturating exponential fixed point 8bit (16 elements)
1010 *
1011 * @param[in] a 8bit fixed point input vector
1012 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1013 *
1014 * @return The result of the 8bit saturating exponential
1015 */
1016qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position);
1017
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001018/** Calculate saturating exponential fixed point 16 bit (8 elements)
1019 *
1020 * @param[in] a 16 bit fixed point input vector
1021 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1022 *
1023 * @return The result of the 16 bit saturating exponential
1024 */
1025qint16x8_t vqexpq_qs16(qint16x8_t a, int fixed_point_position);
1026
1027/** Calculate logarithm fixed point 8 bit (8 elements)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001028 *
1029 * @param[in] a 8bit fixed point input vector
1030 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1031 *
1032 * @return The result of the 8bit logarithm.
1033 */
1034qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position);
1035
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001036/** Calculate logarithm fixed point 16 bit (4 elements)
1037 *
1038 * @param[in] a 16 bit fixed point input vector
1039 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1040 *
1041 * @return The result of the 16 bit logarithm.
1042 */
1043qint16x4_t vlog_qs16(qint16x4_t a, int fixed_point_position);
1044
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001045/** Calculate logarithm fixed point 16bit (16 elements)
1046 *
1047 * @param[in] a 8bit fixed point input vector
1048 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1049 *
1050 * @return The result of the 8bit logarithm.
1051 */
1052qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position);
1053
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001054/** Calculate logarithm fixed point 16 bit (8 elements)
1055 *
1056 * @param[in] a 16 bit fixed point input vector
1057 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1058 *
1059 * @return The result of the 16 bit logarithm.
1060 */
1061qint16x8_t vlogq_qs16(qint16x8_t a, int fixed_point_position);
1062
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001063/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
1064 *
1065 * @param[in] a 8bit fixed point input vector
1066 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1067 *
1068 * @return The result of the 8bit inverse sqrt.
1069 */
1070qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
1071
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001072/** Calculate inverse square root for fixed point 16 bit using Newton-Raphosn method (4 elements)
1073 *
1074 * @param[in] a 16 bit fixed point input vector
1075 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1076 *
1077 * @return The result of the 16 bit inverse sqrt.
1078 */
1079qint16x4_t vinvsqrt_qs16(qint16x4_t a, int fixed_point_position);
1080
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001081/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
1082 *
1083 * @param[in] a 8bit fixed point input vector
1084 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1085 *
1086 * @return The result of the 8bit inverse sqrt.
1087 */
1088qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
1089
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001090/** Calculate saturating inverse square root for fixed point 16 bit using Newton-Raphosn method (4 elements)
1091 *
1092 * @param[in] a 16 bit fixed point input vector
1093 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1094 *
1095 * @return The result of the 16 bit inverse sqrt.
1096 */
1097qint16x4_t vqinvsqrt_qs16(qint16x4_t a, int fixed_point_position);
1098
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001099/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
1100 *
1101 * @param[in] a 8bit fixed point input vector
1102 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1103 *
1104 * @return The result of the 8bit inverse sqrt.
1105 */
1106qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
1107
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001108/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
1109 *
1110 * @param[in] a 16 bit fixed point input vector
1111 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1112 *
1113 * @return The result of the 16 bit inverse sqrt.
1114 */
1115qint16x8_t vinvsqrtq_qs16(qint16x8_t a, int fixed_point_position);
1116
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001117/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
1118 *
1119 * @param[in] a 8bit fixed point input vector
1120 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1121 *
1122 * @return The result of the 8bit inverse sqrt.
1123 */
1124qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
1125
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001126/** Calculate saturating inverse square root for fixed point 16 bit using Newton-Raphosn method (8 elements)
1127 *
1128 * @param[in] a 16 bit fixed point input vector
1129 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1130 *
1131 * @return The result of the 16 bit inverse sqrt.
1132 */
1133qint16x8_t vqinvsqrtq_qs16(qint16x8_t a, int fixed_point_position);
1134
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001135/** Calculate hyperbolic tangent for fixed point 8bit (8 elements)
1136 *
1137 * @param[in] a 8bit fixed point input vector
1138 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1139 *
1140 * @return The calculated Hyperbolic Tangent.
1141 */
Georgios Pinitasccc65d42017-06-27 17:39:11 +01001142qint8x8_t vqtanh_qs8(qint8x8_t a, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001143
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001144/** Calculate hyperbolic tangent for fixed point 16 bit (4 elements)
1145 *
1146 * @param[in] a 16 bit fixed point input vector
1147 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1148 *
1149 * @return The calculated Hyperbolic Tangent.
1150 */
Georgios Pinitasccc65d42017-06-27 17:39:11 +01001151qint16x4_t vqtanh_qs16(qint16x4_t a, int fixed_point_position);
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001152
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001153/** Calculate hyperbolic tangent for fixed point 8bit (16 elements)
1154 *
1155 * @param[in] a 8bit fixed point input vector
1156 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1157 *
1158 * @return The calculated Hyperbolic Tangent.
1159 */
Georgios Pinitasccc65d42017-06-27 17:39:11 +01001160qint8x16_t vqtanhq_qs8(qint8x16_t a, int fixed_point_position);
1161
1162/** Calculate hyperbolic tangent for fixed point 16bit (8 elements)
1163 *
1164 * @param[in] a 16 bit fixed point input vector
1165 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1166 *
1167 * @return The calculated Hyperbolic Tangent.
1168 */
1169qint16x8_t vqtanhq_qs16(qint16x8_t a, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001170
1171/** Calculate saturating n power for fixed point 8bit (16 elements).
1172 *
1173 * pow(a,b) = e^(b*log(a))
1174 *
1175 * @param[in] a 8bit fixed point input vector
1176 * @param[in] b 8bit fixed point power vector
1177 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1178 *
1179 * @return The result of the 8bit power.
1180 */
1181qint8x8_t vqpowq_qs8(qint8x8_t a, qint8x16_t b, int fixed_point_position);
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001182
1183/** Compute lane-by-lane maximum between elements of a float vector with 4x2 elements
1184 *
1185 * @param[in] a Float input vector
1186 * @param[in] b Float input vector
1187 *
1188 * @return The lane-by-lane maximum -> float32x4x2
1189 */
1190float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001191}
1192#include "arm_compute/core/NEON/NEFixedPoint.inl"
1193#endif /* __ARM_COMPUTE_NEFIXEDPOINT_H__ */