blob: 08f680801dd4813ec4c9ec6b208290ce362d506b [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEFIXEDPOINT_H__
25#define __ARM_COMPUTE_NEFIXEDPOINT_H__
26
27#include "arm_compute/core/FixedPoint.h"
28
29#include <arm_neon.h>
30
31namespace arm_compute
32{
33using qint8x8_t = int8x8_t; /**< 8 bit fixed point vector with 8 elements */
34using qint8x8x2_t = int8x8x2_t; /**< 8 bit fixed point vector with 16 elements */
35using qint8x8x3_t = int8x8x3_t; /**< 8 bit fixed point vector with 24 elements */
36using qint8x8x4_t = int8x8x4_t; /**< 8 bit fixed point vector with 32 elements */
37using qint8x16_t = int8x16_t; /**< 8 bit fixed point vector with 16 elements */
38using qint8x16x2_t = int8x16x2_t; /**< 8 bit fixed point vector with 32 elements */
39using qint8x16x3_t = int8x16x3_t; /**< 8 bit fixed point vector with 48 elements */
40using qint8x16x4_t = int8x16x4_t; /**< 8 bit fixed point vector with 64 elements */
41using qint16x4_t = int16x4_t; /**< 16 bit fixed point vector with 4 elements */
42using qint16x4x2_t = int16x4x2_t; /**< 16 bit fixed point vector with 8 elements */
43using qint16x4x3_t = int16x4x3_t; /**< 16 bit fixed point vector with 12 elements */
44using qint16x4x4_t = int16x4x4_t; /**< 16 bit fixed point vector with 16 elements */
45using qint16x8_t = int16x8_t; /**< 16 bit fixed point vector with 8 elements */
46using qint16x8x2_t = int16x8x2_t; /**< 16 bit fixed point vector with 16 elements */
47using qint16x8x3_t = int16x8x3_t; /**< 16 bit fixed point vector with 24 elements */
48using qint16x8x4_t = int16x8x4_t; /**< 16 bit fixed point vector with 32 elements */
Georgios Pinitas9247c922017-06-28 18:29:47 +010049using qint32x2_t = int32x2_t; /**< 32 bit fixed point vector with 2 elements */
Michalis Spyrou0a8334c2017-06-14 18:00:05 +010050using qint32x4_t = int32x4_t; /**< 32 bit fixed point vector with 4 elements */
Anthony Barbier6ff3b192017-09-04 18:44:23 +010051
52/** Get the lower half of a 16 elements vector
53 *
54 * @param[in] a vector of 16 elements
55 *
56 * @return 8 bit fixed point vector (8 elements)
57 */
58qint8x8_t vget_low_qs8(qint8x16_t a);
59
Michalis Spyrou0a8334c2017-06-14 18:00:05 +010060/** Get the lower half of a 16 elements vector
61 *
62 * @param[in] a vector of 8 elements
63 *
64 * @return 16 bit fixed point vector (4 elements)
65 */
66qint16x4_t vget_low_qs16(qint16x8_t a);
67
Anthony Barbier6ff3b192017-09-04 18:44:23 +010068/** Get the higher half of a 16 elements vector
69 *
70 * @param[in] a vector of 16 elements
71 *
72 * @return 8 bit fixed point vector (8 elements)
73 */
74qint8x8_t vget_high_qs8(qint8x16_t a);
75
Michalis Spyrou0a8334c2017-06-14 18:00:05 +010076/** Get the higher half of a 16 elements vector
77 *
78 * @param[in] a vector of 8 elements
79 *
80 * @return 16 bit fixed point vector (4 elements)
81 */
82qint16x4_t vget_high_qs16(qint16x8_t a);
83
Anthony Barbier6ff3b192017-09-04 18:44:23 +010084/** Load a single 8 bit fixed point vector from memory (8 elements)
85 *
86 * @param[in] addr Memory address of the 8 bit fixed point vector to load
87 *
88 * @return 8 bit fixed point vector (8 elements)
89 */
90qint8x8_t vld1_qs8(const qint8_t *addr);
91
Anthony Barbier6ff3b192017-09-04 18:44:23 +010092/** Load a single 16 bit fixed point vector from memory (4 elements)
93 *
94 * @param[in] addr Memory address of the 16 bit fixed point vector to load
95 *
96 * @return 16 bit fixed point vector (4 elements)
97 */
98qint16x4_t vld1_qs16(const qint16_t *addr);
99
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100100/** Load a single 8 bit fixed point vector from memory (16 elements)
101 *
102 * @param[in] addr Memory address of the 8 bit fixed point vector to load
103 *
104 * @return 8 bit fixed point vector (16 elements)
105 */
106qint8x16_t vld1q_qs8(const qint8_t *addr);
107
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100108/** Load a single 16 bit fixed point vector from memory (8 elements)
109 *
110 * @param[in] addr Memory address of the 16 bit fixed point vector to load
111 *
112 * @return 16 bit fixed point vector (8 elements)
113 */
114qint16x8_t vld1q_qs16(const qint16_t *addr);
115
116/** Load all lanes of 8 bit fixed point vector with same value from memory (8 elements)
117 *
118 * @param[in] addr Memory address of the 8 bit fixed point scalar value to load
119 *
120 * @return 8 bit fixed point vector (8 elements)
121 */
122qint8x8_t vld1_dup_qs8(const qint8_t *addr);
123
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100124/** Load all lanes of 16 bit fixed point vector with same value from memory (4 elements)
125 *
126 * @param[in] addr Memory address of the 16 bit fixed point scalar value to load
127 *
128 * @return 16 bit fixed point vector (4 elements)
129 */
130qint16x4_t vld1_dup_qs16(const qint16_t *addr);
131
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100132/** Load all lanes of 8 bit fixed point vector with same value from memory (16 elements)
133 *
134 * @param[in] addr Memory address of the 8 bit fixed point scalar value to load
135 *
136 * @return 8 bit fixed point vector (16 elements)
137 */
138qint8x16_t vld1q_dup_qs8(const qint8_t *addr);
139
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100140/** Load all lanes of 16 bit fixed point vector with same value from memory (8 elements)
141 *
142 * @param[in] addr Memory address of the 16 bit fixed point scalar value to load
143 *
144 * @return 16 bit fixed point vector (8 elements)
145 */
146qint16x8_t vld1q_dup_qs16(const qint16_t *addr);
147
Michele Di Giorgio81f0d152017-07-11 15:00:52 +0100148/** Load two 16 bit fixed point vectors from memory (8x2 elements)
149 *
150 * @param[in] addr Memory address of the 16 bit fixed point vectors to load
151 *
152 * @return 16 bit fixed point vectors (8x2 elements)
153 */
154qint16x8x2_t vld2q_qs16(qint16_t *addr);
155
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100156/** Store a single 8 bit fixed point vector to memory (8 elements)
157 *
158 * @param[in] addr Memory address where the 8 bit fixed point vector should be stored
159 * @param[in] b 8 bit fixed point vector to store
160 *
161 */
162void vst1_qs8(qint8_t *addr, qint8x8_t b);
163
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100164/** Store a single 16 bit fixed point vector to memory (4 elements)
165 *
166 * @param[in] addr Memory address where the 16 bit fixed point vector should be stored
167 * @param[in] b 16 bit fixed point vector to store
168 *
169 */
170void vst1_qs16(qint16_t *addr, qint16x4_t b);
171
172/** Store a single 8 bit fixed point vector to memory (16 elements)
173 *
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100174 * @param[in] addr Memory address where the 8 bit fixed point vector should be stored
175 * @param[in] b 8 bit fixed point vector to store
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100176 *
177 */
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100178void vst1q_qs8(qint8_t *addr, qint8x16_t b);
179
180/** Store a single 16 bit fixed point vector to memory (8 elements)
181*
182* @param[in] addr Memory address where the 16 bit fixed point vector should be stored
183* @param[in] b 16 bit fixed point vector to store
184*
185*/
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100186void vst1q_qs16(qint16_t *addr, qint16x8_t b);
187
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100188/** Store two 16 bit fixed point vector to memory (8x2 elements)
189*
190* @param[in] addr Memory address where the 16 bit fixed point vectors should be stored
191* @param[in] b 16 bit fixed point vectors to store
192*
193*/
194void vst2q_qs16(qint16_t *addr, qint16x8x2_t b);
195
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100196/** 16 bit fixed point vector saturating narrow (8 elements)
197 *
198 * @param[in] a 16 bit fixed point vector to convert
199 *
200 * @return 8 bit fixed point vector
201 */
202qint8x8_t vqmovn_q16(qint16x8_t a);
203
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100204/** 32 bit fixed point vector saturating narrow (4 elements)
205 *
206 * @param[in] a 32 bit fixed point vector to convert
207 *
208 * @return 16 bit fixed point vector
209 */
210qint16x4_t vqmovn_q32(qint32x4_t a);
211
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100212/** 8 bit fixed point vector duplicate (8 elements)
213 *
214 * @param[in] a 8 bit fixed point to duplicate
215 *
216 * @return The result of the vector duplication
217 */
218qint8x8_t vdup_n_qs8(qint8_t a);
219
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100220/** 16 bit fixed point vector duplicate (4 elements)
221 *
222 * @param[in] a 16 bit fixed point to duplicate
223 *
224 * @return The result of the vector duplication
225 */
226qint16x4_t vdup_n_qs16(qint16_t a);
227
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100228/** 8 bit fixed point vector duplicate (16 elements)
229 *
230 * @param[in] a 8 bit fixed point to duplicate
231 *
232 * @return The result of the vector duplication
233 */
234qint8x16_t vdupq_n_qs8(qint8_t a);
235
236/** Duplicate a float and convert it to 8 bit fixed point vector (16 elements)
237 *
Michele Di Giorgiod5e65c72017-07-26 17:09:17 +0100238 * @param[in] a floating point value to convert and duplicate
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100239 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
240 *
241 * @return The result of the vector duplication
242 */
243qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position);
244
Michele Di Giorgiod5e65c72017-07-26 17:09:17 +0100245/** Duplicate a float and convert it to 16 bit fixed point vector (8 elements)
246 *
247 * @param[in] a floating point value to convert and duplicate
248 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
249 *
250 * @return The result of the vector duplication
251 */
252qint16x8_t vdupq_n_qs16_f32(float a, int fixed_point_position);
253
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100254/** 16 bit fixed point vector duplicate (8 elements)
255 *
256 * @param[in] a 16 bit fixed point to duplicate
257 *
258 * @return The result of the vector duplication
259 */
260qint16x8_t vdupq_n_qs16(qint16x8_t a);
261
262/** Absolute value of 8 bit fixed point vector (8 elements)
263 *
264 * @param[in] a 8 bit fixed point input vector
265 *
266 * @return The result of the 8 bit fixed point vector absolute value
267 */
268qint8x8_t vabs_qs8(qint8x8_t a);
269
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100270/** Absolute value of 16 bit fixed point vector (4 elements)
271 *
272 * @param[in] a 16 bit fixed point input vector
273 *
274 * @return The result of the 16 bit fixed point vector absolute value
275 */
276qint16x4_t vabs_qs16(qint16x4_t a);
277
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100278/** Absolute value of 8 bit fixed point vector (16 elements)
279 *
280 * @param[in] a 8 bit fixed point input vector
281 *
282 * @return The result of the 8 bit fixed point vector absolute value
283 */
284qint8x16_t vabsq_qs8(qint8x16_t a);
285
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100286/** Absolute value of 16 bit fixed point vector (8 elements)
287 *
288 * @param[in] a 16 bit fixed point input vector
289 *
290 * @return The result of the 16 bit fixed point vector absolute value
291 */
292qint16x8_t vabsq_qs16(qint16x8_t a);
293
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100294/** Saturating absolute value of 8 bit fixed point vector (8 elements)
295 *
296 * @param[in] a 8 bit fixed point input vector
297 *
298 * @return The result of the 8 bit fixed point vector absolute value
299 */
300qint8x8_t vqabs_qs8(qint8x8_t a);
301
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100302/** Saturating absolute value of 16 bit fixed point vector (4 elements)
303 *
304 * @param[in] a 4 bit fixed point input vector
305 *
306 * @return The result of the 16 bit fixed point vector absolute value
307 */
308qint16x4_t vqabs_qs16(qint16x4_t a);
309
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100310/** Saturating absolute value of 8 bit fixed point vector (16 elements)
311 *
312 * @param[in] a 8 bit fixed point input vector
313 *
314 * @return The result of the 8 bit fixed point vector absolute value
315 */
316qint8x16_t vqabsq_qs8(qint8x16_t a);
317
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100318/** Saturating absolute value of 16 bit fixed point vector (8 elements)
319 *
320 * @param[in] a 16 bit fixed point input vector
321 *
322 * @return The result of the 16 bit fixed point vector absolute value
323 */
324qint16x8_t vqabsq_qs16(qint16x8_t a);
325
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100326/** 8 bit fixed point vector max (8 elements)
327 *
328 * @param[in] a First 8 bit fixed point input vector
329 * @param[in] b Second 8 bit fixed point input vector
330 *
331 * @return The result of the 8 bit fixed point vector max operation
332 */
333qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b);
334
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100335/** 16 bit fixed point vector max (4 elements)
336 *
337 * @param[in] a First 16 bit fixed point input vector
338 * @param[in] b Second 16 bit fixed point input vector
339 *
340 * @return The result of the 16 bit fixed point vector max operation
341 */
342qint16x4_t vmax_qs16(qint16x4_t a, qint16x4_t b);
343
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100344/** 8 bit fixed point vector max (16 elements)
345 *
346 * @param[in] a First 8 bit fixed point input vector
347 * @param[in] b Second 8 bit fixed point input vector
348 *
349 * @return The result of the 8 bit fixed point vector max operation
350 */
351qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b);
352
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100353/** 16 bit fixed point vector max (8 elements)
354 *
355 * @param[in] a First 16 bit fixed point input vector
356 * @param[in] b Second 16 bit fixed point input vector
357 *
358 * @return The result of the 16 bit fixed point vector max operation
359 */
360qint16x8_t vmaxq_qs16(qint16x8_t a, qint16x8_t b);
361
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100362/** 8 bit fixed point vector pairwise max (8 elements)
363 *
364 * @param[in] a First 8 bit fixed point input vector
365 * @param[in] b Second 8 bit fixed point input vector
366 *
367 * @return The result of the 8 bit fixed point vector pairwise max operation
368 */
369qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b);
370
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100371/** 16 bit fixed point vector pairwise max (4 elements)
372 *
373 * @param[in] a First 16 bit fixed point input vector
374 * @param[in] b Second 16 bit fixed point input vector
375 *
376 * @return The result of the 16 bit fixed point vector pairwise max operation
377 */
378qint16x4_t vpmax_qs16(qint16x4_t a, qint16x4_t b);
379
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100380/** 8 bit fixed point vector min (8 elements)
381 *
382 * @param[in] a First 8 bit fixed point input vector
383 * @param[in] b Second 8 bit fixed point input vector
384 *
385 * @return The result of the 8 bit fixed point vector max operation
386 */
387qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b);
388
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100389/** 16 bit fixed point vector min (4 elements)
390 *
391 * @param[in] a First 16 bit fixed point input vector
392 * @param[in] b Second 16 bit fixed point input vector
393 *
394 * @return The result of the 16 bit fixed point vector max operation
395 */
396qint16x4_t vmin_qs16(qint16x4_t a, qint16x4_t b);
397
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100398/** 8 bit fixed point vector min (16 elements)
399 *
400 * @param[in] a First 8 bit fixed point input vector
401 * @param[in] b Second 8 bit fixed point input vector
402 *
403 * @return The result of the 8 bit fixed point vector min operation
404 */
405qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b);
406
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100407/** 16 bit fixed point vector min (8 elements)
408 *
409 * @param[in] a First 16 bit fixed point input vector
410 * @param[in] b Second 16 bit fixed point input vector
411 *
412 * @return The result of the 16 bit fixed point vector min operation
413 */
414qint16x8_t vminq_qs16(qint16x8_t a, qint16x8_t b);
415
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100416/** 8 bit fixed point vector pairwise min (8 elements)
417 *
418 * @param[in] a First 8 bit fixed point input vector
419 * @param[in] b Second 8 bit fixed point input vector
420 *
421 * @return The result of the 8 bit fixed point vector pairwise min operation
422 */
423qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b);
424
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100425/** 16 bit fixed point vector pairwise min (4 elements)
426 *
427 * @param[in] a First 16 bit fixed point input vector
428 * @param[in] b Second 16 bit fixed point input vector
429 *
430 * @return The result of the 16 bit fixed point vector pairwise min operation
431 */
432qint16x4_t vpmin_qs16(qint16x4_t a, qint16x4_t b);
433
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100434/** 8 bit fixed point vector add (8 elements)
435 *
436 * @param[in] a First 8 bit fixed point input vector
437 * @param[in] b Second 8 bit fixed point input vector
438 *
439 * @return The result of the 8 bit fixed point vector addition
440 */
441qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b);
442
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100443/** 16 bit fixed point vector add (4 elements)
444 *
445 * @param[in] a First 16 bit fixed point input vector
446 * @param[in] b Second 16 bit fixed point input vector
447 *
448 * @return The result of the 16 bit fixed point vector addition
449 */
450qint16x4_t vadd_qs16(qint16x4_t a, qint16x4_t b);
451
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100452/** 8 bit fixed point vector add (16 elements)
453 *
454 * @param[in] a First 8 bit fixed point input vector
455 * @param[in] b Second 8 bit fixed point input vector
456 *
457 * @return The result of the 8 bit fixed point vector addition
458 */
459qint8x16_t vaddq_qs8(qint8x16_t a, qint8x16_t b);
460
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100461/** 16 bit fixed point vector add (8 elements)
462 *
463 * @param[in] a First 16 bit fixed point input vector
464 * @param[in] b Second 16 bit fixed point input vector
465 *
466 * @return The result of the 16 bit fixed point vector addition
467 */
468qint16x8_t vaddq_qs16(qint16x8_t a, qint16x8_t b);
469
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100470/** 8 bit fixed point vector saturating add (8 elements)
471 *
472 * @param[in] a First 8 bit fixed point input vector
473 * @param[in] b Second 8 bit fixed point input vector
474 *
475 * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
476 */
477qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b);
478
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100479/** 16 bit fixed point vector saturating add (4 elements)
480 *
481 * @param[in] a First 16 bit fixed point input vector
482 * @param[in] b Second 16 bit fixed point input vector
483 *
484 * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
485 */
486qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b);
487
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100488/** 8 bit fixed point vector saturating add (16 elements)
489 *
490 * @param[in] a First 8 bit fixed point input vector
491 * @param[in] b Second 8 bit fixed point input vector
492 *
493 * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
494 */
495qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b);
496
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100497/** 16 bit fixed point vector saturating add (8 elements)
498 *
499 * @param[in] a First 16 bit fixed point input vector
500 * @param[in] b Second 16 bit fixed point input vector
501 *
502 * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
503 */
504qint16x8_t vqaddq_qs16(qint16x8_t a, qint16x8_t b);
505
506/** 8 bit fixed point vector saturating pairwise add (8 elements)
507 *
508 * @param[in] a 8 bit fixed point input vector
509 *
510 * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
511 */
512int16x4_t vpaddl_qs8(qint8x8_t a);
513
514/** 8 bit fixed point vector subtraction (8 elements)
515 *
516 * @param[in] a First 8 bit fixed point input vector
517 * @param[in] b Second 8 bit fixed point input vector
518 *
519 * @return The result of the 8 bit fixed point vector subtraction
520 */
521qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b);
522
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100523/** 16 bit fixed point vector subtraction (4 elements)
524 *
525 * @param[in] a First 16 bit fixed point input vector
526 * @param[in] b Second 16 bit fixed point input vector
527 *
528 * @return The result of the 16 bit fixed point vector subtraction
529 */
530qint16x4_t vsub_qs16(qint16x4_t a, qint16x4_t b);
531
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100532/** 8 bit fixed point vector subtraction (16 elements)
533 *
534 * @param[in] a First 8 bit fixed point input vector
535 * @param[in] b Second 8 bit fixed point input vector
536 *
537 * @return The result of the 8 bit fixed point vector subtraction
538 */
539qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b);
540
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100541/** 16 bit fixed point vector subtraction (8 elements)
542 *
543 * @param[in] a First 16 bit fixed point input vector
544 * @param[in] b Second 16 bit fixed point input vector
545 *
546 * @return The result of the 16 bit fixed point vector subtraction
547 */
548qint16x8_t vsubq_qs16(qint16x8_t a, qint16x8_t b);
549
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100550/** 8 bit fixed point vector saturating subtraction (8 elements)
551 *
552 * @param[in] a First 8 bit fixed point input vector
553 * @param[in] b Second 8 bit fixed point input vector
554 *
555 * @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
556 */
557qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b);
558
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100559/** 16 bit fixed point vector saturating subtraction (4 elements)
560 *
561 * @param[in] a First 16 bit fixed point input vector
562 * @param[in] b Second 16 bit fixed point input vector
563 *
564 * @return The result of the 16 bit fixed point vector subtraction. The result is saturated in case of overflow
565 */
566qint16x4_t vqsub_qs16(qint16x4_t a, qint16x4_t b);
567
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100568/** 8 bit fixed point vector saturating subtraction (16 elements)
569 *
570 * @param[in] a First 8 bit fixed point input vector
571 * @param[in] b Second 8 bit fixed point input vector
572 *
573 * @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
574 */
575qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b);
576
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100577/** 16 bit fixed point vector saturating subtraction (8 elements)
578 *
579 * @param[in] a First 16 bit fixed point input vector
580 * @param[in] b Second 16 bit fixed point input vector
581 *
582 * @return The result of the 16 bit fixed point vector subtraction. The result is saturated in case of overflow
583 */
584qint16x8_t vqsubq_qs16(qint16x8_t a, qint16x8_t b);
585
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100586/** 8 bit fixed point vector multiply (8 elements)
587 *
588 * @param[in] a First 8 bit fixed point input vector
589 * @param[in] b Second 8 bit fixed point input vector
590 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
591 *
592 * @return The result of the 8 bit fixed point vector multiplication.
593 */
594qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
595
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100596/** 16 bit fixed point vector multiply (4 elements)
597 *
598 * @param[in] a First 16 bit fixed point input vector
599 * @param[in] b Second 16 bit fixed point input vector
600 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
601 *
602 * @return The result of the 16 bit fixed point vector multiplication.
603 */
604qint16x4_t vmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
605
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100606/** 8 bit fixed point vector multiply (16 elements)
607 *
608 * @param[in] a First 8 bit fixed point input vector
609 * @param[in] b Second 8 bit fixed point input vector
610 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
611 *
612 * @return The result of the 8 bit fixed point vector multiplication.
613 */
614qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
615
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100616/** 16 bit fixed point vector multiply (8 elements)
617 *
618 * @param[in] a First 16 bit fixed point input vector
619 * @param[in] b Second 16 bit fixed point input vector
620 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
621 *
622 * @return The result of the 16 bit fixed point vector multiplication.
623 */
624qint16x8_t vmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
625
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100626/** 8 bit fixed point vector saturating multiply (8 elements)
627 *
628 * @param[in] a First 8 bit fixed point input vector
629 * @param[in] b Second 8 bit fixed point input vector
630 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
631 *
632 * @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
633 */
634qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
635
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100636/** 16 bit fixed point vector saturating multiply (4 elements)
637 *
638 * @param[in] a First 16 bit fixed point input vector
639 * @param[in] b Second 16 bit fixed point input vector
640 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
641 *
642 * @return The result of the 16 bit fixed point vector multiplication. The result is saturated in case of overflow
643 */
644qint16x4_t vqmul_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
645
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100646/** 8 bit fixed point vector saturating multiply (16 elements)
647 *
648 * @param[in] a First 8 bit fixed point input vector
649 * @param[in] b Second 8 bit fixed point input vector
650 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
651 *
652 * @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
653 */
654qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
655
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100656/** 16 bit fixed point vector saturating multiply (8 elements)
657 *
658 * @param[in] a First 16 bit fixed point input vector
659 * @param[in] b Second 16 bit fixed point input vector
660 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
661 *
662 * @return The result of the 16 bit fixed point vector multiplication. The result is saturated in case of overflow
663 */
664qint16x8_t vqmulq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
665
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100666/** 8 bit fixed point vector long multiply (8 elements)
667 *
668 * @param[in] a First 8 bit fixed point input vector
669 * @param[in] b Second 8 bit fixed point input vector
670 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
671 *
672 * @return The result of the 8 bit fixed point long vector multiplication.
673 */
674qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
675
676/** 8 bit fixed point vector multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
677 *
678 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
679 * @param[in] b Second 8 bit fixed point input vector
680 * @param[in] c Third 8 bit fixed point input vector
681 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
682 *
683 * @return The result of the 8 bit fixed point vector multiply-accumulate
684 */
685qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
686
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100687/** 16 bit fixed point vector multiply-accumulate (4 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
688 *
689 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
690 * @param[in] b Second 16 bit fixed point input vector
691 * @param[in] c Third 16 bit fixed point input vector
692 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
693 *
694 * @return The result of the 16 bit fixed point vector multiply-accumulate
695 */
696qint16x4_t vmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
697
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100698/** 8 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
699 *
700 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
701 * @param[in] b Second 8 bit fixed point input vector
702 * @param[in] c Third 8 bit fixed point input vector
703 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
704 *
705 * @return The result of the 8 bit fixed point vector multiply-accumulate
706 */
707qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
708
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100709/** 16 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
710 *
711 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
712 * @param[in] b Second 16 bit fixed point input vector
713 * @param[in] c Third 16 bit fixed point input vector
714 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
715 *
716 * @return The result of the 16 bit fixed point vector multiply-accumulate
717 */
718qint16x8_t vmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position);
719
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100720/** 8 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
721 *
722 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
723 * @param[in] b Second 8 bit fixed point input vector
724 * @param[in] c Third 8 bit fixed point input vector
725 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
726 *
727 * @return The result of the 8 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow
728 */
729qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
730
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100731/** 16 bit fixed point vector saturating multiply-accumulate (4 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
732 *
733 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
734 * @param[in] b Second 16 bit fixed point input vector
735 * @param[in] c Third 16 bit fixed point input vector
736 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
737 *
738 * @return The result of the 16 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow
739 */
740qint16x4_t vqmla_qs16(qint16x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
741
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100742/** 8 bit fixed point vector saturating multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
743 *
744 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
745 * @param[in] b Second 8 bit fixed point input vector
746 * @param[in] c Third 8 bit fixed point input vector
747 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
748 *
749 * @return The result of the 8 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow
750 */
751qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
752
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100753/** 16 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
754 *
755 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
756 * @param[in] b Second 16 bit fixed point input vector
757 * @param[in] c Third 16 bit fixed point input vector
758 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
759 *
760 * @return The result of the 16 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow
761 */
762qint16x8_t vqmlaq_qs16(qint16x8_t a, qint16x8_t b, qint16x8_t c, int fixed_point_position);
763
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100764/** 8 bit fixed point vector multiply-accumulate long (8 elements).
765 * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
766 *
767 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
768 * @param[in] b Second 8 bit fixed point input vector
769 * @param[in] c Third 8 bit fixed point input vector
770 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
771 *
772 * @return The result of the 8 bit fixed point vector multiply-accumulate long
773 */
774qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
775
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100776/** 16 bit fixed point vector multiply-accumulate long (4 elements).
777 * This operation performs the product between @p b and @p c and add the result to the 32 bit fixed point vector @p a (a + b * c). 4 elements
778 *
779 * @param[in] a First 32 bit fixed point input vector where the result of multiplication must be added to
780 * @param[in] b Second 16 bit fixed point input vector
781 * @param[in] c Third 16 bit fixed point input vector
782 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
783 *
784 * @return The result of the 16 bit fixed point vector multiply-accumulate long
785 */
786qint32x4_t vmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
787
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100788/** 8 bit fixed point vector saturating multiply-accumulate long (8 elements). The saturation is performed on the 16 bit fixed point output vector.
789 * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
790 *
791 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
792 * @param[in] b Second 8 bit fixed point input vector
793 * @param[in] c Third 8 bit fixed point input vector
794 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
795 *
796 * @return The result of the 8 bit fixed point vector multiply-accumulate long
797 */
798qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
799
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100800/** 16 bit fixed point vector saturating multiply-accumulate long (4 elements). The saturation is performed on the 16 bit fixed point output vector.
801 * This operation performs the product between @p b and @p c and add the result to the 32 bit fixed point vector @p a (a + b * c). 4 elements
802 *
803 * @param[in] a First 32 bit fixed point input vector where the result of multiplication must be added to
804 * @param[in] b Second 16 bit fixed point input vector
805 * @param[in] c Third 16 bit fixed point input vector
806 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
807 *
808 * @return The result of the 16 bit fixed point vector multiply-accumulate long
809 */
810qint32x4_t vqmlal_qs16(qint32x4_t a, qint16x4_t b, qint16x4_t c, int fixed_point_position);
811
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100812/** Convert a float vector with 4x2 elements to 8 bit fixed point vector with 8 elements
813 *
814 * @param[in] a Float input vector
815 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
816 *
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100817 * @return The result of the conversion float -> 8 bit fixed point. The result is saturated in case of overflow
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100818 */
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100819qint8x8_t vqcvt_qs8_f32(const float32x4x2_t a, int fixed_point_position);
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100820
821/** Convert a float vector with 4 elements to 16 bit fixed point vector with 4 elements
822 *
823 * @param[in] a Float input vector
824 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
825 *
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100826 * @return The result of the conversion float -> 16 bit fixed point. The result is saturated in case of overflow
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100827 */
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100828qint16x4_t vqcvt_qs16_f32(const float32x4_t a, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100829
830/** Convert a float vector with 4x4 elements to 8 bit fixed point vector with 16 elements
831 *
832 * @param[in] a Float input vector
833 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
834 *
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100835 * @return The result of the conversion float -> 8 bit fixed point. The result is saturated in case of overflow
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100836 */
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100837qint8x16_t vqcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100838
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100839/** Convert a float vector with 4x2 elements to 16 bit fixed point vector with 8 elements
840 *
841 * @param[in] a Float input vector
842 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
843 *
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100844 * @return The result of the conversion float -> 16 bit fixed point. The result is saturated in case of overflow
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100845 */
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100846qint16x8_t vqcvtq_qs16_f32(const float32x4x2_t &a, int fixed_point_position);
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100847
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100848/** Convert a 8 bit fixed point vector with 8 elements to a float vector with 4x2 elements
849 *
850 * @param[in] a 8 bit fixed point input vector
851 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
852 *
853 * @return The result of the conversion 8 bit fixed point -> float32x2x4
854 */
855float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position);
856
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100857/** Convert a 16 bit fixed point vector with 4 elements to a float vector with 4 elements
858 *
859 * @param[in] a 16 bit fixed point input vector
860 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
861 *
862 * @return The result of the conversion 16 bit fixed point -> float32x2
863 */
864float32x4_t vcvt_f32_qs16(qint16x4_t a, int fixed_point_position);
865
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100866/** Convert a 8 bit fixed point vector with 16 elements to a float vector with 4x4 elements
867 *
868 * @param[in] a 8 bit fixed point input vector
869 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
870 *
871 * @return The result of the conversion 8 bit fixed point -> float32x4x4
872 */
873float32x4x4_t vcvtq_qs8_f32(qint8x16_t a, int fixed_point_position);
874
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100875/** Convert a 16 bit fixed point vector with 8 elements to a float vector with 4x2 elements
876 *
877 * @param[in] a 16 bit fixed point input vector
878 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
879 *
880 * @return The result of the conversion 16 bit fixed point -> float32x4x2
881 */
882float32x4x2_t vcvtq_qs16_f32(qint16x8_t a, int fixed_point_position);
883
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100884/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements)
885 *
886 * @param[in] a 8bit fixed point input vector
887 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
888 *
889 * @return The result of the 8bit reciprocal (1/a).
890 */
891qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position);
892
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100893/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (4 elements)
894 *
895 * @param[in] a 16 bit fixed point input vector
896 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
897 *
898 * @return The result of the 16 bit reciprocal (1/a).
899 */
900qint16x4_t vrecip_qs16(qint16x4_t a, int fixed_point_position);
901
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100902/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (16 elements)
903 *
904 * @param[in] a 8bit fixed point input vector
905 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
906 *
907 * @return The result of the 8bit reciprocal (1/a).
908 */
909qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position);
910
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100911/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements)
912 *
913 * @param[in] a 16 bit fixed point input vector
914 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
915 *
916 * @return The result of the 16 bit reciprocal (1/a).
917 */
918qint16x8_t vrecipq_qs16(qint16x8_t a, int fixed_point_position);
919
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100920/** Division fixed point 8bit (8 elements)
921 *
922 * @param[in] a First 8bit fixed point input vector
923 * @param[in] b Second 8bit fixed point input vector
924 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
925 *
926 * @return The quotient and remainder number in fixed point format.
927 */
928qint8x8_t vdiv_qs8(qint8x8_t a, int8x8_t b, int fixed_point_position);
929
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100930/** Division fixed point 16 bit (4 elements)
931 *
932 * @param[in] a First 16 bit fixed point input vector
933 * @param[in] b Second 16 bit fixed point input vector
934 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
935 *
936 * @return The quotient and remainder number in fixed point format.
937 */
938qint16x4_t vdiv_qs16(qint16x4_t a, qint16x4_t b, int fixed_point_position);
939
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100940/** Division fixed point 8bit (16 elements)
941 *
942 * @param[in] a First 8bit fixed point input vector
943 * @param[in] b Second 8bit fixed point input vector
944 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
945 *
946 * @return The quotient and remainder number in 8bit fixed point format.
947 */
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100948qint8x16_t vdivq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
949
950/** Division fixed point 16 bit (8 elements)
951 *
952 * @param[in] a First 16 bit fixed point input vector
953 * @param[in] b Second 16 bit fixed point input vector
954 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
955 *
956 * @return The quotient and remainder number in 16 bit fixed point format.
957 */
958qint16x8_t vdivq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100959
960/** Perform a 4th degree polynomial approximation. (8 elements)
961 *
962 * @param[in] a 8bit fixed point input vector
963 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
964 *
965 * @return The result of the 8bit taylor approximation.
966 */
967template <bool islog>
968qint8x8_t vtaylor_poly_qs8(qint8x8_t a, int fixed_point_position);
969
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100970/** Perform a 4th degree polynomial approximation. (4 elements)
971 *
972 * @param[in] a 16 bit fixed point input vector
973 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
974 *
975 * @return The result of the 16 bit taylor approximation.
976 */
977template <bool islog>
978qint16x4_t vtaylor_poly_qs16(qint16x4_t a, int fixed_point_position);
979
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100980/** Perform a 4th degree polynomial approximation. (16 elements)
981 *
982 * @param[in] a 8bit fixed point input vector
983 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
984 *
985 * @return The result of the 8bit taylor approximation.
986 */
987template <bool islog>
988qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position);
989
Michalis Spyrou0a8334c2017-06-14 18:00:05 +0100990/** Perform a 4th degree polynomial approximation. (8 elements)
991 *
992 * @param[in] a 16 bit fixed point input vector
993 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
994 *
995 * @return The result of the 8bit taylor approximation.
996 */
997template <bool islog>
998qint16x8_t vtaylor_polyq_qs16(qint16x8_t a, int fixed_point_position);
999
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001000/** Calculate saturating exponential fixed point 8bit (8 elements)
1001 *
1002 * @param[in] a 8bit fixed point input vector
1003 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1004 *
1005 * @return The result of the 8bit saturating exponential
1006 */
1007qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position);
1008
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001009/** Calculate saturating exponential fixed point 16 bit (4 elements)
1010 *
1011 * @param[in] a 8bit fixed point input vector
1012 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1013 *
1014 * @return The result of the 16 bit saturating exponential
1015 */
1016qint16x4_t vqexp_qs16(qint16x4_t a, int fixed_point_position);
1017
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001018/** Calculate saturating exponential fixed point 8bit (16 elements)
1019 *
1020 * @param[in] a 8bit fixed point input vector
1021 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1022 *
1023 * @return The result of the 8bit saturating exponential
1024 */
1025qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position);
1026
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001027/** Calculate saturating exponential fixed point 16 bit (8 elements)
1028 *
1029 * @param[in] a 16 bit fixed point input vector
1030 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1031 *
1032 * @return The result of the 16 bit saturating exponential
1033 */
1034qint16x8_t vqexpq_qs16(qint16x8_t a, int fixed_point_position);
1035
1036/** Calculate logarithm fixed point 8 bit (8 elements)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001037 *
1038 * @param[in] a 8bit fixed point input vector
1039 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1040 *
1041 * @return The result of the 8bit logarithm.
1042 */
1043qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position);
1044
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001045/** Calculate logarithm fixed point 16 bit (4 elements)
1046 *
1047 * @param[in] a 16 bit fixed point input vector
1048 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1049 *
1050 * @return The result of the 16 bit logarithm.
1051 */
1052qint16x4_t vlog_qs16(qint16x4_t a, int fixed_point_position);
1053
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001054/** Calculate logarithm fixed point 16bit (16 elements)
1055 *
1056 * @param[in] a 8bit fixed point input vector
1057 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1058 *
1059 * @return The result of the 8bit logarithm.
1060 */
1061qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position);
1062
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001063/** Calculate logarithm fixed point 16 bit (8 elements)
1064 *
1065 * @param[in] a 16 bit fixed point input vector
1066 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1067 *
1068 * @return The result of the 16 bit logarithm.
1069 */
1070qint16x8_t vlogq_qs16(qint16x8_t a, int fixed_point_position);
1071
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001072/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
1073 *
1074 * @param[in] a 8bit fixed point input vector
1075 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1076 *
1077 * @return The result of the 8bit inverse sqrt.
1078 */
1079qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
1080
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001081/** Calculate inverse square root for fixed point 16 bit using Newton-Raphosn method (4 elements)
1082 *
1083 * @param[in] a 16 bit fixed point input vector
1084 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1085 *
1086 * @return The result of the 16 bit inverse sqrt.
1087 */
1088qint16x4_t vinvsqrt_qs16(qint16x4_t a, int fixed_point_position);
1089
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001090/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
1091 *
1092 * @param[in] a 8bit fixed point input vector
1093 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1094 *
1095 * @return The result of the 8bit inverse sqrt.
1096 */
1097qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
1098
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001099/** Calculate saturating inverse square root for fixed point 16 bit using Newton-Raphosn method (4 elements)
1100 *
1101 * @param[in] a 16 bit fixed point input vector
1102 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1103 *
1104 * @return The result of the 16 bit inverse sqrt.
1105 */
1106qint16x4_t vqinvsqrt_qs16(qint16x4_t a, int fixed_point_position);
1107
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001108/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
1109 *
1110 * @param[in] a 8bit fixed point input vector
1111 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1112 *
1113 * @return The result of the 8bit inverse sqrt.
1114 */
1115qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
1116
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001117/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
1118 *
1119 * @param[in] a 16 bit fixed point input vector
1120 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1121 *
1122 * @return The result of the 16 bit inverse sqrt.
1123 */
1124qint16x8_t vinvsqrtq_qs16(qint16x8_t a, int fixed_point_position);
1125
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001126/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
1127 *
1128 * @param[in] a 8bit fixed point input vector
1129 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1130 *
1131 * @return The result of the 8bit inverse sqrt.
1132 */
1133qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
1134
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001135/** Calculate saturating inverse square root for fixed point 16 bit using Newton-Raphosn method (8 elements)
1136 *
1137 * @param[in] a 16 bit fixed point input vector
1138 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1139 *
1140 * @return The result of the 16 bit inverse sqrt.
1141 */
1142qint16x8_t vqinvsqrtq_qs16(qint16x8_t a, int fixed_point_position);
1143
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001144/** Calculate hyperbolic tangent for fixed point 8bit (8 elements)
1145 *
1146 * @param[in] a 8bit fixed point input vector
1147 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1148 *
1149 * @return The calculated Hyperbolic Tangent.
1150 */
Georgios Pinitasccc65d42017-06-27 17:39:11 +01001151qint8x8_t vqtanh_qs8(qint8x8_t a, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001152
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001153/** Calculate hyperbolic tangent for fixed point 16 bit (4 elements)
1154 *
1155 * @param[in] a 16 bit fixed point input vector
1156 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1157 *
1158 * @return The calculated Hyperbolic Tangent.
1159 */
Georgios Pinitasccc65d42017-06-27 17:39:11 +01001160qint16x4_t vqtanh_qs16(qint16x4_t a, int fixed_point_position);
Michalis Spyrou0a8334c2017-06-14 18:00:05 +01001161
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001162/** Calculate hyperbolic tangent for fixed point 8bit (16 elements)
1163 *
1164 * @param[in] a 8bit fixed point input vector
1165 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1166 *
1167 * @return The calculated Hyperbolic Tangent.
1168 */
Georgios Pinitasccc65d42017-06-27 17:39:11 +01001169qint8x16_t vqtanhq_qs8(qint8x16_t a, int fixed_point_position);
1170
1171/** Calculate hyperbolic tangent for fixed point 16bit (8 elements)
1172 *
1173 * @param[in] a 16 bit fixed point input vector
1174 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1175 *
1176 * @return The calculated Hyperbolic Tangent.
1177 */
1178qint16x8_t vqtanhq_qs16(qint16x8_t a, int fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001179
1180/** Calculate saturating n power for fixed point 8bit (16 elements).
1181 *
1182 * pow(a,b) = e^(b*log(a))
1183 *
1184 * @param[in] a 8bit fixed point input vector
1185 * @param[in] b 8bit fixed point power vector
1186 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1187 *
1188 * @return The result of the 8bit power.
1189 */
Michele Di Giorgiod5e65c72017-07-26 17:09:17 +01001190qint8x16_t vqpowq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
1191
1192/** Calculate saturating n power for fixed point 16bit (8 elements).
1193 *
1194 * pow(a,b) = e^(b*log(a))
1195 *
1196 * @param[in] a 16bit fixed point input vector
1197 * @param[in] b 16bit fixed point power vector
1198 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
1199 *
1200 * @return The result of the 16bit power.
1201 */
1202qint16x8_t vqpowq_qs16(qint16x8_t a, qint16x8_t b, int fixed_point_position);
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +01001203
1204/** Compute lane-by-lane maximum between elements of a float vector with 4x2 elements
1205 *
1206 * @param[in] a Float input vector
1207 * @param[in] b Float input vector
1208 *
1209 * @return The lane-by-lane maximum -> float32x4x2
1210 */
1211float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001212}
1213#include "arm_compute/core/NEON/NEFixedPoint.inl"
1214#endif /* __ARM_COMPUTE_NEFIXEDPOINT_H__ */