blob: 201c5b5e7efce7cc338a40312eb00c464d0a3aed [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEFIXEDPOINT_H__
25#define __ARM_COMPUTE_NEFIXEDPOINT_H__
26
27#include "arm_compute/core/FixedPoint.h"
28
29#include <arm_neon.h>
30
31namespace arm_compute
32{
33using qint8x8_t = int8x8_t; /**< 8 bit fixed point vector with 8 elements */
34using qint8x8x2_t = int8x8x2_t; /**< 8 bit fixed point vector with 16 elements */
35using qint8x8x3_t = int8x8x3_t; /**< 8 bit fixed point vector with 24 elements */
36using qint8x8x4_t = int8x8x4_t; /**< 8 bit fixed point vector with 32 elements */
37using qint8x16_t = int8x16_t; /**< 8 bit fixed point vector with 16 elements */
38using qint8x16x2_t = int8x16x2_t; /**< 8 bit fixed point vector with 32 elements */
39using qint8x16x3_t = int8x16x3_t; /**< 8 bit fixed point vector with 48 elements */
40using qint8x16x4_t = int8x16x4_t; /**< 8 bit fixed point vector with 64 elements */
41using qint16x4_t = int16x4_t; /**< 16 bit fixed point vector with 4 elements */
42using qint16x4x2_t = int16x4x2_t; /**< 16 bit fixed point vector with 8 elements */
43using qint16x4x3_t = int16x4x3_t; /**< 16 bit fixed point vector with 12 elements */
44using qint16x4x4_t = int16x4x4_t; /**< 16 bit fixed point vector with 16 elements */
45using qint16x8_t = int16x8_t; /**< 16 bit fixed point vector with 8 elements */
46using qint16x8x2_t = int16x8x2_t; /**< 16 bit fixed point vector with 16 elements */
47using qint16x8x3_t = int16x8x3_t; /**< 16 bit fixed point vector with 24 elements */
48using qint16x8x4_t = int16x8x4_t; /**< 16 bit fixed point vector with 32 elements */
49
50/** Get the lower half of a 16 elements vector
51 *
52 * @param[in] a vector of 16 elements
53 *
54 * @return 8 bit fixed point vector (8 elements)
55 */
56qint8x8_t vget_low_qs8(qint8x16_t a);
57
58/** Get the higher half of a 16 elements vector
59 *
60 * @param[in] a vector of 16 elements
61 *
62 * @return 8 bit fixed point vector (8 elements)
63 */
64qint8x8_t vget_high_qs8(qint8x16_t a);
65
66/** Load a single 8 bit fixed point vector from memory (8 elements)
67 *
68 * @param[in] addr Memory address of the 8 bit fixed point vector to load
69 *
70 * @return 8 bit fixed point vector (8 elements)
71 */
72qint8x8_t vld1_qs8(const qint8_t *addr);
73
74/** Load a single 8 bit fixed point vector from memory (16 elements)
75 *
76 * @param[in] addr Memory address of the 8 bit fixed point vector to load
77 *
78 * @return 8 bit fixed point vector (16 elements)
79 */
80qint8x16_t vld1q_qs8(const qint8_t *addr);
81
82/** Load a single 16 bit fixed point vector from memory (4 elements)
83 *
84 * @param[in] addr Memory address of the 16 bit fixed point vector to load
85 *
86 * @return 16 bit fixed point vector (4 elements)
87 */
88qint16x4_t vld1_qs16(const qint16_t *addr);
89
90/** Load a single 16 bit fixed point vector from memory (8 elements)
91 *
92 * @param[in] addr Memory address of the 16 bit fixed point vector to load
93 *
94 * @return 16 bit fixed point vector (8 elements)
95 */
96qint16x8_t vld1q_qs16(const qint16_t *addr);
97
98/** Load all lanes of 8 bit fixed point vector with same value from memory (8 elements)
99 *
100 * @param[in] addr Memory address of the 8 bit fixed point scalar value to load
101 *
102 * @return 8 bit fixed point vector (8 elements)
103 */
104qint8x8_t vld1_dup_qs8(const qint8_t *addr);
105
106/** Load all lanes of 8 bit fixed point vector with same value from memory (16 elements)
107 *
108 * @param[in] addr Memory address of the 8 bit fixed point scalar value to load
109 *
110 * @return 8 bit fixed point vector (16 elements)
111 */
112qint8x16_t vld1q_dup_qs8(const qint8_t *addr);
113
114/** Store a single 8 bit fixed point vector to memory (8 elements)
115 *
116 * @param[in] addr Memory address where the 8 bit fixed point vector should be stored
117 * @param[in] b 8 bit fixed point vector to store
118 *
119 */
120void vst1_qs8(qint8_t *addr, qint8x8_t b);
121
122/** Store a single 8 bit fixed point vector to memory (16 elements)
123 *
124 * @param[in] addr Memory address where the 8 bit fixed point vector should be stored
125 * @param[in] b 8 bit fixed point vector to store
126 *
127 */
128void vst1q_qs8(qint8_t *addr, qint8x16_t b);
129
130/** Store a single 16 bit fixed point vector to memory (4 elements)
131 *
132 * @param[in] addr Memory address where the 16 bit fixed point vector should be stored
133 * @param[in] b 16 bit fixed point vector to store
134 *
135 */
136void vst1_qs16(qint16_t *addr, qint16x4_t b);
137
138/** Store a single 8 bit fixed point vector to memory (16 elements)
139 *
140 * @param[in] addr Memory address where the 16 bit fixed point vector should be stored
141 * @param[in] b 16 bit fixed point vector to store
142 *
143 */
144void vst1q_qs16(qint16_t *addr, qint16x8_t b);
145
146/** 16 bit fixed point vector saturating narrow (8 elements)
147 *
148 * @param[in] a 16 bit fixed point vector to convert
149 *
150 * @return 8 bit fixed point vector
151 */
152qint8x8_t vqmovn_q16(qint16x8_t a);
153
154/** 8 bit fixed point vector duplicate (8 elements)
155 *
156 * @param[in] a 8 bit fixed point to duplicate
157 *
158 * @return The result of the vector duplication
159 */
160qint8x8_t vdup_n_qs8(qint8_t a);
161
162/** 8 bit fixed point vector duplicate (16 elements)
163 *
164 * @param[in] a 8 bit fixed point to duplicate
165 *
166 * @return The result of the vector duplication
167 */
168qint8x16_t vdupq_n_qs8(qint8_t a);
169
170/** Duplicate a float and convert it to 8 bit fixed point vector (16 elements)
171 *
172 * @param[in] a 8 bit fixed point to duplicate
173 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
174 *
175 * @return The result of the vector duplication
176 */
177qint8x16_t vdupq_n_qs8_f32(float a, int fixed_point_position);
178
179/** 16 bit fixed point vector duplicate (8 elements)
180 *
181 * @param[in] a 16 bit fixed point to duplicate
182 *
183 * @return The result of the vector duplication
184 */
185qint16x8_t vdupq_n_qs16(qint16x8_t a);
186
187/** Absolute value of 8 bit fixed point vector (8 elements)
188 *
189 * @param[in] a 8 bit fixed point input vector
190 *
191 * @return The result of the 8 bit fixed point vector absolute value
192 */
193qint8x8_t vabs_qs8(qint8x8_t a);
194
195/** Absolute value of 8 bit fixed point vector (16 elements)
196 *
197 * @param[in] a 8 bit fixed point input vector
198 *
199 * @return The result of the 8 bit fixed point vector absolute value
200 */
201qint8x16_t vabsq_qs8(qint8x16_t a);
202
203/** Saturating absolute value of 8 bit fixed point vector (8 elements)
204 *
205 * @param[in] a 8 bit fixed point input vector
206 *
207 * @return The result of the 8 bit fixed point vector absolute value
208 */
209qint8x8_t vqabs_qs8(qint8x8_t a);
210
211/** Saturating absolute value of 8 bit fixed point vector (16 elements)
212 *
213 * @param[in] a 8 bit fixed point input vector
214 *
215 * @return The result of the 8 bit fixed point vector absolute value
216 */
217qint8x16_t vqabsq_qs8(qint8x16_t a);
218
219/** 8 bit fixed point vector max (8 elements)
220 *
221 * @param[in] a First 8 bit fixed point input vector
222 * @param[in] b Second 8 bit fixed point input vector
223 *
224 * @return The result of the 8 bit fixed point vector max operation
225 */
226qint8x8_t vmax_qs8(qint8x8_t a, qint8x8_t b);
227
228/** 8 bit fixed point vector max (16 elements)
229 *
230 * @param[in] a First 8 bit fixed point input vector
231 * @param[in] b Second 8 bit fixed point input vector
232 *
233 * @return The result of the 8 bit fixed point vector max operation
234 */
235qint8x16_t vmaxq_qs8(qint8x16_t a, qint8x16_t b);
236
237/** 8 bit fixed point vector pairwise max (8 elements)
238 *
239 * @param[in] a First 8 bit fixed point input vector
240 * @param[in] b Second 8 bit fixed point input vector
241 *
242 * @return The result of the 8 bit fixed point vector pairwise max operation
243 */
244qint8x8_t vpmax_qs8(qint8x8_t a, qint8x8_t b);
245
246/** 8 bit fixed point vector min (8 elements)
247 *
248 * @param[in] a First 8 bit fixed point input vector
249 * @param[in] b Second 8 bit fixed point input vector
250 *
251 * @return The result of the 8 bit fixed point vector max operation
252 */
253qint8x8_t vmin_qs8(qint8x8_t a, qint8x8_t b);
254
255/** 8 bit fixed point vector min (16 elements)
256 *
257 * @param[in] a First 8 bit fixed point input vector
258 * @param[in] b Second 8 bit fixed point input vector
259 *
260 * @return The result of the 8 bit fixed point vector min operation
261 */
262qint8x16_t vminq_qs8(qint8x16_t a, qint8x16_t b);
263
264/** 8 bit fixed point vector pairwise min (8 elements)
265 *
266 * @param[in] a First 8 bit fixed point input vector
267 * @param[in] b Second 8 bit fixed point input vector
268 *
269 * @return The result of the 8 bit fixed point vector pairwise min operation
270 */
271qint8x8_t vpmin_qs8(qint8x8_t a, qint8x8_t b);
272
273/** 8 bit fixed point vector add (8 elements)
274 *
275 * @param[in] a First 8 bit fixed point input vector
276 * @param[in] b Second 8 bit fixed point input vector
277 *
278 * @return The result of the 8 bit fixed point vector addition
279 */
280qint8x8_t vadd_qs8(qint8x8_t a, qint8x8_t b);
281
282/** 8 bit fixed point vector add (16 elements)
283 *
284 * @param[in] a First 8 bit fixed point input vector
285 * @param[in] b Second 8 bit fixed point input vector
286 *
287 * @return The result of the 8 bit fixed point vector addition
288 */
289qint8x16_t vaddq_qs8(qint8x16_t a, qint8x16_t b);
290
291/** 8 bit fixed point vector saturating add (8 elements)
292 *
293 * @param[in] a First 8 bit fixed point input vector
294 * @param[in] b Second 8 bit fixed point input vector
295 *
296 * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
297 */
298qint8x8_t vqadd_qs8(qint8x8_t a, qint8x8_t b);
299
300/** 8 bit fixed point vector saturating add (16 elements)
301 *
302 * @param[in] a First 8 bit fixed point input vector
303 * @param[in] b Second 8 bit fixed point input vector
304 *
305 * @return The result of the 8 bit fixed point vector addition. The result is saturated in case of overflow
306 */
307qint8x16_t vqaddq_qs8(qint8x16_t a, qint8x16_t b);
308
309/** 16 bit fixed point vector saturating add (4 elements)
310 *
311 * @param[in] a First 16 bit fixed point input vector
312 * @param[in] b Second 16 bit fixed point input vector
313 *
314 * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
315 */
316qint16x4_t vqadd_qs16(qint16x4_t a, qint16x4_t b);
317
318/** 16 bit fixed point vector saturating add (8 elements)
319 *
320 * @param[in] a First 16 bit fixed point input vector
321 * @param[in] b Second 16 bit fixed point input vector
322 *
323 * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
324 */
325qint16x8_t vqaddq_qs16(qint16x8_t a, qint16x8_t b);
326
327/** 8 bit fixed point vector saturating pairwise add (8 elements)
328 *
329 * @param[in] a 8 bit fixed point input vector
330 *
331 * @return The result of the 16 bit fixed point vector addition. The result is saturated in case of overflow
332 */
333int16x4_t vpaddl_qs8(qint8x8_t a);
334
335/** 8 bit fixed point vector subtraction (8 elements)
336 *
337 * @param[in] a First 8 bit fixed point input vector
338 * @param[in] b Second 8 bit fixed point input vector
339 *
340 * @return The result of the 8 bit fixed point vector subtraction
341 */
342qint8x8_t vsub_qs8(qint8x8_t a, qint8x8_t b);
343
344/** 8 bit fixed point vector subtraction (16 elements)
345 *
346 * @param[in] a First 8 bit fixed point input vector
347 * @param[in] b Second 8 bit fixed point input vector
348 *
349 * @return The result of the 8 bit fixed point vector subtraction
350 */
351qint8x16_t vsubq_qs8(qint8x16_t a, qint8x16_t b);
352
353/** 8 bit fixed point vector saturating subtraction (8 elements)
354 *
355 * @param[in] a First 8 bit fixed point input vector
356 * @param[in] b Second 8 bit fixed point input vector
357 *
358 * @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
359 */
360qint8x8_t vqsub_qs8(qint8x8_t a, qint8x8_t b);
361
362/** 8 bit fixed point vector saturating subtraction (16 elements)
363 *
364 * @param[in] a First 8 bit fixed point input vector
365 * @param[in] b Second 8 bit fixed point input vector
366 *
367 * @return The result of the 8 bit fixed point vector subtraction. The result is saturated in case of overflow
368 */
369qint8x16_t vqsubq_qs8(qint8x16_t a, qint8x16_t b);
370
371/** 8 bit fixed point vector multiply (8 elements)
372 *
373 * @param[in] a First 8 bit fixed point input vector
374 * @param[in] b Second 8 bit fixed point input vector
375 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
376 *
377 * @return The result of the 8 bit fixed point vector multiplication.
378 */
379qint8x8_t vmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
380
381/** 8 bit fixed point vector multiply (16 elements)
382 *
383 * @param[in] a First 8 bit fixed point input vector
384 * @param[in] b Second 8 bit fixed point input vector
385 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
386 *
387 * @return The result of the 8 bit fixed point vector multiplication.
388 */
389qint8x16_t vmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
390
391/** 8 bit fixed point vector saturating multiply (8 elements)
392 *
393 * @param[in] a First 8 bit fixed point input vector
394 * @param[in] b Second 8 bit fixed point input vector
395 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
396 *
397 * @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
398 */
399qint8x8_t vqmul_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
400
401/** 8 bit fixed point vector saturating multiply (16 elements)
402 *
403 * @param[in] a First 8 bit fixed point input vector
404 * @param[in] b Second 8 bit fixed point input vector
405 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
406 *
407 * @return The result of the 8 bit fixed point vector multiplication. The result is saturated in case of overflow
408 */
409qint8x16_t vqmulq_qs8(qint8x16_t a, qint8x16_t b, int fixed_point_position);
410
411/** 8 bit fixed point vector long multiply (8 elements)
412 *
413 * @param[in] a First 8 bit fixed point input vector
414 * @param[in] b Second 8 bit fixed point input vector
415 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
416 *
417 * @return The result of the 8 bit fixed point long vector multiplication.
418 */
419qint16x8_t vmull_qs8(qint8x8_t a, qint8x8_t b, int fixed_point_position);
420
421/** 8 bit fixed point vector multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
422 *
423 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
424 * @param[in] b Second 8 bit fixed point input vector
425 * @param[in] c Third 8 bit fixed point input vector
426 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
427 *
428 * @return The result of the 8 bit fixed point vector multiply-accumulate
429 */
430qint8x8_t vmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
431
432/** 8 bit fixed point vector multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
433 *
434 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
435 * @param[in] b Second 8 bit fixed point input vector
436 * @param[in] c Third 8 bit fixed point input vector
437 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
438 *
439 * @return The result of the 8 bit fixed point vector multiply-accumulate
440 */
441qint8x16_t vmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
442
443/** 8 bit fixed point vector saturating multiply-accumulate (8 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
444 *
445 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
446 * @param[in] b Second 8 bit fixed point input vector
447 * @param[in] c Third 8 bit fixed point input vector
448 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
449 *
450 * @return The result of the 8 bit fixed point vector multiply-accumulate. The result is saturated in case of overflow
451 */
452qint8x8_t vqmla_qs8(qint8x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
453
454/** 8 bit fixed point vector saturating multiply-accumulate (16 elements). This operation performs the product between @p b and @p c and add the result to @p a (a + b * c).
455 *
456 * @param[in] a First 8 bit fixed point input vector where the result of multiplication must be added to
457 * @param[in] b Second 8 bit fixed point input vector
458 * @param[in] c Third 8 bit fixed point input vector
459 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
460 *
461 * @return The result of the 8 bit fixed point vector multiply-accumulate.The result is saturated in case of overflow
462 */
463qint8x16_t vqmlaq_qs8(qint8x16_t a, qint8x16_t b, qint8x16_t c, int fixed_point_position);
464
465/** 8 bit fixed point vector multiply-accumulate long (8 elements).
466 * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
467 *
468 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
469 * @param[in] b Second 8 bit fixed point input vector
470 * @param[in] c Third 8 bit fixed point input vector
471 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
472 *
473 * @return The result of the 8 bit fixed point vector multiply-accumulate long
474 */
475qint16x8_t vmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
476
477/** 8 bit fixed point vector saturating multiply-accumulate long (8 elements). The saturation is performed on the 16 bit fixed point output vector.
478 * This operation performs the product between @p b and @p c and add the result to the 16 bit fixed point vector @p a (a + b * c). 8 elements
479 *
480 * @param[in] a First 16 bit fixed point input vector where the result of multiplication must be added to
481 * @param[in] b Second 8 bit fixed point input vector
482 * @param[in] c Third 8 bit fixed point input vector
483 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
484 *
485 * @return The result of the 8 bit fixed point vector multiply-accumulate long
486 */
487qint16x8_t vqmlal_qs8(qint16x8_t a, qint8x8_t b, qint8x8_t c, int fixed_point_position);
488
489/** Convert a float vector with 4x2 elements to 8 bit fixed point vector with 8 elements
490 *
491 * @param[in] a Float input vector
492 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
493 *
494 * @return The result of the conversion float -> 8 bit fixed point
495 */
496qint8x8_t vcvt_qs8_f32(const float32x4x2_t &a, int fixed_point_position);
497
498/** Convert a float vector with 4x4 elements to 8 bit fixed point vector with 16 elements
499 *
500 * @param[in] a Float input vector
501 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
502 *
503 * @return The result of the conversion float -> 8 bit fixed point
504 */
505qint8x16_t vcvtq_qs8_f32(const float32x4x4_t &a, int fixed_point_position);
506
507/** Convert a 8 bit fixed point vector with 8 elements to a float vector with 4x2 elements
508 *
509 * @param[in] a 8 bit fixed point input vector
510 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
511 *
512 * @return The result of the conversion 8 bit fixed point -> float32x2x4
513 */
514float32x4x2_t vcvt_f32_qs8(qint8x8_t a, int fixed_point_position);
515
516/** Convert a 8 bit fixed point vector with 16 elements to a float vector with 4x4 elements
517 *
518 * @param[in] a 8 bit fixed point input vector
519 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
520 *
521 * @return The result of the conversion 8 bit fixed point -> float32x4x4
522 */
523float32x4x4_t vcvtq_qs8_f32(qint8x16_t a, int fixed_point_position);
524
525/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (8 elements)
526 *
527 * @param[in] a 8bit fixed point input vector
528 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
529 *
530 * @return The result of the 8bit reciprocal (1/a).
531 */
532qint8x8_t vrecip_qs8(qint8x8_t a, int fixed_point_position);
533
534/** Calculate reciprocal of a fixed point 8bit number using the Newton-Raphson method. (16 elements)
535 *
536 * @param[in] a 8bit fixed point input vector
537 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
538 *
539 * @return The result of the 8bit reciprocal (1/a).
540 */
541qint8x16_t vrecipq_qs8(qint8x16_t a, int fixed_point_position);
542
543/** Division fixed point 8bit (8 elements)
544 *
545 * @param[in] a First 8bit fixed point input vector
546 * @param[in] b Second 8bit fixed point input vector
547 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
548 *
549 * @return The quotient and remainder number in fixed point format.
550 */
551qint8x8_t vdiv_qs8(qint8x8_t a, int8x8_t b, int fixed_point_position);
552
553/** Division fixed point 8bit (16 elements)
554 *
555 * @param[in] a First 8bit fixed point input vector
556 * @param[in] b Second 8bit fixed point input vector
557 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
558 *
559 * @return The quotient and remainder number in 8bit fixed point format.
560 */
561qint8x16_t vdivq_qs8(qint8x16_t a, int8x16_t b, int fixed_point_position);
562
563/** Perform a 4th degree polynomial approximation. (8 elements)
564 *
565 * @param[in] a 8bit fixed point input vector
566 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
567 *
568 * @return The result of the 8bit taylor approximation.
569 */
570template <bool islog>
571qint8x8_t vtaylor_poly_qs8(qint8x8_t a, int fixed_point_position);
572
573/** Perform a 4th degree polynomial approximation. (16 elements)
574 *
575 * @param[in] a 8bit fixed point input vector
576 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
577 *
578 * @return The result of the 8bit taylor approximation.
579 */
580template <bool islog>
581qint8x16_t vtaylor_polyq_qs8(qint8x16_t a, int fixed_point_position);
582
583/** Calculate saturating exponential fixed point 8bit (8 elements)
584 *
585 * @param[in] a 8bit fixed point input vector
586 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
587 *
588 * @return The result of the 8bit saturating exponential
589 */
590qint8x8_t vqexp_qs8(qint8x8_t a, int fixed_point_position);
591
592/** Calculate saturating exponential fixed point 8bit (16 elements)
593 *
594 * @param[in] a 8bit fixed point input vector
595 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
596 *
597 * @return The result of the 8bit saturating exponential
598 */
599qint8x16_t vqexpq_qs8(qint8x16_t a, int fixed_point_position);
600
601/** Calculate logarithm fixed point 16bit (8 elements)
602 *
603 * @param[in] a 8bit fixed point input vector
604 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
605 *
606 * @return The result of the 8bit logarithm.
607 */
608qint8x8_t vlog_qs8(qint8x8_t a, int fixed_point_position);
609
610/** Calculate logarithm fixed point 16bit (16 elements)
611 *
612 * @param[in] a 8bit fixed point input vector
613 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
614 *
615 * @return The result of the 8bit logarithm.
616 */
617qint8x16_t vlogq_qs8(qint8x16_t a, int fixed_point_position);
618
619/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
620 *
621 * @param[in] a 8bit fixed point input vector
622 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
623 *
624 * @return The result of the 8bit inverse sqrt.
625 */
626qint8x8_t vinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
627
628/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (8 elements)
629 *
630 * @param[in] a 8bit fixed point input vector
631 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
632 *
633 * @return The result of the 8bit inverse sqrt.
634 */
635qint8x8_t vqinvsqrt_qs8(qint8x8_t a, int fixed_point_position);
636
637/** Calculate inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
638 *
639 * @param[in] a 8bit fixed point input vector
640 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
641 *
642 * @return The result of the 8bit inverse sqrt.
643 */
644qint8x16_t vinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
645
646/** Calculate saturating inverse square root for fixed point 8bit using Newton-Raphosn method (16 elements)
647 *
648 * @param[in] a 8bit fixed point input vector
649 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
650 *
651 * @return The result of the 8bit inverse sqrt.
652 */
653qint8x16_t vqinvsqrtq_qs8(qint8x16_t a, int fixed_point_position);
654
655/** Calculate hyperbolic tangent for fixed point 8bit (8 elements)
656 *
657 * @param[in] a 8bit fixed point input vector
658 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
659 *
660 * @return The calculated Hyperbolic Tangent.
661 */
662qint8x8_t vtanh_qs8(qint8x8_t a, int fixed_point_position);
663
664/** Calculate hyperbolic tangent for fixed point 8bit (16 elements)
665 *
666 * @param[in] a 8bit fixed point input vector
667 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
668 *
669 * @return The calculated Hyperbolic Tangent.
670 */
671qint8x16_t vtanhq_qs8(qint8x16_t a, int fixed_point_position);
672
673/** Calculate saturating n power for fixed point 8bit (16 elements).
674 *
675 * pow(a,b) = e^(b*log(a))
676 *
677 * @param[in] a 8bit fixed point input vector
678 * @param[in] b 8bit fixed point power vector
679 * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number
680 *
681 * @return The result of the 8bit power.
682 */
683qint8x8_t vqpowq_qs8(qint8x8_t a, qint8x16_t b, int fixed_point_position);
Michele Di Giorgio8af2dd62017-06-19 15:19:29 +0100684
685/** Compute lane-by-lane maximum between elements of a float vector with 4x2 elements
686 *
687 * @param[in] a Float input vector
688 * @param[in] b Float input vector
689 *
690 * @return The lane-by-lane maximum -> float32x4x2
691 */
692float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100693}
694#include "arm_compute/core/NEON/NEFixedPoint.inl"
695#endif /* __ARM_COMPUTE_NEFIXEDPOINT_H__ */