blob: b467a600d6875e4ac7e0ab2e1a43c7586c352b75 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2016, 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEMATH_H__
25#define __ARM_COMPUTE_NEMATH_H__
26
27#include <arm_neon.h>
28
29namespace arm_compute
30{
31/** Calculate inverse square root.
32 *
33 * @param[in] x Input value.
34 *
35 * @return The calculated inverse square root.
36 */
37float32x4_t vinvsqrtq_f32(float32x4_t x);
38
39/** Calculate reciprocal.
40 *
41 * @param[in] x Input value.
42 *
43 * @return The calculated reciprocal.
44 */
45float32x4_t vinvq_f32(float32x4_t x);
46
47/** Perform a 7th degree polynomial approximation using Estrin's method.
48 *
49 * @param[in] x Input vector value in F32 format.
50 * @param[in] coeffs Polynomial coefficients table.
51 *
52 * @return The calculated approximation.
53 */
54float32x4_t vtaylor_polyq_f32(float32x4_t x, const std::array<float32x4_t, 8> &coeffs);
55
56/** Calculate exponential
57 *
58 * @param[in] x Input vector value in F32 format.
59 *
60 * @return The calculated exponent.
61 */
62float32x4_t vexpq_f32(float32x4_t x);
63
64/** Calculate logarithm
65 *
66 * @param[in] x Input vector value in F32 format.
67 *
68 * @return The calculated logarithm.
69 */
70float32x4_t vlogq_f32(float32x4_t x);
71
72/** Calculate hyperbolic tangent.
73 *
74 * tanh(x) = (e^2x - 1)/(e^2x + 1)
75 *
76 * @note We clamp x to [-5,5] to avoid overflowing issues.
77 *
78 * @param[in] val Input vector value in F32 format.
79 *
80 * @return The calculated Hyperbolic Tangent.
81 */
82float32x4_t vtanhq_f32(float32x4_t val);
83
84/** Calculate n power of a number.
85 *
86 * pow(x,n) = e^(n*log(x))
87 *
88 * @param[in] val Input vector value in F32 format.
89 * @param[in] n Powers to raise the input to.
90 *
91 * @return The calculated power.
92 */
93float32x4_t vpowq_f32(float32x4_t val, float32x4_t n);
Pablo Tellodf246182017-07-03 16:25:09 +010094
95#ifdef ARM_COMPUTE_ENABLE_FP16
Pablo Tello91654c42017-07-05 11:32:17 +010096/** Calculate hyperbolic tangent.
97 *
98 * tanh(x) = (e^2x - 1)/(e^2x + 1)
99 *
100 * @note We clamp x to [-5,5] to avoid overflowing issues.
101 *
102 * @param[in] val Input vector value in F32 format.
103 *
104 * @return The calculated Hyperbolic Tangent.
105 */
106float16x8_t vtanhq_f16(float16x8_t val);
107/** Calculate inverse square root.
108 *
109 * @param[in] x Input value.
110 *
111 * @return The calculated inverse square root.
112 */
113float16x8_t vinvsqrtq_f16(float16x8_t x);
Pablo Tellodf246182017-07-03 16:25:09 +0100114/** Calculate exponential
115 *
116 * @param[in] x Input vector value in F16 format.
117 *
118 * @return The calculated exponent.
119 */
120float16x8_t vexpq_f16(float16x8_t x);
121/** Calculate n power of a number.
122 *
123 * pow(x,n) = e^(n*log(x))
124 *
125 * @param[in] val Input vector value in F16 format.
126 * @param[in] n Powers to raise the input to.
127 *
128 * @return The calculated power.
129 */
130float16x8_t vpowq_f16(float16x8_t val, float16x8_t n);
131#endif /* ARM_COMPUTE_ENABLE_FP16 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100132}
133#include "arm_compute/core/NEON/NEMath.inl"
134#endif /* __ARM_COMPUTE_NEMATH_H__ */