blob: 6134d75b29e8e7a63f4a1286caed098641d13a7f [file] [log] [blame]
Michalis Spyroua6825a42018-09-13 12:24:03 +01001/*
Georgios Pinitasddb93bb2020-10-02 16:38:59 +01002 * Copyright (c) 2018-2020 Arm Limited.
Michalis Spyroua6825a42018-09-13 12:24:03 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouf4643372019-11-29 16:17:13 +000024#ifndef ARM_COMPUTE_WRAPPER_ADD_H
25#define ARM_COMPUTE_WRAPPER_ADD_H
Michalis Spyroua6825a42018-09-13 12:24:03 +010026
27#include <arm_neon.h>
28
29namespace arm_compute
30{
31namespace wrapper
32{
33#define VADD_IMPL(stype, vtype, prefix, postfix) \
34 inline vtype vadd(const vtype &a, const vtype &b) \
35 { \
36 return prefix##_##postfix(a, b); \
37 }
38
39VADD_IMPL(uint8x8_t, uint8x8_t, vadd, u8)
40VADD_IMPL(int8x8_t, int8x8_t, vadd, s8)
41VADD_IMPL(uint16x4_t, uint16x4_t, vadd, u16)
42VADD_IMPL(int16x4_t, int16x4_t, vadd, s16)
43VADD_IMPL(uint32x2_t, uint32x2_t, vadd, u32)
44VADD_IMPL(int32x2_t, int32x2_t, vadd, s32)
45VADD_IMPL(uint64x1_t, uint64x1_t, vadd, u64)
46VADD_IMPL(int64x1_t, int64x1_t, vadd, s64)
47VADD_IMPL(float32x2_t, float32x2_t, vadd, f32)
48#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
49VADD_IMPL(float16x4_t, float16x4_t, vadd, f16)
50#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
51
52VADD_IMPL(uint8x16_t, uint8x16_t, vaddq, u8)
53VADD_IMPL(int8x16_t, int8x16_t, vaddq, s8)
54VADD_IMPL(uint16x8_t, uint16x8_t, vaddq, u16)
55VADD_IMPL(int16x8_t, int16x8_t, vaddq, s16)
56VADD_IMPL(uint32x4_t, uint32x4_t, vaddq, u32)
57VADD_IMPL(int32x4_t, int32x4_t, vaddq, s32)
58VADD_IMPL(uint64x2_t, uint64x2_t, vaddq, u64)
59VADD_IMPL(int64x2_t, int64x2_t, vaddq, s64)
60VADD_IMPL(float32x4_t, float32x4_t, vaddq, f32)
61#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
62VADD_IMPL(float16x8_t, float16x8_t, vaddq, f16)
63#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Michalis Spyroua6825a42018-09-13 12:24:03 +010064#undef VADD_IMPL
Georgios Pinitas5a594532018-12-03 14:30:05 +000065
Georgios Pinitasdbdea0d2019-10-16 19:21:40 +010066// VQADD: Vector saturating add (No notion of saturation for floating point)
Georgios Pinitas5a594532018-12-03 14:30:05 +000067#define VQADD_IMPL(stype, vtype, prefix, postfix) \
68 inline vtype vqadd(const vtype &a, const vtype &b) \
69 { \
70 return prefix##_##postfix(a, b); \
71 }
72
Georgios Pinitas5a594532018-12-03 14:30:05 +000073VQADD_IMPL(uint8x8_t, uint8x8_t, vqadd, u8)
74VQADD_IMPL(int8x8_t, int8x8_t, vqadd, s8)
75VQADD_IMPL(uint16x4_t, uint16x4_t, vqadd, u16)
76VQADD_IMPL(int16x4_t, int16x4_t, vqadd, s16)
77VQADD_IMPL(uint32x2_t, uint32x2_t, vqadd, u32)
78VQADD_IMPL(int32x2_t, int32x2_t, vqadd, s32)
79VQADD_IMPL(uint64x1_t, uint64x1_t, vqadd, u64)
80VQADD_IMPL(int64x1_t, int64x1_t, vqadd, s64)
81VQADD_IMPL(float32x2_t, float32x2_t, vadd, f32)
82#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
83VQADD_IMPL(float16x4_t, float16x4_t, vadd, f16)
84#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
85
86VQADD_IMPL(uint8x16_t, uint8x16_t, vqaddq, u8)
87VQADD_IMPL(int8x16_t, int8x16_t, vqaddq, s8)
88VQADD_IMPL(uint16x8_t, uint16x8_t, vqaddq, u16)
89VQADD_IMPL(int16x8_t, int16x8_t, vqaddq, s16)
90VQADD_IMPL(uint32x4_t, uint32x4_t, vqaddq, u32)
91VQADD_IMPL(int32x4_t, int32x4_t, vqaddq, s32)
92VQADD_IMPL(uint64x2_t, uint64x2_t, vqaddq, u64)
93VQADD_IMPL(int64x2_t, int64x2_t, vqaddq, s64)
94VQADD_IMPL(float32x4_t, float32x4_t, vaddq, f32)
95#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
96VQADD_IMPL(float16x8_t, float16x8_t, vaddq, f16)
97#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
98#undef VQADD_IMPL
Georgios Pinitasdbdea0d2019-10-16 19:21:40 +010099
100// VADDW: Vector widening add
101#define VADDW_IMPL(wtype, vtype, prefix, postfix) \
102 inline wtype vaddw(const wtype &a, const vtype &b) \
103 { \
104 return prefix##_##postfix(a, b); \
105 }
106
107VADDW_IMPL(uint16x8_t, uint8x8_t, vaddw, u8)
108VADDW_IMPL(int16x8_t, int8x8_t, vaddw, s8)
109VADDW_IMPL(uint32x4_t, uint16x4_t, vaddw, u16)
110VADDW_IMPL(int32x4_t, int16x4_t, vaddw, s16)
111VADDW_IMPL(uint64x2_t, uint32x2_t, vaddw, u32)
112VADDW_IMPL(int64x2_t, int32x2_t, vaddw, s32)
113#undef VADDW_IMPL
114
115// VADDL: Vector long add
116#define VADDL_IMPL(wtype, vtype, prefix, postfix) \
117 inline wtype vaddl(const vtype &a, const vtype &b) \
118 { \
119 return prefix##_##postfix(a, b); \
120 }
121
122VADDL_IMPL(uint16x8_t, uint8x8_t, vaddl, u8)
123VADDL_IMPL(int16x8_t, int8x8_t, vaddl, s8)
124VADDL_IMPL(uint32x4_t, uint16x4_t, vaddl, u16)
125VADDL_IMPL(int32x4_t, int16x4_t, vaddl, s16)
126VADDL_IMPL(uint64x2_t, uint32x2_t, vaddl, u32)
127VADDL_IMPL(int64x2_t, int32x2_t, vaddl, s32)
128#undef VADDL_IMPL
129
130#if defined(__aarch64__)
131// VADDV: Across vector add
132#define VADDV_IMPL(stype, vtype, prefix, postfix) \
133 inline stype vaddv(const vtype &a) \
134 { \
135 return prefix##_##postfix(a); \
136 }
137
138VADDV_IMPL(uint8_t, uint8x8_t, vaddv, u8)
139VADDV_IMPL(int8_t, int8x8_t, vaddv, s8)
140VADDV_IMPL(uint16_t, uint16x4_t, vaddv, u16)
141VADDV_IMPL(int16_t, int16x4_t, vaddv, s16)
142VADDV_IMPL(uint32_t, uint32x2_t, vaddv, u32)
143VADDV_IMPL(int32_t, int32x2_t, vaddv, s32)
144VADDV_IMPL(float, float32x2_t, vaddv, f32)
145
146VADDV_IMPL(uint8_t, uint8x16_t, vaddvq, u8)
147VADDV_IMPL(int8_t, int8x16_t, vaddvq, s8)
148VADDV_IMPL(uint16_t, uint16x8_t, vaddvq, u16)
149VADDV_IMPL(int16_t, int16x8_t, vaddvq, s16)
150VADDV_IMPL(uint32_t, uint32x4_t, vaddvq, u32)
151VADDV_IMPL(int32_t, int32x4_t, vaddvq, s32)
152VADDV_IMPL(uint64_t, uint64x2_t, vaddvq, u64)
153VADDV_IMPL(int64_t, int64x2_t, vaddvq, s64)
154VADDV_IMPL(float, float32x4_t, vaddvq, f32)
155#undef VADDV_IMPL
156#endif // defined(__aarch64__)
157
158// VPADDL: Signed add long pairwise
159#define VPADDL_IMPL(ltype, vtype, prefix, postfix) \
160 inline ltype vpaddl(const vtype &a) \
161 { \
162 return prefix##_##postfix(a); \
163 }
164
165VPADDL_IMPL(uint16x4_t, uint8x8_t, vpaddl, u8)
166VPADDL_IMPL(int16x4_t, int8x8_t, vpaddl, s8)
167VPADDL_IMPL(uint32x2_t, uint16x4_t, vpaddl, u16)
168VPADDL_IMPL(int32x2_t, int16x4_t, vpaddl, s16)
169VPADDL_IMPL(uint64x1_t, uint32x2_t, vpaddl, u32)
170VPADDL_IMPL(int64x1_t, int32x2_t, vpaddl, s32)
171
172VPADDL_IMPL(uint16x8_t, uint8x16_t, vpaddlq, u8)
173VPADDL_IMPL(int16x8_t, int8x16_t, vpaddlq, s8)
174VPADDL_IMPL(uint32x4_t, uint16x8_t, vpaddlq, u16)
175VPADDL_IMPL(int32x4_t, int16x8_t, vpaddlq, s16)
176VPADDL_IMPL(uint64x2_t, uint32x4_t, vpaddlq, u32)
177VPADDL_IMPL(int64x2_t, int32x4_t, vpaddlq, s32)
178#undef VPADDL_IMPL
Manuel Bottini581f1782019-11-13 17:24:43 +0000179
180// VPADD: Add pairwise
181#define VPADD_IMPL(stype, vtype, prefix, postfix) \
182 inline vtype vpadd(const vtype &a, const vtype &b) \
183 { \
184 return prefix##_##postfix(a, b); \
185 }
186
187VPADD_IMPL(uint8x8_t, uint8x8_t, vpadd, u8)
188VPADD_IMPL(int8x8_t, int8x8_t, vpadd, s8)
189VPADD_IMPL(uint16x4_t, uint16x4_t, vpadd, u16)
190VPADD_IMPL(int16x4_t, int16x4_t, vpadd, s16)
191VPADD_IMPL(uint32x2_t, uint32x2_t, vpadd, u32)
192VPADD_IMPL(int32x2_t, int32x2_t, vpadd, s32)
193VPADD_IMPL(float32x2_t, float32x2_t, vpadd, f32)
194#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
195VPADD_IMPL(float16x4_t, float16x4_t, vpadd, f16)
196#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
197
198#undef VPADD_IMPL
Michalis Spyroua6825a42018-09-13 12:24:03 +0100199} // namespace wrapper
200} // namespace arm_compute
Michalis Spyrouf4643372019-11-29 16:17:13 +0000201#endif /* ARM_COMPUTE_WRAPPER_ADD_H */