blob: 6452393ca0941573b8e23eb357bf8e30582d25a8 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2016, 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
25
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/IAccessWindow.h"
29#include "arm_compute/core/ITensor.h"
Michele Di Giorgio81f0d152017-07-11 15:00:52 +010030#include "arm_compute/core/NEON/NEFixedPoint.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010031#include "arm_compute/core/TensorInfo.h"
32#include "arm_compute/core/Validate.h"
33
34#include <algorithm>
35#include <arm_neon.h>
36#include <cstdint>
37#include <map>
38#include <string>
39
40using namespace arm_compute;
41
42namespace arm_compute
43{
44class Coordinates;
45} // namespace arm_compute
46
47namespace
48{
Michele Di Giorgio81f0d152017-07-11 15:00:52 +010049void add_wrap_QS8_QS8_QS8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
50{
51 Iterator input1(in1, window);
52 Iterator input2(in2, window);
53 Iterator output(out, window);
54
55 execute_window_loop(window, [&](const Coordinates & id)
56 {
57 const qint8x16_t a = vld1q_qs8(reinterpret_cast<const qint8_t *>(input1.ptr()));
58 const qint8x16_t b = vld1q_qs8(reinterpret_cast<const qint8_t *>(input2.ptr()));
59
60 vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vaddq_qs8(a, b));
61 },
62 input1, input2, output);
63}
64
65void add_saturate_QS8_QS8_QS8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
66{
67 Iterator input1(in1, window);
68 Iterator input2(in2, window);
69 Iterator output(out, window);
70
71 execute_window_loop(window, [&](const Coordinates & id)
72 {
73 const qint8x16_t a = vld1q_qs8(reinterpret_cast<const qint8_t *>(input1.ptr()));
74 const qint8x16_t b = vld1q_qs8(reinterpret_cast<const qint8_t *>(input2.ptr()));
75
76 vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vqaddq_qs8(a, b));
77 },
78 input1, input2, output);
79}
80
Anthony Barbier6ff3b192017-09-04 18:44:23 +010081void add_wrap_U8_U8_U8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
82{
83 Iterator input1(in1, window);
84 Iterator input2(in2, window);
85 Iterator output(out, window);
86
87 execute_window_loop(window, [&](const Coordinates & id)
88 {
89 vst1q_u8(output.ptr(), vaddq_u8(vld1q_u8(input1.ptr()), vld1q_u8(input2.ptr())));
90 },
91 input1, input2, output);
92}
93
94void add_saturate_U8_U8_U8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
95{
96 Iterator input1(in1, window);
97 Iterator input2(in2, window);
98 Iterator output(out, window);
99
100 execute_window_loop(window, [&](const Coordinates & id)
101 {
102 vst1q_u8(output.ptr(), vqaddq_u8(vld1q_u8(input1.ptr()), vld1q_u8(input2.ptr())));
103 },
104 input1, input2, output);
105}
106
107inline int16x8x2_t vadd2q_s16(const int16x8x2_t &a, const int16x8x2_t &b)
108{
109 const int16x8x2_t res =
110 {
111 {
112 vaddq_s16(a.val[0], b.val[0]),
113 vaddq_s16(a.val[1], b.val[1])
114 }
115 };
116
117 return res;
118}
119
120inline float32x4x4_t vadd4q_f32(const float32x4x4_t &a, const float32x4x4_t &b)
121{
122 const float32x4x4_t res =
123 {
124 {
125 vaddq_f32(a.val[0], b.val[0]),
126 vaddq_f32(a.val[1], b.val[1]),
127 vaddq_f32(a.val[2], b.val[2]),
128 vaddq_f32(a.val[3], b.val[3])
129 }
130 };
131
132 return res;
133}
134
135inline int16x8x2_t vqadd2q_s16(const int16x8x2_t &a, const int16x8x2_t &b)
136{
137 const int16x8x2_t res =
138 {
139 {
140 vqaddq_s16(a.val[0], b.val[0]),
141 vqaddq_s16(a.val[1], b.val[1])
142 }
143 };
144
145 return res;
146}
147
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000148#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tellod1b0ecc2017-07-11 11:27:04 +0100149inline float16x8x2_t vadd2q_f16(const float16x8x2_t &a, const float16x8x2_t &b)
150{
151 const float16x8x2_t res =
152 {
153 {
154 vaddq_f16(a.val[0], b.val[0]),
155 vaddq_f16(a.val[1], b.val[1])
156 }
157 };
158
159 return res;
160}
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000161#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tellod1b0ecc2017-07-11 11:27:04 +0100162
163void add_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
164{
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000165#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tellod1b0ecc2017-07-11 11:27:04 +0100166 Iterator input1(in1, window);
167 Iterator input2(in2, window);
168 Iterator output(out, window);
169
170 execute_window_loop(window, [&](const Coordinates & id)
171 {
172 const float16x8x2_t a = vld2q_f16(reinterpret_cast<const float16_t *>(input1.ptr()));
173 const float16x8x2_t b = vld2q_f16(reinterpret_cast<const float16_t *>(input2.ptr()));
174
175 vst2q_f16(reinterpret_cast<float16_t *>(output.ptr()), vadd2q_f16(a, b));
176 },
177 input1, input2, output);
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000178#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tellod1b0ecc2017-07-11 11:27:04 +0100179 ARM_COMPUTE_UNUSED(in1);
180 ARM_COMPUTE_UNUSED(in2);
181 ARM_COMPUTE_UNUSED(out);
182 ARM_COMPUTE_UNUSED(window);
183 ARM_COMPUTE_ERROR("Not supported, recompile the library with arch=arm64-v8.2-a");
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000184#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tellod1b0ecc2017-07-11 11:27:04 +0100185}
186
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100187void add_F32_F32_F32(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
188{
189 Iterator input1(in1, window);
190 Iterator input2(in2, window);
191 Iterator output(out, window);
192
193 execute_window_loop(window, [&](const Coordinates & id)
194 {
195 const float32x4x4_t a = vld4q_f32(reinterpret_cast<const float *>(input1.ptr()));
196 const float32x4x4_t b = vld4q_f32(reinterpret_cast<const float *>(input2.ptr()));
197
198 vst4q_f32(reinterpret_cast<float *>(output.ptr()), vadd4q_f32(a, b));
199 },
200 input1, input2, output);
201}
202
203void add_wrap_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
204{
205 Iterator input1(in1, window);
206 Iterator input2(in2, window);
207 Iterator output(out, window);
208
209 execute_window_loop(window, [&](const Coordinates & id)
210 {
211 const int16x8x2_t a = vld2q_s16(reinterpret_cast<const int16_t *>(input1.ptr()));
212 const int16x8x2_t b = vld2q_s16(reinterpret_cast<const int16_t *>(input2.ptr()));
213
214 vst2q_s16(reinterpret_cast<int16_t *>(output.ptr()), vadd2q_s16(a, b));
215 },
216 input1, input2, output);
217}
218
219void add_saturate_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
220{
221 Iterator input1(in1, window);
222 Iterator input2(in2, window);
223 Iterator output(out, window);
224
225 execute_window_loop(window, [&](const Coordinates & id)
226 {
227 const int16x8x2_t a = vld2q_s16(reinterpret_cast<const int16_t *>(input1.ptr()));
228 const int16x8x2_t b = vld2q_s16(reinterpret_cast<const int16_t *>(input2.ptr()));
229
230 vst2q_s16(reinterpret_cast<int16_t *>(output.ptr()), vqadd2q_s16(a, b));
231 },
232 input1, input2, output);
233}
234
235void add_wrap_S16_U8_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
236{
237 Iterator input1(in1, window);
238 Iterator input2(in2, window);
239 Iterator output(out, window);
240
241 execute_window_loop(window, [&](const Coordinates & id)
242 {
243 const int16x8x2_t a =
244 {
245 {
246 vld1q_s16(reinterpret_cast<const int16_t *>(input1.ptr())),
247 vld1q_s16(reinterpret_cast<const int16_t *>(input1.ptr()) + 8)
248 }
249 };
250 const uint8x16_t b = vld1q_u8(input2.ptr());
251
252 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), vaddq_s16(a.val[0], vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(b)))));
253 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, vaddq_s16(a.val[1], vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(b)))));
254 },
255 input1, input2, output);
256}
257
258void add_saturate_S16_U8_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
259{
260 Iterator input1(in1, window);
261 Iterator input2(in2, window);
262 Iterator output(out, window);
263
264 execute_window_loop(window, [&](const Coordinates & id)
265 {
266 const int16x8x2_t a =
267 {
268 {
269 vld1q_s16(reinterpret_cast<const int16_t *>(input1.ptr())),
270 vld1q_s16(reinterpret_cast<const int16_t *>(input1.ptr()) + 8)
271 }
272 };
273 const uint8x16_t b = vld1q_u8(input2.ptr());
274
275 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), vqaddq_s16(a.val[0], vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(b)))));
276 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, vqaddq_s16(a.val[1], vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(b)))));
277 },
278 input1, input2, output);
279}
280
281inline void add_wrap_U8_S16_S16(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window)
282{
283 //Simply swap the two input buffers:
284 add_wrap_S16_U8_S16(input2, input1, output, window);
285}
286
287inline void add_saturate_U8_S16_S16(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window)
288{
289 //Simply swap the two input buffers:
290 add_saturate_S16_U8_S16(input2, input1, output, window);
291}
292
293void add_wrap_U8_U8_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
294{
295 Iterator input1(in1, window);
296 Iterator input2(in2, window);
297 Iterator output(out, window);
298
299 execute_window_loop(window, [&](const Coordinates & id)
300 {
301 const uint8x16_t a = vld1q_u8(input1.ptr());
302 const uint8x16_t b = vld1q_u8(input2.ptr());
303
304 const int16x8x2_t a_s16 =
305 {
306 {
307 vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(a))),
308 vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(a)))
309 }
310 };
311
312 const int16x8x2_t b_s16 =
313 {
314 {
315 vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(b))),
316 vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(b)))
317 }
318 };
319
320 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), vaddq_s16(a_s16.val[0], b_s16.val[0]));
321 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, vaddq_s16(a_s16.val[1], b_s16.val[1]));
322 },
323 input1, input2, output);
324}
325
326void add_saturate_U8_U8_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
327{
328 Iterator input1(in1, window);
329 Iterator input2(in2, window);
330 Iterator output(out, window);
331
332 execute_window_loop(window, [&](const Coordinates & id)
333 {
334 const uint8x16_t a = vld1q_u8(input1.ptr());
335 const uint8x16_t b = vld1q_u8(input2.ptr());
336
337 const int16x8x2_t a_s16 =
338 {
339 {
340 vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(a))),
341 vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(a)))
342 }
343 };
344
345 const int16x8x2_t b_s16 =
346 {
347 {
348 vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(b))),
349 vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(b)))
350 }
351 };
352
353 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), vqaddq_s16(a_s16.val[0], b_s16.val[0]));
354 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, vqaddq_s16(a_s16.val[1], b_s16.val[1]));
355 },
356 input1, input2, output);
357}
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000358
359inline Error validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy)
360{
361 ARM_COMPUTE_UNUSED(policy);
362 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, input2, output);
363 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::QS8, DataType::U8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
364 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::QS8, DataType::U8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
365 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::U8, DataType::QS16, DataType::S16, DataType::F16, DataType::F32);
366
367 if(is_data_type_fixed_point(input1->data_type()) || is_data_type_fixed_point(input2->data_type()) || is_data_type_fixed_point(output->data_type()))
368 {
369 // Check that all data types are the same and all fixed-point positions are the same
370 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input1, input2, output);
371 }
372
373 ARM_COMPUTE_RETURN_ERROR_ON_MSG(
374 !(input1->data_type() == DataType::QS8 && input2->data_type() == DataType::QS8 && output->data_type() == DataType::QS8)
375 && !(input1->data_type() == DataType::U8 && input2->data_type() == DataType::U8 && output->data_type() == DataType::U8)
376 && !(input1->data_type() == DataType::U8 && input2->data_type() == DataType::U8 && output->data_type() == DataType::S16)
377 && !(input1->data_type() == DataType::U8 && input2->data_type() == DataType::S16 && output->data_type() == DataType::S16)
378 && !(input1->data_type() == DataType::S16 && input2->data_type() == DataType::U8 && output->data_type() == DataType::S16)
379 && !(input1->data_type() == DataType::QS16 && input2->data_type() == DataType::QS16 && output->data_type() == DataType::QS16)
380 && !(input1->data_type() == DataType::S16 && input2->data_type() == DataType::S16 && output->data_type() == DataType::S16)
381 && !(input1->data_type() == DataType::F32 && input2->data_type() == DataType::F32 && output->data_type() == DataType::F32)
382 && !(input1->data_type() == DataType::F16 && input2->data_type() == DataType::F16 && output->data_type() == DataType::F16),
383 "You called addition with the wrong image formats");
384
385 return Error{};
386}
387
388inline std::pair<Error, Window> validate_and_configure_window(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output)
389{
390 constexpr unsigned int num_elems_processed_per_iteration = 16;
391
392 // Configure kernel window
393 Window win = calculate_max_window(*input1, Steps(num_elems_processed_per_iteration));
394 AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
395
396 bool window_changed = update_window_and_padding(win,
397 AccessWindowHorizontal(input1, 0, num_elems_processed_per_iteration),
398 AccessWindowHorizontal(input2, 0, num_elems_processed_per_iteration),
399 output_access);
400
401 ValidRegion valid_region = intersect_valid_regions(input1->valid_region(),
402 input2->valid_region());
403
404 output_access.set_valid_region(win, valid_region);
405
406 Error err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Error{};
407 return std::make_pair(err, win);
408}
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100409} // namespace
410
411NEArithmeticAdditionKernel::NEArithmeticAdditionKernel()
412 : _func(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr)
413{
414}
415
416void NEArithmeticAdditionKernel::configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy)
417{
418 ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
419
Georgios Pinitasf0dea702017-07-03 18:17:28 +0100420 // Auto initialize output if not initialized
421 {
422 set_shape_if_empty(*output->info(), input1->info()->tensor_shape());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100423
Georgios Pinitasf0dea702017-07-03 18:17:28 +0100424 if(input1->info()->data_type() == DataType::S16 || input2->info()->data_type() == DataType::S16)
425 {
426 set_format_if_unknown(*output->info(), Format::S16);
427 }
Pablo Tellod1b0ecc2017-07-11 11:27:04 +0100428 else if(input1->info()->data_type() == DataType::F16 || input2->info()->data_type() == DataType::F16)
429 {
430 set_format_if_unknown(*output->info(), Format::F16);
431 }
Georgios Pinitasf0dea702017-07-03 18:17:28 +0100432 else if(input1->info()->data_type() == DataType::F32 || input2->info()->data_type() == DataType::F32)
433 {
434 set_format_if_unknown(*output->info(), Format::F32);
435 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100436 }
437
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000438 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info(), policy));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100439
440 static std::map<std::string, AddFunction *> map_function =
441 {
Michele Di Giorgio81f0d152017-07-11 15:00:52 +0100442 { "add_wrap_QS8_QS8_QS8", &add_wrap_QS8_QS8_QS8 },
443 { "add_saturate_QS8_QS8_QS8", &add_saturate_QS8_QS8_QS8 },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100444 { "add_wrap_U8_U8_U8", &add_wrap_U8_U8_U8 },
445 { "add_saturate_U8_U8_U8", &add_saturate_U8_U8_U8 },
446 { "add_wrap_S16_U8_S16", &add_wrap_S16_U8_S16 },
447 { "add_saturate_S16_U8_S16", &add_saturate_S16_U8_S16 },
448 { "add_wrap_U8_S16_S16", &add_wrap_U8_S16_S16 },
449 { "add_saturate_U8_S16_S16", &add_saturate_U8_S16_S16 },
450 { "add_wrap_U8_U8_S16", &add_wrap_U8_U8_S16 },
451 { "add_saturate_U8_U8_S16", &add_saturate_U8_U8_S16 },
Michele Di Giorgio81f0d152017-07-11 15:00:52 +0100452 { "add_wrap_QS16_QS16_QS16", &add_wrap_S16_S16_S16 },
453 { "add_saturate_QS16_QS16_QS16", &add_saturate_S16_S16_S16 },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100454 { "add_wrap_S16_S16_S16", &add_wrap_S16_S16_S16 },
455 { "add_saturate_S16_S16_S16", &add_saturate_S16_S16_S16 },
456 { "add_wrap_F32_F32_F32", &add_F32_F32_F32 },
457 { "add_saturate_F32_F32_F32", &add_F32_F32_F32 },
Pablo Tellod1b0ecc2017-07-11 11:27:04 +0100458 { "add_wrap_F16_F16_F16", &add_F16_F16_F16 },
459 { "add_saturate_F16_F16_F16", &add_F16_F16_F16 },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100460 };
461
462 _input1 = input1;
463 _input2 = input2;
464 _output = output;
465
466 std::string function_to_call("add_");
467 function_to_call += policy == ConvertPolicy::WRAP ? "wrap_" : "saturate_";
468 function_to_call += string_from_data_type(input1->info()->data_type()) + "_";
469 function_to_call += string_from_data_type(input2->info()->data_type()) + "_";
470 function_to_call += string_from_data_type(output->info()->data_type());
471
472 auto it = map_function.find(function_to_call);
473
474 if(it != map_function.end())
475 {
476 _func = it->second;
477 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100478
479 // Configure kernel window
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000480 auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info());
481 ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
482 INEKernel::configure(win_config.second);
483}
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100484
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000485Error NEArithmeticAdditionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy)
486{
487 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output, policy));
488 ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input1->clone().get(), input2->clone().get(), output->clone().get()).first);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100489
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000490 return Error{};
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100491}
492
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100493void NEArithmeticAdditionKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100494{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100495 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100496 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
497 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
498 ARM_COMPUTE_ERROR_ON(_func == nullptr);
499
500 (*_func)(_input1, _input2, _output, window);
501}