blob: 5a162e3b2cdbfc9c2c4d67221c357d74d1eea4a1 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +01002 * Copyright (c) 2016-2018 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
25
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
Michele Di Giorgio81f0d152017-07-11 15:00:52 +010029#include "arm_compute/core/NEON/NEFixedPoint.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010030#include "arm_compute/core/TensorInfo.h"
31#include "arm_compute/core/Validate.h"
32
33#include <algorithm>
34#include <arm_neon.h>
35#include <cstdint>
36#include <map>
37#include <string>
38
39using namespace arm_compute;
40
41namespace arm_compute
42{
43class Coordinates;
44} // namespace arm_compute
45
46namespace
47{
48void sub_wrap_U8_U8_U8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
49{
50 Iterator input1(in1, window);
51 Iterator input2(in2, window);
52 Iterator output(out, window);
53
54 execute_window_loop(window, [&](const Coordinates & id)
55 {
56 const uint8x16_t ta1 = vld1q_u8(input1.ptr());
57 const uint8x16_t ta2 = vld1q_u8(input2.ptr());
58
59 vst1q_u8(output.ptr(), vsubq_u8(ta1, ta2));
60 },
61 input1, input2, output);
62}
63
64void sub_saturate_U8_U8_U8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
65{
66 Iterator input1(in1, window);
67 Iterator input2(in2, window);
68 Iterator output(out, window);
69
70 execute_window_loop(window, [&](const Coordinates & id)
71 {
72 const uint8x16_t ta1 = vld1q_u8(input1.ptr());
73 const uint8x16_t ta2 = vld1q_u8(input2.ptr());
74
75 vst1q_u8(output.ptr(), vqsubq_u8(ta1, ta2));
76 },
77 input1, input2, output);
78}
79
80void sub_wrap_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
81{
82 Iterator input1(in1, window);
83 Iterator input2(in2, window);
84 Iterator output(out, window);
85
86 execute_window_loop(window, [&](const Coordinates & id)
87 {
88 const int16x8x2_t ta1 = vld2q_s16(reinterpret_cast<const int16_t *>(input1.ptr()));
89 const int16x8x2_t ta2 = vld2q_s16(reinterpret_cast<const int16_t *>(input2.ptr()));
90
91 const int16x8x2_t ta3 =
92 {
93 {
94 vsubq_s16(ta1.val[0], ta2.val[0]),
95 vsubq_s16(ta1.val[1], ta2.val[1])
96 }
97 };
98
99 vst2q_s16(reinterpret_cast<int16_t *>(output.ptr()), ta3);
100 },
101 input1, input2, output);
102}
103
104void sub_saturate_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
105{
106 Iterator input1(in1, window);
107 Iterator input2(in2, window);
108 Iterator output(out, window);
109
110 execute_window_loop(window, [&](const Coordinates & id)
111 {
112 const int16x8x2_t ta1 = vld2q_s16(reinterpret_cast<const int16_t *>(input1.ptr()));
113 const int16x8x2_t ta2 = vld2q_s16(reinterpret_cast<const int16_t *>(input2.ptr()));
114
115 const int16x8x2_t ta3 =
116 {
117 {
118 vqsubq_s16(ta1.val[0], ta2.val[0]),
119 vqsubq_s16(ta1.val[1], ta2.val[1])
120 }
121 };
122
123 vst2q_s16(reinterpret_cast<int16_t *>(output.ptr()), ta3);
124 },
125 input1, input2, output);
126}
127
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000128#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tellod7a5d222017-07-11 13:54:43 +0100129inline float16x8x2_t vsub2q_f16(const float16x8x2_t &a, const float16x8x2_t &b)
130{
131 const float16x8x2_t res =
132 {
133 {
134 vsubq_f16(a.val[0], b.val[0]),
135 vsubq_f16(a.val[1], b.val[1])
136 }
137 };
138
139 return res;
140}
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000141#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tellod7a5d222017-07-11 13:54:43 +0100142
143void sub_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
144{
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000145#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tellod7a5d222017-07-11 13:54:43 +0100146 Iterator input1(in1, window);
147 Iterator input2(in2, window);
148 Iterator output(out, window);
149
150 execute_window_loop(window, [&](const Coordinates & id)
151 {
152 const float16x8x2_t a = vld2q_f16(reinterpret_cast<const float16_t *>(input1.ptr()));
153 const float16x8x2_t b = vld2q_f16(reinterpret_cast<const float16_t *>(input2.ptr()));
154
155 vst2q_f16(reinterpret_cast<float16_t *>(output.ptr()), vsub2q_f16(a, b));
156 },
157 input1, input2, output);
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000158#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tellod7a5d222017-07-11 13:54:43 +0100159 ARM_COMPUTE_UNUSED(in1);
160 ARM_COMPUTE_UNUSED(in2);
161 ARM_COMPUTE_UNUSED(out);
162 ARM_COMPUTE_UNUSED(window);
163 ARM_COMPUTE_ERROR("Not supported, recompile the library with arch=arm64-v8.2-a");
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000164#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tellod7a5d222017-07-11 13:54:43 +0100165}
166
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100167void sub_F32_F32_F32(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
168{
169 Iterator input1(in1, window);
170 Iterator input2(in2, window);
171 Iterator output(out, window);
172
173 execute_window_loop(window, [&](const Coordinates & id)
174 {
175 const float32x4x4_t ta1 = vld4q_f32(reinterpret_cast<const float *>(input1.ptr()));
176 const float32x4x4_t ta2 = vld4q_f32(reinterpret_cast<const float *>(input2.ptr()));
177
178 const float32x4x4_t ta3 =
179 {
180 {
181 vsubq_f32(ta1.val[0], ta2.val[0]),
182 vsubq_f32(ta1.val[1], ta2.val[1]),
183 vsubq_f32(ta1.val[2], ta2.val[2]),
184 vsubq_f32(ta1.val[3], ta2.val[3]),
185 }
186 };
187
188 vst4q_f32(reinterpret_cast<float *>(output.ptr()), ta3);
189 },
190 input1, input2, output);
191}
192void sub_wrap_S16_U8_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
193{
194 Iterator input1(in1, window);
195 Iterator input2(in2, window);
196 Iterator output(out, window);
197
198 execute_window_loop(window, [&](const Coordinates & id)
199 {
200 const uint8x16_t bv_0 = vld1q_u8(input2.ptr());
201 int16x8_t a1_0 = vld1q_s16(reinterpret_cast<const int16_t *>(input1.ptr()));
202 int16x8_t a2_0 = vld1q_s16(reinterpret_cast<const int16_t *>(input1.ptr()) + 8);
203
204 a1_0 = vsubq_s16(a1_0, vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bv_0))));
205 a2_0 = vsubq_s16(a2_0, vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bv_0))));
206
207 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), a1_0);
208 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, a2_0);
209 },
210 input1, input2, output);
211}
212
213void sub_saturate_S16_U8_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
214{
215 Iterator input1(in1, window);
216 Iterator input2(in2, window);
217 Iterator output(out, window);
218
219 execute_window_loop(window, [&](const Coordinates & id)
220 {
221 const uint8x16_t bv_0 = vld1q_u8(input2.ptr());
222 int16x8_t a1_0 = vld1q_s16(reinterpret_cast<const int16_t *>(input1.ptr()));
223 int16x8_t a2_0 = vld1q_s16(reinterpret_cast<const int16_t *>(input1.ptr()) + 8);
224
225 a1_0 = vqsubq_s16(a1_0, vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bv_0))));
226 a2_0 = vqsubq_s16(a2_0, vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bv_0))));
227
228 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), a1_0);
229 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, a2_0);
230 },
231 input1, input2, output);
232}
233
234void sub_wrap_U8_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
235{
236 Iterator input1(in1, window);
237 Iterator input2(in2, window);
238 Iterator output(out, window);
239
240 execute_window_loop(window, [&](const Coordinates & id)
241 {
242 const uint8x16_t bv_0 = vld1q_u8(input1.ptr());
243 int16x8_t a1_0 = vld1q_s16(reinterpret_cast<const int16_t *>(input2.ptr()));
244 int16x8_t a2_0 = vld1q_s16(reinterpret_cast<const int16_t *>(input2.ptr()) + 8);
245
246 a1_0 = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bv_0))), a1_0);
247 a2_0 = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bv_0))), a2_0);
248
249 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), a1_0);
250 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, a2_0);
251 },
252 input1, input2, output);
253}
254
255void sub_saturate_U8_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
256{
257 Iterator input1(in1, window);
258 Iterator input2(in2, window);
259 Iterator output(out, window);
260
261 execute_window_loop(window, [&](const Coordinates & id)
262 {
263 const uint8x16_t bv_0 = vld1q_u8(input1.ptr());
264 int16x8_t a1_0 = vld1q_s16(reinterpret_cast<const int16_t *>(input2.ptr()));
265 int16x8_t a2_0 = vld1q_s16(reinterpret_cast<const int16_t *>(input2.ptr()) + 8);
266
267 a1_0 = vqsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bv_0))), a1_0);
268 a2_0 = vqsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bv_0))), a2_0);
269
270 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), a1_0);
271 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, a2_0);
272 },
273 input1, input2, output);
274}
275
276void sub_wrap_U8_U8_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
277{
278 Iterator input1(in1, window);
279 Iterator input2(in2, window);
280 Iterator output(out, window);
281
282 execute_window_loop(window, [&](const Coordinates & id)
283 {
284 const uint8x16_t av_0 = vld1q_u8(input1.ptr());
285 const uint8x16_t bv_0 = vld1q_u8(input2.ptr());
286
287 const int16x8_t a1_0 = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(av_0))),
288 vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bv_0))));
289 const int16x8_t a2_0 = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(av_0))),
290 vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bv_0))));
291
292 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), a1_0);
293 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, a2_0);
294 },
295 input1, input2, output);
296}
297
298void sub_saturate_U8_U8_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
299{
300 Iterator input1(in1, window);
301 Iterator input2(in2, window);
302 Iterator output(out, window);
303
304 execute_window_loop(window, [&](const Coordinates & id)
305 {
306 const uint8x16_t av_0 = vld1q_u8(input1.ptr());
307 const uint8x16_t bv_0 = vld1q_u8(input2.ptr());
308
309 const int16x8_t a1_0 = vqsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(av_0))),
310 vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bv_0))));
311 const int16x8_t a2_0 = vqsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(av_0))),
312 vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bv_0))));
313
314 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), a1_0);
315 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, a2_0);
316 },
317 input1, input2, output);
318}
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000319
Georgios Pinitas631c41a2017-12-06 11:53:03 +0000320inline Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy)
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000321{
322 ARM_COMPUTE_UNUSED(policy);
323 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, input2, output);
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +0100324 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
325 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
326 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000327
328 ARM_COMPUTE_RETURN_ERROR_ON_MSG(
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +0100329 !(input1->data_type() == DataType::U8 && input2->data_type() == DataType::U8 && output->data_type() == DataType::U8)
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000330 && !(input1->data_type() == DataType::U8 && input2->data_type() == DataType::U8 && output->data_type() == DataType::S16)
331 && !(input1->data_type() == DataType::U8 && input2->data_type() == DataType::S16 && output->data_type() == DataType::S16)
332 && !(input1->data_type() == DataType::S16 && input2->data_type() == DataType::U8 && output->data_type() == DataType::S16)
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000333 && !(input1->data_type() == DataType::S16 && input2->data_type() == DataType::S16 && output->data_type() == DataType::S16)
334 && !(input1->data_type() == DataType::F32 && input2->data_type() == DataType::F32 && output->data_type() == DataType::F32)
335 && !(input1->data_type() == DataType::F16 && input2->data_type() == DataType::F16 && output->data_type() == DataType::F16),
336 "You called subtract with the wrong image formats");
337
Georgios Pinitas631c41a2017-12-06 11:53:03 +0000338 return Status{};
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000339}
340
Georgios Pinitas631c41a2017-12-06 11:53:03 +0000341inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output)
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000342{
343 constexpr unsigned int num_elems_processed_per_iteration = 16;
344
345 // Configure kernel window
346 Window win = calculate_max_window(*input1, Steps(num_elems_processed_per_iteration));
347 AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
348
349 bool window_changed = update_window_and_padding(win,
350 AccessWindowHorizontal(input1, 0, num_elems_processed_per_iteration),
351 AccessWindowHorizontal(input2, 0, num_elems_processed_per_iteration),
352 output_access);
353
354 ValidRegion valid_region = intersect_valid_regions(input1->valid_region(),
355 input2->valid_region());
356
357 output_access.set_valid_region(win, valid_region);
358
Georgios Pinitas631c41a2017-12-06 11:53:03 +0000359 Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000360 return std::make_pair(err, win);
361}
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100362} // namespace
363
364NEArithmeticSubtractionKernel::NEArithmeticSubtractionKernel()
365 : _func(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr)
366{
367}
368
369void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy)
370{
371 ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
372
Georgios Pinitasf0dea702017-07-03 18:17:28 +0100373 // Auto initialize output if not initialized
374 {
375 set_shape_if_empty(*output->info(), input1->info()->tensor_shape());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100376
Georgios Pinitasf0dea702017-07-03 18:17:28 +0100377 if(input1->info()->data_type() == DataType::S16 || input2->info()->data_type() == DataType::S16)
378 {
379 set_format_if_unknown(*output->info(), Format::S16);
380 }
Pablo Tellod7a5d222017-07-11 13:54:43 +0100381 else if(input1->info()->data_type() == DataType::F16 || input2->info()->data_type() == DataType::F16)
382 {
383 set_format_if_unknown(*output->info(), Format::F16);
384 }
Georgios Pinitasf0dea702017-07-03 18:17:28 +0100385 else if(input1->info()->data_type() == DataType::F32 || input2->info()->data_type() == DataType::F32)
386 {
387 set_format_if_unknown(*output->info(), Format::F32);
388 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100389 }
390
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000391 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info(), policy));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100392
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000393 static std::map<std::string, NEArithmeticSubtractionKernel::SubFunction *> map_function =
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100394 {
395 { "sub_wrap_U8_U8_U8", &sub_wrap_U8_U8_U8 },
396 { "sub_wrap_U8_U8_S16", &sub_wrap_U8_U8_S16 },
397 { "sub_saturate_U8_U8_U8", &sub_saturate_U8_U8_U8 },
398 { "sub_saturate_U8_U8_S16", &sub_saturate_U8_U8_S16 },
399 { "sub_wrap_U8_S16_S16", &sub_wrap_U8_S16_S16 },
400 { "sub_wrap_S16_U8_S16", &sub_wrap_S16_U8_S16 },
401 { "sub_saturate_U8_S16_S16", &sub_saturate_U8_S16_S16 },
402 { "sub_saturate_S16_U8_S16", &sub_saturate_S16_U8_S16 },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100403 { "sub_wrap_S16_S16_S16", &sub_wrap_S16_S16_S16 },
404 { "sub_saturate_S16_S16_S16", &sub_saturate_S16_S16_S16 },
405 { "sub_wrap_F32_F32_F32", &sub_F32_F32_F32 },
406 { "sub_saturate_F32_F32_F32", &sub_F32_F32_F32 },
Pablo Tellod7a5d222017-07-11 13:54:43 +0100407 { "sub_wrap_F16_F16_F16", &sub_F16_F16_F16 },
408 { "sub_saturate_F16_F16_F16", &sub_F16_F16_F16 },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100409 };
410
411 _input1 = input1;
412 _input2 = input2;
413 _output = output;
414
415 std::string function_to_call("sub_");
416 function_to_call += policy == ConvertPolicy::WRAP ? "wrap_" : "saturate_";
417 function_to_call += string_from_data_type(input1->info()->data_type()) + "_";
418 function_to_call += string_from_data_type(input2->info()->data_type()) + "_";
419 function_to_call += string_from_data_type(output->info()->data_type());
420
421 auto it = map_function.find(function_to_call);
422
423 if(it != map_function.end())
424 {
425 _func = it->second;
426 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100427
428 // Configure kernel window
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000429 auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info());
430 ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
431 INEKernel::configure(win_config.second);
432}
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100433
Georgios Pinitas631c41a2017-12-06 11:53:03 +0000434Status NEArithmeticSubtractionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy)
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000435{
436 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output, policy));
437 ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input1->clone().get(), input2->clone().get(), output->clone().get()).first);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100438
Georgios Pinitas631c41a2017-12-06 11:53:03 +0000439 return Status{};
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100440}
441
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100442void NEArithmeticSubtractionKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100443{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100444 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100445 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
446 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
447 ARM_COMPUTE_ERROR_ON(_func == nullptr);
448
449 (*_func)(_input1, _input2, _output, window);
450}