blob: 3c76548b0a73015621cd2d9f82a662ca62647c30 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +01002 * Copyright (c) 2016-2018 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
25
Anthony Barbiereaefd002018-07-20 17:49:35 +010026#include "arm_compute/core/CPP/Validate.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010027#include "arm_compute/core/Error.h"
28#include "arm_compute/core/Helpers.h"
29#include "arm_compute/core/ITensor.h"
Michele Di Giorgio81f0d152017-07-11 15:00:52 +010030#include "arm_compute/core/NEON/NEFixedPoint.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010031#include "arm_compute/core/TensorInfo.h"
32#include "arm_compute/core/Validate.h"
33
34#include <algorithm>
35#include <arm_neon.h>
36#include <cstdint>
37#include <map>
38#include <string>
39
40using namespace arm_compute;
41
42namespace arm_compute
43{
44class Coordinates;
45} // namespace arm_compute
46
47namespace
48{
49void sub_wrap_U8_U8_U8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
50{
51 Iterator input1(in1, window);
52 Iterator input2(in2, window);
53 Iterator output(out, window);
54
55 execute_window_loop(window, [&](const Coordinates & id)
56 {
57 const uint8x16_t ta1 = vld1q_u8(input1.ptr());
58 const uint8x16_t ta2 = vld1q_u8(input2.ptr());
59
60 vst1q_u8(output.ptr(), vsubq_u8(ta1, ta2));
61 },
62 input1, input2, output);
63}
64
65void sub_saturate_U8_U8_U8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
66{
67 Iterator input1(in1, window);
68 Iterator input2(in2, window);
69 Iterator output(out, window);
70
71 execute_window_loop(window, [&](const Coordinates & id)
72 {
73 const uint8x16_t ta1 = vld1q_u8(input1.ptr());
74 const uint8x16_t ta2 = vld1q_u8(input2.ptr());
75
76 vst1q_u8(output.ptr(), vqsubq_u8(ta1, ta2));
77 },
78 input1, input2, output);
79}
80
81void sub_wrap_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
82{
83 Iterator input1(in1, window);
84 Iterator input2(in2, window);
85 Iterator output(out, window);
86
87 execute_window_loop(window, [&](const Coordinates & id)
88 {
89 const int16x8x2_t ta1 = vld2q_s16(reinterpret_cast<const int16_t *>(input1.ptr()));
90 const int16x8x2_t ta2 = vld2q_s16(reinterpret_cast<const int16_t *>(input2.ptr()));
91
92 const int16x8x2_t ta3 =
93 {
94 {
95 vsubq_s16(ta1.val[0], ta2.val[0]),
96 vsubq_s16(ta1.val[1], ta2.val[1])
97 }
98 };
99
100 vst2q_s16(reinterpret_cast<int16_t *>(output.ptr()), ta3);
101 },
102 input1, input2, output);
103}
104
105void sub_saturate_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
106{
107 Iterator input1(in1, window);
108 Iterator input2(in2, window);
109 Iterator output(out, window);
110
111 execute_window_loop(window, [&](const Coordinates & id)
112 {
113 const int16x8x2_t ta1 = vld2q_s16(reinterpret_cast<const int16_t *>(input1.ptr()));
114 const int16x8x2_t ta2 = vld2q_s16(reinterpret_cast<const int16_t *>(input2.ptr()));
115
116 const int16x8x2_t ta3 =
117 {
118 {
119 vqsubq_s16(ta1.val[0], ta2.val[0]),
120 vqsubq_s16(ta1.val[1], ta2.val[1])
121 }
122 };
123
124 vst2q_s16(reinterpret_cast<int16_t *>(output.ptr()), ta3);
125 },
126 input1, input2, output);
127}
128
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000129#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tellod7a5d222017-07-11 13:54:43 +0100130inline float16x8x2_t vsub2q_f16(const float16x8x2_t &a, const float16x8x2_t &b)
131{
132 const float16x8x2_t res =
133 {
134 {
135 vsubq_f16(a.val[0], b.val[0]),
136 vsubq_f16(a.val[1], b.val[1])
137 }
138 };
139
140 return res;
141}
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000142#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tellod7a5d222017-07-11 13:54:43 +0100143
144void sub_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
145{
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000146#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tellod7a5d222017-07-11 13:54:43 +0100147 Iterator input1(in1, window);
148 Iterator input2(in2, window);
149 Iterator output(out, window);
150
151 execute_window_loop(window, [&](const Coordinates & id)
152 {
153 const float16x8x2_t a = vld2q_f16(reinterpret_cast<const float16_t *>(input1.ptr()));
154 const float16x8x2_t b = vld2q_f16(reinterpret_cast<const float16_t *>(input2.ptr()));
155
156 vst2q_f16(reinterpret_cast<float16_t *>(output.ptr()), vsub2q_f16(a, b));
157 },
158 input1, input2, output);
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000159#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tellod7a5d222017-07-11 13:54:43 +0100160 ARM_COMPUTE_UNUSED(in1);
161 ARM_COMPUTE_UNUSED(in2);
162 ARM_COMPUTE_UNUSED(out);
163 ARM_COMPUTE_UNUSED(window);
164 ARM_COMPUTE_ERROR("Not supported, recompile the library with arch=arm64-v8.2-a");
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000165#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tellod7a5d222017-07-11 13:54:43 +0100166}
167
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100168void sub_F32_F32_F32(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
169{
170 Iterator input1(in1, window);
171 Iterator input2(in2, window);
172 Iterator output(out, window);
173
174 execute_window_loop(window, [&](const Coordinates & id)
175 {
176 const float32x4x4_t ta1 = vld4q_f32(reinterpret_cast<const float *>(input1.ptr()));
177 const float32x4x4_t ta2 = vld4q_f32(reinterpret_cast<const float *>(input2.ptr()));
178
179 const float32x4x4_t ta3 =
180 {
181 {
182 vsubq_f32(ta1.val[0], ta2.val[0]),
183 vsubq_f32(ta1.val[1], ta2.val[1]),
184 vsubq_f32(ta1.val[2], ta2.val[2]),
185 vsubq_f32(ta1.val[3], ta2.val[3]),
186 }
187 };
188
189 vst4q_f32(reinterpret_cast<float *>(output.ptr()), ta3);
190 },
191 input1, input2, output);
192}
193void sub_wrap_S16_U8_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
194{
195 Iterator input1(in1, window);
196 Iterator input2(in2, window);
197 Iterator output(out, window);
198
199 execute_window_loop(window, [&](const Coordinates & id)
200 {
201 const uint8x16_t bv_0 = vld1q_u8(input2.ptr());
202 int16x8_t a1_0 = vld1q_s16(reinterpret_cast<const int16_t *>(input1.ptr()));
203 int16x8_t a2_0 = vld1q_s16(reinterpret_cast<const int16_t *>(input1.ptr()) + 8);
204
205 a1_0 = vsubq_s16(a1_0, vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bv_0))));
206 a2_0 = vsubq_s16(a2_0, vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bv_0))));
207
208 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), a1_0);
209 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, a2_0);
210 },
211 input1, input2, output);
212}
213
214void sub_saturate_S16_U8_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
215{
216 Iterator input1(in1, window);
217 Iterator input2(in2, window);
218 Iterator output(out, window);
219
220 execute_window_loop(window, [&](const Coordinates & id)
221 {
222 const uint8x16_t bv_0 = vld1q_u8(input2.ptr());
223 int16x8_t a1_0 = vld1q_s16(reinterpret_cast<const int16_t *>(input1.ptr()));
224 int16x8_t a2_0 = vld1q_s16(reinterpret_cast<const int16_t *>(input1.ptr()) + 8);
225
226 a1_0 = vqsubq_s16(a1_0, vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bv_0))));
227 a2_0 = vqsubq_s16(a2_0, vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bv_0))));
228
229 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), a1_0);
230 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, a2_0);
231 },
232 input1, input2, output);
233}
234
235void sub_wrap_U8_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
236{
237 Iterator input1(in1, window);
238 Iterator input2(in2, window);
239 Iterator output(out, window);
240
241 execute_window_loop(window, [&](const Coordinates & id)
242 {
243 const uint8x16_t bv_0 = vld1q_u8(input1.ptr());
244 int16x8_t a1_0 = vld1q_s16(reinterpret_cast<const int16_t *>(input2.ptr()));
245 int16x8_t a2_0 = vld1q_s16(reinterpret_cast<const int16_t *>(input2.ptr()) + 8);
246
247 a1_0 = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bv_0))), a1_0);
248 a2_0 = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bv_0))), a2_0);
249
250 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), a1_0);
251 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, a2_0);
252 },
253 input1, input2, output);
254}
255
256void sub_saturate_U8_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
257{
258 Iterator input1(in1, window);
259 Iterator input2(in2, window);
260 Iterator output(out, window);
261
262 execute_window_loop(window, [&](const Coordinates & id)
263 {
264 const uint8x16_t bv_0 = vld1q_u8(input1.ptr());
265 int16x8_t a1_0 = vld1q_s16(reinterpret_cast<const int16_t *>(input2.ptr()));
266 int16x8_t a2_0 = vld1q_s16(reinterpret_cast<const int16_t *>(input2.ptr()) + 8);
267
268 a1_0 = vqsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bv_0))), a1_0);
269 a2_0 = vqsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bv_0))), a2_0);
270
271 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), a1_0);
272 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, a2_0);
273 },
274 input1, input2, output);
275}
276
277void sub_wrap_U8_U8_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
278{
279 Iterator input1(in1, window);
280 Iterator input2(in2, window);
281 Iterator output(out, window);
282
283 execute_window_loop(window, [&](const Coordinates & id)
284 {
285 const uint8x16_t av_0 = vld1q_u8(input1.ptr());
286 const uint8x16_t bv_0 = vld1q_u8(input2.ptr());
287
288 const int16x8_t a1_0 = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(av_0))),
289 vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bv_0))));
290 const int16x8_t a2_0 = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(av_0))),
291 vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bv_0))));
292
293 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), a1_0);
294 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, a2_0);
295 },
296 input1, input2, output);
297}
298
299void sub_saturate_U8_U8_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
300{
301 Iterator input1(in1, window);
302 Iterator input2(in2, window);
303 Iterator output(out, window);
304
305 execute_window_loop(window, [&](const Coordinates & id)
306 {
307 const uint8x16_t av_0 = vld1q_u8(input1.ptr());
308 const uint8x16_t bv_0 = vld1q_u8(input2.ptr());
309
310 const int16x8_t a1_0 = vqsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(av_0))),
311 vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bv_0))));
312 const int16x8_t a2_0 = vqsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(av_0))),
313 vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bv_0))));
314
315 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), a1_0);
316 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, a2_0);
317 },
318 input1, input2, output);
319}
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000320
Georgios Pinitas631c41a2017-12-06 11:53:03 +0000321inline Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy)
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000322{
323 ARM_COMPUTE_UNUSED(policy);
Anthony Barbiereaefd002018-07-20 17:49:35 +0100324 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input1);
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000325 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, input2, output);
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +0100326 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
327 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
328 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000329
330 ARM_COMPUTE_RETURN_ERROR_ON_MSG(
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +0100331 !(input1->data_type() == DataType::U8 && input2->data_type() == DataType::U8 && output->data_type() == DataType::U8)
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000332 && !(input1->data_type() == DataType::U8 && input2->data_type() == DataType::U8 && output->data_type() == DataType::S16)
333 && !(input1->data_type() == DataType::U8 && input2->data_type() == DataType::S16 && output->data_type() == DataType::S16)
334 && !(input1->data_type() == DataType::S16 && input2->data_type() == DataType::U8 && output->data_type() == DataType::S16)
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000335 && !(input1->data_type() == DataType::S16 && input2->data_type() == DataType::S16 && output->data_type() == DataType::S16)
336 && !(input1->data_type() == DataType::F32 && input2->data_type() == DataType::F32 && output->data_type() == DataType::F32)
337 && !(input1->data_type() == DataType::F16 && input2->data_type() == DataType::F16 && output->data_type() == DataType::F16),
338 "You called subtract with the wrong image formats");
339
Georgios Pinitas631c41a2017-12-06 11:53:03 +0000340 return Status{};
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000341}
342
Georgios Pinitas631c41a2017-12-06 11:53:03 +0000343inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output)
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000344{
345 constexpr unsigned int num_elems_processed_per_iteration = 16;
346
347 // Configure kernel window
348 Window win = calculate_max_window(*input1, Steps(num_elems_processed_per_iteration));
349 AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
350
351 bool window_changed = update_window_and_padding(win,
352 AccessWindowHorizontal(input1, 0, num_elems_processed_per_iteration),
353 AccessWindowHorizontal(input2, 0, num_elems_processed_per_iteration),
354 output_access);
355
356 ValidRegion valid_region = intersect_valid_regions(input1->valid_region(),
357 input2->valid_region());
358
359 output_access.set_valid_region(win, valid_region);
360
Georgios Pinitas631c41a2017-12-06 11:53:03 +0000361 Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000362 return std::make_pair(err, win);
363}
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100364} // namespace
365
366NEArithmeticSubtractionKernel::NEArithmeticSubtractionKernel()
367 : _func(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr)
368{
369}
370
371void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy)
372{
373 ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
374
Georgios Pinitasf0dea702017-07-03 18:17:28 +0100375 // Auto initialize output if not initialized
376 {
377 set_shape_if_empty(*output->info(), input1->info()->tensor_shape());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100378
Georgios Pinitasf0dea702017-07-03 18:17:28 +0100379 if(input1->info()->data_type() == DataType::S16 || input2->info()->data_type() == DataType::S16)
380 {
381 set_format_if_unknown(*output->info(), Format::S16);
382 }
Pablo Tellod7a5d222017-07-11 13:54:43 +0100383 else if(input1->info()->data_type() == DataType::F16 || input2->info()->data_type() == DataType::F16)
384 {
385 set_format_if_unknown(*output->info(), Format::F16);
386 }
Georgios Pinitasf0dea702017-07-03 18:17:28 +0100387 else if(input1->info()->data_type() == DataType::F32 || input2->info()->data_type() == DataType::F32)
388 {
389 set_format_if_unknown(*output->info(), Format::F32);
390 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100391 }
392
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000393 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info(), policy));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100394
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000395 static std::map<std::string, NEArithmeticSubtractionKernel::SubFunction *> map_function =
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100396 {
397 { "sub_wrap_U8_U8_U8", &sub_wrap_U8_U8_U8 },
398 { "sub_wrap_U8_U8_S16", &sub_wrap_U8_U8_S16 },
399 { "sub_saturate_U8_U8_U8", &sub_saturate_U8_U8_U8 },
400 { "sub_saturate_U8_U8_S16", &sub_saturate_U8_U8_S16 },
401 { "sub_wrap_U8_S16_S16", &sub_wrap_U8_S16_S16 },
402 { "sub_wrap_S16_U8_S16", &sub_wrap_S16_U8_S16 },
403 { "sub_saturate_U8_S16_S16", &sub_saturate_U8_S16_S16 },
404 { "sub_saturate_S16_U8_S16", &sub_saturate_S16_U8_S16 },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100405 { "sub_wrap_S16_S16_S16", &sub_wrap_S16_S16_S16 },
406 { "sub_saturate_S16_S16_S16", &sub_saturate_S16_S16_S16 },
407 { "sub_wrap_F32_F32_F32", &sub_F32_F32_F32 },
408 { "sub_saturate_F32_F32_F32", &sub_F32_F32_F32 },
Pablo Tellod7a5d222017-07-11 13:54:43 +0100409 { "sub_wrap_F16_F16_F16", &sub_F16_F16_F16 },
410 { "sub_saturate_F16_F16_F16", &sub_F16_F16_F16 },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100411 };
412
413 _input1 = input1;
414 _input2 = input2;
415 _output = output;
416
417 std::string function_to_call("sub_");
418 function_to_call += policy == ConvertPolicy::WRAP ? "wrap_" : "saturate_";
419 function_to_call += string_from_data_type(input1->info()->data_type()) + "_";
420 function_to_call += string_from_data_type(input2->info()->data_type()) + "_";
421 function_to_call += string_from_data_type(output->info()->data_type());
422
423 auto it = map_function.find(function_to_call);
424
425 if(it != map_function.end())
426 {
427 _func = it->second;
428 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100429
430 // Configure kernel window
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000431 auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info());
432 ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
433 INEKernel::configure(win_config.second);
434}
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100435
Georgios Pinitas631c41a2017-12-06 11:53:03 +0000436Status NEArithmeticSubtractionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy)
Ioan-Cristian Szabo397d58a2017-11-30 15:19:11 +0000437{
438 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output, policy));
439 ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input1->clone().get(), input2->clone().get(), output->clone().get()).first);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100440
Georgios Pinitas631c41a2017-12-06 11:53:03 +0000441 return Status{};
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100442}
443
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100444void NEArithmeticSubtractionKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100445{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100446 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100447 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
448 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
449 ARM_COMPUTE_ERROR_ON(_func == nullptr);
450
451 (*_func)(_input1, _input2, _output, window);
452}