blob: 3d4800fe151b2faa65770bf13dc17d5ac2d6c1d2 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2016-2020 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h"
25
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/IAccessWindow.h"
29#include "arm_compute/core/ITensor.h"
30#include "arm_compute/core/TensorInfo.h"
31#include "arm_compute/core/Types.h"
32#include "arm_compute/core/Validate.h"
33
34#include <arm_neon.h>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010035
36namespace arm_compute
37{
Anthony Barbier6ff3b192017-09-04 18:44:23 +010038namespace
39{
40void abs_diff_U8_U8_U8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
41{
42 Iterator input1(in1, window);
43 Iterator input2(in2, window);
44 Iterator output(out, window);
45
Michalis Spyroua4f378d2019-04-26 14:54:54 +010046 execute_window_loop(window, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010047 {
48 const uint8x16_t input1_val = vld1q_u8(input1.ptr());
49 const uint8x16_t input2_val = vld1q_u8(input2.ptr());
50
51 vst1q_u8(output.ptr(), vabdq_u8(input1_val, input2_val));
52 },
53 input1, input2, output);
54}
55
56inline int16x8x2_t vqabd2q_s16(const int16x8x2_t &v1, const int16x8x2_t &v2)
57{
58 const int16x8x2_t res =
59 {
60 {
61 vqabsq_s16(vqsubq_s16(v1.val[0], v2.val[0])),
62 vqabsq_s16(vqsubq_s16(v1.val[1], v2.val[1]))
63 }
64 };
65
66 return res;
67}
68
69void abs_diff_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
70{
71 Iterator input1(in1, window);
72 Iterator input2(in2, window);
73 Iterator output(out, window);
74
Michalis Spyroua4f378d2019-04-26 14:54:54 +010075 execute_window_loop(window, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010076 {
77 int16x8x2_t input1_val = vld2q_s16(reinterpret_cast<const int16_t *>(input1.ptr()));
78 int16x8x2_t input2_val = vld2q_s16(reinterpret_cast<const int16_t *>(input2.ptr()));
79 vst2q_s16(reinterpret_cast<int16_t *>(output.ptr()), vqabd2q_s16(input1_val, input2_val));
80 },
81 input1, input2, output);
82}
83
84void abs_diff_U8_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
85{
86 Iterator input1(in1, window);
87 Iterator input2(in2, window);
88 Iterator output(out, window);
89
Michalis Spyroua4f378d2019-04-26 14:54:54 +010090 execute_window_loop(window, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010091 {
92 const uint8x16_t input1_val = vld1q_u8(input1.ptr());
93 const int16x8x2_t input2_val =
94 {
95 {
96 vld1q_s16(reinterpret_cast<const int16_t *>(input2.ptr())),
97 vld1q_s16(reinterpret_cast<const int16_t *>(input2.ptr()) + 8)
98 }
99 };
100
101 const int16x8x2_t out_val =
102 {
103 {
104 vqabsq_s16(vqsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(input1_val))), input2_val.val[0])),
105 vqabsq_s16(vqsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(input1_val))), input2_val.val[1]))
106 }
107 };
108
109 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), out_val.val[0]);
110 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, out_val.val[1]);
111
112 },
113 input1, input2, output);
114}
115
116void abs_diff_S16_U8_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
117{
118 abs_diff_U8_S16_S16(in2, in1, out, window);
119}
120} // namespace
121
122NEAbsoluteDifferenceKernel::NEAbsoluteDifferenceKernel()
123 : _func(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr)
124{
125}
126
127void NEAbsoluteDifferenceKernel::configure(const ITensor *input1, const ITensor *input2, ITensor *output)
128{
129 ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
130
131 set_shape_if_empty(*output->info(), input1->info()->tensor_shape());
132
133 if(input1->info()->data_type() == DataType::S16 || input2->info()->data_type() == DataType::S16)
134 {
135 set_format_if_unknown(*output->info(), Format::S16);
136 }
Michele Di Giorgiof9b595a2020-07-03 13:34:52 +0100137 else if(input1->info()->data_type() == DataType::U8 || input2->info()->data_type() == DataType::U8)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100138 {
139 set_format_if_unknown(*output->info(), Format::U8);
140 }
141
142 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input1, input2, output);
143 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16);
144 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16);
145 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
146 ARM_COMPUTE_ERROR_ON_MSG(output->info()->data_type() == DataType::U8 && (input1->info()->data_type() != DataType::U8 || input2->info()->data_type() != DataType::U8),
147 "The output image can only be U8 if both input images are U8");
148
149 _input1 = input1;
150 _input2 = input2;
151 _output = output;
152
153 const DataType input1_data_type = input1->info()->data_type();
154 const DataType input2_data_type = input2->info()->data_type();
155
156 if(input1_data_type == input2_data_type)
157 {
158 if(input1_data_type == DataType::U8)
159 {
160 _func = &abs_diff_U8_U8_U8;
161 }
162 else
163 {
164 _func = &abs_diff_S16_S16_S16;
165 }
166 }
167 else
168 {
169 if(input1_data_type == DataType::U8)
170 {
171 _func = &abs_diff_U8_S16_S16;
172 }
173 else
174 {
175 _func = &abs_diff_S16_U8_S16;
176 }
177 }
178
179 constexpr unsigned int num_elems_processed_per_iteration = 16;
180
181 // Configure kernel window
182 Window win = calculate_max_window(*input1->info(), Steps(num_elems_processed_per_iteration));
183 AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
184
185 update_window_and_padding(win,
186 AccessWindowHorizontal(input1->info(), 0, num_elems_processed_per_iteration),
187 AccessWindowHorizontal(input2->info(), 0, num_elems_processed_per_iteration),
188 output_access);
189
190 ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(),
191 input2->info()->valid_region());
192
193 output_access.set_valid_region(win, valid_region);
194
195 INEKernel::configure(win);
196}
197
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100198void NEAbsoluteDifferenceKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100199{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100200 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100201 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
202 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
203 ARM_COMPUTE_ERROR_ON(_func == nullptr);
204
205 _func(_input1, _input2, _output, window);
206}
Michele Di Giorgiof9b595a2020-07-03 13:34:52 +0100207} // namespace arm_compute