blob: 61e13049631d7ae01ba863b2251cf0d78f07432f [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Michalis Spyroua4f378d2019-04-26 14:54:54 +01002 * Copyright (c) 2016-2019 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEChannelExtractKernel.h"
25
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/IAccessWindow.h"
29#include "arm_compute/core/IMultiImage.h"
30#include "arm_compute/core/ITensor.h"
31#include "arm_compute/core/MultiImageInfo.h"
32#include "arm_compute/core/NEON/INEKernel.h"
33#include "arm_compute/core/TensorInfo.h"
34#include "arm_compute/core/Types.h"
35#include "arm_compute/core/Validate.h"
36#include "arm_compute/core/Window.h"
37
38#include <arm_neon.h>
39
40using namespace arm_compute;
41
42namespace arm_compute
43{
44class Coordinates;
45} // namespace arm_compute
46
47NEChannelExtractKernel::NEChannelExtractKernel()
48 : _func(nullptr), _lut_index(0)
49{
50}
51
52void NEChannelExtractKernel::configure(const ITensor *input, Channel channel, ITensor *output)
53{
54 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
55 ARM_COMPUTE_ERROR_ON(input == output);
56
57 set_format_if_unknown(*output->info(), Format::U8);
58
Ioan-Cristian Szabo9414f642017-10-27 17:35:40 +010059 // Check if input tensor has a valid format
Anthony Barbier6ff3b192017-09-04 18:44:23 +010060 ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(input, Format::RGB888, Format::RGBA8888, Format::UYVY422, Format::YUYV422);
61 ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output, Format::U8);
62
Ioan-Cristian Szabo9414f642017-10-27 17:35:40 +010063 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
64 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010065
Ioan-Cristian Szabo9414f642017-10-27 17:35:40 +010066 // Check if channel is valid for given format
67 const Format format = input->info()->format();
68 ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(format, channel);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010069
Ioan-Cristian Szabo9414f642017-10-27 17:35:40 +010070 unsigned int subsampling = 1;
71
72 if(format == Format::YUYV422 || format == Format::UYVY422)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010073 {
Ioan-Cristian Szabo9414f642017-10-27 17:35:40 +010074 // Check if the width of the tensor shape is even for formats with subsampled channels (UYVY422 and YUYV422)
75 ARM_COMPUTE_ERROR_ON_TENSORS_NOT_EVEN(format, input);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010076
Ioan-Cristian Szabo9414f642017-10-27 17:35:40 +010077 if(channel != Channel::Y)
78 {
79 subsampling = 2;
80 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +010081 }
82
Ioan-Cristian Szabo9414f642017-10-27 17:35:40 +010083 TensorShape output_shape = calculate_subsampled_shape(input->info()->tensor_shape(), format, channel);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010084 set_shape_if_empty(*output->info(), output_shape);
85
Ioan-Cristian Szabo9414f642017-10-27 17:35:40 +010086 ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output_shape, output->info()->tensor_shape());
Anthony Barbier6ff3b192017-09-04 18:44:23 +010087
Ioan-Cristian Szabo9414f642017-10-27 17:35:40 +010088 _input = input;
89 _output = output;
90 _lut_index = channel_idx_from_format(format, channel);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010091
Ioan-Cristian Szabo9414f642017-10-27 17:35:40 +010092 unsigned int num_elems_processed_per_iteration = 16;
93
94 if(format == Format::YUYV422 || format == Format::UYVY422)
95 {
96 _func = &NEChannelExtractKernel::extract_1C_from_2C_img;
97
98 if(channel != Channel::Y) // Channel::U or Channel::V
99 {
100 num_elems_processed_per_iteration = 32;
101 _func = &NEChannelExtractKernel::extract_YUYV_uv;
102 }
103 }
104 else // Format::RGB888 or Format::RGBA8888
105 {
106 _func = &NEChannelExtractKernel::extract_1C_from_3C_img;
107
108 if(format == Format::RGBA8888)
109 {
110 _func = &NEChannelExtractKernel::extract_1C_from_4C_img;
111 }
112 }
113
114 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
115
Georgios Pinitasb158fba2017-07-05 16:50:24 +0100116 AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
117 AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_processed_per_iteration, 1, 1.f / subsampling, 1.f / subsampling);
Georgios Pinitasb158fba2017-07-05 16:50:24 +0100118 update_window_and_padding(win, input_access, output_access);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100119
120 ValidRegion input_valid_region = input->info()->valid_region();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100121 output_access.set_valid_region(win, ValidRegion(input_valid_region.anchor, output->info()->tensor_shape()));
122
123 INEKernel::configure(win);
124}
125
126void NEChannelExtractKernel::configure(const IMultiImage *input, Channel channel, IImage *output)
127{
128 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
129 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output);
130
131 set_format_if_unknown(*output->info(), Format::U8);
132
Ioan-Cristian Szabo9414f642017-10-27 17:35:40 +0100133 const Format format = input->info()->format();
134 ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(format, channel);
135
136 // Get input plane
137 const IImage *input_plane = input->plane(plane_idx_from_channel(format, channel));
138 ARM_COMPUTE_ERROR_ON_NULLPTR(input_plane);
139
140 if(Channel::Y == channel && format != Format::YUV444)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100141 {
Ioan-Cristian Szabo9414f642017-10-27 17:35:40 +0100142 // Check if the width of the tensor shape is even for formats with subsampled channels (UYVY422 and YUYV422)
143 ARM_COMPUTE_ERROR_ON_TENSORS_NOT_EVEN(format, input_plane);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100144 }
145
Ioan-Cristian Szabo9414f642017-10-27 17:35:40 +0100146 // Calculate 2x2 subsampled tensor shape
147 TensorShape output_shape = calculate_subsampled_shape(input->plane(0)->info()->tensor_shape(), format, channel);
148 set_shape_if_empty(*output->info(), output_shape);
149
150 ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output_shape, output->info()->tensor_shape());
151
152 // Check if input tensor has a valid format
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100153 ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(input, Format::NV12, Format::NV21, Format::IYUV, Format::YUV444);
154 ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output, Format::U8);
155
Ioan-Cristian Szabo9414f642017-10-27 17:35:40 +0100156 _input = input_plane;
157 _output = output;
158 _lut_index = channel_idx_from_format(format, channel);
159
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100160 unsigned int num_elems_processed_per_iteration = 32;
161
Ioan-Cristian Szabo9414f642017-10-27 17:35:40 +0100162 _func = &NEChannelExtractKernel::copy_plane;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100163
Ioan-Cristian Szabo9414f642017-10-27 17:35:40 +0100164 if((format == Format::NV12 || format == Format::NV21) && channel != Channel::Y)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100165 {
Ioan-Cristian Szabo9414f642017-10-27 17:35:40 +0100166 num_elems_processed_per_iteration = 16;
167 _func = &NEChannelExtractKernel::extract_1C_from_2C_img;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100168 }
169
Ioan-Cristian Szabo9414f642017-10-27 17:35:40 +0100170 Window win = calculate_max_window(*_input->info(), Steps(num_elems_processed_per_iteration));
171
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100172 AccessWindowHorizontal input_access(_input->info(), 0, num_elems_processed_per_iteration);
Georgios Pinitasb158fba2017-07-05 16:50:24 +0100173 AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100174 update_window_and_padding(win, input_access, output_access);
175 output_access.set_valid_region(win, _input->info()->valid_region());
176
177 INEKernel::configure(win);
178}
179
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100180void NEChannelExtractKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100181{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100182 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100183 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
184 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window);
185 ARM_COMPUTE_ERROR_ON(_func == nullptr);
186
187 (this->*_func)(window);
188}
189
190void NEChannelExtractKernel::extract_1C_from_2C_img(const Window &win)
191{
192 Iterator in(_input, win);
193 Iterator out(_output, win);
194
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100195 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100196 {
197 const auto in_ptr = static_cast<uint8_t *>(in.ptr());
198 const auto out_ptr = static_cast<uint8_t *>(out.ptr());
199 const auto pixels = vld2q_u8(in_ptr);
200 vst1q_u8(out_ptr, pixels.val[_lut_index]);
201 },
202 in, out);
203}
204
205void NEChannelExtractKernel::extract_1C_from_3C_img(const Window &win)
206{
207 Iterator in(_input, win);
208 Iterator out(_output, win);
209
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100210 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100211 {
212 const auto in_ptr = static_cast<uint8_t *>(in.ptr());
213 const auto out_ptr = static_cast<uint8_t *>(out.ptr());
214 const auto pixels = vld3q_u8(in_ptr);
215 vst1q_u8(out_ptr, pixels.val[_lut_index]);
216 },
217 in, out);
218}
219
220void NEChannelExtractKernel::extract_1C_from_4C_img(const Window &win)
221{
222 Iterator in(_input, win);
223 Iterator out(_output, win);
224
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100225 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100226 {
227 const auto in_ptr = static_cast<uint8_t *>(in.ptr());
228 const auto out_ptr = static_cast<uint8_t *>(out.ptr());
229 const auto pixels = vld4q_u8(in_ptr);
230 vst1q_u8(out_ptr, pixels.val[_lut_index]);
231 },
232 in, out);
233}
234
235void NEChannelExtractKernel::extract_YUYV_uv(const Window &win)
236{
237 ARM_COMPUTE_ERROR_ON(win.x().step() % 2);
238
239 Window win_out(win);
240 win_out.set_dimension_step(Window::DimX, win.x().step() / 2);
241
242 Iterator in(_input, win);
243 Iterator out(_output, win_out);
244
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100245 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100246 {
247 const auto in_ptr = static_cast<uint8_t *>(in.ptr());
248 const auto out_ptr = static_cast<uint8_t *>(out.ptr());
249 const auto pixels = vld4q_u8(in_ptr);
250 vst1q_u8(out_ptr, pixels.val[_lut_index]);
251 },
252 in, out);
253}
254
255void NEChannelExtractKernel::copy_plane(const Window &win)
256{
257 Iterator in(_input, win);
258 Iterator out(_output, win);
259
260 execute_window_loop(win, [&](const Coordinates &)
261 {
262 const auto in_ptr = static_cast<uint8_t *>(in.ptr());
263 const auto out_ptr = static_cast<uint8_t *>(out.ptr());
264 vst4_u8(out_ptr, vld4_u8(in_ptr));
265 },
266 in, out);
267}