blob: 6bfd4c5bda2ae88f97fdb6cc92850baa68297211 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +01002 * Copyright (c) 2016-2020 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouebcebf12020-10-21 00:04:14 +010024#include "src/core/NEON/kernels/NEChannelCombineKernel.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010025
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/IAccessWindow.h"
29#include "arm_compute/core/IMultiImage.h"
30#include "arm_compute/core/ITensor.h"
31#include "arm_compute/core/MultiImageInfo.h"
32#include "arm_compute/core/TensorInfo.h"
33#include "arm_compute/core/Types.h"
34#include "arm_compute/core/Validate.h"
35#include "arm_compute/core/Window.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010036#include "src/core/helpers/AutoConfiguration.h"
37#include "src/core/helpers/WindowHelpers.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010038
39#include <arm_neon.h>
40
41using namespace arm_compute;
42
43namespace arm_compute
44{
45class Coordinates;
46} // namespace arm_compute
47
48NEChannelCombineKernel::NEChannelCombineKernel()
49 : _func(nullptr), _planes{ { nullptr } }, _output(nullptr), _output_multi(nullptr), _x_subsampling{ { 1, 1, 1 } }, _y_subsampling{ { 1, 1, 1 } }, _num_elems_processed_per_iteration(8),
50_is_parallelizable(true)
51{
52}
53
54void NEChannelCombineKernel::configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output)
55{
56 ARM_COMPUTE_ERROR_ON_NULLPTR(plane0, plane1, plane2, output);
57 ARM_COMPUTE_ERROR_ON(plane0 == output);
58 ARM_COMPUTE_ERROR_ON(plane1 == output);
59 ARM_COMPUTE_ERROR_ON(plane2 == output);
60
Ioan-Cristian Szaboae3c8ab2017-11-16 17:55:03 +000061 ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane0, Format::U8);
62 ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane1, Format::U8);
63 ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane2, Format::U8);
64 ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output, Format::RGB888, Format::RGBA8888, Format::UYVY422, Format::YUYV422);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010065
66 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane0, 1, DataType::U8);
67 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane1, 1, DataType::U8);
68 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane2, 1, DataType::U8);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010069
Ioan-Cristian Szaboae3c8ab2017-11-16 17:55:03 +000070 const Format output_format = output->info()->format();
71
72 // Check if horizontal dimension of Y plane is even and validate horizontal sub-sampling dimensions for U and V planes
73 if(Format::YUYV422 == output_format || Format::UYVY422 == output_format)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010074 {
Ioan-Cristian Szaboae3c8ab2017-11-16 17:55:03 +000075 // Validate Y plane of input and output
76 ARM_COMPUTE_ERROR_ON_TENSORS_NOT_EVEN(output_format, plane0, output);
77
78 // Validate U and V plane of the input
79 ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(output_format, plane0->info()->tensor_shape(), plane1, plane2);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010080 }
81
Ioan-Cristian Szaboae3c8ab2017-11-16 17:55:03 +000082 _planes[0] = plane0;
83 _planes[1] = plane1;
84 _planes[2] = plane2;
85 _planes[3] = nullptr;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010086
Ioan-Cristian Szaboae3c8ab2017-11-16 17:55:03 +000087 // Validate the last input tensor only for RGBA format
88 if(Format::RGBA8888 == output_format)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010089 {
Ioan-Cristian Szaboae3c8ab2017-11-16 17:55:03 +000090 ARM_COMPUTE_ERROR_ON_NULLPTR(plane3);
91 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane3);
92
93 ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane3, Format::U8);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010094 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane3, 1, DataType::U8);
Ioan-Cristian Szaboae3c8ab2017-11-16 17:55:03 +000095
96 _planes[3] = plane3;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010097 }
98
Anthony Barbier6ff3b192017-09-04 18:44:23 +010099 _output = output;
100 _output_multi = nullptr;
101
Ioan-Cristian Szaboae3c8ab2017-11-16 17:55:03 +0000102 // Half the processed elements for U and V channels due to horizontal sub-sampling of 2
103 if(Format::YUYV422 == output_format || Format::UYVY422 == output_format)
104 {
105 _x_subsampling[1] = 2;
106 _x_subsampling[2] = 2;
107 }
108
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100109 _num_elems_processed_per_iteration = 8;
110 _is_parallelizable = true;
111
Ioan-Cristian Szaboae3c8ab2017-11-16 17:55:03 +0000112 // Select function and number of elements to process given the output format
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100113 switch(output_format)
114 {
115 case Format::RGB888:
116 _func = &NEChannelCombineKernel::combine_3C;
117 break;
118 case Format::RGBA8888:
119 _func = &NEChannelCombineKernel::combine_4C;
120 break;
121 case Format::UYVY422:
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100122 _num_elems_processed_per_iteration = 16;
123 _func = &NEChannelCombineKernel::combine_YUV_1p<true>;
124 break;
125 case Format::YUYV422:
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100126 _num_elems_processed_per_iteration = 16;
127 _func = &NEChannelCombineKernel::combine_YUV_1p<false>;
128 break;
129 default:
130 ARM_COMPUTE_ERROR("Not supported format.");
131 break;
132 }
133
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100134 Window win = calculate_max_window(*plane0->info(), Steps(_num_elems_processed_per_iteration));
135
136 AccessWindowHorizontal output_access(output->info(), 0, _num_elems_processed_per_iteration);
137 AccessWindowHorizontal plane0_access(plane0->info(), 0, _num_elems_processed_per_iteration / _x_subsampling[1], 1.f / _x_subsampling[0]);
138 AccessWindowHorizontal plane1_access(plane1->info(), 0, _num_elems_processed_per_iteration / _x_subsampling[1], 1.f / _x_subsampling[1]);
139 AccessWindowHorizontal plane2_access(plane2->info(), 0, _num_elems_processed_per_iteration / _x_subsampling[1], 1.f / _x_subsampling[2]);
140 AccessWindowHorizontal plane3_access(plane3 == nullptr ? nullptr : plane3->info(), 0, _num_elems_processed_per_iteration);
141
142 update_window_and_padding(
143 win,
144 plane0_access,
145 plane1_access,
146 plane2_access,
147 plane3_access,
148 output_access);
149
150 ValidRegion valid_region = intersect_valid_regions(plane0->info()->valid_region(),
151 plane1->info()->valid_region(),
152 plane2->info()->valid_region());
153
154 if(plane3 != nullptr)
155 {
156 valid_region = intersect_valid_regions(plane3->info()->valid_region(), valid_region);
157 }
158
159 output_access.set_valid_region(win, ValidRegion(valid_region.anchor, output->info()->tensor_shape()));
160
161 INEKernel::configure(win);
162}
163
164void NEChannelCombineKernel::configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output)
165{
166 ARM_COMPUTE_ERROR_ON_NULLPTR(plane0, plane1, plane2, output);
167 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane0);
168 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane1);
169 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane2);
170
Ioan-Cristian Szaboae3c8ab2017-11-16 17:55:03 +0000171 ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane0, Format::U8);
172 ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane1, Format::U8);
173 ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane2, Format::U8);
174 ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output, Format::NV12, Format::NV21, Format::IYUV, Format::YUV444);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100175
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100176 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane0, 1, DataType::U8);
177 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane1, 1, DataType::U8);
178 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane2, 1, DataType::U8);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100179
Ioan-Cristian Szaboae3c8ab2017-11-16 17:55:03 +0000180 const Format output_format = output->info()->format();
181
182 // Validate shape of Y plane to be even and shape of sub-sampling dimensions for U and V planes
183 // Perform validation only for formats which require sub-sampling.
184 if(Format::YUV444 != output_format)
185 {
186 // Validate Y plane of input and output
187 ARM_COMPUTE_ERROR_ON_TENSORS_NOT_EVEN(output_format, plane0, output->plane(0));
188
189 // Validate U and V plane of the input
190 ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(output_format, plane0->info()->tensor_shape(), plane1, plane2);
191
192 // Validate second plane U (NV12 and NV21 have a UV88 combined plane while IYUV has only the U plane)
193 // MultiImage generates the correct tensor shape but also check in case the tensor shape of planes was changed to a wrong size
194 ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(output_format, plane0->info()->tensor_shape(), output->plane(1));
195
196 // Validate the last plane V of format IYUV
197 if(Format::IYUV == output_format)
198 {
199 // Validate Y plane of the output
200 ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(output_format, plane0->info()->tensor_shape(), output->plane(2));
201 }
202 }
203
204 _planes[0] = plane0;
205 _planes[1] = plane1;
206 _planes[2] = plane2;
207 _planes[3] = nullptr;
208 _output = nullptr;
209 _output_multi = output;
210
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100211 bool has_two_planes = false;
212 unsigned int num_elems_written_plane1 = 8;
213
214 _num_elems_processed_per_iteration = 8;
215 _is_parallelizable = true;
216
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100217 switch(output_format)
218 {
219 case Format::NV12:
220 case Format::NV21:
221 _x_subsampling = { { 1, 2, 2 } };
222 _y_subsampling = { { 1, 2, 2 } };
223 _func = &NEChannelCombineKernel::combine_YUV_2p;
224 has_two_planes = true;
225 num_elems_written_plane1 = 16;
226 break;
227 case Format::IYUV:
228 _is_parallelizable = false;
229 _x_subsampling = { { 1, 2, 2 } };
230 _y_subsampling = { { 1, 2, 2 } };
231 _func = &NEChannelCombineKernel::combine_YUV_3p;
232 break;
233 case Format::YUV444:
234 _is_parallelizable = false;
235 _x_subsampling = { { 1, 1, 1 } };
236 _y_subsampling = { { 1, 1, 1 } };
237 _func = &NEChannelCombineKernel::combine_YUV_3p;
238 break;
239 default:
240 ARM_COMPUTE_ERROR("Not supported format.");
241 break;
242 }
243
244 const unsigned int y_step = *std::max_element(_y_subsampling.begin(), _y_subsampling.end());
245
246 Window win = calculate_max_window(*plane0->info(), Steps(_num_elems_processed_per_iteration, y_step));
247 AccessWindowRectangle output_plane0_access(output->plane(0)->info(), 0, 0, _num_elems_processed_per_iteration, 1, 1.f, 1.f / _y_subsampling[0]);
248 AccessWindowRectangle output_plane1_access(output->plane(1)->info(), 0, 0, num_elems_written_plane1, 1, 1.f / _x_subsampling[1], 1.f / _y_subsampling[1]);
249 AccessWindowRectangle output_plane2_access(has_two_planes ? nullptr : output->plane(2)->info(), 0, 0, _num_elems_processed_per_iteration, 1, 1.f / _x_subsampling[2], 1.f / _y_subsampling[2]);
250
251 update_window_and_padding(win,
252 AccessWindowHorizontal(plane0->info(), 0, _num_elems_processed_per_iteration),
253 AccessWindowRectangle(plane1->info(), 0, 0, _num_elems_processed_per_iteration, 1, 1.f / _x_subsampling[1], 1.f / _y_subsampling[1]),
254 AccessWindowRectangle(plane2->info(), 0, 0, _num_elems_processed_per_iteration, 1, 1.f / _x_subsampling[2], 1.f / _y_subsampling[2]),
255 output_plane0_access,
256 output_plane1_access,
257 output_plane2_access);
258
Ioan-Cristian Szaboae3c8ab2017-11-16 17:55:03 +0000259 ValidRegion plane0_valid_region = plane0->info()->valid_region();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100260 ValidRegion output_plane1_region = has_two_planes ? intersect_valid_regions(plane1->info()->valid_region(), plane2->info()->valid_region()) : plane2->info()->valid_region();
261
262 output_plane0_access.set_valid_region(win, ValidRegion(plane0_valid_region.anchor, output->plane(0)->info()->tensor_shape()));
263 output_plane1_access.set_valid_region(win, ValidRegion(output_plane1_region.anchor, output->plane(1)->info()->tensor_shape()));
264 output_plane2_access.set_valid_region(win, ValidRegion(plane2->info()->valid_region().anchor, output->plane(2)->info()->tensor_shape()));
265
266 INEKernel::configure(win);
267}
268
269bool NEChannelCombineKernel::is_parallelisable() const
270{
271 return _is_parallelizable;
272}
273
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100274void NEChannelCombineKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100275{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100276 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100277 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
278 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
279 ARM_COMPUTE_ERROR_ON(_func == nullptr);
280
281 (this->*_func)(window);
282}
283
284void NEChannelCombineKernel::combine_3C(const Window &win)
285{
286 Iterator p0(_planes[0], win);
287 Iterator p1(_planes[1], win);
288 Iterator p2(_planes[2], win);
289 Iterator out(_output, win);
290
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100291 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100292 {
293 const auto p0_ptr = static_cast<uint8_t *>(p0.ptr());
294 const auto p1_ptr = static_cast<uint8_t *>(p1.ptr());
295 const auto p2_ptr = static_cast<uint8_t *>(p2.ptr());
296 const auto out_ptr = static_cast<uint8_t *>(out.ptr());
297
298 const uint8x8x3_t pixels =
299 {
300 {
301 vld1_u8(p0_ptr),
302 vld1_u8(p1_ptr),
303 vld1_u8(p2_ptr)
304 }
305 };
306
307 vst3_u8(out_ptr, pixels);
308 },
309 p0, p1, p2, out);
310}
311
312void NEChannelCombineKernel::combine_4C(const Window &win)
313{
314 Iterator p0(_planes[0], win);
315 Iterator p1(_planes[1], win);
316 Iterator p2(_planes[2], win);
317 Iterator p3(_planes[3], win);
318 Iterator out(_output, win);
319
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100320 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100321 {
322 const auto p0_ptr = static_cast<uint8_t *>(p0.ptr());
323 const auto p1_ptr = static_cast<uint8_t *>(p1.ptr());
324 const auto p2_ptr = static_cast<uint8_t *>(p2.ptr());
325 const auto p3_ptr = static_cast<uint8_t *>(p3.ptr());
326 const auto out_ptr = static_cast<uint8_t *>(out.ptr());
327
328 const uint8x8x4_t pixels =
329 {
330 {
331 vld1_u8(p0_ptr),
332 vld1_u8(p1_ptr),
333 vld1_u8(p2_ptr),
334 vld1_u8(p3_ptr)
335 }
336 };
337
338 vst4_u8(out_ptr, pixels);
339 },
340 p0, p1, p2, p3, out);
341}
342
343template <bool is_uyvy>
344void NEChannelCombineKernel::combine_YUV_1p(const Window &win)
345{
346 // Create sub-sampled uv window and init uv planes
347 Window win_uv(win);
Ioan-Cristian Szaboae3c8ab2017-11-16 17:55:03 +0000348 win_uv.set_dimension_step(Window::DimX, win.x().step() / _x_subsampling[1]);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100349 win_uv.validate();
350
351 Iterator p0(_planes[0], win);
352 Iterator p1(_planes[1], win_uv);
353 Iterator p2(_planes[2], win_uv);
354 Iterator out(_output, win);
355
356 constexpr auto shift = is_uyvy ? 1 : 0;
357
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100358 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100359 {
360 const auto p0_ptr = static_cast<uint8_t *>(p0.ptr());
361 const auto p1_ptr = static_cast<uint8_t *>(p1.ptr());
362 const auto p2_ptr = static_cast<uint8_t *>(p2.ptr());
363 const auto out_ptr = static_cast<uint8_t *>(out.ptr());
364
365 const uint8x8x2_t pixels_y = vld2_u8(p0_ptr);
366 const uint8x8x2_t pixels_uv =
367 {
368 {
369 vld1_u8(p1_ptr),
370 vld1_u8(p2_ptr)
371 }
372 };
373
374 uint8x8x4_t pixels{ {} };
375 pixels.val[0 + shift] = pixels_y.val[0];
376 pixels.val[1 - shift] = pixels_uv.val[0];
377 pixels.val[2 + shift] = pixels_y.val[1];
378 pixels.val[3 - shift] = pixels_uv.val[1];
379
380 vst4_u8(out_ptr, pixels);
381 },
382 p0, p1, p2, out);
383}
384
385void NEChannelCombineKernel::combine_YUV_2p(const Window &win)
386{
387 ARM_COMPUTE_ERROR_ON(win.x().start() % _x_subsampling[1]);
388 ARM_COMPUTE_ERROR_ON(win.y().start() % _y_subsampling[1]);
389
390 // Copy first plane
391 copy_plane(win, 0);
392
393 // Update UV window
394 Window uv_win(win);
Ioan-Cristian Szaboae3c8ab2017-11-16 17:55:03 +0000395 uv_win.set(Window::DimX, Window::Dimension(uv_win.x().start() / _x_subsampling[1], uv_win.x().end() / _x_subsampling[1], uv_win.x().step() / _x_subsampling[1]));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100396 uv_win.set(Window::DimY, Window::Dimension(uv_win.y().start() / _y_subsampling[1], uv_win.y().end() / _y_subsampling[1], 1));
397 uv_win.validate();
398
399 // Update output win
400 Window out_win(win);
Ioan-Cristian Szaboae3c8ab2017-11-16 17:55:03 +0000401 out_win.set(Window::DimX, Window::Dimension(out_win.x().start(), out_win.x().end(), out_win.x().step() / _x_subsampling[1]));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100402 out_win.set(Window::DimY, Window::Dimension(out_win.y().start() / _y_subsampling[1], out_win.y().end() / _y_subsampling[1], 1));
403 out_win.validate();
404
405 // Construct second plane
406 const int shift = (Format::NV12 == _output_multi->info()->format()) ? 0 : 1;
407 Iterator p1(_planes[1 + shift], uv_win);
408 Iterator p2(_planes[2 - shift], uv_win);
409 Iterator out(_output_multi->plane(1), out_win);
410
Ioan-Cristian Szaboae3c8ab2017-11-16 17:55:03 +0000411 // Increase step size after iterator is created to calculate stride correctly for multi channel format
412 out_win.set_dimension_step(Window::DimX, out_win.x().step() * _x_subsampling[1]);
413
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100414 execute_window_loop(out_win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100415 {
416 const uint8x8x2_t pixels =
417 {
418 {
419 vld1_u8(p1.ptr()),
420 vld1_u8(p2.ptr())
421 }
422 };
423
424 vst2_u8(out.ptr(), pixels);
425 },
426 p1, p2, out);
427}
428
429void NEChannelCombineKernel::combine_YUV_3p(const Window &win)
430{
431 copy_plane(win, 0);
432 copy_plane(win, 1);
433 copy_plane(win, 2);
434}
435
436void NEChannelCombineKernel::copy_plane(const Window &win, uint32_t plane_id)
437{
438 ARM_COMPUTE_ERROR_ON(win.x().start() % _x_subsampling[plane_id]);
439 ARM_COMPUTE_ERROR_ON(win.y().start() % _y_subsampling[plane_id]);
440
441 // Update window
442 Window tmp_win(win);
Ioan-Cristian Szaboae3c8ab2017-11-16 17:55:03 +0000443 tmp_win.set(Window::DimX, Window::Dimension(tmp_win.x().start() / _x_subsampling[plane_id], tmp_win.x().end() / _x_subsampling[plane_id], tmp_win.x().step() / _x_subsampling[plane_id]));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100444 tmp_win.set(Window::DimY, Window::Dimension(tmp_win.y().start() / _y_subsampling[plane_id], tmp_win.y().end() / _y_subsampling[plane_id], 1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100445
446 Iterator in(_planes[plane_id], tmp_win);
447 Iterator out(_output_multi->plane(plane_id), tmp_win);
448
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100449 execute_window_loop(tmp_win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100450 {
Ioan-Cristian Szaboae3c8ab2017-11-16 17:55:03 +0000451 const uint8x8_t pixels = vld1_u8(in.ptr());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100452
Ioan-Cristian Szaboae3c8ab2017-11-16 17:55:03 +0000453 vst1_u8(out.ptr(), pixels);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100454 },
455 in, out);
456}