blob: 23270d42d11bba213d78c068370941cdd120e14c [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Georgios Pinitasddb93bb2020-10-02 16:38:59 +01002 * Copyright (c) 2016-2020 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouebcebf12020-10-21 00:04:14 +010024#include "src/core/NEON/kernels/NEColorConvertKernel.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010025
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/IAccessWindow.h"
29#include "arm_compute/core/IMultiImage.h"
30#include "arm_compute/core/ITensor.h"
31#include "arm_compute/core/MultiImageInfo.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010032#include "arm_compute/core/TensorInfo.h"
33#include "arm_compute/core/Types.h"
34#include "arm_compute/core/Validate.h"
35#include "arm_compute/core/Window.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010036#include "src/core/helpers/AutoConfiguration.h"
37#include "src/core/helpers/WindowHelpers.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010038
Georgios Pinitasddb93bb2020-10-02 16:38:59 +010039#include "src/core/NEON/kernels/detail/NEColorConvertHelper.inl"
40
Anthony Barbier6ff3b192017-09-04 18:44:23 +010041using namespace arm_compute;
42
43NEColorConvertKernel::NEColorConvertKernel()
44 : _input(nullptr), _output(nullptr), _func(nullptr)
45{
46}
47
48void NEColorConvertKernel::configure(const ITensor *input, ITensor *output)
49{
50 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
51
52 set_shape_if_empty(*output->info(), input->info()->tensor_shape());
53
54 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
55
56 unsigned int num_elems_processed_per_iteration = 0;
57
58 switch(input->info()->format())
59 {
60 case Format::RGBA8888:
61 {
62 switch(output->info()->format())
63 {
64 case Format::RGB888:
65 _func = colorconvert_rgbx_to_rgb;
66 num_elems_processed_per_iteration = 16;
67 break;
68 default:
69 ARM_COMPUTE_ERROR("Not supported");
70 break;
71 }
72 break;
73 }
74 case Format::UYVY422:
75 {
76 switch(output->info()->format())
77 {
78 case Format::RGB888:
79 _func = colorconvert_yuyv_to_rgb<false, false>;
80 num_elems_processed_per_iteration = 32;
81 break;
82 case Format::RGBA8888:
83 _func = colorconvert_yuyv_to_rgb<false, true>;
84 num_elems_processed_per_iteration = 32;
85 break;
86 default:
87 ARM_COMPUTE_ERROR("Not supported");
88 break;
89 }
90 break;
91 }
92 case Format::YUYV422:
93 {
94 switch(output->info()->format())
95 {
96 case Format::RGB888:
97 _func = colorconvert_yuyv_to_rgb<true, false>;
98 num_elems_processed_per_iteration = 32;
99 break;
100 case Format::RGBA8888:
101 _func = colorconvert_yuyv_to_rgb<true, true>;
102 num_elems_processed_per_iteration = 32;
103 break;
104 default:
105 ARM_COMPUTE_ERROR("Not supported");
106 break;
107 }
108 break;
109 }
110 case Format::RGB888:
111 {
112 switch(output->info()->format())
113 {
114 case Format::RGBA8888:
115 _func = colorconvert_rgb_to_rgbx;
116 num_elems_processed_per_iteration = 16;
117 break;
Manuel Bottini4284bfa2018-09-26 15:33:15 +0100118 case Format::U8:
119 _func = colorconvert_rgb_to_u8;
120 num_elems_processed_per_iteration = 16;
121 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100122 default:
123 ARM_COMPUTE_ERROR("Not supported");
124 break;
125 }
126 break;
127 }
128 default:
129 ARM_COMPUTE_ERROR("Not supported");
130 break;
131 }
132
133 _input = input;
134 _output = output;
135
136 // Configure kernel window
137 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
138 AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
139 AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
140
141 update_window_and_padding(win, input_access, output_access);
142
143 output_access.set_valid_region(win, input->info()->valid_region());
144
145 INEKernel::configure(win);
146}
147
148void NEColorConvertKernel::configure(const IMultiImage *input, IImage *output)
149{
150 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
151 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output);
152
153 set_shape_if_empty(*output->info(), input->plane(0)->info()->tensor_shape());
154
155 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input->plane(0), output);
156
157 unsigned int num_elems_processed_per_iteration = 0;
158
159 switch(input->info()->format())
160 {
161 case Format::NV12:
162 {
163 switch(output->info()->format())
164 {
165 case Format::RGB888:
166 _func = colorconvert_nv12_to_rgb<true, false>;
167 num_elems_processed_per_iteration = 32;
168 break;
169 case Format::RGBA8888:
170 _func = colorconvert_nv12_to_rgb<true, true>;
171 num_elems_processed_per_iteration = 32;
172 break;
173 default:
174 ARM_COMPUTE_ERROR("Not supported");
175 break;
176 }
177 break;
178 }
179 case Format::NV21:
180 {
181 switch(output->info()->format())
182 {
183 case Format::RGB888:
184 _func = colorconvert_nv12_to_rgb<false, false>;
185 num_elems_processed_per_iteration = 32;
186 break;
187 case Format::RGBA8888:
188 _func = colorconvert_nv12_to_rgb<false, true>;
189 num_elems_processed_per_iteration = 32;
190 break;
191 default:
192 ARM_COMPUTE_ERROR("Not supported");
193 break;
194 }
195 break;
196 }
197 case Format::IYUV:
198 {
199 switch(output->info()->format())
200 {
201 case Format::RGB888:
202 _func = colorconvert_iyuv_to_rgb<false>;
203 num_elems_processed_per_iteration = 32;
204 break;
205 case Format::RGBA8888:
206 _func = colorconvert_iyuv_to_rgb<true>;
207 num_elems_processed_per_iteration = 32;
208 break;
209 default:
210 ARM_COMPUTE_ERROR("Not supported");
211 break;
212 }
213 break;
214 }
215 default:
216 ARM_COMPUTE_ERROR("Not supported");
217 break;
218 }
219
220 _input = input;
221 _output = output;
222
223 // Configure kernel window
224 Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
225 win.set_dimension_step(Window::DimY, 2);
226
227 unsigned int input_plane_count = 3;
228
229 if(input->info()->format() == Format::NV12 || input->info()->format() == Format::NV21)
230 {
231 input_plane_count = 2;
232 }
233
234 AccessWindowHorizontal input0_access(input->plane(0)->info(), 0, num_elems_processed_per_iteration);
235 AccessWindowRectangle input1_access(input->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, 0.5f, 0.5f);
236 AccessWindowRectangle input2_access(input_plane_count == 2 ? nullptr : input->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1, 0.5f, 0.5f);
237 AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
238
239 update_window_and_padding(win,
240 input0_access, input1_access, input2_access,
241 output_access);
242
243 ValidRegion intersect_region = intersect_valid_regions(input->plane(0)->info()->valid_region(),
244 input->plane(1)->info()->valid_region());
245
246 if(input_plane_count == 3)
247 {
248 intersect_region = intersect_valid_regions(intersect_region, input->plane(2)->info()->valid_region());
249 }
250
251 output_access.set_valid_region(win, intersect_region);
252
253 INEKernel::configure(win);
254}
255
256void NEColorConvertKernel::configure(const IImage *input, IMultiImage *output)
257{
258 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
259 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
260
261 set_shape_if_empty(*output->plane(0)->info(), input->info()->tensor_shape());
262
263 switch(output->info()->format())
264 {
265 case Format::NV12:
266 {
267 TensorShape subsampled_shape = input->info()->tensor_shape();
268 subsampled_shape.set(0, subsampled_shape[0] / 2);
269 subsampled_shape.set(1, subsampled_shape[1] / 2);
270
271 set_shape_if_empty(*output->plane(1)->info(), subsampled_shape);
272
273 ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(subsampled_shape, output->plane(1)->info()->tensor_shape());
274 break;
275 }
276 case Format::IYUV:
277 {
278 TensorShape subsampled_shape = input->info()->tensor_shape();
279 subsampled_shape.set(0, subsampled_shape[0] / 2);
280 subsampled_shape.set(1, subsampled_shape[1] / 2);
281
282 set_shape_if_empty(*output->plane(1)->info(), subsampled_shape);
283 set_shape_if_empty(*output->plane(2)->info(), subsampled_shape);
284
285 ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(subsampled_shape, output->plane(1)->info()->tensor_shape());
286 ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(subsampled_shape, output->plane(2)->info()->tensor_shape());
287 break;
288 }
289 case Format::YUV444:
290 set_shape_if_empty(*output->plane(1)->info(), input->info()->tensor_shape());
291 set_shape_if_empty(*output->plane(2)->info(), input->info()->tensor_shape());
292
293 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output->plane(1));
294 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output->plane(2));
295 break;
296 default:
297 ARM_COMPUTE_ERROR("Not supported");
298 }
299
300 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output->plane(0));
301
302 unsigned int num_elems_processed_per_iteration = 0;
303
304 switch(input->info()->format())
305 {
306 case Format::RGB888:
307 {
308 switch(output->info()->format())
309 {
310 case Format::NV12:
311 _func = colorconvert_rgb_to_nv12<false>;
312 num_elems_processed_per_iteration = 16;
313 break;
314 case Format::IYUV:
315 _func = colorconvert_rgb_to_iyuv<false>;
316 num_elems_processed_per_iteration = 16;
317 break;
318 case Format::YUV444:
319 _func = colorconvert_rgb_to_yuv4<false>;
320 num_elems_processed_per_iteration = 16;
321 break;
322 default:
323 ARM_COMPUTE_ERROR("Not supported");
324 break;
325 }
326 break;
327 }
328 case Format::RGBA8888:
329 {
330 switch(output->info()->format())
331 {
332 case Format::NV12:
333 _func = colorconvert_rgb_to_nv12<true>;
334 num_elems_processed_per_iteration = 16;
335 break;
336 case Format::IYUV:
337 _func = colorconvert_rgb_to_iyuv<true>;
338 num_elems_processed_per_iteration = 16;
339 break;
340 case Format::YUV444:
341 _func = colorconvert_rgb_to_yuv4<true>;
342 num_elems_processed_per_iteration = 16;
343 break;
344 default:
345 ARM_COMPUTE_ERROR("Not supported");
346 break;
347 }
348 break;
349 }
350 case Format::UYVY422:
351 {
352 switch(output->info()->format())
353 {
354 case Format::NV12:
355 _func = colorconvert_yuyv_to_nv12<false>;
356 num_elems_processed_per_iteration = 32;
357 break;
358 case Format::IYUV:
359 _func = colorconvert_yuyv_to_iyuv<false>;
360 num_elems_processed_per_iteration = 32;
361 break;
362 default:
363 ARM_COMPUTE_ERROR("Not supported");
364 break;
365 }
366 break;
367 }
368 case Format::YUYV422:
369 {
370 switch(output->info()->format())
371 {
372 case Format::NV12:
373 _func = colorconvert_yuyv_to_nv12<true>;
374 num_elems_processed_per_iteration = 32;
375 break;
376 case Format::IYUV:
377 _func = colorconvert_yuyv_to_iyuv<true>;
378 num_elems_processed_per_iteration = 32;
379 break;
380 default:
381 ARM_COMPUTE_ERROR("Not supported");
382 break;
383 }
384 break;
385 }
386 default:
387 ARM_COMPUTE_ERROR("Not supported");
388 break;
389 }
390
391 _input = input;
392 _output = output;
393
394 // Configure kernel window
395 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
396
397 float sub_sampling = 1.f;
398
399 if((input->info()->format() != Format::RGB888 || output->info()->format() != Format::YUV444) && (input->info()->format() != Format::RGBA8888 || output->info()->format() != Format::YUV444))
400 {
401 win.set_dimension_step(Window::DimY, 2);
402 sub_sampling = 0.5f;
403 }
404
405 unsigned int output_plane_count = 3;
406
407 if(output->info()->format() == Format::NV12 || output->info()->format() == Format::NV21)
408 {
409 output_plane_count = 2;
410 }
411
412 AccessWindowHorizontal output0_access(output->plane(0)->info(), 0, num_elems_processed_per_iteration);
413 AccessWindowRectangle output1_access(output->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, sub_sampling, sub_sampling);
414 AccessWindowRectangle output2_access(output_plane_count == 2 ? nullptr : output->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1, sub_sampling, sub_sampling);
415
416 update_window_and_padding(win,
417 AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration),
418 output0_access,
419 output1_access,
420 output2_access);
421
422 output0_access.set_valid_region(win, input->info()->valid_region());
423 output1_access.set_valid_region(win, input->info()->valid_region());
424 output2_access.set_valid_region(win, input->info()->valid_region());
425
426 INEKernel::configure(win);
427}
428
429void NEColorConvertKernel::configure(const IMultiImage *input, IMultiImage *output)
430{
431 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
432 ARM_COMPUTE_ERROR_ON(input == output);
433
434 set_shape_if_empty(*output->plane(0)->info(), input->plane(0)->info()->tensor_shape());
435
436 switch(output->info()->format())
437 {
438 case Format::NV12:
439 {
440 TensorShape subsampled_shape = input->plane(0)->info()->tensor_shape();
441 subsampled_shape.set(0, subsampled_shape[0] / 2);
442 subsampled_shape.set(1, subsampled_shape[1] / 2);
443
444 set_shape_if_empty(*output->plane(1)->info(), subsampled_shape);
445
446 ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(subsampled_shape, output->plane(1)->info()->tensor_shape());
447 break;
448 }
449 case Format::IYUV:
450 {
451 TensorShape subsampled_shape = input->plane(0)->info()->tensor_shape();
452 subsampled_shape.set(0, subsampled_shape[0] / 2);
453 subsampled_shape.set(1, subsampled_shape[1] / 2);
454
455 set_shape_if_empty(*output->plane(1)->info(), subsampled_shape);
456 set_shape_if_empty(*output->plane(2)->info(), subsampled_shape);
457
458 ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(subsampled_shape, output->plane(1)->info()->tensor_shape());
459 ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(subsampled_shape, output->plane(2)->info()->tensor_shape());
460 break;
461 }
462 case Format::YUV444:
463 set_shape_if_empty(*output->plane(1)->info(), input->plane(0)->info()->tensor_shape());
464 set_shape_if_empty(*output->plane(2)->info(), input->plane(0)->info()->tensor_shape());
465
466 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input->plane(0), output->plane(1));
467 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input->plane(0), output->plane(2));
468 break;
469 default:
470 ARM_COMPUTE_ERROR("Not supported");
471 }
472
473 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input->plane(0), output->plane(0));
474
475 switch(input->info()->format())
476 {
477 case Format::NV12:
478 {
479 switch(output->info()->format())
480 {
481 case Format::IYUV:
482 _func = colorconvert_nv12_to_iyuv<true>;
483 break;
484 case Format::YUV444:
485 _func = colorconvert_nv12_to_yuv4<true>;
486 break;
487 default:
488 ARM_COMPUTE_ERROR("Not supported");
489 break;
490 }
491 break;
492 }
493 case Format::NV21:
494 {
495 switch(output->info()->format())
496 {
497 case Format::IYUV:
498 _func = colorconvert_nv12_to_iyuv<false>;
499 break;
500 case Format::YUV444:
501 _func = colorconvert_nv12_to_yuv4<false>;
502 break;
503 default:
504 ARM_COMPUTE_ERROR("Not supported");
505 break;
506 }
507 break;
508 }
509 case Format::IYUV:
510 {
511 switch(output->info()->format())
512 {
513 case Format::NV12:
514 _func = colorconvert_iyuv_to_nv12;
515 break;
516 case Format::YUV444:
517 _func = colorconvert_iyuv_to_yuv4;
518 break;
519 default:
520 ARM_COMPUTE_ERROR("Not supported");
521 break;
522 }
523 break;
524 }
525 default:
526 ARM_COMPUTE_ERROR("Not supported");
527 break;
528 }
529
530 _input = input;
531 _output = output;
532
533 constexpr unsigned int num_elems_processed_per_iteration = 32;
534 constexpr float input_sub_sampling = 0.5f;
535 const float output_sub_sampling = output->info()->format() == Format::YUV444 ? 1.f : 0.5f;
536
537 // Configure kernel window
538 Window win = calculate_max_window(*input->plane(0)->info(), Steps(num_elems_processed_per_iteration));
539 win.set_dimension_step(Window::DimY, 2);
540
541 unsigned int input_plane_count = 3;
542
543 if(input->info()->format() == Format::NV12 || input->info()->format() == Format::NV21)
544 {
545 input_plane_count = 2;
546 }
547
548 unsigned int output_plane_count = 3;
549
550 if(output->info()->format() == Format::NV12 || output->info()->format() == Format::NV21)
551 {
552 output_plane_count = 2;
553 }
554
555 AccessWindowHorizontal output0_access(output->plane(0)->info(), 0, num_elems_processed_per_iteration);
556 AccessWindowRectangle output1_access(output->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, output_sub_sampling, output_sub_sampling);
557 AccessWindowRectangle output2_access(output_plane_count == 2 ? nullptr : output->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1, output_sub_sampling, output_sub_sampling);
558
559 update_window_and_padding(win,
560 AccessWindowHorizontal(input->plane(0)->info(), 0, num_elems_processed_per_iteration),
561 AccessWindowRectangle(input->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, input_sub_sampling, input_sub_sampling),
562 AccessWindowRectangle(input_plane_count == 2 ? nullptr : input->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1, input_sub_sampling, input_sub_sampling),
563 output0_access,
564 output1_access,
565 output2_access);
566
567 ValidRegion intersect_region = intersect_valid_regions(input->plane(0)->info()->valid_region(),
568 input->plane(1)->info()->valid_region());
569
570 if(input_plane_count == 3)
571 {
572 intersect_region = intersect_valid_regions(intersect_region, input->plane(2)->info()->valid_region());
573 }
574
575 output0_access.set_valid_region(win, intersect_region);
576 output1_access.set_valid_region(win, intersect_region);
577 output2_access.set_valid_region(win, intersect_region);
578
579 INEKernel::configure(win);
580}
581
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100582void NEColorConvertKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100583{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100584 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100585 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
586 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
587 ARM_COMPUTE_ERROR_ON(_func == nullptr);
588
589 (*_func)(_input, _output, window);
590}