blob: bc8c77543aa124857909bb2a796dd8cd47099872 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Georgios Pinitasddb93bb2020-10-02 16:38:59 +01002 * Copyright (c) 2016-2020 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h"
25
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/IAccessWindow.h"
29#include "arm_compute/core/IMultiImage.h"
30#include "arm_compute/core/ITensor.h"
31#include "arm_compute/core/MultiImageInfo.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010032#include "arm_compute/core/TensorInfo.h"
33#include "arm_compute/core/Types.h"
34#include "arm_compute/core/Validate.h"
35#include "arm_compute/core/Window.h"
36
Georgios Pinitasddb93bb2020-10-02 16:38:59 +010037#include "src/core/NEON/kernels/detail/NEColorConvertHelper.inl"
38
Anthony Barbier6ff3b192017-09-04 18:44:23 +010039using namespace arm_compute;
40
41NEColorConvertKernel::NEColorConvertKernel()
42 : _input(nullptr), _output(nullptr), _func(nullptr)
43{
44}
45
46void NEColorConvertKernel::configure(const ITensor *input, ITensor *output)
47{
48 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
49
50 set_shape_if_empty(*output->info(), input->info()->tensor_shape());
51
52 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
53
54 unsigned int num_elems_processed_per_iteration = 0;
55
56 switch(input->info()->format())
57 {
58 case Format::RGBA8888:
59 {
60 switch(output->info()->format())
61 {
62 case Format::RGB888:
63 _func = colorconvert_rgbx_to_rgb;
64 num_elems_processed_per_iteration = 16;
65 break;
66 default:
67 ARM_COMPUTE_ERROR("Not supported");
68 break;
69 }
70 break;
71 }
72 case Format::UYVY422:
73 {
74 switch(output->info()->format())
75 {
76 case Format::RGB888:
77 _func = colorconvert_yuyv_to_rgb<false, false>;
78 num_elems_processed_per_iteration = 32;
79 break;
80 case Format::RGBA8888:
81 _func = colorconvert_yuyv_to_rgb<false, true>;
82 num_elems_processed_per_iteration = 32;
83 break;
84 default:
85 ARM_COMPUTE_ERROR("Not supported");
86 break;
87 }
88 break;
89 }
90 case Format::YUYV422:
91 {
92 switch(output->info()->format())
93 {
94 case Format::RGB888:
95 _func = colorconvert_yuyv_to_rgb<true, false>;
96 num_elems_processed_per_iteration = 32;
97 break;
98 case Format::RGBA8888:
99 _func = colorconvert_yuyv_to_rgb<true, true>;
100 num_elems_processed_per_iteration = 32;
101 break;
102 default:
103 ARM_COMPUTE_ERROR("Not supported");
104 break;
105 }
106 break;
107 }
108 case Format::RGB888:
109 {
110 switch(output->info()->format())
111 {
112 case Format::RGBA8888:
113 _func = colorconvert_rgb_to_rgbx;
114 num_elems_processed_per_iteration = 16;
115 break;
Manuel Bottini4284bfa2018-09-26 15:33:15 +0100116 case Format::U8:
117 _func = colorconvert_rgb_to_u8;
118 num_elems_processed_per_iteration = 16;
119 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100120 default:
121 ARM_COMPUTE_ERROR("Not supported");
122 break;
123 }
124 break;
125 }
126 default:
127 ARM_COMPUTE_ERROR("Not supported");
128 break;
129 }
130
131 _input = input;
132 _output = output;
133
134 // Configure kernel window
135 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
136 AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
137 AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
138
139 update_window_and_padding(win, input_access, output_access);
140
141 output_access.set_valid_region(win, input->info()->valid_region());
142
143 INEKernel::configure(win);
144}
145
146void NEColorConvertKernel::configure(const IMultiImage *input, IImage *output)
147{
148 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
149 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output);
150
151 set_shape_if_empty(*output->info(), input->plane(0)->info()->tensor_shape());
152
153 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input->plane(0), output);
154
155 unsigned int num_elems_processed_per_iteration = 0;
156
157 switch(input->info()->format())
158 {
159 case Format::NV12:
160 {
161 switch(output->info()->format())
162 {
163 case Format::RGB888:
164 _func = colorconvert_nv12_to_rgb<true, false>;
165 num_elems_processed_per_iteration = 32;
166 break;
167 case Format::RGBA8888:
168 _func = colorconvert_nv12_to_rgb<true, true>;
169 num_elems_processed_per_iteration = 32;
170 break;
171 default:
172 ARM_COMPUTE_ERROR("Not supported");
173 break;
174 }
175 break;
176 }
177 case Format::NV21:
178 {
179 switch(output->info()->format())
180 {
181 case Format::RGB888:
182 _func = colorconvert_nv12_to_rgb<false, false>;
183 num_elems_processed_per_iteration = 32;
184 break;
185 case Format::RGBA8888:
186 _func = colorconvert_nv12_to_rgb<false, true>;
187 num_elems_processed_per_iteration = 32;
188 break;
189 default:
190 ARM_COMPUTE_ERROR("Not supported");
191 break;
192 }
193 break;
194 }
195 case Format::IYUV:
196 {
197 switch(output->info()->format())
198 {
199 case Format::RGB888:
200 _func = colorconvert_iyuv_to_rgb<false>;
201 num_elems_processed_per_iteration = 32;
202 break;
203 case Format::RGBA8888:
204 _func = colorconvert_iyuv_to_rgb<true>;
205 num_elems_processed_per_iteration = 32;
206 break;
207 default:
208 ARM_COMPUTE_ERROR("Not supported");
209 break;
210 }
211 break;
212 }
213 default:
214 ARM_COMPUTE_ERROR("Not supported");
215 break;
216 }
217
218 _input = input;
219 _output = output;
220
221 // Configure kernel window
222 Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
223 win.set_dimension_step(Window::DimY, 2);
224
225 unsigned int input_plane_count = 3;
226
227 if(input->info()->format() == Format::NV12 || input->info()->format() == Format::NV21)
228 {
229 input_plane_count = 2;
230 }
231
232 AccessWindowHorizontal input0_access(input->plane(0)->info(), 0, num_elems_processed_per_iteration);
233 AccessWindowRectangle input1_access(input->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, 0.5f, 0.5f);
234 AccessWindowRectangle input2_access(input_plane_count == 2 ? nullptr : input->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1, 0.5f, 0.5f);
235 AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
236
237 update_window_and_padding(win,
238 input0_access, input1_access, input2_access,
239 output_access);
240
241 ValidRegion intersect_region = intersect_valid_regions(input->plane(0)->info()->valid_region(),
242 input->plane(1)->info()->valid_region());
243
244 if(input_plane_count == 3)
245 {
246 intersect_region = intersect_valid_regions(intersect_region, input->plane(2)->info()->valid_region());
247 }
248
249 output_access.set_valid_region(win, intersect_region);
250
251 INEKernel::configure(win);
252}
253
254void NEColorConvertKernel::configure(const IImage *input, IMultiImage *output)
255{
256 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
257 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
258
259 set_shape_if_empty(*output->plane(0)->info(), input->info()->tensor_shape());
260
261 switch(output->info()->format())
262 {
263 case Format::NV12:
264 {
265 TensorShape subsampled_shape = input->info()->tensor_shape();
266 subsampled_shape.set(0, subsampled_shape[0] / 2);
267 subsampled_shape.set(1, subsampled_shape[1] / 2);
268
269 set_shape_if_empty(*output->plane(1)->info(), subsampled_shape);
270
271 ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(subsampled_shape, output->plane(1)->info()->tensor_shape());
272 break;
273 }
274 case Format::IYUV:
275 {
276 TensorShape subsampled_shape = input->info()->tensor_shape();
277 subsampled_shape.set(0, subsampled_shape[0] / 2);
278 subsampled_shape.set(1, subsampled_shape[1] / 2);
279
280 set_shape_if_empty(*output->plane(1)->info(), subsampled_shape);
281 set_shape_if_empty(*output->plane(2)->info(), subsampled_shape);
282
283 ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(subsampled_shape, output->plane(1)->info()->tensor_shape());
284 ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(subsampled_shape, output->plane(2)->info()->tensor_shape());
285 break;
286 }
287 case Format::YUV444:
288 set_shape_if_empty(*output->plane(1)->info(), input->info()->tensor_shape());
289 set_shape_if_empty(*output->plane(2)->info(), input->info()->tensor_shape());
290
291 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output->plane(1));
292 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output->plane(2));
293 break;
294 default:
295 ARM_COMPUTE_ERROR("Not supported");
296 }
297
298 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output->plane(0));
299
300 unsigned int num_elems_processed_per_iteration = 0;
301
302 switch(input->info()->format())
303 {
304 case Format::RGB888:
305 {
306 switch(output->info()->format())
307 {
308 case Format::NV12:
309 _func = colorconvert_rgb_to_nv12<false>;
310 num_elems_processed_per_iteration = 16;
311 break;
312 case Format::IYUV:
313 _func = colorconvert_rgb_to_iyuv<false>;
314 num_elems_processed_per_iteration = 16;
315 break;
316 case Format::YUV444:
317 _func = colorconvert_rgb_to_yuv4<false>;
318 num_elems_processed_per_iteration = 16;
319 break;
320 default:
321 ARM_COMPUTE_ERROR("Not supported");
322 break;
323 }
324 break;
325 }
326 case Format::RGBA8888:
327 {
328 switch(output->info()->format())
329 {
330 case Format::NV12:
331 _func = colorconvert_rgb_to_nv12<true>;
332 num_elems_processed_per_iteration = 16;
333 break;
334 case Format::IYUV:
335 _func = colorconvert_rgb_to_iyuv<true>;
336 num_elems_processed_per_iteration = 16;
337 break;
338 case Format::YUV444:
339 _func = colorconvert_rgb_to_yuv4<true>;
340 num_elems_processed_per_iteration = 16;
341 break;
342 default:
343 ARM_COMPUTE_ERROR("Not supported");
344 break;
345 }
346 break;
347 }
348 case Format::UYVY422:
349 {
350 switch(output->info()->format())
351 {
352 case Format::NV12:
353 _func = colorconvert_yuyv_to_nv12<false>;
354 num_elems_processed_per_iteration = 32;
355 break;
356 case Format::IYUV:
357 _func = colorconvert_yuyv_to_iyuv<false>;
358 num_elems_processed_per_iteration = 32;
359 break;
360 default:
361 ARM_COMPUTE_ERROR("Not supported");
362 break;
363 }
364 break;
365 }
366 case Format::YUYV422:
367 {
368 switch(output->info()->format())
369 {
370 case Format::NV12:
371 _func = colorconvert_yuyv_to_nv12<true>;
372 num_elems_processed_per_iteration = 32;
373 break;
374 case Format::IYUV:
375 _func = colorconvert_yuyv_to_iyuv<true>;
376 num_elems_processed_per_iteration = 32;
377 break;
378 default:
379 ARM_COMPUTE_ERROR("Not supported");
380 break;
381 }
382 break;
383 }
384 default:
385 ARM_COMPUTE_ERROR("Not supported");
386 break;
387 }
388
389 _input = input;
390 _output = output;
391
392 // Configure kernel window
393 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
394
395 float sub_sampling = 1.f;
396
397 if((input->info()->format() != Format::RGB888 || output->info()->format() != Format::YUV444) && (input->info()->format() != Format::RGBA8888 || output->info()->format() != Format::YUV444))
398 {
399 win.set_dimension_step(Window::DimY, 2);
400 sub_sampling = 0.5f;
401 }
402
403 unsigned int output_plane_count = 3;
404
405 if(output->info()->format() == Format::NV12 || output->info()->format() == Format::NV21)
406 {
407 output_plane_count = 2;
408 }
409
410 AccessWindowHorizontal output0_access(output->plane(0)->info(), 0, num_elems_processed_per_iteration);
411 AccessWindowRectangle output1_access(output->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, sub_sampling, sub_sampling);
412 AccessWindowRectangle output2_access(output_plane_count == 2 ? nullptr : output->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1, sub_sampling, sub_sampling);
413
414 update_window_and_padding(win,
415 AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration),
416 output0_access,
417 output1_access,
418 output2_access);
419
420 output0_access.set_valid_region(win, input->info()->valid_region());
421 output1_access.set_valid_region(win, input->info()->valid_region());
422 output2_access.set_valid_region(win, input->info()->valid_region());
423
424 INEKernel::configure(win);
425}
426
427void NEColorConvertKernel::configure(const IMultiImage *input, IMultiImage *output)
428{
429 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
430 ARM_COMPUTE_ERROR_ON(input == output);
431
432 set_shape_if_empty(*output->plane(0)->info(), input->plane(0)->info()->tensor_shape());
433
434 switch(output->info()->format())
435 {
436 case Format::NV12:
437 {
438 TensorShape subsampled_shape = input->plane(0)->info()->tensor_shape();
439 subsampled_shape.set(0, subsampled_shape[0] / 2);
440 subsampled_shape.set(1, subsampled_shape[1] / 2);
441
442 set_shape_if_empty(*output->plane(1)->info(), subsampled_shape);
443
444 ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(subsampled_shape, output->plane(1)->info()->tensor_shape());
445 break;
446 }
447 case Format::IYUV:
448 {
449 TensorShape subsampled_shape = input->plane(0)->info()->tensor_shape();
450 subsampled_shape.set(0, subsampled_shape[0] / 2);
451 subsampled_shape.set(1, subsampled_shape[1] / 2);
452
453 set_shape_if_empty(*output->plane(1)->info(), subsampled_shape);
454 set_shape_if_empty(*output->plane(2)->info(), subsampled_shape);
455
456 ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(subsampled_shape, output->plane(1)->info()->tensor_shape());
457 ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(subsampled_shape, output->plane(2)->info()->tensor_shape());
458 break;
459 }
460 case Format::YUV444:
461 set_shape_if_empty(*output->plane(1)->info(), input->plane(0)->info()->tensor_shape());
462 set_shape_if_empty(*output->plane(2)->info(), input->plane(0)->info()->tensor_shape());
463
464 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input->plane(0), output->plane(1));
465 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input->plane(0), output->plane(2));
466 break;
467 default:
468 ARM_COMPUTE_ERROR("Not supported");
469 }
470
471 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input->plane(0), output->plane(0));
472
473 switch(input->info()->format())
474 {
475 case Format::NV12:
476 {
477 switch(output->info()->format())
478 {
479 case Format::IYUV:
480 _func = colorconvert_nv12_to_iyuv<true>;
481 break;
482 case Format::YUV444:
483 _func = colorconvert_nv12_to_yuv4<true>;
484 break;
485 default:
486 ARM_COMPUTE_ERROR("Not supported");
487 break;
488 }
489 break;
490 }
491 case Format::NV21:
492 {
493 switch(output->info()->format())
494 {
495 case Format::IYUV:
496 _func = colorconvert_nv12_to_iyuv<false>;
497 break;
498 case Format::YUV444:
499 _func = colorconvert_nv12_to_yuv4<false>;
500 break;
501 default:
502 ARM_COMPUTE_ERROR("Not supported");
503 break;
504 }
505 break;
506 }
507 case Format::IYUV:
508 {
509 switch(output->info()->format())
510 {
511 case Format::NV12:
512 _func = colorconvert_iyuv_to_nv12;
513 break;
514 case Format::YUV444:
515 _func = colorconvert_iyuv_to_yuv4;
516 break;
517 default:
518 ARM_COMPUTE_ERROR("Not supported");
519 break;
520 }
521 break;
522 }
523 default:
524 ARM_COMPUTE_ERROR("Not supported");
525 break;
526 }
527
528 _input = input;
529 _output = output;
530
531 constexpr unsigned int num_elems_processed_per_iteration = 32;
532 constexpr float input_sub_sampling = 0.5f;
533 const float output_sub_sampling = output->info()->format() == Format::YUV444 ? 1.f : 0.5f;
534
535 // Configure kernel window
536 Window win = calculate_max_window(*input->plane(0)->info(), Steps(num_elems_processed_per_iteration));
537 win.set_dimension_step(Window::DimY, 2);
538
539 unsigned int input_plane_count = 3;
540
541 if(input->info()->format() == Format::NV12 || input->info()->format() == Format::NV21)
542 {
543 input_plane_count = 2;
544 }
545
546 unsigned int output_plane_count = 3;
547
548 if(output->info()->format() == Format::NV12 || output->info()->format() == Format::NV21)
549 {
550 output_plane_count = 2;
551 }
552
553 AccessWindowHorizontal output0_access(output->plane(0)->info(), 0, num_elems_processed_per_iteration);
554 AccessWindowRectangle output1_access(output->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, output_sub_sampling, output_sub_sampling);
555 AccessWindowRectangle output2_access(output_plane_count == 2 ? nullptr : output->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1, output_sub_sampling, output_sub_sampling);
556
557 update_window_and_padding(win,
558 AccessWindowHorizontal(input->plane(0)->info(), 0, num_elems_processed_per_iteration),
559 AccessWindowRectangle(input->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, input_sub_sampling, input_sub_sampling),
560 AccessWindowRectangle(input_plane_count == 2 ? nullptr : input->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1, input_sub_sampling, input_sub_sampling),
561 output0_access,
562 output1_access,
563 output2_access);
564
565 ValidRegion intersect_region = intersect_valid_regions(input->plane(0)->info()->valid_region(),
566 input->plane(1)->info()->valid_region());
567
568 if(input_plane_count == 3)
569 {
570 intersect_region = intersect_valid_regions(intersect_region, input->plane(2)->info()->valid_region());
571 }
572
573 output0_access.set_valid_region(win, intersect_region);
574 output1_access.set_valid_region(win, intersect_region);
575 output2_access.set_valid_region(win, intersect_region);
576
577 INEKernel::configure(win);
578}
579
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100580void NEColorConvertKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100581{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100582 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100583 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
584 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
585 ARM_COMPUTE_ERROR_ON(_func == nullptr);
586
587 (*_func)(_input, _output, window);
588}