blob: 4f178c9d7557ec3674151205c7bcca1f178bea29 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Sanghoon Lee1fad27a2018-04-05 10:57:57 +01002 * Copyright (c) 2016-2018 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h"
25
26#include "arm_compute/core/CL/CLKernelLibrary.h"
27#include "arm_compute/core/CL/ICLMultiImage.h"
28#include "arm_compute/core/CL/ICLTensor.h"
29#include "arm_compute/core/CL/OpenCL.h"
30#include "arm_compute/core/Error.h"
31#include "arm_compute/core/Helpers.h"
32#include "arm_compute/core/MultiImageInfo.h"
33#include "arm_compute/core/TensorInfo.h"
34#include "arm_compute/core/Types.h"
35#include "arm_compute/core/Utils.h"
36#include "arm_compute/core/Validate.h"
37#include "arm_compute/core/Window.h"
38
39#include <sstream>
40
41using namespace arm_compute;
42
43CLColorConvertKernel::CLColorConvertKernel()
44 : _input(nullptr), _output(nullptr), _multi_input(nullptr), _multi_output(nullptr)
45{
46}
47
48void CLColorConvertKernel::configure(const ICLTensor *input, ICLTensor *output)
49{
50 ARM_COMPUTE_ERROR_ON(input == nullptr);
51 ARM_COMPUTE_ERROR_ON(output == nullptr);
52
53 unsigned int num_elems_processed_per_iteration = 0;
54 switch(input->info()->format())
55 {
56 case Format::RGBA8888:
57 {
58 switch(output->info()->format())
59 {
60 case Format::RGB888:
61 num_elems_processed_per_iteration = 16;
62 break;
63 default:
Manuel Bottiniacaf21d2018-09-26 17:38:19 +010064 ARM_COMPUTE_ERROR("Not supported");
Anthony Barbier6ff3b192017-09-04 18:44:23 +010065 break;
66 }
67 break;
68 }
69 case Format::UYVY422:
70 case Format::YUYV422:
71 {
72 switch(output->info()->format())
73 {
74 case Format::RGB888:
75 case Format::RGBA8888:
76 num_elems_processed_per_iteration = 8;
77 break;
78 default:
Manuel Bottiniacaf21d2018-09-26 17:38:19 +010079 ARM_COMPUTE_ERROR("Not supported");
Anthony Barbier6ff3b192017-09-04 18:44:23 +010080 break;
81 }
82 break;
83 }
84 case Format::RGB888:
85 {
86 switch(output->info()->format())
87 {
88 case Format::RGBA8888:
Manuel Bottiniacaf21d2018-09-26 17:38:19 +010089 case Format::U8:
Anthony Barbier6ff3b192017-09-04 18:44:23 +010090 num_elems_processed_per_iteration = 16;
91 break;
92 default:
Manuel Bottiniacaf21d2018-09-26 17:38:19 +010093 ARM_COMPUTE_ERROR("Not supported");
Anthony Barbier6ff3b192017-09-04 18:44:23 +010094 break;
95 }
96 break;
97 }
98 default:
99 break;
100 }
101 ARM_COMPUTE_ERROR_ON_MSG(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported",
102 string_from_format(input->info()->format()).c_str(),
103 string_from_format(output->info()->format()).c_str());
104
105 std::stringstream kernel_name;
106
107 kernel_name << string_from_format(input->info()->format());
108 kernel_name << "_to_";
109 kernel_name << string_from_format(output->info()->format());
110 kernel_name << "_bt709";
111
112 _input = input;
113 _output = output;
114
115 // Create kernel
116 _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str()));
117
118 // Configure kernel window
119 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
120 AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
121 AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
122
123 update_window_and_padding(win, input_access, output_access);
124
125 output_access.set_valid_region(win, input->info()->valid_region());
126
Anthony Barbierb6eb3532018-08-08 13:20:04 +0100127 ICLKernel::configure_internal(win);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100128}
129
130void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLImage *output)
131{
132 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output);
133 ARM_COMPUTE_ERROR_ON(output == nullptr);
134
135 unsigned int num_elems_processed_per_iteration = 0;
136
137 switch(input->info()->format())
138 {
139 case Format::NV12:
140 case Format::NV21:
141 case Format::IYUV:
142 {
143 switch(output->info()->format())
144 {
145 case Format::RGB888:
146 case Format::RGBA8888:
147 num_elems_processed_per_iteration = 4;
148 break;
149 default:
Manuel Bottiniacaf21d2018-09-26 17:38:19 +0100150 ARM_COMPUTE_ERROR("Not supported");
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100151 break;
152 }
153 break;
154 }
155 default:
156 break;
157 }
158 ARM_COMPUTE_ERROR_ON_MSG(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported",
159 string_from_format(input->info()->format()).c_str(),
160 string_from_format(output->info()->format()).c_str());
161
162 std::stringstream kernel_name;
163
164 kernel_name << string_from_format(input->info()->format());
165 kernel_name << "_to_";
166 kernel_name << string_from_format(output->info()->format());
167 kernel_name << "_bt709";
168
169 _multi_input = input;
170 _output = output;
171
172 // Create kernel
173 _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str()));
174
175 // Configure kernel window
176 const bool has_two_planes = (input->info()->format() == Format::NV12) || (input->info()->format() == Format::NV21);
177 const float sub_sampling = (has_two_planes || (input->info()->format() == Format::IYUV)) ? 0.5f : 1;
178
179 Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
180 win.set_dimension_step(Window::DimY, 2);
181
182 AccessWindowHorizontal plane0_access(input->plane(0)->info(), 0, num_elems_processed_per_iteration);
183 AccessWindowRectangle plane1_access(input->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1,
184 sub_sampling, sub_sampling);
185 AccessWindowRectangle plane2_access(has_two_planes ? nullptr : input->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1,
186 sub_sampling, sub_sampling);
187 AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
188
189 update_window_and_padding(win,
190 plane0_access, plane1_access, plane2_access,
191 output_access);
192
193 ValidRegion intersect_region = intersect_valid_regions(input->plane(0)->info()->valid_region(), input->plane(1)->info()->valid_region(),
194 input->plane(2)->info()->valid_region());
195 output_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->info()->tensor_shape()));
196
Anthony Barbierb6eb3532018-08-08 13:20:04 +0100197 ICLKernel::configure_internal(win);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100198}
199
200void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *output)
201{
202 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
203 ARM_COMPUTE_ERROR_ON(output == nullptr);
204
205 unsigned int num_elems_processed_per_iteration = 0;
Pablo Tello96fc1d62018-07-17 17:10:59 +0100206 unsigned int num_elems_read_per_iteration_x = 0;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100207
208 bool has_two_planes = (output->info()->format() == Format::NV12) || (output->info()->format() == Format::NV21);
209 float sub_sampling = (has_two_planes || (output->info()->format() == Format::IYUV)) ? 0.5f : 1;
210
211 switch(input->info()->format())
212 {
213 case Format::RGB888:
214 case Format::RGBA8888:
215 {
216 switch(output->info()->format())
217 {
218 case Format::NV12:
219 case Format::IYUV:
220 num_elems_processed_per_iteration = 2;
Pablo Tello96fc1d62018-07-17 17:10:59 +0100221 num_elems_read_per_iteration_x = 8;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100222 break;
223 case Format::YUV444:
224 num_elems_processed_per_iteration = 4;
Pablo Tello96fc1d62018-07-17 17:10:59 +0100225 num_elems_read_per_iteration_x = 16;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100226 break;
227 default:
Manuel Bottiniacaf21d2018-09-26 17:38:19 +0100228 ARM_COMPUTE_ERROR("Not supported");
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100229 break;
230 }
231 break;
232 }
233 case Format::UYVY422:
234 case Format::YUYV422:
235 {
236 switch(output->info()->format())
237 {
238 case Format::NV12:
239 case Format::IYUV:
240 num_elems_processed_per_iteration = 8;
Pablo Tello96fc1d62018-07-17 17:10:59 +0100241 num_elems_read_per_iteration_x = 8;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100242 break;
243 default:
Manuel Bottiniacaf21d2018-09-26 17:38:19 +0100244 ARM_COMPUTE_ERROR("Not supported");
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100245 break;
246 }
247 break;
248 }
249 default:
250 break;
251 }
Pablo Tello96fc1d62018-07-17 17:10:59 +0100252
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100253 ARM_COMPUTE_ERROR_ON_MSG(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported",
254 string_from_format(input->info()->format()).c_str(),
255 string_from_format(output->info()->format()).c_str());
256
257 std::stringstream kernel_name;
258
259 kernel_name << string_from_format(input->info()->format());
260 kernel_name << "_to_";
261 kernel_name << string_from_format(output->info()->format());
262 kernel_name << "_bt709";
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100263 _input = input;
264 _multi_output = output;
265
266 // Create kernel
267 _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str()));
268
269 // Configure kernel window
270 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
271 if((input->info()->format() != Format::RGB888 || output->info()->format() != Format::YUV444) && (input->info()->format() != Format::RGBA8888 || output->info()->format() != Format::YUV444))
272 {
273 win.set_dimension_step(Window::DimY, 2);
274 }
275
276 AccessWindowHorizontal output_plane0_access(output->plane(0)->info(), 0, num_elems_processed_per_iteration);
277 AccessWindowRectangle output_plane1_access(output->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, sub_sampling, sub_sampling);
278 AccessWindowRectangle output_plane2_access(has_two_planes ? nullptr : output->plane(2)->info(), 0, 0,
279 num_elems_processed_per_iteration, 1, sub_sampling, sub_sampling);
280
Pablo Tello96fc1d62018-07-17 17:10:59 +0100281 AccessWindowHorizontal input_access(input->info(), 0, num_elems_read_per_iteration_x);
282
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100283 update_window_and_padding(win,
Pablo Tello96fc1d62018-07-17 17:10:59 +0100284 input_access,
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100285 output_plane0_access,
286 output_plane1_access,
287 output_plane2_access);
288
289 ValidRegion input_region = input->info()->valid_region();
290
291 output_plane0_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(0)->info()->tensor_shape()));
292 output_plane1_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(1)->info()->tensor_shape()));
293 output_plane2_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(2)->info()->tensor_shape()));
294
Anthony Barbierb6eb3532018-08-08 13:20:04 +0100295 ICLKernel::configure_internal(win);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100296}
297
298void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLMultiImage *output)
299{
300 unsigned int num_elems_processed_per_iteration = 0;
301 switch(input->info()->format())
302 {
303 case Format::NV12:
304 case Format::NV21:
305 {
306 switch(output->info()->format())
307 {
308 case Format::IYUV:
309 case Format::YUV444:
310 num_elems_processed_per_iteration = 16;
311 break;
312 default:
Manuel Bottiniacaf21d2018-09-26 17:38:19 +0100313 ARM_COMPUTE_ERROR("Not supported");
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100314 break;
315 }
316 break;
317 }
318 case Format::IYUV:
319 {
320 switch(output->info()->format())
321 {
322 case Format::YUV444:
323 case Format::NV12:
324 num_elems_processed_per_iteration = 16;
325 break;
326 default:
Manuel Bottiniacaf21d2018-09-26 17:38:19 +0100327 ARM_COMPUTE_ERROR("Not supported");
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100328 break;
329 }
330 break;
331 }
332 default:
333 break;
334 }
335 ARM_COMPUTE_ERROR_ON_MSG(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported",
336 string_from_format(input->info()->format()).c_str(),
337 string_from_format(output->info()->format()).c_str());
338
339 std::stringstream kernel_name;
340
341 kernel_name << string_from_format(input->info()->format());
342 kernel_name << "_to_";
343 kernel_name << string_from_format(output->info()->format());
344 kernel_name << "_bt709";
345
346 _multi_input = input;
347 _multi_output = output;
348
349 // Create kernel
350 bool has_two_input_planars = (input->info()->format() == Format::NV12) || (input->info()->format() == Format::NV21);
351 bool has_two_output_planars = (output->info()->format() == Format::NV12) || (output->info()->format() == Format::NV21);
352
353 float sub_sampling_input = (has_two_input_planars || (input->info()->format() == Format::IYUV)) ? 0.5f : 1;
354 float sub_sampling_output = (has_two_output_planars || (output->info()->format() == Format::IYUV)) ? 0.5f : 1;
355
356 _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str()));
357
358 Window win = calculate_max_window(*input->cl_plane(0)->info(), Steps(num_elems_processed_per_iteration));
359 win.set_dimension_step(Window::DimY, 2);
360
361 AccessWindowHorizontal input_plane0_access(input->plane(0)->info(), 0, num_elems_processed_per_iteration);
362 AccessWindowRectangle input_plane1_access(input->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1,
363 sub_sampling_input, sub_sampling_input);
364 AccessWindowRectangle input_plane2_access(has_two_input_planars ? nullptr : input->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1,
365 sub_sampling_input, sub_sampling_input);
366 AccessWindowHorizontal output_plane0_access(output->plane(0)->info(), 0, num_elems_processed_per_iteration);
367 AccessWindowRectangle output_plane1_access(output->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, sub_sampling_output, sub_sampling_output);
368 AccessWindowRectangle output_plane2_access(has_two_output_planars ? nullptr : output->plane(2)->info(), 0, 0,
369 num_elems_processed_per_iteration, 1, sub_sampling_output, sub_sampling_output);
370
371 update_window_and_padding(win,
372 input_plane0_access, input_plane1_access, input_plane2_access,
373 output_plane0_access, output_plane1_access, output_plane2_access);
374
375 ValidRegion intersect_region = intersect_valid_regions(input->plane(0)->info()->valid_region(), input->plane(1)->info()->valid_region(),
376 input->plane(2)->info()->valid_region());
377 output_plane0_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(0)->info()->tensor_shape()));
378 output_plane1_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(1)->info()->tensor_shape()));
379 output_plane2_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(2)->info()->tensor_shape()));
380
Anthony Barbierb6eb3532018-08-08 13:20:04 +0100381 ICLKernel::configure_internal(win);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100382}
383
384void CLColorConvertKernel::run(const Window &window, cl::CommandQueue &queue)
385{
386 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
387 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
388
389 Window slice = window.first_slice_window_2D();
390
391 if(nullptr != _input && nullptr != _output)
392 {
393 do
394 {
395 unsigned int idx = 0;
396 add_2D_tensor_argument(idx, _input, slice);
397 add_2D_tensor_argument(idx, _output, slice);
398 enqueue(queue, *this, slice);
399 }
400 while(window.slide_window_slice_2D(slice));
401 }
402 else if(nullptr != _input && nullptr != _multi_output)
403 {
404 Format format = _multi_output->info()->format();
405 do
406 {
407 Window win_uv(slice);
408
409 if((Format::NV12 == format) || (Format::NV21 == format) || (Format::IYUV == format))
410 {
411 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
412 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
413 }
414 unsigned int idx = 0;
415 add_2D_tensor_argument(idx, _input, slice);
416 add_2D_tensor_argument(idx, _multi_output->cl_plane(0), slice);
417 for(int i = 1; i < 3 && (0 != _multi_output->cl_plane(i)->info()->num_dimensions()); ++i)
418 {
419 add_2D_tensor_argument(idx, _multi_output->cl_plane(i), win_uv);
420 }
421 enqueue(queue, *this, slice);
422 }
423 while(window.slide_window_slice_2D(slice));
424 }
425 else if(nullptr != _multi_input && nullptr != _output)
426 {
427 Format format = _multi_input->info()->format();
428 do
429 {
430 Window win_uv(slice);
431
432 if((Format::NV12 == format) || (Format::NV21 == format) || (Format::IYUV == format))
433 {
434 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
435 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
436 }
437
438 unsigned int idx = 0;
439 add_2D_tensor_argument(idx, _multi_input->cl_plane(0), slice);
440
441 for(int i = 1; i < 3 && (0 != _multi_input->cl_plane(i)->info()->num_dimensions()); ++i)
442 {
443 add_2D_tensor_argument(idx, _multi_input->cl_plane(i), win_uv);
444 }
445 add_2D_tensor_argument(idx, _output, slice);
446 enqueue(queue, *this, slice);
447 }
448 while(window.slide_window_slice_2D(slice));
449 }
450 else if(nullptr != _multi_input && nullptr != _multi_output)
451 {
452 Format in_format = _multi_input->info()->format();
453 Format out_format = _multi_output->info()->format();
454 do
455 {
456 Window win_in_uv(slice);
457 if((Format::NV12 == in_format) || (Format::NV21 == in_format) || (Format::IYUV == in_format))
458 {
459 win_in_uv.set(Window::DimX, Window::Dimension(win_in_uv.x().start() / 2,
460 win_in_uv.x().end() / 2, win_in_uv.x().step() / 2));
461 win_in_uv.set(Window::DimY, Window::Dimension(win_in_uv.y().start() / 2, win_in_uv.y().end() / 2, 1));
462 }
463 unsigned int idx = 0;
464 add_2D_tensor_argument(idx, _multi_input->cl_plane(0), slice);
465 for(int i = 1; i < 3 && (0 != _multi_input->cl_plane(i)->info()->num_dimensions()); ++i)
466 {
467 add_2D_tensor_argument(idx, _multi_input->cl_plane(i), win_in_uv);
468 }
469
470 Window win_out_uv(slice);
471 if((Format::NV12 == out_format) || (Format::NV21 == out_format) || (Format::IYUV == out_format))
472 {
473 win_out_uv.set(Window::DimX, Window::Dimension(win_out_uv.x().start() / 2,
474 win_out_uv.x().end() / 2, win_out_uv.x().step() / 2));
475 win_out_uv.set(Window::DimY, Window::Dimension(win_out_uv.y().start() / 2, win_out_uv.y().end() / 2, 1));
476 }
477
478 add_2D_tensor_argument(idx, _multi_output->cl_plane(0), slice);
479 for(int i = 1; i < 3 && (0 != _multi_output->cl_plane(i)->info()->num_dimensions()); ++i)
480 {
481 add_2D_tensor_argument(idx, _multi_output->cl_plane(i), win_out_uv);
482 }
483 enqueue(queue, *this, slice);
484 }
485 while(window.slide_window_slice_2D(slice));
486 }
487 else
488 {
489 ARM_COMPUTE_ERROR("Not supported");
490 }
491}