blob: e79019eab9ba9561cc11bbefce53291f8583ebe4 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Sanghoon Lee1fad27a2018-04-05 10:57:57 +01002 * Copyright (c) 2016-2018 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h"
25
26#include "arm_compute/core/CL/CLKernelLibrary.h"
27#include "arm_compute/core/CL/ICLMultiImage.h"
28#include "arm_compute/core/CL/ICLTensor.h"
29#include "arm_compute/core/CL/OpenCL.h"
30#include "arm_compute/core/Error.h"
31#include "arm_compute/core/Helpers.h"
32#include "arm_compute/core/MultiImageInfo.h"
33#include "arm_compute/core/TensorInfo.h"
34#include "arm_compute/core/Types.h"
35#include "arm_compute/core/Utils.h"
36#include "arm_compute/core/Validate.h"
37#include "arm_compute/core/Window.h"
38
39#include <sstream>
40
41using namespace arm_compute;
42
43CLColorConvertKernel::CLColorConvertKernel()
44 : _input(nullptr), _output(nullptr), _multi_input(nullptr), _multi_output(nullptr)
45{
46}
47
48void CLColorConvertKernel::configure(const ICLTensor *input, ICLTensor *output)
49{
50 ARM_COMPUTE_ERROR_ON(input == nullptr);
51 ARM_COMPUTE_ERROR_ON(output == nullptr);
52
53 unsigned int num_elems_processed_per_iteration = 0;
54 switch(input->info()->format())
55 {
56 case Format::RGBA8888:
57 {
58 switch(output->info()->format())
59 {
60 case Format::RGB888:
61 num_elems_processed_per_iteration = 16;
62 break;
63 default:
64 break;
65 }
66 break;
67 }
68 case Format::UYVY422:
69 case Format::YUYV422:
70 {
71 switch(output->info()->format())
72 {
73 case Format::RGB888:
74 case Format::RGBA8888:
75 num_elems_processed_per_iteration = 8;
76 break;
77 default:
78 break;
79 }
80 break;
81 }
82 case Format::RGB888:
83 {
84 switch(output->info()->format())
85 {
86 case Format::RGBA8888:
87 num_elems_processed_per_iteration = 16;
88 break;
89 default:
90 break;
91 }
92 break;
93 }
94 default:
95 break;
96 }
97 ARM_COMPUTE_ERROR_ON_MSG(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported",
98 string_from_format(input->info()->format()).c_str(),
99 string_from_format(output->info()->format()).c_str());
100
101 std::stringstream kernel_name;
102
103 kernel_name << string_from_format(input->info()->format());
104 kernel_name << "_to_";
105 kernel_name << string_from_format(output->info()->format());
106 kernel_name << "_bt709";
107
108 _input = input;
109 _output = output;
110
111 // Create kernel
112 _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str()));
113
114 // Configure kernel window
115 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
116 AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
117 AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
118
119 update_window_and_padding(win, input_access, output_access);
120
121 output_access.set_valid_region(win, input->info()->valid_region());
122
Anthony Barbierb6eb3532018-08-08 13:20:04 +0100123 ICLKernel::configure_internal(win);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100124}
125
126void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLImage *output)
127{
128 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output);
129 ARM_COMPUTE_ERROR_ON(output == nullptr);
130
131 unsigned int num_elems_processed_per_iteration = 0;
132
133 switch(input->info()->format())
134 {
135 case Format::NV12:
136 case Format::NV21:
137 case Format::IYUV:
138 {
139 switch(output->info()->format())
140 {
141 case Format::RGB888:
142 case Format::RGBA8888:
143 num_elems_processed_per_iteration = 4;
144 break;
145 default:
146 break;
147 }
148 break;
149 }
150 default:
151 break;
152 }
153 ARM_COMPUTE_ERROR_ON_MSG(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported",
154 string_from_format(input->info()->format()).c_str(),
155 string_from_format(output->info()->format()).c_str());
156
157 std::stringstream kernel_name;
158
159 kernel_name << string_from_format(input->info()->format());
160 kernel_name << "_to_";
161 kernel_name << string_from_format(output->info()->format());
162 kernel_name << "_bt709";
163
164 _multi_input = input;
165 _output = output;
166
167 // Create kernel
168 _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str()));
169
170 // Configure kernel window
171 const bool has_two_planes = (input->info()->format() == Format::NV12) || (input->info()->format() == Format::NV21);
172 const float sub_sampling = (has_two_planes || (input->info()->format() == Format::IYUV)) ? 0.5f : 1;
173
174 Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
175 win.set_dimension_step(Window::DimY, 2);
176
177 AccessWindowHorizontal plane0_access(input->plane(0)->info(), 0, num_elems_processed_per_iteration);
178 AccessWindowRectangle plane1_access(input->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1,
179 sub_sampling, sub_sampling);
180 AccessWindowRectangle plane2_access(has_two_planes ? nullptr : input->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1,
181 sub_sampling, sub_sampling);
182 AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
183
184 update_window_and_padding(win,
185 plane0_access, plane1_access, plane2_access,
186 output_access);
187
188 ValidRegion intersect_region = intersect_valid_regions(input->plane(0)->info()->valid_region(), input->plane(1)->info()->valid_region(),
189 input->plane(2)->info()->valid_region());
190 output_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->info()->tensor_shape()));
191
Anthony Barbierb6eb3532018-08-08 13:20:04 +0100192 ICLKernel::configure_internal(win);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100193}
194
195void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *output)
196{
197 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
198 ARM_COMPUTE_ERROR_ON(output == nullptr);
199
200 unsigned int num_elems_processed_per_iteration = 0;
Pablo Tello96fc1d62018-07-17 17:10:59 +0100201 unsigned int num_elems_read_per_iteration_x = 0;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100202
203 bool has_two_planes = (output->info()->format() == Format::NV12) || (output->info()->format() == Format::NV21);
204 float sub_sampling = (has_two_planes || (output->info()->format() == Format::IYUV)) ? 0.5f : 1;
205
206 switch(input->info()->format())
207 {
208 case Format::RGB888:
209 case Format::RGBA8888:
210 {
211 switch(output->info()->format())
212 {
213 case Format::NV12:
214 case Format::IYUV:
215 num_elems_processed_per_iteration = 2;
Pablo Tello96fc1d62018-07-17 17:10:59 +0100216 num_elems_read_per_iteration_x = 8;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100217 break;
218 case Format::YUV444:
219 num_elems_processed_per_iteration = 4;
Pablo Tello96fc1d62018-07-17 17:10:59 +0100220 num_elems_read_per_iteration_x = 16;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100221 break;
222 default:
223 break;
224 }
225 break;
226 }
227 case Format::UYVY422:
228 case Format::YUYV422:
229 {
230 switch(output->info()->format())
231 {
232 case Format::NV12:
233 case Format::IYUV:
234 num_elems_processed_per_iteration = 8;
Pablo Tello96fc1d62018-07-17 17:10:59 +0100235 num_elems_read_per_iteration_x = 8;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100236 break;
237 default:
238 break;
239 }
240 break;
241 }
242 default:
243 break;
244 }
Pablo Tello96fc1d62018-07-17 17:10:59 +0100245
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100246 ARM_COMPUTE_ERROR_ON_MSG(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported",
247 string_from_format(input->info()->format()).c_str(),
248 string_from_format(output->info()->format()).c_str());
249
250 std::stringstream kernel_name;
251
252 kernel_name << string_from_format(input->info()->format());
253 kernel_name << "_to_";
254 kernel_name << string_from_format(output->info()->format());
255 kernel_name << "_bt709";
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100256 _input = input;
257 _multi_output = output;
258
259 // Create kernel
260 _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str()));
261
262 // Configure kernel window
263 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
264 if((input->info()->format() != Format::RGB888 || output->info()->format() != Format::YUV444) && (input->info()->format() != Format::RGBA8888 || output->info()->format() != Format::YUV444))
265 {
266 win.set_dimension_step(Window::DimY, 2);
267 }
268
269 AccessWindowHorizontal output_plane0_access(output->plane(0)->info(), 0, num_elems_processed_per_iteration);
270 AccessWindowRectangle output_plane1_access(output->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, sub_sampling, sub_sampling);
271 AccessWindowRectangle output_plane2_access(has_two_planes ? nullptr : output->plane(2)->info(), 0, 0,
272 num_elems_processed_per_iteration, 1, sub_sampling, sub_sampling);
273
Pablo Tello96fc1d62018-07-17 17:10:59 +0100274 AccessWindowHorizontal input_access(input->info(), 0, num_elems_read_per_iteration_x);
275
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100276 update_window_and_padding(win,
Pablo Tello96fc1d62018-07-17 17:10:59 +0100277 input_access,
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100278 output_plane0_access,
279 output_plane1_access,
280 output_plane2_access);
281
282 ValidRegion input_region = input->info()->valid_region();
283
284 output_plane0_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(0)->info()->tensor_shape()));
285 output_plane1_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(1)->info()->tensor_shape()));
286 output_plane2_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(2)->info()->tensor_shape()));
287
Anthony Barbierb6eb3532018-08-08 13:20:04 +0100288 ICLKernel::configure_internal(win);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100289}
290
291void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLMultiImage *output)
292{
293 unsigned int num_elems_processed_per_iteration = 0;
294 switch(input->info()->format())
295 {
296 case Format::NV12:
297 case Format::NV21:
298 {
299 switch(output->info()->format())
300 {
301 case Format::IYUV:
302 case Format::YUV444:
303 num_elems_processed_per_iteration = 16;
304 break;
305 default:
306 break;
307 }
308 break;
309 }
310 case Format::IYUV:
311 {
312 switch(output->info()->format())
313 {
314 case Format::YUV444:
315 case Format::NV12:
316 num_elems_processed_per_iteration = 16;
317 break;
318 default:
319 break;
320 }
321 break;
322 }
323 default:
324 break;
325 }
326 ARM_COMPUTE_ERROR_ON_MSG(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported",
327 string_from_format(input->info()->format()).c_str(),
328 string_from_format(output->info()->format()).c_str());
329
330 std::stringstream kernel_name;
331
332 kernel_name << string_from_format(input->info()->format());
333 kernel_name << "_to_";
334 kernel_name << string_from_format(output->info()->format());
335 kernel_name << "_bt709";
336
337 _multi_input = input;
338 _multi_output = output;
339
340 // Create kernel
341 bool has_two_input_planars = (input->info()->format() == Format::NV12) || (input->info()->format() == Format::NV21);
342 bool has_two_output_planars = (output->info()->format() == Format::NV12) || (output->info()->format() == Format::NV21);
343
344 float sub_sampling_input = (has_two_input_planars || (input->info()->format() == Format::IYUV)) ? 0.5f : 1;
345 float sub_sampling_output = (has_two_output_planars || (output->info()->format() == Format::IYUV)) ? 0.5f : 1;
346
347 _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str()));
348
349 Window win = calculate_max_window(*input->cl_plane(0)->info(), Steps(num_elems_processed_per_iteration));
350 win.set_dimension_step(Window::DimY, 2);
351
352 AccessWindowHorizontal input_plane0_access(input->plane(0)->info(), 0, num_elems_processed_per_iteration);
353 AccessWindowRectangle input_plane1_access(input->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1,
354 sub_sampling_input, sub_sampling_input);
355 AccessWindowRectangle input_plane2_access(has_two_input_planars ? nullptr : input->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1,
356 sub_sampling_input, sub_sampling_input);
357 AccessWindowHorizontal output_plane0_access(output->plane(0)->info(), 0, num_elems_processed_per_iteration);
358 AccessWindowRectangle output_plane1_access(output->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, sub_sampling_output, sub_sampling_output);
359 AccessWindowRectangle output_plane2_access(has_two_output_planars ? nullptr : output->plane(2)->info(), 0, 0,
360 num_elems_processed_per_iteration, 1, sub_sampling_output, sub_sampling_output);
361
362 update_window_and_padding(win,
363 input_plane0_access, input_plane1_access, input_plane2_access,
364 output_plane0_access, output_plane1_access, output_plane2_access);
365
366 ValidRegion intersect_region = intersect_valid_regions(input->plane(0)->info()->valid_region(), input->plane(1)->info()->valid_region(),
367 input->plane(2)->info()->valid_region());
368 output_plane0_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(0)->info()->tensor_shape()));
369 output_plane1_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(1)->info()->tensor_shape()));
370 output_plane2_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(2)->info()->tensor_shape()));
371
Anthony Barbierb6eb3532018-08-08 13:20:04 +0100372 ICLKernel::configure_internal(win);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100373}
374
375void CLColorConvertKernel::run(const Window &window, cl::CommandQueue &queue)
376{
377 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
378 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
379
380 Window slice = window.first_slice_window_2D();
381
382 if(nullptr != _input && nullptr != _output)
383 {
384 do
385 {
386 unsigned int idx = 0;
387 add_2D_tensor_argument(idx, _input, slice);
388 add_2D_tensor_argument(idx, _output, slice);
389 enqueue(queue, *this, slice);
390 }
391 while(window.slide_window_slice_2D(slice));
392 }
393 else if(nullptr != _input && nullptr != _multi_output)
394 {
395 Format format = _multi_output->info()->format();
396 do
397 {
398 Window win_uv(slice);
399
400 if((Format::NV12 == format) || (Format::NV21 == format) || (Format::IYUV == format))
401 {
402 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
403 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
404 }
405 unsigned int idx = 0;
406 add_2D_tensor_argument(idx, _input, slice);
407 add_2D_tensor_argument(idx, _multi_output->cl_plane(0), slice);
408 for(int i = 1; i < 3 && (0 != _multi_output->cl_plane(i)->info()->num_dimensions()); ++i)
409 {
410 add_2D_tensor_argument(idx, _multi_output->cl_plane(i), win_uv);
411 }
412 enqueue(queue, *this, slice);
413 }
414 while(window.slide_window_slice_2D(slice));
415 }
416 else if(nullptr != _multi_input && nullptr != _output)
417 {
418 Format format = _multi_input->info()->format();
419 do
420 {
421 Window win_uv(slice);
422
423 if((Format::NV12 == format) || (Format::NV21 == format) || (Format::IYUV == format))
424 {
425 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
426 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
427 }
428
429 unsigned int idx = 0;
430 add_2D_tensor_argument(idx, _multi_input->cl_plane(0), slice);
431
432 for(int i = 1; i < 3 && (0 != _multi_input->cl_plane(i)->info()->num_dimensions()); ++i)
433 {
434 add_2D_tensor_argument(idx, _multi_input->cl_plane(i), win_uv);
435 }
436 add_2D_tensor_argument(idx, _output, slice);
437 enqueue(queue, *this, slice);
438 }
439 while(window.slide_window_slice_2D(slice));
440 }
441 else if(nullptr != _multi_input && nullptr != _multi_output)
442 {
443 Format in_format = _multi_input->info()->format();
444 Format out_format = _multi_output->info()->format();
445 do
446 {
447 Window win_in_uv(slice);
448 if((Format::NV12 == in_format) || (Format::NV21 == in_format) || (Format::IYUV == in_format))
449 {
450 win_in_uv.set(Window::DimX, Window::Dimension(win_in_uv.x().start() / 2,
451 win_in_uv.x().end() / 2, win_in_uv.x().step() / 2));
452 win_in_uv.set(Window::DimY, Window::Dimension(win_in_uv.y().start() / 2, win_in_uv.y().end() / 2, 1));
453 }
454 unsigned int idx = 0;
455 add_2D_tensor_argument(idx, _multi_input->cl_plane(0), slice);
456 for(int i = 1; i < 3 && (0 != _multi_input->cl_plane(i)->info()->num_dimensions()); ++i)
457 {
458 add_2D_tensor_argument(idx, _multi_input->cl_plane(i), win_in_uv);
459 }
460
461 Window win_out_uv(slice);
462 if((Format::NV12 == out_format) || (Format::NV21 == out_format) || (Format::IYUV == out_format))
463 {
464 win_out_uv.set(Window::DimX, Window::Dimension(win_out_uv.x().start() / 2,
465 win_out_uv.x().end() / 2, win_out_uv.x().step() / 2));
466 win_out_uv.set(Window::DimY, Window::Dimension(win_out_uv.y().start() / 2, win_out_uv.y().end() / 2, 1));
467 }
468
469 add_2D_tensor_argument(idx, _multi_output->cl_plane(0), slice);
470 for(int i = 1; i < 3 && (0 != _multi_output->cl_plane(i)->info()->num_dimensions()); ++i)
471 {
472 add_2D_tensor_argument(idx, _multi_output->cl_plane(i), win_out_uv);
473 }
474 enqueue(queue, *this, slice);
475 }
476 while(window.slide_window_slice_2D(slice));
477 }
478 else
479 {
480 ARM_COMPUTE_ERROR("Not supported");
481 }
482}