blob: ead2b8f092ce909f77ee43fe136c7e199d61f236 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2016, 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h"
25
26#include "arm_compute/core/CL/CLKernelLibrary.h"
27#include "arm_compute/core/CL/ICLMultiImage.h"
28#include "arm_compute/core/CL/ICLTensor.h"
29#include "arm_compute/core/CL/OpenCL.h"
30#include "arm_compute/core/Error.h"
31#include "arm_compute/core/Helpers.h"
32#include "arm_compute/core/MultiImageInfo.h"
33#include "arm_compute/core/TensorInfo.h"
34#include "arm_compute/core/Types.h"
35#include "arm_compute/core/Utils.h"
36#include "arm_compute/core/Validate.h"
37#include "arm_compute/core/Window.h"
38
39#include <sstream>
40
41using namespace arm_compute;
42
43CLColorConvertKernel::CLColorConvertKernel()
44 : _input(nullptr), _output(nullptr), _multi_input(nullptr), _multi_output(nullptr)
45{
46}
47
48void CLColorConvertKernel::configure(const ICLTensor *input, ICLTensor *output)
49{
50 ARM_COMPUTE_ERROR_ON(input == nullptr);
51 ARM_COMPUTE_ERROR_ON(output == nullptr);
52
53 unsigned int num_elems_processed_per_iteration = 0;
54 switch(input->info()->format())
55 {
56 case Format::RGBA8888:
57 {
58 switch(output->info()->format())
59 {
60 case Format::RGB888:
61 num_elems_processed_per_iteration = 16;
62 break;
63 default:
64 break;
65 }
66 break;
67 }
68 case Format::UYVY422:
69 case Format::YUYV422:
70 {
71 switch(output->info()->format())
72 {
73 case Format::RGB888:
74 case Format::RGBA8888:
75 num_elems_processed_per_iteration = 8;
76 break;
77 default:
78 break;
79 }
80 break;
81 }
82 case Format::RGB888:
83 {
84 switch(output->info()->format())
85 {
86 case Format::RGBA8888:
87 num_elems_processed_per_iteration = 16;
88 break;
89 default:
90 break;
91 }
92 break;
93 }
94 default:
95 break;
96 }
97 ARM_COMPUTE_ERROR_ON_MSG(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported",
98 string_from_format(input->info()->format()).c_str(),
99 string_from_format(output->info()->format()).c_str());
100
101 std::stringstream kernel_name;
102
103 kernel_name << string_from_format(input->info()->format());
104 kernel_name << "_to_";
105 kernel_name << string_from_format(output->info()->format());
106 kernel_name << "_bt709";
107
108 _input = input;
109 _output = output;
110
111 // Create kernel
112 _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str()));
113
114 // Configure kernel window
115 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
116 AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
117 AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
118
119 update_window_and_padding(win, input_access, output_access);
120
121 output_access.set_valid_region(win, input->info()->valid_region());
122
123 ICLKernel::configure(win);
124}
125
126void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLImage *output)
127{
128 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output);
129 ARM_COMPUTE_ERROR_ON(output == nullptr);
130
131 unsigned int num_elems_processed_per_iteration = 0;
132
133 switch(input->info()->format())
134 {
135 case Format::NV12:
136 case Format::NV21:
137 case Format::IYUV:
138 {
139 switch(output->info()->format())
140 {
141 case Format::RGB888:
142 case Format::RGBA8888:
143 num_elems_processed_per_iteration = 4;
144 break;
145 default:
146 break;
147 }
148 break;
149 }
150 default:
151 break;
152 }
153 ARM_COMPUTE_ERROR_ON_MSG(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported",
154 string_from_format(input->info()->format()).c_str(),
155 string_from_format(output->info()->format()).c_str());
156
157 std::stringstream kernel_name;
158
159 kernel_name << string_from_format(input->info()->format());
160 kernel_name << "_to_";
161 kernel_name << string_from_format(output->info()->format());
162 kernel_name << "_bt709";
163
164 _multi_input = input;
165 _output = output;
166
167 // Create kernel
168 _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str()));
169
170 // Configure kernel window
171 const bool has_two_planes = (input->info()->format() == Format::NV12) || (input->info()->format() == Format::NV21);
172 const float sub_sampling = (has_two_planes || (input->info()->format() == Format::IYUV)) ? 0.5f : 1;
173
174 Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
175 win.set_dimension_step(Window::DimY, 2);
176
177 AccessWindowHorizontal plane0_access(input->plane(0)->info(), 0, num_elems_processed_per_iteration);
178 AccessWindowRectangle plane1_access(input->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1,
179 sub_sampling, sub_sampling);
180 AccessWindowRectangle plane2_access(has_two_planes ? nullptr : input->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1,
181 sub_sampling, sub_sampling);
182 AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
183
184 update_window_and_padding(win,
185 plane0_access, plane1_access, plane2_access,
186 output_access);
187
188 ValidRegion intersect_region = intersect_valid_regions(input->plane(0)->info()->valid_region(), input->plane(1)->info()->valid_region(),
189 input->plane(2)->info()->valid_region());
190 output_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->info()->tensor_shape()));
191
192 ICLKernel::configure(win);
193}
194
195void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *output)
196{
197 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
198 ARM_COMPUTE_ERROR_ON(output == nullptr);
199
200 unsigned int num_elems_processed_per_iteration = 0;
201
202 bool has_two_planes = (output->info()->format() == Format::NV12) || (output->info()->format() == Format::NV21);
203 float sub_sampling = (has_two_planes || (output->info()->format() == Format::IYUV)) ? 0.5f : 1;
204
205 switch(input->info()->format())
206 {
207 case Format::RGB888:
208 case Format::RGBA8888:
209 {
210 switch(output->info()->format())
211 {
212 case Format::NV12:
213 case Format::IYUV:
214 num_elems_processed_per_iteration = 2;
215 break;
216 case Format::YUV444:
217 num_elems_processed_per_iteration = 4;
218 break;
219 default:
220 break;
221 }
222 break;
223 }
224 case Format::UYVY422:
225 case Format::YUYV422:
226 {
227 switch(output->info()->format())
228 {
229 case Format::NV12:
230 case Format::IYUV:
231 num_elems_processed_per_iteration = 8;
232 break;
233 default:
234 break;
235 }
236 break;
237 }
238 default:
239 break;
240 }
241 ARM_COMPUTE_ERROR_ON_MSG(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported",
242 string_from_format(input->info()->format()).c_str(),
243 string_from_format(output->info()->format()).c_str());
244
245 std::stringstream kernel_name;
246
247 kernel_name << string_from_format(input->info()->format());
248 kernel_name << "_to_";
249 kernel_name << string_from_format(output->info()->format());
250 kernel_name << "_bt709";
251
252 _input = input;
253 _multi_output = output;
254
255 // Create kernel
256 _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str()));
257
258 // Configure kernel window
259 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
260 if((input->info()->format() != Format::RGB888 || output->info()->format() != Format::YUV444) && (input->info()->format() != Format::RGBA8888 || output->info()->format() != Format::YUV444))
261 {
262 win.set_dimension_step(Window::DimY, 2);
263 }
264
265 AccessWindowHorizontal output_plane0_access(output->plane(0)->info(), 0, num_elems_processed_per_iteration);
266 AccessWindowRectangle output_plane1_access(output->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, sub_sampling, sub_sampling);
267 AccessWindowRectangle output_plane2_access(has_two_planes ? nullptr : output->plane(2)->info(), 0, 0,
268 num_elems_processed_per_iteration, 1, sub_sampling, sub_sampling);
269
270 update_window_and_padding(win,
271 AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration),
272 output_plane0_access,
273 output_plane1_access,
274 output_plane2_access);
275
276 ValidRegion input_region = input->info()->valid_region();
277
278 output_plane0_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(0)->info()->tensor_shape()));
279 output_plane1_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(1)->info()->tensor_shape()));
280 output_plane2_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(2)->info()->tensor_shape()));
281
282 ICLKernel::configure(win);
283}
284
285void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLMultiImage *output)
286{
287 unsigned int num_elems_processed_per_iteration = 0;
288 switch(input->info()->format())
289 {
290 case Format::NV12:
291 case Format::NV21:
292 {
293 switch(output->info()->format())
294 {
295 case Format::IYUV:
296 case Format::YUV444:
297 num_elems_processed_per_iteration = 16;
298 break;
299 default:
300 break;
301 }
302 break;
303 }
304 case Format::IYUV:
305 {
306 switch(output->info()->format())
307 {
308 case Format::YUV444:
309 case Format::NV12:
310 num_elems_processed_per_iteration = 16;
311 break;
312 default:
313 break;
314 }
315 break;
316 }
317 default:
318 break;
319 }
320 ARM_COMPUTE_ERROR_ON_MSG(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported",
321 string_from_format(input->info()->format()).c_str(),
322 string_from_format(output->info()->format()).c_str());
323
324 std::stringstream kernel_name;
325
326 kernel_name << string_from_format(input->info()->format());
327 kernel_name << "_to_";
328 kernel_name << string_from_format(output->info()->format());
329 kernel_name << "_bt709";
330
331 _multi_input = input;
332 _multi_output = output;
333
334 // Create kernel
335 bool has_two_input_planars = (input->info()->format() == Format::NV12) || (input->info()->format() == Format::NV21);
336 bool has_two_output_planars = (output->info()->format() == Format::NV12) || (output->info()->format() == Format::NV21);
337
338 float sub_sampling_input = (has_two_input_planars || (input->info()->format() == Format::IYUV)) ? 0.5f : 1;
339 float sub_sampling_output = (has_two_output_planars || (output->info()->format() == Format::IYUV)) ? 0.5f : 1;
340
341 _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str()));
342
343 Window win = calculate_max_window(*input->cl_plane(0)->info(), Steps(num_elems_processed_per_iteration));
344 win.set_dimension_step(Window::DimY, 2);
345
346 AccessWindowHorizontal input_plane0_access(input->plane(0)->info(), 0, num_elems_processed_per_iteration);
347 AccessWindowRectangle input_plane1_access(input->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1,
348 sub_sampling_input, sub_sampling_input);
349 AccessWindowRectangle input_plane2_access(has_two_input_planars ? nullptr : input->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1,
350 sub_sampling_input, sub_sampling_input);
351 AccessWindowHorizontal output_plane0_access(output->plane(0)->info(), 0, num_elems_processed_per_iteration);
352 AccessWindowRectangle output_plane1_access(output->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, sub_sampling_output, sub_sampling_output);
353 AccessWindowRectangle output_plane2_access(has_two_output_planars ? nullptr : output->plane(2)->info(), 0, 0,
354 num_elems_processed_per_iteration, 1, sub_sampling_output, sub_sampling_output);
355
356 update_window_and_padding(win,
357 input_plane0_access, input_plane1_access, input_plane2_access,
358 output_plane0_access, output_plane1_access, output_plane2_access);
359
360 ValidRegion intersect_region = intersect_valid_regions(input->plane(0)->info()->valid_region(), input->plane(1)->info()->valid_region(),
361 input->plane(2)->info()->valid_region());
362 output_plane0_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(0)->info()->tensor_shape()));
363 output_plane1_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(1)->info()->tensor_shape()));
364 output_plane2_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(2)->info()->tensor_shape()));
365
366 ICLKernel::configure(win);
367}
368
369void CLColorConvertKernel::run(const Window &window, cl::CommandQueue &queue)
370{
371 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
372 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
373
374 Window slice = window.first_slice_window_2D();
375
376 if(nullptr != _input && nullptr != _output)
377 {
378 do
379 {
380 unsigned int idx = 0;
381 add_2D_tensor_argument(idx, _input, slice);
382 add_2D_tensor_argument(idx, _output, slice);
383 enqueue(queue, *this, slice);
384 }
385 while(window.slide_window_slice_2D(slice));
386 }
387 else if(nullptr != _input && nullptr != _multi_output)
388 {
389 Format format = _multi_output->info()->format();
390 do
391 {
392 Window win_uv(slice);
393
394 if((Format::NV12 == format) || (Format::NV21 == format) || (Format::IYUV == format))
395 {
396 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
397 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
398 }
399 unsigned int idx = 0;
400 add_2D_tensor_argument(idx, _input, slice);
401 add_2D_tensor_argument(idx, _multi_output->cl_plane(0), slice);
402 for(int i = 1; i < 3 && (0 != _multi_output->cl_plane(i)->info()->num_dimensions()); ++i)
403 {
404 add_2D_tensor_argument(idx, _multi_output->cl_plane(i), win_uv);
405 }
406 enqueue(queue, *this, slice);
407 }
408 while(window.slide_window_slice_2D(slice));
409 }
410 else if(nullptr != _multi_input && nullptr != _output)
411 {
412 Format format = _multi_input->info()->format();
413 do
414 {
415 Window win_uv(slice);
416
417 if((Format::NV12 == format) || (Format::NV21 == format) || (Format::IYUV == format))
418 {
419 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
420 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
421 }
422
423 unsigned int idx = 0;
424 add_2D_tensor_argument(idx, _multi_input->cl_plane(0), slice);
425
426 for(int i = 1; i < 3 && (0 != _multi_input->cl_plane(i)->info()->num_dimensions()); ++i)
427 {
428 add_2D_tensor_argument(idx, _multi_input->cl_plane(i), win_uv);
429 }
430 add_2D_tensor_argument(idx, _output, slice);
431 enqueue(queue, *this, slice);
432 }
433 while(window.slide_window_slice_2D(slice));
434 }
435 else if(nullptr != _multi_input && nullptr != _multi_output)
436 {
437 Format in_format = _multi_input->info()->format();
438 Format out_format = _multi_output->info()->format();
439 do
440 {
441 Window win_in_uv(slice);
442 if((Format::NV12 == in_format) || (Format::NV21 == in_format) || (Format::IYUV == in_format))
443 {
444 win_in_uv.set(Window::DimX, Window::Dimension(win_in_uv.x().start() / 2,
445 win_in_uv.x().end() / 2, win_in_uv.x().step() / 2));
446 win_in_uv.set(Window::DimY, Window::Dimension(win_in_uv.y().start() / 2, win_in_uv.y().end() / 2, 1));
447 }
448 unsigned int idx = 0;
449 add_2D_tensor_argument(idx, _multi_input->cl_plane(0), slice);
450 for(int i = 1; i < 3 && (0 != _multi_input->cl_plane(i)->info()->num_dimensions()); ++i)
451 {
452 add_2D_tensor_argument(idx, _multi_input->cl_plane(i), win_in_uv);
453 }
454
455 Window win_out_uv(slice);
456 if((Format::NV12 == out_format) || (Format::NV21 == out_format) || (Format::IYUV == out_format))
457 {
458 win_out_uv.set(Window::DimX, Window::Dimension(win_out_uv.x().start() / 2,
459 win_out_uv.x().end() / 2, win_out_uv.x().step() / 2));
460 win_out_uv.set(Window::DimY, Window::Dimension(win_out_uv.y().start() / 2, win_out_uv.y().end() / 2, 1));
461 }
462
463 add_2D_tensor_argument(idx, _multi_output->cl_plane(0), slice);
464 for(int i = 1; i < 3 && (0 != _multi_output->cl_plane(i)->info()->num_dimensions()); ++i)
465 {
466 add_2D_tensor_argument(idx, _multi_output->cl_plane(i), win_out_uv);
467 }
468 enqueue(queue, *this, slice);
469 }
470 while(window.slide_window_slice_2D(slice));
471 }
472 else
473 {
474 ARM_COMPUTE_ERROR("Not supported");
475 }
476}