blob: 0da5affe182e8c8f7ee24c50fd8418e41e584941 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Alex Gildayc357c472018-03-21 13:54:09 +00002 * Copyright (c) 2016-2018 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/Error.h"
25#include "arm_compute/core/Helpers.h"
26#include "arm_compute/core/IMultiImage.h"
27#include "arm_compute/core/Utils.h"
28
29#include <arm_neon.h>
30
31namespace
32{
Alex Gildayc357c472018-03-21 13:54:09 +000033#ifndef DOXYGEN_SKIP_THIS
Anthony Barbier6ff3b192017-09-04 18:44:23 +010034constexpr float red_coef_bt709 = 1.5748F;
35constexpr float green_coef_bt709 = -0.1873f;
36constexpr float green_coef2_bt709 = -0.4681f;
37constexpr float blue_coef_bt709 = 1.8556f;
38
39constexpr float rgb2yuv_bt709_kr = 0.2126f;
40constexpr float rgb2yuv_bt709_kb = 0.0722f;
41// K_g = 1 - K_r - K_b
42constexpr float rgb2yuv_bt709_kg = 0.7152f;
43// C_u = 1 / (2 * (1 - K_b))
44constexpr float rgb2yuv_bt709_cu = 0.5389f;
45// C_v = 1 / (2 * (1 - K_r))
46constexpr float rgb2yuv_bt709_cv = 0.6350f;
47
48inline void convert_uint8x16_to_float32x4x4(const uint8x16_t &in, float32x4x4_t &out)
49{
50 const auto tmp1 = vmovl_u8(vget_low_u8(in));
51 out.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp1)));
52 out.val[1] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp1)));
53 const auto tmp2 = vmovl_u8(vget_high_u8(in));
54 out.val[2] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp2)));
55 out.val[3] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp2)));
56}
57
58inline void convert_float32x4x3_to_uint8x8x3(const float32x4x3_t &in1, const float32x4x3_t &in2, uint8x8x3_t &out)
59{
60 out.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[0])),
61 vqmovn_u32(vcvtq_u32_f32(in2.val[0]))));
62 out.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[1])),
63 vqmovn_u32(vcvtq_u32_f32(in2.val[1]))));
64 out.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[2])),
65 vqmovn_u32(vcvtq_u32_f32(in2.val[2]))));
66}
67
68inline void convert_float32x4x4_to_unit8x16(const float32x4x4_t &in, uint8x16_t &out)
69{
70 const auto low = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[0])),
71 vqmovn_u32(vcvtq_u32_f32(in.val[1])));
72 const auto high = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[2])),
73 vqmovn_u32(vcvtq_u32_f32(in.val[3])));
74 out = vcombine_u8(vqmovn_u16(low), vqmovn_u16(high));
75}
76
77inline void rgb_to_yuv_calculation(const float32x4_t &rvec, const float32x4_t &gvec, const float32x4_t &bvec,
78 float32x4_t &yvec, float32x4_t &uvec, float32x4_t &vvec)
79{
80 /*
81 Y'= 0.2126*R' + 0.7152*G' + 0.0722*B'
82 U'=-0.1146*R' - 0.3854*G' + 0.5000*B'
83 V'= 0.5000*R' - 0.4542*G' - 0.0458*B'
84 */
85 const auto c128 = vdupq_n_f32(128.f);
86
87 // Y = R * K_r + G * (1 - K_r - K_b) * B * K_b
88 yvec = vmulq_n_f32(rvec, rgb2yuv_bt709_kr);
89 yvec = vmlaq_n_f32(yvec, gvec, rgb2yuv_bt709_kg);
90 yvec = vmlaq_n_f32(yvec, bvec, rgb2yuv_bt709_kb);
91
92 // U = (B - Y) / (2 * (1 - K_b))
93 uvec = vsubq_f32(bvec, yvec);
94 uvec = vmlaq_n_f32(c128, uvec, rgb2yuv_bt709_cu);
95
96 // V = (R - Y) / (2 * (1 - K_r))
97 vvec = vsubq_f32(rvec, yvec);
98 vvec = vmlaq_n_f32(c128, vvec, rgb2yuv_bt709_cv);
99}
100
101inline void yuyv_to_rgb_calculation(const float32x4_t &yvec_val, float32x4_t uvec_val, const float32x4_t &yyvec_val,
102 float32x4_t vvec_val, unsigned char *output_ptr, const bool alpha)
103{
104 float32x4x3_t rgb1, rgb2;
105
106 // Compute: cb - 128 and cr - 128;
107 const auto c128 = vdupq_n_f32(128.f);
108 uvec_val = vsubq_f32(uvec_val, c128);
109 vvec_val = vsubq_f32(vvec_val, c128);
110
111 // Compute:
112 // r = 0.0000f*f_u + 1.5748f*f_v;
113 // g = 0.1873f*f_u - 0.4681f*f_v;
114 // b = 1.8556f*f_u + 0.0000f*f_v;
115 const auto red = vmulq_n_f32(vvec_val, red_coef_bt709);
116 const auto blue = vmulq_n_f32(uvec_val, blue_coef_bt709);
117 const auto green = vaddq_f32(vmulq_n_f32(uvec_val, green_coef_bt709),
118 vmulq_n_f32(vvec_val, green_coef2_bt709));
119
120 // Compute the final r,g,b values using y1 for the first texel and y2 for the second one.
121 // the result is stored in two float32x4x3_t which then are converted to one uint8x8x3_t
122 // and written back to memory using vst3 instruction
123
124 rgb1.val[0] = vaddq_f32(yvec_val, red);
125 rgb1.val[1] = vaddq_f32(yvec_val, green);
126 rgb1.val[2] = vaddq_f32(yvec_val, blue);
127
128 rgb2.val[0] = vaddq_f32(yyvec_val, red);
129 rgb2.val[1] = vaddq_f32(yyvec_val, green);
130 rgb2.val[2] = vaddq_f32(yyvec_val, blue);
131
132 uint8x8x3_t u8_rgb;
133 convert_float32x4x3_to_uint8x8x3(rgb1, rgb2, u8_rgb);
134
135 if(!alpha)
136 {
137 vst3_lane_u8(&output_ptr[0], u8_rgb, 0);
138 vst3_lane_u8(&output_ptr[3], u8_rgb, 4);
139 vst3_lane_u8(&output_ptr[6], u8_rgb, 1);
140 vst3_lane_u8(&output_ptr[9], u8_rgb, 5);
141 vst3_lane_u8(&output_ptr[12], u8_rgb, 2);
142 vst3_lane_u8(&output_ptr[15], u8_rgb, 6);
143 vst3_lane_u8(&output_ptr[18], u8_rgb, 3);
144 vst3_lane_u8(&output_ptr[21], u8_rgb, 7);
145 }
146 else
147 {
148 uint8x8x4_t u8_rgba;
149 u8_rgba.val[0] = u8_rgb.val[0];
150 u8_rgba.val[1] = u8_rgb.val[1];
151 u8_rgba.val[2] = u8_rgb.val[2];
152 u8_rgba.val[3] = vdup_n_u8(255);
153 vst4_lane_u8(&output_ptr[0], u8_rgba, 0);
154 vst4_lane_u8(&output_ptr[4], u8_rgba, 4);
155 vst4_lane_u8(&output_ptr[8], u8_rgba, 1);
156 vst4_lane_u8(&output_ptr[12], u8_rgba, 5);
157 vst4_lane_u8(&output_ptr[16], u8_rgba, 2);
158 vst4_lane_u8(&output_ptr[20], u8_rgba, 6);
159 vst4_lane_u8(&output_ptr[24], u8_rgba, 3);
160 vst4_lane_u8(&output_ptr[28], u8_rgba, 7);
161 }
162}
163
164inline uint8x16x3_t load_rgb(const unsigned char *const ptr, const bool alpha)
165{
166 uint8x16x3_t rgb;
167
168 if(alpha)
169 {
170 const auto tmp = vld4q_u8(ptr);
171 rgb.val[0] = tmp.val[0];
172 rgb.val[1] = tmp.val[1];
173 rgb.val[2] = tmp.val[2];
174 }
175 else
176 {
177 rgb = vld3q_u8(ptr);
178 }
179
180 return rgb;
181}
182
183inline void rgb_to_yuv_conversion(uint8x16x3_t &vec_top, uint8x16x3_t &vec_bottom)
184{
185 // Convert the uint8x16_t to float32x4x4_t
186 float32x4x4_t frvec_top, fgvec_top, fbvec_top;
187 convert_uint8x16_to_float32x4x4(vec_top.val[0], frvec_top);
188 convert_uint8x16_to_float32x4x4(vec_top.val[1], fgvec_top);
189 convert_uint8x16_to_float32x4x4(vec_top.val[2], fbvec_top);
190
191 float32x4x4_t frvec_bottom, fgvec_bottom, fbvec_bottom;
192 convert_uint8x16_to_float32x4x4(vec_bottom.val[0], frvec_bottom);
193 convert_uint8x16_to_float32x4x4(vec_bottom.val[1], fgvec_bottom);
194 convert_uint8x16_to_float32x4x4(vec_bottom.val[2], fbvec_bottom);
195
196 float32x4x4_t fyvec_top, fuvec_top, fvvec_top;
197 float32x4x4_t fyvec_bottom, fuvec_bottom, fvvec_bottom;
198
199 for(auto i = 0; i < 4; ++i)
200 {
201 rgb_to_yuv_calculation(frvec_top.val[i], fgvec_top.val[i], fbvec_top.val[i],
202 fyvec_top.val[i], fuvec_top.val[i], fvvec_top.val[i]);
203 rgb_to_yuv_calculation(frvec_bottom.val[i], fgvec_bottom.val[i], fbvec_bottom.val[i],
204 fyvec_bottom.val[i], fuvec_bottom.val[i], fvvec_bottom.val[i]);
205 }
206
207 convert_float32x4x4_to_unit8x16(fyvec_top, vec_top.val[0]);
208 convert_float32x4x4_to_unit8x16(fuvec_top, vec_top.val[1]);
209 convert_float32x4x4_to_unit8x16(fvvec_top, vec_top.val[2]);
210 convert_float32x4x4_to_unit8x16(fyvec_bottom, vec_bottom.val[0]);
211 convert_float32x4x4_to_unit8x16(fuvec_bottom, vec_bottom.val[1]);
212 convert_float32x4x4_to_unit8x16(fvvec_bottom, vec_bottom.val[2]);
213}
214
215inline void store_rgb_to_nv12(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top,
216 const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom,
217 unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom,
218 unsigned char *const __restrict out_uv)
219{
220 uint8x16x3_t vec_top, vec_bottom;
221 vec_top.val[0] = rvec_top;
222 vec_top.val[1] = gvec_top;
223 vec_top.val[2] = bvec_top;
224 vec_bottom.val[0] = rvec_bottom;
225 vec_bottom.val[1] = gvec_bottom;
226 vec_bottom.val[2] = bvec_bottom;
227
228 rgb_to_yuv_conversion(vec_top, vec_bottom);
229
230 vst1q_u8(out_y_top, vec_top.val[0]);
231 vst1q_u8(out_y_bottom, vec_bottom.val[0]);
232
233 const auto uvec = vuzpq_u8(vec_top.val[1], vec_bottom.val[1]);
234 const auto vvec = vuzpq_u8(vec_top.val[2], vec_bottom.val[2]);
235 const auto utmp = vrhaddq_u8(uvec.val[0], uvec.val[1]);
236 const auto vtmp = vrhaddq_u8(vvec.val[0], vvec.val[1]);
237
238 uint8x8x2_t uvvec;
239 uvvec.val[0] = vhadd_u8(vget_low_u8(utmp), vget_high_u8(utmp));
240 uvvec.val[1] = vhadd_u8(vget_low_u8(vtmp), vget_high_u8(vtmp));
241
242 vst2_u8(out_uv, uvvec);
243}
244
245inline void store_rgb_to_iyuv(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top,
246 const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom,
247 unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom,
248 unsigned char *const __restrict out_u,
249 unsigned char *const __restrict out_v)
250{
251 uint8x16x3_t vec_top, vec_bottom;
252 vec_top.val[0] = rvec_top;
253 vec_top.val[1] = gvec_top;
254 vec_top.val[2] = bvec_top;
255 vec_bottom.val[0] = rvec_bottom;
256 vec_bottom.val[1] = gvec_bottom;
257 vec_bottom.val[2] = bvec_bottom;
258
259 rgb_to_yuv_conversion(vec_top, vec_bottom);
260
261 vst1q_u8(out_y_top, vec_top.val[0]);
262 vst1q_u8(out_y_bottom, vec_bottom.val[0]);
263
264 const auto uvvec_top = vuzpq_u8(vec_top.val[1], vec_top.val[2]);
265 const auto uvvec_bottom = vuzpq_u8(vec_bottom.val[1], vec_bottom.val[2]);
266 const auto uvvec = vhaddq_u8(vrhaddq_u8(uvvec_top.val[0], uvvec_top.val[1]),
267 vrhaddq_u8(uvvec_bottom.val[0], uvvec_bottom.val[1]));
268
269 vst1_u8(out_u, vget_low_u8(uvvec));
270 vst1_u8(out_v, vget_high_u8(uvvec));
271}
272
273inline void store_rgb_to_yuv4(const uint8x16_t &rvec, const uint8x16_t &gvec, const uint8x16_t &bvec,
274 unsigned char *const __restrict out_y,
275 unsigned char *const __restrict out_u,
276 unsigned char *const __restrict out_v)
277{
278 // Convert the uint8x16_t to float32x4x4_t
279 float32x4x4_t frvec, fgvec, fbvec;
280 convert_uint8x16_to_float32x4x4(rvec, frvec);
281 convert_uint8x16_to_float32x4x4(gvec, fgvec);
282 convert_uint8x16_to_float32x4x4(bvec, fbvec);
283
284 float32x4x4_t fyvec, fuvec, fvvec;
285 for(auto i = 0; i < 4; ++i)
286 {
287 rgb_to_yuv_calculation(frvec.val[i], fgvec.val[i], fbvec.val[i],
288 fyvec.val[i], fuvec.val[i], fvvec.val[i]);
289 }
290
291 uint8x16_t yvec, uvec, vvec;
292 convert_float32x4x4_to_unit8x16(fyvec, yvec);
293 convert_float32x4x4_to_unit8x16(fuvec, uvec);
294 convert_float32x4x4_to_unit8x16(fvvec, vvec);
295
296 vst1q_u8(out_y, yvec);
297 vst1q_u8(out_u, uvec);
298 vst1q_u8(out_v, vvec);
299}
Alex Gildayc357c472018-03-21 13:54:09 +0000300#endif /* DOXYGEN_SKIP_THIS */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100301}
302
303namespace arm_compute
304{
Alex Gildayc357c472018-03-21 13:54:09 +0000305/** Convert RGB to RGBX.
306 *
307 * @param[in] input Input RGB data buffer.
308 * @param[out] output Output RGBX buffer.
309 * @param[in] win Window for iterating the buffers.
310 *
311 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100312void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict output, const Window &win)
313{
314 ARM_COMPUTE_ERROR_ON(nullptr == input);
315 ARM_COMPUTE_ERROR_ON(nullptr == output);
316
317 const auto input_ptr = static_cast<const IImage *__restrict>(input);
318 const auto output_ptr = static_cast<IImage *__restrict>(output);
319
320 Iterator in(input_ptr, win);
321 Iterator out(output_ptr, win);
322
323 execute_window_loop(win, [&](const Coordinates & id)
324 {
325 const auto ta1 = vld3q_u8(in.ptr());
326 uint8x16x4_t ta2;
327 ta2.val[0] = ta1.val[0];
328 ta2.val[1] = ta1.val[1];
329 ta2.val[2] = ta1.val[2];
330 ta2.val[3] = vdupq_n_u8(255);
331 vst4q_u8(out.ptr(), ta2);
332 },
333 in, out);
334}
335
Alex Gildayc357c472018-03-21 13:54:09 +0000336/** Convert RGBX to RGB.
337 *
338 * @param[in] input Input RGBX data buffer.
339 * @param[out] output Output RGB buffer.
340 * @param[in] win Window for iterating the buffers.
341 *
342 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100343void colorconvert_rgbx_to_rgb(const void *input, void *output, const Window &win)
344{
345 ARM_COMPUTE_ERROR_ON(nullptr == input);
346 ARM_COMPUTE_ERROR_ON(nullptr == output);
347
348 const auto input_ptr = static_cast<const IImage *__restrict>(input);
349 const auto output_ptr = static_cast<IImage *__restrict>(output);
350
351 Iterator in(input_ptr, win);
352 Iterator out(output_ptr, win);
353
354 execute_window_loop(win, [&](const Coordinates & id)
355 {
356 const auto ta1 = vld4q_u8(in.ptr());
357 uint8x16x3_t ta2;
358 ta2.val[0] = ta1.val[0];
359 ta2.val[1] = ta1.val[1];
360 ta2.val[2] = ta1.val[2];
361 vst3q_u8(out.ptr(), ta2);
362 },
363 in, out);
364}
365
Alex Gildayc357c472018-03-21 13:54:09 +0000366/** Convert YUYV to RGB.
367 *
368 * @param[in] input Input YUYV data buffer.
369 * @param[out] output Output RGB buffer.
370 * @param[in] win Window for iterating the buffers.
371 *
372 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100373template <bool yuyv, bool alpha>
374void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
375{
376 ARM_COMPUTE_ERROR_ON(nullptr == input);
377 ARM_COMPUTE_ERROR_ON(nullptr == output);
378
379 const auto input_ptr = static_cast<const IImage *__restrict>(input);
380 const auto output_ptr = static_cast<IImage *__restrict>(output);
381
382 constexpr auto element_size = alpha ? 32 : 24;
383 constexpr auto shift = yuyv ? 0 : 1;
384
385 Iterator in(input_ptr, win);
386 Iterator out(output_ptr, win);
387
388 execute_window_loop(win, [&](const Coordinates & id)
389 {
390 float32x4x4_t uvec, yvec, vvec, yyvec;
391 const auto ta = vld4q_u8(in.ptr());
392 //ta.val[0] = Y0 Y2 Y4 Y6 ...
393 //ta.val[1] = U0 U2 U4 U6 ...
394 //ta.val[2] = Y1 Y3 Y5 Y7 ...
395 //ta.val[3] = V0 V2 V4 V7 ...
396
397 // Convert the uint8x16x4_t to float32x4x4_t
398 convert_uint8x16_to_float32x4x4(ta.val[0 + shift], yvec);
399 convert_uint8x16_to_float32x4x4(ta.val[1 - shift], uvec);
400 convert_uint8x16_to_float32x4x4(ta.val[2 + shift], yyvec);
401 convert_uint8x16_to_float32x4x4(ta.val[3 - shift], vvec);
402
403 yuyv_to_rgb_calculation(yvec.val[0], uvec.val[0], yyvec.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
404 yuyv_to_rgb_calculation(yvec.val[1], uvec.val[1], yyvec.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
405 yuyv_to_rgb_calculation(yvec.val[2], uvec.val[2], yyvec.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
406 yuyv_to_rgb_calculation(yvec.val[3], uvec.val[3], yyvec.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
407 },
408 in, out);
409}
410
Alex Gildayc357c472018-03-21 13:54:09 +0000411/** Convert NV12 to RGB.
412 *
413 * @param[in] input Input NV12 data buffer.
414 * @param[out] output Output RGB buffer.
415 * @param[in] win Window for iterating the buffers.
416 *
417 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100418template <bool uv, bool alpha>
419void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
420{
421 ARM_COMPUTE_ERROR_ON(nullptr == input);
422 ARM_COMPUTE_ERROR_ON(nullptr == output);
423 win.validate();
424
425 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
426 const auto output_ptr = static_cast<IImage *__restrict>(output);
427
428 constexpr auto element_size = alpha ? 32 : 24;
429 const auto out_stride = output_ptr->info()->strides_in_bytes().y();
430 constexpr auto shift = uv ? 0 : 1;
431
432 // UV's width and height are subsampled
433 Window win_uv(win);
434 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win.x().step() / 2));
435 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
436 win_uv.validate();
437
438 Iterator in_y(input_ptr->plane(0), win);
439 Iterator in_uv(input_ptr->plane(1), win_uv);
440 Iterator out(output_ptr, win);
441
442 execute_window_loop(win, [&](const Coordinates & id)
443 {
444 const auto ta_y_top = vld2q_u8(in_y.ptr());
445 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
446 const auto ta_uv = vld2q_u8(in_uv.ptr());
447 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
448 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
449 //ta_uv.val[0] = U0 U2 U4 U6 ...
450 //ta_uv.val[1] = V0 V2 V4 V6 ...
451
452 // Convert the uint8x16x4_t to float32x4x4_t
453 float32x4x4_t yvec_top, yyvec_top, yvec_bottom, yyvec_bottom, uvec, vvec;
454 convert_uint8x16_to_float32x4x4(ta_y_top.val[0], yvec_top);
455 convert_uint8x16_to_float32x4x4(ta_y_top.val[1], yyvec_top);
456 convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0], yvec_bottom);
457 convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1], yyvec_bottom);
458 convert_uint8x16_to_float32x4x4(ta_uv.val[0 + shift], uvec);
459 convert_uint8x16_to_float32x4x4(ta_uv.val[1 - shift], vvec);
460
461 yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
462 yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
463 yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
464 yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
465
466 yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha);
467 yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha);
468 yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha);
469 yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha);
470 },
471 in_y, in_uv, out);
472}
473
Alex Gildayc357c472018-03-21 13:54:09 +0000474/** Convert IYUV to RGB.
475 *
476 * @param[in] input Input IYUV data buffer.
477 * @param[out] output Output RGB buffer.
478 * @param[in] win Window for iterating the buffers.
479 *
480 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100481template <bool alpha>
482void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
483{
484 ARM_COMPUTE_ERROR_ON(nullptr == input);
485 ARM_COMPUTE_ERROR_ON(nullptr == output);
486 win.validate();
487
488 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
489 const auto output_ptr = static_cast<IImage *__restrict>(output);
490
491 constexpr auto element_size = alpha ? 32 : 24;
492 const auto out_stride = output_ptr->info()->strides_in_bytes().y();
493
494 // UV's width and height are subsampled
495 Window win_uv(win);
496 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
497 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
498 win_uv.validate();
499
500 Iterator in_y(input_ptr->plane(0), win);
501 Iterator in_u(input_ptr->plane(1), win_uv);
502 Iterator in_v(input_ptr->plane(2), win_uv);
503 Iterator out(output_ptr, win);
504
505 execute_window_loop(win, [&](const Coordinates & id)
506 {
507 const auto ta_y_top = vld2q_u8(in_y.ptr());
508 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
509 const auto ta_u = vld1q_u8(in_u.ptr());
510 const auto ta_v = vld1q_u8(in_v.ptr());
511 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
512 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
513 //ta_u.val[0] = U0 U2 U4 U6 ...
514 //ta_v.val[0] = V0 V2 V4 V6 ...
515
516 // Convert the uint8x16x4_t to float32x4x4_t
517 float32x4x4_t yvec_top, yyvec_top, yvec_bottom, yyvec_bottom, uvec, vvec;
518 convert_uint8x16_to_float32x4x4(ta_y_top.val[0], yvec_top);
519 convert_uint8x16_to_float32x4x4(ta_y_top.val[1], yyvec_top);
520 convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0], yvec_bottom);
521 convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1], yyvec_bottom);
522 convert_uint8x16_to_float32x4x4(ta_u, uvec);
523 convert_uint8x16_to_float32x4x4(ta_v, vvec);
524
525 yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
526 yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
527 yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
528 yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
529
530 yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha);
531 yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha);
532 yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha);
533 yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha);
534 },
535 in_y, in_u, in_v, out);
536}
537
Alex Gildayc357c472018-03-21 13:54:09 +0000538/** Convert YUYV to NV12.
539 *
540 * @param[in] input Input YUYV data buffer.
541 * @param[out] output Output NV12 buffer.
542 * @param[in] win Window for iterating the buffers.
543 *
544 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100545template <bool yuyv>
546void colorconvert_yuyv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
547{
548 ARM_COMPUTE_ERROR_ON(nullptr == input);
549 ARM_COMPUTE_ERROR_ON(nullptr == output);
550 win.validate();
551
552 const auto input_ptr = static_cast<const IImage *__restrict>(input);
553 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
554
555 constexpr auto shift = yuyv ? 0 : 1;
556
557 // NV12's UV's width and height are subsampled
558 Window win_uv(win);
559 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
560 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
561 win_uv.validate();
562
563 Iterator in(input_ptr, win);
564 Iterator out_y(output_ptr->plane(0), win);
565 Iterator out_uv(output_ptr->plane(1), win_uv);
566
567 execute_window_loop(win, [&](const Coordinates & id)
568 {
569 const auto ta_top = vld4q_u8(in.ptr());
570 const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y());
571 //ta.val[0] = Y0 Y2 Y4 Y6 ...
572 //ta.val[1] = U0 U2 U4 U6 ...
573 //ta.val[2] = Y1 Y3 Y5 Y7 ...
574 //ta.val[3] = V0 V2 V4 V7 ...
575
576 uint8x16x2_t yvec;
577 yvec.val[0] = ta_top.val[0 + shift];
578 yvec.val[1] = ta_top.val[2 + shift];
579 vst2q_u8(out_y.ptr(), yvec);
580
581 uint8x16x2_t yyvec;
582 yyvec.val[0] = ta_bottom.val[0 + shift];
583 yyvec.val[1] = ta_bottom.val[2 + shift];
584 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
585
586 uint8x16x2_t uvvec;
587 uvvec.val[0] = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
588 uvvec.val[1] = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
589 vst2q_u8(out_uv.ptr(), uvvec);
590 },
591 in, out_y, out_uv);
592}
593
Alex Gildayc357c472018-03-21 13:54:09 +0000594/** Convert IYUV to NV12.
595 *
596 * @param[in] input Input IYUV data buffer.
597 * @param[out] output Output NV12 buffer.
598 * @param[in] win Window for iterating the buffers.
599 *
600 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100601void colorconvert_iyuv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
602{
603 ARM_COMPUTE_ERROR_ON(nullptr == input);
604 ARM_COMPUTE_ERROR_ON(nullptr == output);
605 win.validate();
606
607 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
608 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
609
610 // UV's width and height are subsampled
611 Window win_uv(win);
612 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
613 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
614 win_uv.validate();
615
616 Iterator in_y(input_ptr->plane(0), win);
617 Iterator in_u(input_ptr->plane(1), win_uv);
618 Iterator in_v(input_ptr->plane(2), win_uv);
619 Iterator out_y(output_ptr->plane(0), win);
620 Iterator out_uv(output_ptr->plane(1), win_uv);
621
622 execute_window_loop(win, [&](const Coordinates & id)
623 {
624 const auto ta_y_top = vld2q_u8(in_y.ptr());
625 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
626 uint8x16x2_t ta_uv;
627 ta_uv.val[0] = vld1q_u8(in_u.ptr());
628 ta_uv.val[1] = vld1q_u8(in_v.ptr());
629 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
630 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
631 //ta_uv.val[0] = U0 U2 U4 U6 ...
632 //ta_uv.val[1] = V0 V2 V4 V6 ...
633
634 vst2q_u8(out_y.ptr(), ta_y_top);
635 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
636 vst2q_u8(out_uv.ptr(), ta_uv);
637 },
638 in_y, in_u, in_v, out_y, out_uv);
639}
640
Alex Gildayc357c472018-03-21 13:54:09 +0000641/** Convert NV12 to IYUV.
642 *
643 * @param[in] input Input NV12 data buffer.
644 * @param[out] output Output IYUV buffer.
645 * @param[in] win Window for iterating the buffers.
646 *
647 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100648template <bool uv>
649void colorconvert_nv12_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
650{
651 ARM_COMPUTE_ERROR_ON(nullptr == input);
652 ARM_COMPUTE_ERROR_ON(nullptr == output);
653 win.validate();
654
655 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
656 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
657
658 constexpr auto shift = uv ? 0 : 1;
659
660 // UV's width and height are subsampled
661 Window win_uv(win);
662 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
663 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
664 win_uv.validate();
665
666 Iterator in_y(input_ptr->plane(0), win);
667 Iterator in_uv(input_ptr->plane(1), win_uv);
668 Iterator out_y(output_ptr->plane(0), win);
669 Iterator out_u(output_ptr->plane(1), win_uv);
670 Iterator out_v(output_ptr->plane(2), win_uv);
671
672 execute_window_loop(win, [&](const Coordinates & id)
673 {
674 const auto ta_y_top = vld2q_u8(in_y.ptr());
675 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
676 const auto ta_uv = vld2q_u8(in_uv.ptr());
677 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
678 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
679 //ta_uv.val[0] = U0 U2 U4 U6 ...
680 //ta_uv.val[1] = V0 V2 V4 V6 ...
681
682 vst2q_u8(out_y.ptr(), ta_y_top);
683 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
684 vst1q_u8(out_u.ptr(), ta_uv.val[0 + shift]);
685 vst1q_u8(out_v.ptr(), ta_uv.val[1 - shift]);
686 },
687 in_y, in_uv, out_y, out_u, out_v);
688}
689
Alex Gildayc357c472018-03-21 13:54:09 +0000690/** Convert YUYV to IYUV.
691 *
692 * @param[in] input Input YUYV data buffer.
693 * @param[out] output Output IYUV buffer.
694 * @param[in] win Window for iterating the buffers.
695 *
696 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100697template <bool yuyv>
698void colorconvert_yuyv_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
699{
700 ARM_COMPUTE_ERROR_ON(nullptr == input);
701 ARM_COMPUTE_ERROR_ON(nullptr == output);
702 win.validate();
703
704 const auto input_ptr = static_cast<const IImage *__restrict>(input);
705 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
706
707 constexpr auto shift = yuyv ? 0 : 1;
708
709 // Destination's UV's width and height are subsampled
710 Window win_uv(win);
711 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
712 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
713 win_uv.validate();
714
715 Iterator in(input_ptr, win);
716 Iterator out_y(output_ptr->plane(0), win);
717 Iterator out_u(output_ptr->plane(1), win_uv);
718 Iterator out_v(output_ptr->plane(2), win_uv);
719
720 execute_window_loop(win, [&](const Coordinates & id)
721 {
722 const auto ta_top = vld4q_u8(in.ptr());
723 const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y());
724 //ta.val[0] = Y0 Y2 Y4 Y6 ...
725 //ta.val[1] = U0 U2 U4 U6 ...
726 //ta.val[2] = Y1 Y3 Y5 Y7 ...
727 //ta.val[3] = V0 V2 V4 V7 ...
728
729 uint8x16x2_t yvec;
730 yvec.val[0] = ta_top.val[0 + shift];
731 yvec.val[1] = ta_top.val[2 + shift];
732 vst2q_u8(out_y.ptr(), yvec);
733
734 uint8x16x2_t yyvec;
735 yyvec.val[0] = ta_bottom.val[0 + shift];
736 yyvec.val[1] = ta_bottom.val[2 + shift];
737 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
738
739 uint8x16_t uvec;
740 uvec = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
741 vst1q_u8(out_u.ptr(), uvec);
742
743 uint8x16_t vvec;
744 vvec = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
745 vst1q_u8(out_v.ptr(), vvec);
746 },
747 in, out_y, out_u, out_v);
748}
749
Alex Gildayc357c472018-03-21 13:54:09 +0000750/** Convert NV12 to YUV4.
751 *
752 * @param[in] input Input NV12 data buffer.
753 * @param[out] output Output YUV4 buffer.
754 * @param[in] win Window for iterating the buffers.
755 *
756 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100757template <bool uv>
758void colorconvert_nv12_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
759{
760 ARM_COMPUTE_ERROR_ON(nullptr == input);
761 ARM_COMPUTE_ERROR_ON(nullptr == output);
762 win.validate();
763
764 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
765 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
766
767 constexpr auto shift = uv ? 0 : 1;
768
769 // UV's width and height are subsampled
770 Window win_uv(win);
771 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
772 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
773 win_uv.validate();
774
775 Iterator in_y(input_ptr->plane(0), win);
776 Iterator in_uv(input_ptr->plane(1), win_uv);
777 Iterator out_y(output_ptr->plane(0), win);
778 Iterator out_u(output_ptr->plane(1), win);
779 Iterator out_v(output_ptr->plane(2), win);
780
781 execute_window_loop(win, [&](const Coordinates & id)
782 {
783 const auto ta_y_top = vld2q_u8(in_y.ptr());
784 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
785 const auto ta_uv = vld2q_u8(in_uv.ptr());
786 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
787 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
788 //ta_uv.val[0] = U0 U2 U4 U6 ...
789 //ta_uv.val[1] = V0 V2 V4 V6 ...
790
791 vst2q_u8(out_y.ptr(), ta_y_top);
792 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
793
794 uint8x16x2_t uvec;
795 uvec.val[0] = ta_uv.val[0 + shift];
796 uvec.val[1] = ta_uv.val[0 + shift];
797 vst2q_u8(out_u.ptr(), uvec);
798 vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
799
800 uint8x16x2_t vvec;
801 vvec.val[0] = ta_uv.val[1 - shift];
802 vvec.val[1] = ta_uv.val[1 - shift];
803 vst2q_u8(out_v.ptr(), vvec);
804 vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
805 },
806 in_y, in_uv, out_y, out_u, out_v);
807}
808
Alex Gildayc357c472018-03-21 13:54:09 +0000809/** Convert IYUV to YUV4.
810 *
811 * @param[in] input Input IYUV data buffer.
812 * @param[out] output Output YUV4 buffer.
813 * @param[in] win Window for iterating the buffers.
814 *
815 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100816void colorconvert_iyuv_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
817{
818 ARM_COMPUTE_ERROR_ON(nullptr == input);
819 ARM_COMPUTE_ERROR_ON(nullptr == output);
820 win.validate();
821
822 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
823 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
824
825 // UV's width and height are subsampled
826 Window win_uv(win);
827 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
828 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
829 win_uv.validate();
830
831 Iterator in_y(input_ptr->plane(0), win);
832 Iterator in_u(input_ptr->plane(1), win_uv);
833 Iterator in_v(input_ptr->plane(2), win_uv);
834 Iterator out_y(output_ptr->plane(0), win);
835 Iterator out_u(output_ptr->plane(1), win);
836 Iterator out_v(output_ptr->plane(2), win);
837
838 execute_window_loop(win, [&](const Coordinates & id)
839 {
840 const auto ta_y_top = vld2q_u8(in_y.ptr());
841 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
842 const auto ta_u = vld1q_u8(in_u.ptr());
843 const auto ta_v = vld1q_u8(in_v.ptr());
844 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
845 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
846 //ta_u = U0 U2 U4 U6 ...
847 //ta_v = V0 V2 V4 V6 ...
848
849 vst2q_u8(out_y.ptr(), ta_y_top);
850 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
851
852 uint8x16x2_t uvec;
853 uvec.val[0] = ta_u;
854 uvec.val[1] = ta_u;
855 vst2q_u8(out_u.ptr(), uvec);
856 vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
857
858 uint8x16x2_t vvec;
859 vvec.val[0] = ta_v;
860 vvec.val[1] = ta_v;
861 vst2q_u8(out_v.ptr(), vvec);
862 vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
863 },
864 in_y, in_u, in_v, out_y, out_u, out_v);
865}
866
Alex Gildayc357c472018-03-21 13:54:09 +0000867/** Convert RGB to NV12.
868 *
869 * @param[in] input Input RGB data buffer.
870 * @param[out] output Output NV12 buffer.
871 * @param[in] win Window for iterating the buffers.
872 *
873 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100874template <bool alpha>
875void colorconvert_rgb_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
876{
877 ARM_COMPUTE_ERROR_ON(nullptr == input);
878 ARM_COMPUTE_ERROR_ON(nullptr == output);
879 win.validate();
880
881 const auto input_ptr = static_cast<const IImage *__restrict>(input);
882 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
883
884 // UV's width and height are subsampled
885 Window win_uv(win);
886 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
887 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
888 win_uv.validate();
889
890 Iterator in(input_ptr, win);
891 Iterator out_y(output_ptr->plane(0), win);
892 Iterator out_uv(output_ptr->plane(1), win_uv);
893
894 execute_window_loop(win, [&](const Coordinates & id)
895 {
896 const auto ta_rgb_top = load_rgb(in.ptr(), alpha);
897 const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
898 //ta_rgb.val[0] = R0 R1 R2 R3 ...
899 //ta_rgb.val[1] = G0 G1 G2 G3 ...
900 //ta_rgb.val[2] = B0 B1 B2 B3 ...
901
902 store_rgb_to_nv12(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2],
903 ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2],
904 out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(),
905 out_uv.ptr());
906 },
907 in, out_y, out_uv);
908}
909
Alex Gildayc357c472018-03-21 13:54:09 +0000910/** Convert RGB to IYUV.
911 *
912 * @param[in] input Input RGB data buffer.
913 * @param[out] output Output IYUV buffer.
914 * @param[in] win Window for iterating the buffers.
915 *
916 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100917template <bool alpha>
918void colorconvert_rgb_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
919{
920 ARM_COMPUTE_ERROR_ON(nullptr == input);
921 ARM_COMPUTE_ERROR_ON(nullptr == output);
922 win.validate();
923
924 const auto input_ptr = static_cast<const IImage *__restrict>(input);
925 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
926
927 // UV's width and height are subsampled
928 Window win_uv(win);
929 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
930 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
931 win_uv.validate();
932
933 Iterator in(input_ptr, win);
934 Iterator out_y(output_ptr->plane(0), win);
935 Iterator out_u(output_ptr->plane(1), win_uv);
936 Iterator out_v(output_ptr->plane(2), win_uv);
937
938 execute_window_loop(win, [&](const Coordinates & id)
939 {
940 const auto ta_rgb_top = load_rgb(in.ptr(), alpha);
941 const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
942 //ta_rgb.val[0] = R0 R1 R2 R3 ...
943 //ta_rgb.val[1] = G0 G1 G2 G3 ...
944 //ta_rgb.val[2] = B0 B1 B2 B3 ...
945
946 store_rgb_to_iyuv(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2],
947 ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2],
948 out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(),
949 out_u.ptr(), out_v.ptr());
950 },
951 in, out_y, out_u, out_v);
952}
953
Alex Gildayc357c472018-03-21 13:54:09 +0000954/** Convert RGB to YUV4.
955 *
956 * @param[in] input Input RGB data buffer.
957 * @param[out] output Output YUV4 buffer.
958 * @param[in] win Window for iterating the buffers.
959 *
960 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100961template <bool alpha>
962void colorconvert_rgb_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
963{
964 ARM_COMPUTE_ERROR_ON(nullptr == input);
965 ARM_COMPUTE_ERROR_ON(nullptr == output);
966 win.validate();
967
968 const auto input_ptr = static_cast<const IImage *__restrict>(input);
969 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
970
971 Iterator in(input_ptr, win);
972 Iterator out_y(output_ptr->plane(0), win);
973 Iterator out_u(output_ptr->plane(1), win);
974 Iterator out_v(output_ptr->plane(2), win);
975
976 execute_window_loop(win, [&](const Coordinates & id)
977 {
978 const auto ta_rgb = load_rgb(in.ptr(), alpha);
979 //ta_rgb.val[0] = R0 R1 R2 R3 ...
980 //ta_rgb.val[1] = G0 G1 G2 G3 ...
981 //ta_rgb.val[2] = B0 B1 B2 B3 ...
982
983 store_rgb_to_yuv4(ta_rgb.val[0], ta_rgb.val[1], ta_rgb.val[2],
984 out_y.ptr(), out_u.ptr(), out_v.ptr());
985 },
986 in, out_y, out_u, out_v);
987}
Gian Marco Iodice356f6432017-09-22 11:32:21 +0100988} // namespace arm_compute