blob: 9a9caefaab2a8410ff39e9d70a8ddf5d3f1763c4 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2016, 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/Error.h"
25#include "arm_compute/core/Helpers.h"
26#include "arm_compute/core/IMultiImage.h"
27#include "arm_compute/core/Utils.h"
28
29#include <arm_neon.h>
30
31namespace
32{
33constexpr float red_coef_bt709 = 1.5748F;
34constexpr float green_coef_bt709 = -0.1873f;
35constexpr float green_coef2_bt709 = -0.4681f;
36constexpr float blue_coef_bt709 = 1.8556f;
37
38constexpr float rgb2yuv_bt709_kr = 0.2126f;
39constexpr float rgb2yuv_bt709_kb = 0.0722f;
40// K_g = 1 - K_r - K_b
41constexpr float rgb2yuv_bt709_kg = 0.7152f;
42// C_u = 1 / (2 * (1 - K_b))
43constexpr float rgb2yuv_bt709_cu = 0.5389f;
44// C_v = 1 / (2 * (1 - K_r))
45constexpr float rgb2yuv_bt709_cv = 0.6350f;
46
47inline void convert_uint8x16_to_float32x4x4(const uint8x16_t &in, float32x4x4_t &out)
48{
49 const auto tmp1 = vmovl_u8(vget_low_u8(in));
50 out.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp1)));
51 out.val[1] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp1)));
52 const auto tmp2 = vmovl_u8(vget_high_u8(in));
53 out.val[2] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp2)));
54 out.val[3] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp2)));
55}
56
57inline void convert_float32x4x3_to_uint8x8x3(const float32x4x3_t &in1, const float32x4x3_t &in2, uint8x8x3_t &out)
58{
59 out.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[0])),
60 vqmovn_u32(vcvtq_u32_f32(in2.val[0]))));
61 out.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[1])),
62 vqmovn_u32(vcvtq_u32_f32(in2.val[1]))));
63 out.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[2])),
64 vqmovn_u32(vcvtq_u32_f32(in2.val[2]))));
65}
66
67inline void convert_float32x4x4_to_unit8x16(const float32x4x4_t &in, uint8x16_t &out)
68{
69 const auto low = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[0])),
70 vqmovn_u32(vcvtq_u32_f32(in.val[1])));
71 const auto high = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[2])),
72 vqmovn_u32(vcvtq_u32_f32(in.val[3])));
73 out = vcombine_u8(vqmovn_u16(low), vqmovn_u16(high));
74}
75
76inline void rgb_to_yuv_calculation(const float32x4_t &rvec, const float32x4_t &gvec, const float32x4_t &bvec,
77 float32x4_t &yvec, float32x4_t &uvec, float32x4_t &vvec)
78{
79 /*
80 Y'= 0.2126*R' + 0.7152*G' + 0.0722*B'
81 U'=-0.1146*R' - 0.3854*G' + 0.5000*B'
82 V'= 0.5000*R' - 0.4542*G' - 0.0458*B'
83 */
84 const auto c128 = vdupq_n_f32(128.f);
85
86 // Y = R * K_r + G * (1 - K_r - K_b) * B * K_b
87 yvec = vmulq_n_f32(rvec, rgb2yuv_bt709_kr);
88 yvec = vmlaq_n_f32(yvec, gvec, rgb2yuv_bt709_kg);
89 yvec = vmlaq_n_f32(yvec, bvec, rgb2yuv_bt709_kb);
90
91 // U = (B - Y) / (2 * (1 - K_b))
92 uvec = vsubq_f32(bvec, yvec);
93 uvec = vmlaq_n_f32(c128, uvec, rgb2yuv_bt709_cu);
94
95 // V = (R - Y) / (2 * (1 - K_r))
96 vvec = vsubq_f32(rvec, yvec);
97 vvec = vmlaq_n_f32(c128, vvec, rgb2yuv_bt709_cv);
98}
99
100inline void yuyv_to_rgb_calculation(const float32x4_t &yvec_val, float32x4_t uvec_val, const float32x4_t &yyvec_val,
101 float32x4_t vvec_val, unsigned char *output_ptr, const bool alpha)
102{
103 float32x4x3_t rgb1, rgb2;
104
105 // Compute: cb - 128 and cr - 128;
106 const auto c128 = vdupq_n_f32(128.f);
107 uvec_val = vsubq_f32(uvec_val, c128);
108 vvec_val = vsubq_f32(vvec_val, c128);
109
110 // Compute:
111 // r = 0.0000f*f_u + 1.5748f*f_v;
112 // g = 0.1873f*f_u - 0.4681f*f_v;
113 // b = 1.8556f*f_u + 0.0000f*f_v;
114 const auto red = vmulq_n_f32(vvec_val, red_coef_bt709);
115 const auto blue = vmulq_n_f32(uvec_val, blue_coef_bt709);
116 const auto green = vaddq_f32(vmulq_n_f32(uvec_val, green_coef_bt709),
117 vmulq_n_f32(vvec_val, green_coef2_bt709));
118
119 // Compute the final r,g,b values using y1 for the first texel and y2 for the second one.
120 // the result is stored in two float32x4x3_t which then are converted to one uint8x8x3_t
121 // and written back to memory using vst3 instruction
122
123 rgb1.val[0] = vaddq_f32(yvec_val, red);
124 rgb1.val[1] = vaddq_f32(yvec_val, green);
125 rgb1.val[2] = vaddq_f32(yvec_val, blue);
126
127 rgb2.val[0] = vaddq_f32(yyvec_val, red);
128 rgb2.val[1] = vaddq_f32(yyvec_val, green);
129 rgb2.val[2] = vaddq_f32(yyvec_val, blue);
130
131 uint8x8x3_t u8_rgb;
132 convert_float32x4x3_to_uint8x8x3(rgb1, rgb2, u8_rgb);
133
134 if(!alpha)
135 {
136 vst3_lane_u8(&output_ptr[0], u8_rgb, 0);
137 vst3_lane_u8(&output_ptr[3], u8_rgb, 4);
138 vst3_lane_u8(&output_ptr[6], u8_rgb, 1);
139 vst3_lane_u8(&output_ptr[9], u8_rgb, 5);
140 vst3_lane_u8(&output_ptr[12], u8_rgb, 2);
141 vst3_lane_u8(&output_ptr[15], u8_rgb, 6);
142 vst3_lane_u8(&output_ptr[18], u8_rgb, 3);
143 vst3_lane_u8(&output_ptr[21], u8_rgb, 7);
144 }
145 else
146 {
147 uint8x8x4_t u8_rgba;
148 u8_rgba.val[0] = u8_rgb.val[0];
149 u8_rgba.val[1] = u8_rgb.val[1];
150 u8_rgba.val[2] = u8_rgb.val[2];
151 u8_rgba.val[3] = vdup_n_u8(255);
152 vst4_lane_u8(&output_ptr[0], u8_rgba, 0);
153 vst4_lane_u8(&output_ptr[4], u8_rgba, 4);
154 vst4_lane_u8(&output_ptr[8], u8_rgba, 1);
155 vst4_lane_u8(&output_ptr[12], u8_rgba, 5);
156 vst4_lane_u8(&output_ptr[16], u8_rgba, 2);
157 vst4_lane_u8(&output_ptr[20], u8_rgba, 6);
158 vst4_lane_u8(&output_ptr[24], u8_rgba, 3);
159 vst4_lane_u8(&output_ptr[28], u8_rgba, 7);
160 }
161}
162
163inline uint8x16x3_t load_rgb(const unsigned char *const ptr, const bool alpha)
164{
165 uint8x16x3_t rgb;
166
167 if(alpha)
168 {
169 const auto tmp = vld4q_u8(ptr);
170 rgb.val[0] = tmp.val[0];
171 rgb.val[1] = tmp.val[1];
172 rgb.val[2] = tmp.val[2];
173 }
174 else
175 {
176 rgb = vld3q_u8(ptr);
177 }
178
179 return rgb;
180}
181
182inline void rgb_to_yuv_conversion(uint8x16x3_t &vec_top, uint8x16x3_t &vec_bottom)
183{
184 // Convert the uint8x16_t to float32x4x4_t
185 float32x4x4_t frvec_top, fgvec_top, fbvec_top;
186 convert_uint8x16_to_float32x4x4(vec_top.val[0], frvec_top);
187 convert_uint8x16_to_float32x4x4(vec_top.val[1], fgvec_top);
188 convert_uint8x16_to_float32x4x4(vec_top.val[2], fbvec_top);
189
190 float32x4x4_t frvec_bottom, fgvec_bottom, fbvec_bottom;
191 convert_uint8x16_to_float32x4x4(vec_bottom.val[0], frvec_bottom);
192 convert_uint8x16_to_float32x4x4(vec_bottom.val[1], fgvec_bottom);
193 convert_uint8x16_to_float32x4x4(vec_bottom.val[2], fbvec_bottom);
194
195 float32x4x4_t fyvec_top, fuvec_top, fvvec_top;
196 float32x4x4_t fyvec_bottom, fuvec_bottom, fvvec_bottom;
197
198 for(auto i = 0; i < 4; ++i)
199 {
200 rgb_to_yuv_calculation(frvec_top.val[i], fgvec_top.val[i], fbvec_top.val[i],
201 fyvec_top.val[i], fuvec_top.val[i], fvvec_top.val[i]);
202 rgb_to_yuv_calculation(frvec_bottom.val[i], fgvec_bottom.val[i], fbvec_bottom.val[i],
203 fyvec_bottom.val[i], fuvec_bottom.val[i], fvvec_bottom.val[i]);
204 }
205
206 convert_float32x4x4_to_unit8x16(fyvec_top, vec_top.val[0]);
207 convert_float32x4x4_to_unit8x16(fuvec_top, vec_top.val[1]);
208 convert_float32x4x4_to_unit8x16(fvvec_top, vec_top.val[2]);
209 convert_float32x4x4_to_unit8x16(fyvec_bottom, vec_bottom.val[0]);
210 convert_float32x4x4_to_unit8x16(fuvec_bottom, vec_bottom.val[1]);
211 convert_float32x4x4_to_unit8x16(fvvec_bottom, vec_bottom.val[2]);
212}
213
214inline void store_rgb_to_nv12(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top,
215 const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom,
216 unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom,
217 unsigned char *const __restrict out_uv)
218{
219 uint8x16x3_t vec_top, vec_bottom;
220 vec_top.val[0] = rvec_top;
221 vec_top.val[1] = gvec_top;
222 vec_top.val[2] = bvec_top;
223 vec_bottom.val[0] = rvec_bottom;
224 vec_bottom.val[1] = gvec_bottom;
225 vec_bottom.val[2] = bvec_bottom;
226
227 rgb_to_yuv_conversion(vec_top, vec_bottom);
228
229 vst1q_u8(out_y_top, vec_top.val[0]);
230 vst1q_u8(out_y_bottom, vec_bottom.val[0]);
231
232 const auto uvec = vuzpq_u8(vec_top.val[1], vec_bottom.val[1]);
233 const auto vvec = vuzpq_u8(vec_top.val[2], vec_bottom.val[2]);
234 const auto utmp = vrhaddq_u8(uvec.val[0], uvec.val[1]);
235 const auto vtmp = vrhaddq_u8(vvec.val[0], vvec.val[1]);
236
237 uint8x8x2_t uvvec;
238 uvvec.val[0] = vhadd_u8(vget_low_u8(utmp), vget_high_u8(utmp));
239 uvvec.val[1] = vhadd_u8(vget_low_u8(vtmp), vget_high_u8(vtmp));
240
241 vst2_u8(out_uv, uvvec);
242}
243
244inline void store_rgb_to_iyuv(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top,
245 const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom,
246 unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom,
247 unsigned char *const __restrict out_u,
248 unsigned char *const __restrict out_v)
249{
250 uint8x16x3_t vec_top, vec_bottom;
251 vec_top.val[0] = rvec_top;
252 vec_top.val[1] = gvec_top;
253 vec_top.val[2] = bvec_top;
254 vec_bottom.val[0] = rvec_bottom;
255 vec_bottom.val[1] = gvec_bottom;
256 vec_bottom.val[2] = bvec_bottom;
257
258 rgb_to_yuv_conversion(vec_top, vec_bottom);
259
260 vst1q_u8(out_y_top, vec_top.val[0]);
261 vst1q_u8(out_y_bottom, vec_bottom.val[0]);
262
263 const auto uvvec_top = vuzpq_u8(vec_top.val[1], vec_top.val[2]);
264 const auto uvvec_bottom = vuzpq_u8(vec_bottom.val[1], vec_bottom.val[2]);
265 const auto uvvec = vhaddq_u8(vrhaddq_u8(uvvec_top.val[0], uvvec_top.val[1]),
266 vrhaddq_u8(uvvec_bottom.val[0], uvvec_bottom.val[1]));
267
268 vst1_u8(out_u, vget_low_u8(uvvec));
269 vst1_u8(out_v, vget_high_u8(uvvec));
270}
271
272inline void store_rgb_to_yuv4(const uint8x16_t &rvec, const uint8x16_t &gvec, const uint8x16_t &bvec,
273 unsigned char *const __restrict out_y,
274 unsigned char *const __restrict out_u,
275 unsigned char *const __restrict out_v)
276{
277 // Convert the uint8x16_t to float32x4x4_t
278 float32x4x4_t frvec, fgvec, fbvec;
279 convert_uint8x16_to_float32x4x4(rvec, frvec);
280 convert_uint8x16_to_float32x4x4(gvec, fgvec);
281 convert_uint8x16_to_float32x4x4(bvec, fbvec);
282
283 float32x4x4_t fyvec, fuvec, fvvec;
284 for(auto i = 0; i < 4; ++i)
285 {
286 rgb_to_yuv_calculation(frvec.val[i], fgvec.val[i], fbvec.val[i],
287 fyvec.val[i], fuvec.val[i], fvvec.val[i]);
288 }
289
290 uint8x16_t yvec, uvec, vvec;
291 convert_float32x4x4_to_unit8x16(fyvec, yvec);
292 convert_float32x4x4_to_unit8x16(fuvec, uvec);
293 convert_float32x4x4_to_unit8x16(fvvec, vvec);
294
295 vst1q_u8(out_y, yvec);
296 vst1q_u8(out_u, uvec);
297 vst1q_u8(out_v, vvec);
298}
299}
300
301namespace arm_compute
302{
303void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict output, const Window &win)
304{
305 ARM_COMPUTE_ERROR_ON(nullptr == input);
306 ARM_COMPUTE_ERROR_ON(nullptr == output);
307
308 const auto input_ptr = static_cast<const IImage *__restrict>(input);
309 const auto output_ptr = static_cast<IImage *__restrict>(output);
310
311 Iterator in(input_ptr, win);
312 Iterator out(output_ptr, win);
313
314 execute_window_loop(win, [&](const Coordinates & id)
315 {
316 const auto ta1 = vld3q_u8(in.ptr());
317 uint8x16x4_t ta2;
318 ta2.val[0] = ta1.val[0];
319 ta2.val[1] = ta1.val[1];
320 ta2.val[2] = ta1.val[2];
321 ta2.val[3] = vdupq_n_u8(255);
322 vst4q_u8(out.ptr(), ta2);
323 },
324 in, out);
325}
326
327void colorconvert_rgbx_to_rgb(const void *input, void *output, const Window &win)
328{
329 ARM_COMPUTE_ERROR_ON(nullptr == input);
330 ARM_COMPUTE_ERROR_ON(nullptr == output);
331
332 const auto input_ptr = static_cast<const IImage *__restrict>(input);
333 const auto output_ptr = static_cast<IImage *__restrict>(output);
334
335 Iterator in(input_ptr, win);
336 Iterator out(output_ptr, win);
337
338 execute_window_loop(win, [&](const Coordinates & id)
339 {
340 const auto ta1 = vld4q_u8(in.ptr());
341 uint8x16x3_t ta2;
342 ta2.val[0] = ta1.val[0];
343 ta2.val[1] = ta1.val[1];
344 ta2.val[2] = ta1.val[2];
345 vst3q_u8(out.ptr(), ta2);
346 },
347 in, out);
348}
349
350template <bool yuyv, bool alpha>
351void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
352{
353 ARM_COMPUTE_ERROR_ON(nullptr == input);
354 ARM_COMPUTE_ERROR_ON(nullptr == output);
355
356 const auto input_ptr = static_cast<const IImage *__restrict>(input);
357 const auto output_ptr = static_cast<IImage *__restrict>(output);
358
359 constexpr auto element_size = alpha ? 32 : 24;
360 constexpr auto shift = yuyv ? 0 : 1;
361
362 Iterator in(input_ptr, win);
363 Iterator out(output_ptr, win);
364
365 execute_window_loop(win, [&](const Coordinates & id)
366 {
367 float32x4x4_t uvec, yvec, vvec, yyvec;
368 const auto ta = vld4q_u8(in.ptr());
369 //ta.val[0] = Y0 Y2 Y4 Y6 ...
370 //ta.val[1] = U0 U2 U4 U6 ...
371 //ta.val[2] = Y1 Y3 Y5 Y7 ...
372 //ta.val[3] = V0 V2 V4 V7 ...
373
374 // Convert the uint8x16x4_t to float32x4x4_t
375 convert_uint8x16_to_float32x4x4(ta.val[0 + shift], yvec);
376 convert_uint8x16_to_float32x4x4(ta.val[1 - shift], uvec);
377 convert_uint8x16_to_float32x4x4(ta.val[2 + shift], yyvec);
378 convert_uint8x16_to_float32x4x4(ta.val[3 - shift], vvec);
379
380 yuyv_to_rgb_calculation(yvec.val[0], uvec.val[0], yyvec.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
381 yuyv_to_rgb_calculation(yvec.val[1], uvec.val[1], yyvec.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
382 yuyv_to_rgb_calculation(yvec.val[2], uvec.val[2], yyvec.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
383 yuyv_to_rgb_calculation(yvec.val[3], uvec.val[3], yyvec.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
384 },
385 in, out);
386}
387
388template <bool uv, bool alpha>
389void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
390{
391 ARM_COMPUTE_ERROR_ON(nullptr == input);
392 ARM_COMPUTE_ERROR_ON(nullptr == output);
393 win.validate();
394
395 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
396 const auto output_ptr = static_cast<IImage *__restrict>(output);
397
398 constexpr auto element_size = alpha ? 32 : 24;
399 const auto out_stride = output_ptr->info()->strides_in_bytes().y();
400 constexpr auto shift = uv ? 0 : 1;
401
402 // UV's width and height are subsampled
403 Window win_uv(win);
404 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win.x().step() / 2));
405 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
406 win_uv.validate();
407
408 Iterator in_y(input_ptr->plane(0), win);
409 Iterator in_uv(input_ptr->plane(1), win_uv);
410 Iterator out(output_ptr, win);
411
412 execute_window_loop(win, [&](const Coordinates & id)
413 {
414 const auto ta_y_top = vld2q_u8(in_y.ptr());
415 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
416 const auto ta_uv = vld2q_u8(in_uv.ptr());
417 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
418 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
419 //ta_uv.val[0] = U0 U2 U4 U6 ...
420 //ta_uv.val[1] = V0 V2 V4 V6 ...
421
422 // Convert the uint8x16x4_t to float32x4x4_t
423 float32x4x4_t yvec_top, yyvec_top, yvec_bottom, yyvec_bottom, uvec, vvec;
424 convert_uint8x16_to_float32x4x4(ta_y_top.val[0], yvec_top);
425 convert_uint8x16_to_float32x4x4(ta_y_top.val[1], yyvec_top);
426 convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0], yvec_bottom);
427 convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1], yyvec_bottom);
428 convert_uint8x16_to_float32x4x4(ta_uv.val[0 + shift], uvec);
429 convert_uint8x16_to_float32x4x4(ta_uv.val[1 - shift], vvec);
430
431 yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
432 yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
433 yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
434 yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
435
436 yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha);
437 yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha);
438 yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha);
439 yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha);
440 },
441 in_y, in_uv, out);
442}
443
444template <bool alpha>
445void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
446{
447 ARM_COMPUTE_ERROR_ON(nullptr == input);
448 ARM_COMPUTE_ERROR_ON(nullptr == output);
449 win.validate();
450
451 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
452 const auto output_ptr = static_cast<IImage *__restrict>(output);
453
454 constexpr auto element_size = alpha ? 32 : 24;
455 const auto out_stride = output_ptr->info()->strides_in_bytes().y();
456
457 // UV's width and height are subsampled
458 Window win_uv(win);
459 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
460 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
461 win_uv.validate();
462
463 Iterator in_y(input_ptr->plane(0), win);
464 Iterator in_u(input_ptr->plane(1), win_uv);
465 Iterator in_v(input_ptr->plane(2), win_uv);
466 Iterator out(output_ptr, win);
467
468 execute_window_loop(win, [&](const Coordinates & id)
469 {
470 const auto ta_y_top = vld2q_u8(in_y.ptr());
471 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
472 const auto ta_u = vld1q_u8(in_u.ptr());
473 const auto ta_v = vld1q_u8(in_v.ptr());
474 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
475 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
476 //ta_u.val[0] = U0 U2 U4 U6 ...
477 //ta_v.val[0] = V0 V2 V4 V6 ...
478
479 // Convert the uint8x16x4_t to float32x4x4_t
480 float32x4x4_t yvec_top, yyvec_top, yvec_bottom, yyvec_bottom, uvec, vvec;
481 convert_uint8x16_to_float32x4x4(ta_y_top.val[0], yvec_top);
482 convert_uint8x16_to_float32x4x4(ta_y_top.val[1], yyvec_top);
483 convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0], yvec_bottom);
484 convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1], yyvec_bottom);
485 convert_uint8x16_to_float32x4x4(ta_u, uvec);
486 convert_uint8x16_to_float32x4x4(ta_v, vvec);
487
488 yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
489 yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
490 yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
491 yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
492
493 yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha);
494 yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha);
495 yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha);
496 yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha);
497 },
498 in_y, in_u, in_v, out);
499}
500
501template <bool yuyv>
502void colorconvert_yuyv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
503{
504 ARM_COMPUTE_ERROR_ON(nullptr == input);
505 ARM_COMPUTE_ERROR_ON(nullptr == output);
506 win.validate();
507
508 const auto input_ptr = static_cast<const IImage *__restrict>(input);
509 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
510
511 constexpr auto shift = yuyv ? 0 : 1;
512
513 // NV12's UV's width and height are subsampled
514 Window win_uv(win);
515 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
516 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
517 win_uv.validate();
518
519 Iterator in(input_ptr, win);
520 Iterator out_y(output_ptr->plane(0), win);
521 Iterator out_uv(output_ptr->plane(1), win_uv);
522
523 execute_window_loop(win, [&](const Coordinates & id)
524 {
525 const auto ta_top = vld4q_u8(in.ptr());
526 const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y());
527 //ta.val[0] = Y0 Y2 Y4 Y6 ...
528 //ta.val[1] = U0 U2 U4 U6 ...
529 //ta.val[2] = Y1 Y3 Y5 Y7 ...
530 //ta.val[3] = V0 V2 V4 V7 ...
531
532 uint8x16x2_t yvec;
533 yvec.val[0] = ta_top.val[0 + shift];
534 yvec.val[1] = ta_top.val[2 + shift];
535 vst2q_u8(out_y.ptr(), yvec);
536
537 uint8x16x2_t yyvec;
538 yyvec.val[0] = ta_bottom.val[0 + shift];
539 yyvec.val[1] = ta_bottom.val[2 + shift];
540 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
541
542 uint8x16x2_t uvvec;
543 uvvec.val[0] = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
544 uvvec.val[1] = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
545 vst2q_u8(out_uv.ptr(), uvvec);
546 },
547 in, out_y, out_uv);
548}
549
550void colorconvert_iyuv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
551{
552 ARM_COMPUTE_ERROR_ON(nullptr == input);
553 ARM_COMPUTE_ERROR_ON(nullptr == output);
554 win.validate();
555
556 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
557 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
558
559 // UV's width and height are subsampled
560 Window win_uv(win);
561 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
562 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
563 win_uv.validate();
564
565 Iterator in_y(input_ptr->plane(0), win);
566 Iterator in_u(input_ptr->plane(1), win_uv);
567 Iterator in_v(input_ptr->plane(2), win_uv);
568 Iterator out_y(output_ptr->plane(0), win);
569 Iterator out_uv(output_ptr->plane(1), win_uv);
570
571 execute_window_loop(win, [&](const Coordinates & id)
572 {
573 const auto ta_y_top = vld2q_u8(in_y.ptr());
574 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
575 uint8x16x2_t ta_uv;
576 ta_uv.val[0] = vld1q_u8(in_u.ptr());
577 ta_uv.val[1] = vld1q_u8(in_v.ptr());
578 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
579 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
580 //ta_uv.val[0] = U0 U2 U4 U6 ...
581 //ta_uv.val[1] = V0 V2 V4 V6 ...
582
583 vst2q_u8(out_y.ptr(), ta_y_top);
584 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
585 vst2q_u8(out_uv.ptr(), ta_uv);
586 },
587 in_y, in_u, in_v, out_y, out_uv);
588}
589
590template <bool uv>
591void colorconvert_nv12_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
592{
593 ARM_COMPUTE_ERROR_ON(nullptr == input);
594 ARM_COMPUTE_ERROR_ON(nullptr == output);
595 win.validate();
596
597 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
598 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
599
600 constexpr auto shift = uv ? 0 : 1;
601
602 // UV's width and height are subsampled
603 Window win_uv(win);
604 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
605 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
606 win_uv.validate();
607
608 Iterator in_y(input_ptr->plane(0), win);
609 Iterator in_uv(input_ptr->plane(1), win_uv);
610 Iterator out_y(output_ptr->plane(0), win);
611 Iterator out_u(output_ptr->plane(1), win_uv);
612 Iterator out_v(output_ptr->plane(2), win_uv);
613
614 execute_window_loop(win, [&](const Coordinates & id)
615 {
616 const auto ta_y_top = vld2q_u8(in_y.ptr());
617 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
618 const auto ta_uv = vld2q_u8(in_uv.ptr());
619 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
620 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
621 //ta_uv.val[0] = U0 U2 U4 U6 ...
622 //ta_uv.val[1] = V0 V2 V4 V6 ...
623
624 vst2q_u8(out_y.ptr(), ta_y_top);
625 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
626 vst1q_u8(out_u.ptr(), ta_uv.val[0 + shift]);
627 vst1q_u8(out_v.ptr(), ta_uv.val[1 - shift]);
628 },
629 in_y, in_uv, out_y, out_u, out_v);
630}
631
632template <bool yuyv>
633void colorconvert_yuyv_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
634{
635 ARM_COMPUTE_ERROR_ON(nullptr == input);
636 ARM_COMPUTE_ERROR_ON(nullptr == output);
637 win.validate();
638
639 const auto input_ptr = static_cast<const IImage *__restrict>(input);
640 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
641
642 constexpr auto shift = yuyv ? 0 : 1;
643
644 // Destination's UV's width and height are subsampled
645 Window win_uv(win);
646 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
647 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
648 win_uv.validate();
649
650 Iterator in(input_ptr, win);
651 Iterator out_y(output_ptr->plane(0), win);
652 Iterator out_u(output_ptr->plane(1), win_uv);
653 Iterator out_v(output_ptr->plane(2), win_uv);
654
655 execute_window_loop(win, [&](const Coordinates & id)
656 {
657 const auto ta_top = vld4q_u8(in.ptr());
658 const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y());
659 //ta.val[0] = Y0 Y2 Y4 Y6 ...
660 //ta.val[1] = U0 U2 U4 U6 ...
661 //ta.val[2] = Y1 Y3 Y5 Y7 ...
662 //ta.val[3] = V0 V2 V4 V7 ...
663
664 uint8x16x2_t yvec;
665 yvec.val[0] = ta_top.val[0 + shift];
666 yvec.val[1] = ta_top.val[2 + shift];
667 vst2q_u8(out_y.ptr(), yvec);
668
669 uint8x16x2_t yyvec;
670 yyvec.val[0] = ta_bottom.val[0 + shift];
671 yyvec.val[1] = ta_bottom.val[2 + shift];
672 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
673
674 uint8x16_t uvec;
675 uvec = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
676 vst1q_u8(out_u.ptr(), uvec);
677
678 uint8x16_t vvec;
679 vvec = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
680 vst1q_u8(out_v.ptr(), vvec);
681 },
682 in, out_y, out_u, out_v);
683}
684
685template <bool uv>
686void colorconvert_nv12_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
687{
688 ARM_COMPUTE_ERROR_ON(nullptr == input);
689 ARM_COMPUTE_ERROR_ON(nullptr == output);
690 win.validate();
691
692 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
693 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
694
695 constexpr auto shift = uv ? 0 : 1;
696
697 // UV's width and height are subsampled
698 Window win_uv(win);
699 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
700 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
701 win_uv.validate();
702
703 Iterator in_y(input_ptr->plane(0), win);
704 Iterator in_uv(input_ptr->plane(1), win_uv);
705 Iterator out_y(output_ptr->plane(0), win);
706 Iterator out_u(output_ptr->plane(1), win);
707 Iterator out_v(output_ptr->plane(2), win);
708
709 execute_window_loop(win, [&](const Coordinates & id)
710 {
711 const auto ta_y_top = vld2q_u8(in_y.ptr());
712 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
713 const auto ta_uv = vld2q_u8(in_uv.ptr());
714 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
715 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
716 //ta_uv.val[0] = U0 U2 U4 U6 ...
717 //ta_uv.val[1] = V0 V2 V4 V6 ...
718
719 vst2q_u8(out_y.ptr(), ta_y_top);
720 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
721
722 uint8x16x2_t uvec;
723 uvec.val[0] = ta_uv.val[0 + shift];
724 uvec.val[1] = ta_uv.val[0 + shift];
725 vst2q_u8(out_u.ptr(), uvec);
726 vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
727
728 uint8x16x2_t vvec;
729 vvec.val[0] = ta_uv.val[1 - shift];
730 vvec.val[1] = ta_uv.val[1 - shift];
731 vst2q_u8(out_v.ptr(), vvec);
732 vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
733 },
734 in_y, in_uv, out_y, out_u, out_v);
735}
736
737void colorconvert_iyuv_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
738{
739 ARM_COMPUTE_ERROR_ON(nullptr == input);
740 ARM_COMPUTE_ERROR_ON(nullptr == output);
741 win.validate();
742
743 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
744 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
745
746 // UV's width and height are subsampled
747 Window win_uv(win);
748 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
749 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
750 win_uv.validate();
751
752 Iterator in_y(input_ptr->plane(0), win);
753 Iterator in_u(input_ptr->plane(1), win_uv);
754 Iterator in_v(input_ptr->plane(2), win_uv);
755 Iterator out_y(output_ptr->plane(0), win);
756 Iterator out_u(output_ptr->plane(1), win);
757 Iterator out_v(output_ptr->plane(2), win);
758
759 execute_window_loop(win, [&](const Coordinates & id)
760 {
761 const auto ta_y_top = vld2q_u8(in_y.ptr());
762 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
763 const auto ta_u = vld1q_u8(in_u.ptr());
764 const auto ta_v = vld1q_u8(in_v.ptr());
765 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
766 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
767 //ta_u = U0 U2 U4 U6 ...
768 //ta_v = V0 V2 V4 V6 ...
769
770 vst2q_u8(out_y.ptr(), ta_y_top);
771 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
772
773 uint8x16x2_t uvec;
774 uvec.val[0] = ta_u;
775 uvec.val[1] = ta_u;
776 vst2q_u8(out_u.ptr(), uvec);
777 vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
778
779 uint8x16x2_t vvec;
780 vvec.val[0] = ta_v;
781 vvec.val[1] = ta_v;
782 vst2q_u8(out_v.ptr(), vvec);
783 vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
784 },
785 in_y, in_u, in_v, out_y, out_u, out_v);
786}
787
788template <bool alpha>
789void colorconvert_rgb_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
790{
791 ARM_COMPUTE_ERROR_ON(nullptr == input);
792 ARM_COMPUTE_ERROR_ON(nullptr == output);
793 win.validate();
794
795 const auto input_ptr = static_cast<const IImage *__restrict>(input);
796 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
797
798 // UV's width and height are subsampled
799 Window win_uv(win);
800 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
801 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
802 win_uv.validate();
803
804 Iterator in(input_ptr, win);
805 Iterator out_y(output_ptr->plane(0), win);
806 Iterator out_uv(output_ptr->plane(1), win_uv);
807
808 execute_window_loop(win, [&](const Coordinates & id)
809 {
810 const auto ta_rgb_top = load_rgb(in.ptr(), alpha);
811 const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
812 //ta_rgb.val[0] = R0 R1 R2 R3 ...
813 //ta_rgb.val[1] = G0 G1 G2 G3 ...
814 //ta_rgb.val[2] = B0 B1 B2 B3 ...
815
816 store_rgb_to_nv12(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2],
817 ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2],
818 out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(),
819 out_uv.ptr());
820 },
821 in, out_y, out_uv);
822}
823
824template <bool alpha>
825void colorconvert_rgb_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
826{
827 ARM_COMPUTE_ERROR_ON(nullptr == input);
828 ARM_COMPUTE_ERROR_ON(nullptr == output);
829 win.validate();
830
831 const auto input_ptr = static_cast<const IImage *__restrict>(input);
832 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
833
834 // UV's width and height are subsampled
835 Window win_uv(win);
836 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
837 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
838 win_uv.validate();
839
840 Iterator in(input_ptr, win);
841 Iterator out_y(output_ptr->plane(0), win);
842 Iterator out_u(output_ptr->plane(1), win_uv);
843 Iterator out_v(output_ptr->plane(2), win_uv);
844
845 execute_window_loop(win, [&](const Coordinates & id)
846 {
847 const auto ta_rgb_top = load_rgb(in.ptr(), alpha);
848 const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
849 //ta_rgb.val[0] = R0 R1 R2 R3 ...
850 //ta_rgb.val[1] = G0 G1 G2 G3 ...
851 //ta_rgb.val[2] = B0 B1 B2 B3 ...
852
853 store_rgb_to_iyuv(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2],
854 ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2],
855 out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(),
856 out_u.ptr(), out_v.ptr());
857 },
858 in, out_y, out_u, out_v);
859}
860
861template <bool alpha>
862void colorconvert_rgb_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
863{
864 ARM_COMPUTE_ERROR_ON(nullptr == input);
865 ARM_COMPUTE_ERROR_ON(nullptr == output);
866 win.validate();
867
868 const auto input_ptr = static_cast<const IImage *__restrict>(input);
869 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
870
871 Iterator in(input_ptr, win);
872 Iterator out_y(output_ptr->plane(0), win);
873 Iterator out_u(output_ptr->plane(1), win);
874 Iterator out_v(output_ptr->plane(2), win);
875
876 execute_window_loop(win, [&](const Coordinates & id)
877 {
878 const auto ta_rgb = load_rgb(in.ptr(), alpha);
879 //ta_rgb.val[0] = R0 R1 R2 R3 ...
880 //ta_rgb.val[1] = G0 G1 G2 G3 ...
881 //ta_rgb.val[2] = B0 B1 B2 B3 ...
882
883 store_rgb_to_yuv4(ta_rgb.val[0], ta_rgb.val[1], ta_rgb.val[2],
884 out_y.ptr(), out_u.ptr(), out_v.ptr());
885 },
886 in, out_y, out_u, out_v);
887}
Gian Marco Iodice356f6432017-09-22 11:32:21 +0100888} // namespace arm_compute