blob: 7540d338307d22fe64cee94a2b43531341a086ff [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Alex Gildayc357c472018-03-21 13:54:09 +00002 * Copyright (c) 2016-2018 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/Error.h"
25#include "arm_compute/core/Helpers.h"
26#include "arm_compute/core/IMultiImage.h"
27#include "arm_compute/core/Utils.h"
28
29#include <arm_neon.h>
30
31namespace
32{
Alex Gildayc357c472018-03-21 13:54:09 +000033#ifndef DOXYGEN_SKIP_THIS
Anthony Barbier6ff3b192017-09-04 18:44:23 +010034constexpr float red_coef_bt709 = 1.5748F;
35constexpr float green_coef_bt709 = -0.1873f;
36constexpr float green_coef2_bt709 = -0.4681f;
37constexpr float blue_coef_bt709 = 1.8556f;
38
39constexpr float rgb2yuv_bt709_kr = 0.2126f;
40constexpr float rgb2yuv_bt709_kb = 0.0722f;
41// K_g = 1 - K_r - K_b
42constexpr float rgb2yuv_bt709_kg = 0.7152f;
43// C_u = 1 / (2 * (1 - K_b))
44constexpr float rgb2yuv_bt709_cu = 0.5389f;
45// C_v = 1 / (2 * (1 - K_r))
46constexpr float rgb2yuv_bt709_cv = 0.6350f;
47
Manuel Bottini4284bfa2018-09-26 15:33:15 +010048constexpr float rgb2u8_red_coef = 0.2126f;
49constexpr float rgb2u8_green_coef = 0.7152f;
50constexpr float rgb2u8_blue_coef = 0.0722f;
51
52inline float32x4x4_t convert_uint8x16_to_float32x4x4(const uint8x16_t &in)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010053{
Manuel Bottini4284bfa2018-09-26 15:33:15 +010054 float32x4x4_t out;
55 const auto tmp1 = vmovl_u8(vget_low_u8(in));
56 out.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp1)));
57 out.val[1] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp1)));
58 const auto tmp2 = vmovl_u8(vget_high_u8(in));
59 out.val[2] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp2)));
60 out.val[3] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp2)));
61 return out;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010062}
63
64inline void convert_float32x4x3_to_uint8x8x3(const float32x4x3_t &in1, const float32x4x3_t &in2, uint8x8x3_t &out)
65{
66 out.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[0])),
67 vqmovn_u32(vcvtq_u32_f32(in2.val[0]))));
68 out.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[1])),
69 vqmovn_u32(vcvtq_u32_f32(in2.val[1]))));
70 out.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[2])),
71 vqmovn_u32(vcvtq_u32_f32(in2.val[2]))));
72}
73
74inline void convert_float32x4x4_to_unit8x16(const float32x4x4_t &in, uint8x16_t &out)
75{
76 const auto low = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[0])),
77 vqmovn_u32(vcvtq_u32_f32(in.val[1])));
78 const auto high = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[2])),
79 vqmovn_u32(vcvtq_u32_f32(in.val[3])));
80 out = vcombine_u8(vqmovn_u16(low), vqmovn_u16(high));
81}
82
Manuel Bottini4284bfa2018-09-26 15:33:15 +010083inline float32x4_t rgb_to_greyscale_calculation(const float32x4_t &rcolor,const float32x4_t &gcolor, const float32x4_t &bcolor,
84 const float rcoef, const float gcoef, const float bcoef)
85{
86 float32x4_t greyscale = vmulq_n_f32(rcolor, rcoef);
87 greyscale = vmlaq_n_f32(greyscale, gcolor, gcoef);
88 greyscale = vmlaq_n_f32(greyscale, bcolor, bcoef);
89 return greyscale;
90}
91
92inline void rgb_to_u8_conversion(const uint8x16x3_t &in, uint8x16_t &out)
93{
94 float32x4x4_t out_float32;
95
96 //Conversion from 3(RGB) 4 uint8s to 3(RGB) 4 floats
97 const float32x4x4_t r_float32 = convert_uint8x16_to_float32x4x4(in.val[0]);
98 const float32x4x4_t g_float32 = convert_uint8x16_to_float32x4x4(in.val[1]);
99 const float32x4x4_t b_float32 = convert_uint8x16_to_float32x4x4(in.val[2]);
100
101 //New grayscale image = ( (RED_COEFF * R) + (GREEN_COEFF * G) + (BLUE_COEFF * B) )
102 //Computation of 1(Greyscale) 4 uint8 using 3(RGB) 4 uint8s float
103 out_float32.val[0] = rgb_to_greyscale_calculation(r_float32.val[0], g_float32.val[0], b_float32.val[0],
104 rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
105
106 out_float32.val[1] = rgb_to_greyscale_calculation(r_float32.val[1], g_float32.val[1], b_float32.val[1],
107 rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
108
109 out_float32.val[2] = rgb_to_greyscale_calculation(r_float32.val[2], g_float32.val[2], b_float32.val[2],
110 rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
111
112 out_float32.val[3] = rgb_to_greyscale_calculation(r_float32.val[3], g_float32.val[3], b_float32.val[3],
113 rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
114
115 //Conversion from 1(Greyscale) 4 floats to 1(Greyscale) 4 uint8s
116 convert_float32x4x4_to_unit8x16(out_float32, out);
117}
118
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100119inline void rgb_to_yuv_calculation(const float32x4_t &rvec, const float32x4_t &gvec, const float32x4_t &bvec,
120 float32x4_t &yvec, float32x4_t &uvec, float32x4_t &vvec)
121{
122 /*
123 Y'= 0.2126*R' + 0.7152*G' + 0.0722*B'
124 U'=-0.1146*R' - 0.3854*G' + 0.5000*B'
125 V'= 0.5000*R' - 0.4542*G' - 0.0458*B'
126 */
127 const auto c128 = vdupq_n_f32(128.f);
128
129 // Y = R * K_r + G * (1 - K_r - K_b) * B * K_b
130 yvec = vmulq_n_f32(rvec, rgb2yuv_bt709_kr);
131 yvec = vmlaq_n_f32(yvec, gvec, rgb2yuv_bt709_kg);
132 yvec = vmlaq_n_f32(yvec, bvec, rgb2yuv_bt709_kb);
133
134 // U = (B - Y) / (2 * (1 - K_b))
135 uvec = vsubq_f32(bvec, yvec);
136 uvec = vmlaq_n_f32(c128, uvec, rgb2yuv_bt709_cu);
137
138 // V = (R - Y) / (2 * (1 - K_r))
139 vvec = vsubq_f32(rvec, yvec);
140 vvec = vmlaq_n_f32(c128, vvec, rgb2yuv_bt709_cv);
141}
142
143inline void yuyv_to_rgb_calculation(const float32x4_t &yvec_val, float32x4_t uvec_val, const float32x4_t &yyvec_val,
144 float32x4_t vvec_val, unsigned char *output_ptr, const bool alpha)
145{
146 float32x4x3_t rgb1, rgb2;
147
148 // Compute: cb - 128 and cr - 128;
149 const auto c128 = vdupq_n_f32(128.f);
150 uvec_val = vsubq_f32(uvec_val, c128);
151 vvec_val = vsubq_f32(vvec_val, c128);
152
153 // Compute:
154 // r = 0.0000f*f_u + 1.5748f*f_v;
155 // g = 0.1873f*f_u - 0.4681f*f_v;
156 // b = 1.8556f*f_u + 0.0000f*f_v;
157 const auto red = vmulq_n_f32(vvec_val, red_coef_bt709);
158 const auto blue = vmulq_n_f32(uvec_val, blue_coef_bt709);
159 const auto green = vaddq_f32(vmulq_n_f32(uvec_val, green_coef_bt709),
160 vmulq_n_f32(vvec_val, green_coef2_bt709));
161
162 // Compute the final r,g,b values using y1 for the first texel and y2 for the second one.
163 // the result is stored in two float32x4x3_t which then are converted to one uint8x8x3_t
164 // and written back to memory using vst3 instruction
165
166 rgb1.val[0] = vaddq_f32(yvec_val, red);
167 rgb1.val[1] = vaddq_f32(yvec_val, green);
168 rgb1.val[2] = vaddq_f32(yvec_val, blue);
169
170 rgb2.val[0] = vaddq_f32(yyvec_val, red);
171 rgb2.val[1] = vaddq_f32(yyvec_val, green);
172 rgb2.val[2] = vaddq_f32(yyvec_val, blue);
173
174 uint8x8x3_t u8_rgb;
175 convert_float32x4x3_to_uint8x8x3(rgb1, rgb2, u8_rgb);
176
177 if(!alpha)
178 {
179 vst3_lane_u8(&output_ptr[0], u8_rgb, 0);
180 vst3_lane_u8(&output_ptr[3], u8_rgb, 4);
181 vst3_lane_u8(&output_ptr[6], u8_rgb, 1);
182 vst3_lane_u8(&output_ptr[9], u8_rgb, 5);
183 vst3_lane_u8(&output_ptr[12], u8_rgb, 2);
184 vst3_lane_u8(&output_ptr[15], u8_rgb, 6);
185 vst3_lane_u8(&output_ptr[18], u8_rgb, 3);
186 vst3_lane_u8(&output_ptr[21], u8_rgb, 7);
187 }
188 else
189 {
190 uint8x8x4_t u8_rgba;
191 u8_rgba.val[0] = u8_rgb.val[0];
192 u8_rgba.val[1] = u8_rgb.val[1];
193 u8_rgba.val[2] = u8_rgb.val[2];
194 u8_rgba.val[3] = vdup_n_u8(255);
195 vst4_lane_u8(&output_ptr[0], u8_rgba, 0);
196 vst4_lane_u8(&output_ptr[4], u8_rgba, 4);
197 vst4_lane_u8(&output_ptr[8], u8_rgba, 1);
198 vst4_lane_u8(&output_ptr[12], u8_rgba, 5);
199 vst4_lane_u8(&output_ptr[16], u8_rgba, 2);
200 vst4_lane_u8(&output_ptr[20], u8_rgba, 6);
201 vst4_lane_u8(&output_ptr[24], u8_rgba, 3);
202 vst4_lane_u8(&output_ptr[28], u8_rgba, 7);
203 }
204}
205
206inline uint8x16x3_t load_rgb(const unsigned char *const ptr, const bool alpha)
207{
208 uint8x16x3_t rgb;
209
210 if(alpha)
211 {
212 const auto tmp = vld4q_u8(ptr);
213 rgb.val[0] = tmp.val[0];
214 rgb.val[1] = tmp.val[1];
215 rgb.val[2] = tmp.val[2];
216 }
217 else
218 {
219 rgb = vld3q_u8(ptr);
220 }
221
222 return rgb;
223}
224
225inline void rgb_to_yuv_conversion(uint8x16x3_t &vec_top, uint8x16x3_t &vec_bottom)
226{
227 // Convert the uint8x16_t to float32x4x4_t
Manuel Bottini4284bfa2018-09-26 15:33:15 +0100228 const float32x4x4_t frvec_top = convert_uint8x16_to_float32x4x4(vec_top.val[0]);
229 const float32x4x4_t fgvec_top = convert_uint8x16_to_float32x4x4(vec_top.val[1]);
230 const float32x4x4_t fbvec_top = convert_uint8x16_to_float32x4x4(vec_top.val[2]);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100231
Manuel Bottini4284bfa2018-09-26 15:33:15 +0100232 const float32x4x4_t frvec_bottom = convert_uint8x16_to_float32x4x4(vec_bottom.val[0]);
233 const float32x4x4_t fgvec_bottom = convert_uint8x16_to_float32x4x4(vec_bottom.val[1]);
234 const float32x4x4_t fbvec_bottom = convert_uint8x16_to_float32x4x4(vec_bottom.val[2]);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100235
236 float32x4x4_t fyvec_top, fuvec_top, fvvec_top;
237 float32x4x4_t fyvec_bottom, fuvec_bottom, fvvec_bottom;
238
239 for(auto i = 0; i < 4; ++i)
240 {
241 rgb_to_yuv_calculation(frvec_top.val[i], fgvec_top.val[i], fbvec_top.val[i],
242 fyvec_top.val[i], fuvec_top.val[i], fvvec_top.val[i]);
243 rgb_to_yuv_calculation(frvec_bottom.val[i], fgvec_bottom.val[i], fbvec_bottom.val[i],
244 fyvec_bottom.val[i], fuvec_bottom.val[i], fvvec_bottom.val[i]);
245 }
246
247 convert_float32x4x4_to_unit8x16(fyvec_top, vec_top.val[0]);
248 convert_float32x4x4_to_unit8x16(fuvec_top, vec_top.val[1]);
249 convert_float32x4x4_to_unit8x16(fvvec_top, vec_top.val[2]);
250 convert_float32x4x4_to_unit8x16(fyvec_bottom, vec_bottom.val[0]);
251 convert_float32x4x4_to_unit8x16(fuvec_bottom, vec_bottom.val[1]);
252 convert_float32x4x4_to_unit8x16(fvvec_bottom, vec_bottom.val[2]);
253}
254
255inline void store_rgb_to_nv12(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top,
256 const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom,
257 unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom,
258 unsigned char *const __restrict out_uv)
259{
260 uint8x16x3_t vec_top, vec_bottom;
261 vec_top.val[0] = rvec_top;
262 vec_top.val[1] = gvec_top;
263 vec_top.val[2] = bvec_top;
264 vec_bottom.val[0] = rvec_bottom;
265 vec_bottom.val[1] = gvec_bottom;
266 vec_bottom.val[2] = bvec_bottom;
267
268 rgb_to_yuv_conversion(vec_top, vec_bottom);
269
270 vst1q_u8(out_y_top, vec_top.val[0]);
271 vst1q_u8(out_y_bottom, vec_bottom.val[0]);
272
273 const auto uvec = vuzpq_u8(vec_top.val[1], vec_bottom.val[1]);
274 const auto vvec = vuzpq_u8(vec_top.val[2], vec_bottom.val[2]);
275 const auto utmp = vrhaddq_u8(uvec.val[0], uvec.val[1]);
276 const auto vtmp = vrhaddq_u8(vvec.val[0], vvec.val[1]);
277
278 uint8x8x2_t uvvec;
279 uvvec.val[0] = vhadd_u8(vget_low_u8(utmp), vget_high_u8(utmp));
280 uvvec.val[1] = vhadd_u8(vget_low_u8(vtmp), vget_high_u8(vtmp));
281
282 vst2_u8(out_uv, uvvec);
283}
284
285inline void store_rgb_to_iyuv(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top,
286 const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom,
287 unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom,
288 unsigned char *const __restrict out_u,
289 unsigned char *const __restrict out_v)
290{
291 uint8x16x3_t vec_top, vec_bottom;
292 vec_top.val[0] = rvec_top;
293 vec_top.val[1] = gvec_top;
294 vec_top.val[2] = bvec_top;
295 vec_bottom.val[0] = rvec_bottom;
296 vec_bottom.val[1] = gvec_bottom;
297 vec_bottom.val[2] = bvec_bottom;
298
299 rgb_to_yuv_conversion(vec_top, vec_bottom);
300
301 vst1q_u8(out_y_top, vec_top.val[0]);
302 vst1q_u8(out_y_bottom, vec_bottom.val[0]);
303
304 const auto uvvec_top = vuzpq_u8(vec_top.val[1], vec_top.val[2]);
305 const auto uvvec_bottom = vuzpq_u8(vec_bottom.val[1], vec_bottom.val[2]);
306 const auto uvvec = vhaddq_u8(vrhaddq_u8(uvvec_top.val[0], uvvec_top.val[1]),
307 vrhaddq_u8(uvvec_bottom.val[0], uvvec_bottom.val[1]));
308
309 vst1_u8(out_u, vget_low_u8(uvvec));
310 vst1_u8(out_v, vget_high_u8(uvvec));
311}
312
313inline void store_rgb_to_yuv4(const uint8x16_t &rvec, const uint8x16_t &gvec, const uint8x16_t &bvec,
314 unsigned char *const __restrict out_y,
315 unsigned char *const __restrict out_u,
316 unsigned char *const __restrict out_v)
317{
318 // Convert the uint8x16_t to float32x4x4_t
Manuel Bottini4284bfa2018-09-26 15:33:15 +0100319 const float32x4x4_t frvec = convert_uint8x16_to_float32x4x4(rvec);
320 const float32x4x4_t fgvec = convert_uint8x16_to_float32x4x4(gvec);
321 const float32x4x4_t fbvec = convert_uint8x16_to_float32x4x4(bvec);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100322
323 float32x4x4_t fyvec, fuvec, fvvec;
324 for(auto i = 0; i < 4; ++i)
325 {
326 rgb_to_yuv_calculation(frvec.val[i], fgvec.val[i], fbvec.val[i],
327 fyvec.val[i], fuvec.val[i], fvvec.val[i]);
328 }
329
330 uint8x16_t yvec, uvec, vvec;
331 convert_float32x4x4_to_unit8x16(fyvec, yvec);
332 convert_float32x4x4_to_unit8x16(fuvec, uvec);
333 convert_float32x4x4_to_unit8x16(fvvec, vvec);
334
335 vst1q_u8(out_y, yvec);
336 vst1q_u8(out_u, uvec);
337 vst1q_u8(out_v, vvec);
338}
Alex Gildayc357c472018-03-21 13:54:09 +0000339#endif /* DOXYGEN_SKIP_THIS */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100340}
341
342namespace arm_compute
343{
Alex Gildayc357c472018-03-21 13:54:09 +0000344/** Convert RGB to RGBX.
345 *
346 * @param[in] input Input RGB data buffer.
347 * @param[out] output Output RGBX buffer.
348 * @param[in] win Window for iterating the buffers.
349 *
350 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100351void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict output, const Window &win)
352{
353 ARM_COMPUTE_ERROR_ON(nullptr == input);
354 ARM_COMPUTE_ERROR_ON(nullptr == output);
355
356 const auto input_ptr = static_cast<const IImage *__restrict>(input);
357 const auto output_ptr = static_cast<IImage *__restrict>(output);
358
359 Iterator in(input_ptr, win);
360 Iterator out(output_ptr, win);
361
362 execute_window_loop(win, [&](const Coordinates & id)
363 {
364 const auto ta1 = vld3q_u8(in.ptr());
365 uint8x16x4_t ta2;
366 ta2.val[0] = ta1.val[0];
367 ta2.val[1] = ta1.val[1];
368 ta2.val[2] = ta1.val[2];
369 ta2.val[3] = vdupq_n_u8(255);
370 vst4q_u8(out.ptr(), ta2);
371 },
372 in, out);
373}
374
Manuel Bottini4284bfa2018-09-26 15:33:15 +0100375/** Convert RGB to U8.
376 *
377 * @param[in] input Input RGB data buffer.
378 * @param[out] output Output U8 buffer.
379 * @param[in] win Window for iterating the buffers.
380 *
381 */
382void colorconvert_rgb_to_u8(const void *__restrict input, void *__restrict output, const Window &win)
383{
384 ARM_COMPUTE_ERROR_ON(nullptr == input);
385 ARM_COMPUTE_ERROR_ON(nullptr == output);
386
387 const auto input_ptr = static_cast<const IImage *__restrict>(input);
388 const auto output_ptr = static_cast<IImage *__restrict>(output);
389
390 Iterator in(input_ptr, win);
391 Iterator out(output_ptr, win);
392
393 execute_window_loop(win, [&](const Coordinates & id)
394 {
395 const auto ta1 = vld3q_u8(in.ptr());
396 uint8x16_t ta2;
397 rgb_to_u8_conversion(ta1, ta2);
398 vst1q_u8(out.ptr(), ta2);
399 },
400 in, out);
401}
402
Alex Gildayc357c472018-03-21 13:54:09 +0000403/** Convert RGBX to RGB.
404 *
405 * @param[in] input Input RGBX data buffer.
406 * @param[out] output Output RGB buffer.
407 * @param[in] win Window for iterating the buffers.
408 *
409 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100410void colorconvert_rgbx_to_rgb(const void *input, void *output, const Window &win)
411{
412 ARM_COMPUTE_ERROR_ON(nullptr == input);
413 ARM_COMPUTE_ERROR_ON(nullptr == output);
414
415 const auto input_ptr = static_cast<const IImage *__restrict>(input);
416 const auto output_ptr = static_cast<IImage *__restrict>(output);
417
418 Iterator in(input_ptr, win);
419 Iterator out(output_ptr, win);
420
421 execute_window_loop(win, [&](const Coordinates & id)
422 {
423 const auto ta1 = vld4q_u8(in.ptr());
424 uint8x16x3_t ta2;
425 ta2.val[0] = ta1.val[0];
426 ta2.val[1] = ta1.val[1];
427 ta2.val[2] = ta1.val[2];
428 vst3q_u8(out.ptr(), ta2);
429 },
430 in, out);
431}
432
Alex Gildayc357c472018-03-21 13:54:09 +0000433/** Convert YUYV to RGB.
434 *
435 * @param[in] input Input YUYV data buffer.
436 * @param[out] output Output RGB buffer.
437 * @param[in] win Window for iterating the buffers.
438 *
439 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100440template <bool yuyv, bool alpha>
441void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
442{
443 ARM_COMPUTE_ERROR_ON(nullptr == input);
444 ARM_COMPUTE_ERROR_ON(nullptr == output);
445
446 const auto input_ptr = static_cast<const IImage *__restrict>(input);
447 const auto output_ptr = static_cast<IImage *__restrict>(output);
448
449 constexpr auto element_size = alpha ? 32 : 24;
450 constexpr auto shift = yuyv ? 0 : 1;
451
452 Iterator in(input_ptr, win);
453 Iterator out(output_ptr, win);
454
455 execute_window_loop(win, [&](const Coordinates & id)
456 {
Manuel Bottini4284bfa2018-09-26 15:33:15 +0100457 const auto ta = vld4q_u8(in.ptr());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100458 //ta.val[0] = Y0 Y2 Y4 Y6 ...
459 //ta.val[1] = U0 U2 U4 U6 ...
460 //ta.val[2] = Y1 Y3 Y5 Y7 ...
461 //ta.val[3] = V0 V2 V4 V7 ...
462
463 // Convert the uint8x16x4_t to float32x4x4_t
Manuel Bottini4284bfa2018-09-26 15:33:15 +0100464 const float32x4x4_t yvec = convert_uint8x16_to_float32x4x4(ta.val[0 + shift]);
465 const float32x4x4_t uvec = convert_uint8x16_to_float32x4x4(ta.val[1 - shift]);
466 const float32x4x4_t yyvec = convert_uint8x16_to_float32x4x4(ta.val[2 + shift]);
467 const float32x4x4_t vvec = convert_uint8x16_to_float32x4x4(ta.val[3 - shift]);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100468
469 yuyv_to_rgb_calculation(yvec.val[0], uvec.val[0], yyvec.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
470 yuyv_to_rgb_calculation(yvec.val[1], uvec.val[1], yyvec.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
471 yuyv_to_rgb_calculation(yvec.val[2], uvec.val[2], yyvec.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
472 yuyv_to_rgb_calculation(yvec.val[3], uvec.val[3], yyvec.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
473 },
474 in, out);
475}
476
Alex Gildayc357c472018-03-21 13:54:09 +0000477/** Convert NV12 to RGB.
478 *
479 * @param[in] input Input NV12 data buffer.
480 * @param[out] output Output RGB buffer.
481 * @param[in] win Window for iterating the buffers.
482 *
483 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100484template <bool uv, bool alpha>
485void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
486{
487 ARM_COMPUTE_ERROR_ON(nullptr == input);
488 ARM_COMPUTE_ERROR_ON(nullptr == output);
489 win.validate();
490
491 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
492 const auto output_ptr = static_cast<IImage *__restrict>(output);
493
494 constexpr auto element_size = alpha ? 32 : 24;
495 const auto out_stride = output_ptr->info()->strides_in_bytes().y();
496 constexpr auto shift = uv ? 0 : 1;
497
498 // UV's width and height are subsampled
499 Window win_uv(win);
500 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win.x().step() / 2));
501 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
502 win_uv.validate();
503
504 Iterator in_y(input_ptr->plane(0), win);
505 Iterator in_uv(input_ptr->plane(1), win_uv);
506 Iterator out(output_ptr, win);
507
508 execute_window_loop(win, [&](const Coordinates & id)
509 {
510 const auto ta_y_top = vld2q_u8(in_y.ptr());
511 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
512 const auto ta_uv = vld2q_u8(in_uv.ptr());
513 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
514 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
515 //ta_uv.val[0] = U0 U2 U4 U6 ...
516 //ta_uv.val[1] = V0 V2 V4 V6 ...
517
518 // Convert the uint8x16x4_t to float32x4x4_t
Manuel Bottini4284bfa2018-09-26 15:33:15 +0100519 float32x4x4_t yvec_top = convert_uint8x16_to_float32x4x4(ta_y_top.val[0]);
520 float32x4x4_t yyvec_top = convert_uint8x16_to_float32x4x4(ta_y_top.val[1]);
521 float32x4x4_t yvec_bottom = convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0]);
522 float32x4x4_t yyvec_bottom = convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1]);
523 float32x4x4_t uvec = convert_uint8x16_to_float32x4x4(ta_uv.val[0 + shift]);
524 float32x4x4_t vvec = convert_uint8x16_to_float32x4x4(ta_uv.val[1 - shift]);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100525
526 yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
527 yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
528 yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
529 yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
530
531 yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha);
532 yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha);
533 yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha);
534 yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha);
535 },
536 in_y, in_uv, out);
537}
538
Alex Gildayc357c472018-03-21 13:54:09 +0000539/** Convert IYUV to RGB.
540 *
541 * @param[in] input Input IYUV data buffer.
542 * @param[out] output Output RGB buffer.
543 * @param[in] win Window for iterating the buffers.
544 *
545 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100546template <bool alpha>
547void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
548{
549 ARM_COMPUTE_ERROR_ON(nullptr == input);
550 ARM_COMPUTE_ERROR_ON(nullptr == output);
551 win.validate();
552
553 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
554 const auto output_ptr = static_cast<IImage *__restrict>(output);
555
556 constexpr auto element_size = alpha ? 32 : 24;
557 const auto out_stride = output_ptr->info()->strides_in_bytes().y();
558
559 // UV's width and height are subsampled
560 Window win_uv(win);
561 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
562 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
563 win_uv.validate();
564
565 Iterator in_y(input_ptr->plane(0), win);
566 Iterator in_u(input_ptr->plane(1), win_uv);
567 Iterator in_v(input_ptr->plane(2), win_uv);
568 Iterator out(output_ptr, win);
569
570 execute_window_loop(win, [&](const Coordinates & id)
571 {
572 const auto ta_y_top = vld2q_u8(in_y.ptr());
573 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
574 const auto ta_u = vld1q_u8(in_u.ptr());
575 const auto ta_v = vld1q_u8(in_v.ptr());
576 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
577 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
578 //ta_u.val[0] = U0 U2 U4 U6 ...
579 //ta_v.val[0] = V0 V2 V4 V6 ...
580
581 // Convert the uint8x16x4_t to float32x4x4_t
Manuel Bottini4284bfa2018-09-26 15:33:15 +0100582 float32x4x4_t yvec_top = convert_uint8x16_to_float32x4x4(ta_y_top.val[0]);
583 float32x4x4_t yyvec_top = convert_uint8x16_to_float32x4x4(ta_y_top.val[1]);
584 float32x4x4_t yvec_bottom = convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0]);
585 float32x4x4_t yyvec_bottom = convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1]);
586 float32x4x4_t uvec = convert_uint8x16_to_float32x4x4(ta_u);
587 float32x4x4_t vvec = convert_uint8x16_to_float32x4x4(ta_v);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100588
589 yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
590 yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
591 yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
592 yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
593
594 yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha);
595 yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha);
596 yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha);
597 yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha);
598 },
599 in_y, in_u, in_v, out);
600}
601
Alex Gildayc357c472018-03-21 13:54:09 +0000602/** Convert YUYV to NV12.
603 *
604 * @param[in] input Input YUYV data buffer.
605 * @param[out] output Output NV12 buffer.
606 * @param[in] win Window for iterating the buffers.
607 *
608 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100609template <bool yuyv>
610void colorconvert_yuyv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
611{
612 ARM_COMPUTE_ERROR_ON(nullptr == input);
613 ARM_COMPUTE_ERROR_ON(nullptr == output);
614 win.validate();
615
616 const auto input_ptr = static_cast<const IImage *__restrict>(input);
617 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
618
619 constexpr auto shift = yuyv ? 0 : 1;
620
621 // NV12's UV's width and height are subsampled
622 Window win_uv(win);
623 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
624 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
625 win_uv.validate();
626
627 Iterator in(input_ptr, win);
628 Iterator out_y(output_ptr->plane(0), win);
629 Iterator out_uv(output_ptr->plane(1), win_uv);
630
631 execute_window_loop(win, [&](const Coordinates & id)
632 {
633 const auto ta_top = vld4q_u8(in.ptr());
634 const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y());
635 //ta.val[0] = Y0 Y2 Y4 Y6 ...
636 //ta.val[1] = U0 U2 U4 U6 ...
637 //ta.val[2] = Y1 Y3 Y5 Y7 ...
638 //ta.val[3] = V0 V2 V4 V7 ...
639
640 uint8x16x2_t yvec;
641 yvec.val[0] = ta_top.val[0 + shift];
642 yvec.val[1] = ta_top.val[2 + shift];
643 vst2q_u8(out_y.ptr(), yvec);
644
645 uint8x16x2_t yyvec;
646 yyvec.val[0] = ta_bottom.val[0 + shift];
647 yyvec.val[1] = ta_bottom.val[2 + shift];
648 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
649
650 uint8x16x2_t uvvec;
651 uvvec.val[0] = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
652 uvvec.val[1] = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
653 vst2q_u8(out_uv.ptr(), uvvec);
654 },
655 in, out_y, out_uv);
656}
657
Alex Gildayc357c472018-03-21 13:54:09 +0000658/** Convert IYUV to NV12.
659 *
660 * @param[in] input Input IYUV data buffer.
661 * @param[out] output Output NV12 buffer.
662 * @param[in] win Window for iterating the buffers.
663 *
664 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100665void colorconvert_iyuv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
666{
667 ARM_COMPUTE_ERROR_ON(nullptr == input);
668 ARM_COMPUTE_ERROR_ON(nullptr == output);
669 win.validate();
670
671 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
672 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
673
674 // UV's width and height are subsampled
675 Window win_uv(win);
676 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
677 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
678 win_uv.validate();
679
680 Iterator in_y(input_ptr->plane(0), win);
681 Iterator in_u(input_ptr->plane(1), win_uv);
682 Iterator in_v(input_ptr->plane(2), win_uv);
683 Iterator out_y(output_ptr->plane(0), win);
684 Iterator out_uv(output_ptr->plane(1), win_uv);
685
686 execute_window_loop(win, [&](const Coordinates & id)
687 {
688 const auto ta_y_top = vld2q_u8(in_y.ptr());
689 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
690 uint8x16x2_t ta_uv;
691 ta_uv.val[0] = vld1q_u8(in_u.ptr());
692 ta_uv.val[1] = vld1q_u8(in_v.ptr());
693 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
694 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
695 //ta_uv.val[0] = U0 U2 U4 U6 ...
696 //ta_uv.val[1] = V0 V2 V4 V6 ...
697
698 vst2q_u8(out_y.ptr(), ta_y_top);
699 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
700 vst2q_u8(out_uv.ptr(), ta_uv);
701 },
702 in_y, in_u, in_v, out_y, out_uv);
703}
704
Alex Gildayc357c472018-03-21 13:54:09 +0000705/** Convert NV12 to IYUV.
706 *
707 * @param[in] input Input NV12 data buffer.
708 * @param[out] output Output IYUV buffer.
709 * @param[in] win Window for iterating the buffers.
710 *
711 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100712template <bool uv>
713void colorconvert_nv12_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
714{
715 ARM_COMPUTE_ERROR_ON(nullptr == input);
716 ARM_COMPUTE_ERROR_ON(nullptr == output);
717 win.validate();
718
719 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
720 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
721
722 constexpr auto shift = uv ? 0 : 1;
723
724 // UV's width and height are subsampled
725 Window win_uv(win);
726 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
727 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
728 win_uv.validate();
729
730 Iterator in_y(input_ptr->plane(0), win);
731 Iterator in_uv(input_ptr->plane(1), win_uv);
732 Iterator out_y(output_ptr->plane(0), win);
733 Iterator out_u(output_ptr->plane(1), win_uv);
734 Iterator out_v(output_ptr->plane(2), win_uv);
735
736 execute_window_loop(win, [&](const Coordinates & id)
737 {
738 const auto ta_y_top = vld2q_u8(in_y.ptr());
739 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
740 const auto ta_uv = vld2q_u8(in_uv.ptr());
741 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
742 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
743 //ta_uv.val[0] = U0 U2 U4 U6 ...
744 //ta_uv.val[1] = V0 V2 V4 V6 ...
745
746 vst2q_u8(out_y.ptr(), ta_y_top);
747 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
748 vst1q_u8(out_u.ptr(), ta_uv.val[0 + shift]);
749 vst1q_u8(out_v.ptr(), ta_uv.val[1 - shift]);
750 },
751 in_y, in_uv, out_y, out_u, out_v);
752}
753
Alex Gildayc357c472018-03-21 13:54:09 +0000754/** Convert YUYV to IYUV.
755 *
756 * @param[in] input Input YUYV data buffer.
757 * @param[out] output Output IYUV buffer.
758 * @param[in] win Window for iterating the buffers.
759 *
760 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100761template <bool yuyv>
762void colorconvert_yuyv_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
763{
764 ARM_COMPUTE_ERROR_ON(nullptr == input);
765 ARM_COMPUTE_ERROR_ON(nullptr == output);
766 win.validate();
767
768 const auto input_ptr = static_cast<const IImage *__restrict>(input);
769 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
770
771 constexpr auto shift = yuyv ? 0 : 1;
772
773 // Destination's UV's width and height are subsampled
774 Window win_uv(win);
775 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
776 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
777 win_uv.validate();
778
779 Iterator in(input_ptr, win);
780 Iterator out_y(output_ptr->plane(0), win);
781 Iterator out_u(output_ptr->plane(1), win_uv);
782 Iterator out_v(output_ptr->plane(2), win_uv);
783
784 execute_window_loop(win, [&](const Coordinates & id)
785 {
786 const auto ta_top = vld4q_u8(in.ptr());
787 const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y());
788 //ta.val[0] = Y0 Y2 Y4 Y6 ...
789 //ta.val[1] = U0 U2 U4 U6 ...
790 //ta.val[2] = Y1 Y3 Y5 Y7 ...
791 //ta.val[3] = V0 V2 V4 V7 ...
792
793 uint8x16x2_t yvec;
794 yvec.val[0] = ta_top.val[0 + shift];
795 yvec.val[1] = ta_top.val[2 + shift];
796 vst2q_u8(out_y.ptr(), yvec);
797
798 uint8x16x2_t yyvec;
799 yyvec.val[0] = ta_bottom.val[0 + shift];
800 yyvec.val[1] = ta_bottom.val[2 + shift];
801 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
802
803 uint8x16_t uvec;
804 uvec = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
805 vst1q_u8(out_u.ptr(), uvec);
806
807 uint8x16_t vvec;
808 vvec = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
809 vst1q_u8(out_v.ptr(), vvec);
810 },
811 in, out_y, out_u, out_v);
812}
813
Alex Gildayc357c472018-03-21 13:54:09 +0000814/** Convert NV12 to YUV4.
815 *
816 * @param[in] input Input NV12 data buffer.
817 * @param[out] output Output YUV4 buffer.
818 * @param[in] win Window for iterating the buffers.
819 *
820 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100821template <bool uv>
822void colorconvert_nv12_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
823{
824 ARM_COMPUTE_ERROR_ON(nullptr == input);
825 ARM_COMPUTE_ERROR_ON(nullptr == output);
826 win.validate();
827
828 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
829 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
830
831 constexpr auto shift = uv ? 0 : 1;
832
833 // UV's width and height are subsampled
834 Window win_uv(win);
835 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
836 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
837 win_uv.validate();
838
839 Iterator in_y(input_ptr->plane(0), win);
840 Iterator in_uv(input_ptr->plane(1), win_uv);
841 Iterator out_y(output_ptr->plane(0), win);
842 Iterator out_u(output_ptr->plane(1), win);
843 Iterator out_v(output_ptr->plane(2), win);
844
845 execute_window_loop(win, [&](const Coordinates & id)
846 {
847 const auto ta_y_top = vld2q_u8(in_y.ptr());
848 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
849 const auto ta_uv = vld2q_u8(in_uv.ptr());
850 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
851 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
852 //ta_uv.val[0] = U0 U2 U4 U6 ...
853 //ta_uv.val[1] = V0 V2 V4 V6 ...
854
855 vst2q_u8(out_y.ptr(), ta_y_top);
856 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
857
858 uint8x16x2_t uvec;
859 uvec.val[0] = ta_uv.val[0 + shift];
860 uvec.val[1] = ta_uv.val[0 + shift];
861 vst2q_u8(out_u.ptr(), uvec);
862 vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
863
864 uint8x16x2_t vvec;
865 vvec.val[0] = ta_uv.val[1 - shift];
866 vvec.val[1] = ta_uv.val[1 - shift];
867 vst2q_u8(out_v.ptr(), vvec);
868 vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
869 },
870 in_y, in_uv, out_y, out_u, out_v);
871}
872
Alex Gildayc357c472018-03-21 13:54:09 +0000873/** Convert IYUV to YUV4.
874 *
875 * @param[in] input Input IYUV data buffer.
876 * @param[out] output Output YUV4 buffer.
877 * @param[in] win Window for iterating the buffers.
878 *
879 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100880void colorconvert_iyuv_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
881{
882 ARM_COMPUTE_ERROR_ON(nullptr == input);
883 ARM_COMPUTE_ERROR_ON(nullptr == output);
884 win.validate();
885
886 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
887 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
888
889 // UV's width and height are subsampled
890 Window win_uv(win);
891 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
892 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
893 win_uv.validate();
894
895 Iterator in_y(input_ptr->plane(0), win);
896 Iterator in_u(input_ptr->plane(1), win_uv);
897 Iterator in_v(input_ptr->plane(2), win_uv);
898 Iterator out_y(output_ptr->plane(0), win);
899 Iterator out_u(output_ptr->plane(1), win);
900 Iterator out_v(output_ptr->plane(2), win);
901
902 execute_window_loop(win, [&](const Coordinates & id)
903 {
904 const auto ta_y_top = vld2q_u8(in_y.ptr());
905 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
906 const auto ta_u = vld1q_u8(in_u.ptr());
907 const auto ta_v = vld1q_u8(in_v.ptr());
908 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
909 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
910 //ta_u = U0 U2 U4 U6 ...
911 //ta_v = V0 V2 V4 V6 ...
912
913 vst2q_u8(out_y.ptr(), ta_y_top);
914 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
915
916 uint8x16x2_t uvec;
917 uvec.val[0] = ta_u;
918 uvec.val[1] = ta_u;
919 vst2q_u8(out_u.ptr(), uvec);
920 vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
921
922 uint8x16x2_t vvec;
923 vvec.val[0] = ta_v;
924 vvec.val[1] = ta_v;
925 vst2q_u8(out_v.ptr(), vvec);
926 vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
927 },
928 in_y, in_u, in_v, out_y, out_u, out_v);
929}
930
Alex Gildayc357c472018-03-21 13:54:09 +0000931/** Convert RGB to NV12.
932 *
933 * @param[in] input Input RGB data buffer.
934 * @param[out] output Output NV12 buffer.
935 * @param[in] win Window for iterating the buffers.
936 *
937 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100938template <bool alpha>
939void colorconvert_rgb_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
940{
941 ARM_COMPUTE_ERROR_ON(nullptr == input);
942 ARM_COMPUTE_ERROR_ON(nullptr == output);
943 win.validate();
944
945 const auto input_ptr = static_cast<const IImage *__restrict>(input);
946 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
947
948 // UV's width and height are subsampled
949 Window win_uv(win);
950 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
951 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
952 win_uv.validate();
953
954 Iterator in(input_ptr, win);
955 Iterator out_y(output_ptr->plane(0), win);
956 Iterator out_uv(output_ptr->plane(1), win_uv);
957
958 execute_window_loop(win, [&](const Coordinates & id)
959 {
960 const auto ta_rgb_top = load_rgb(in.ptr(), alpha);
961 const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
962 //ta_rgb.val[0] = R0 R1 R2 R3 ...
963 //ta_rgb.val[1] = G0 G1 G2 G3 ...
964 //ta_rgb.val[2] = B0 B1 B2 B3 ...
965
966 store_rgb_to_nv12(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2],
967 ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2],
968 out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(),
969 out_uv.ptr());
970 },
971 in, out_y, out_uv);
972}
973
Alex Gildayc357c472018-03-21 13:54:09 +0000974/** Convert RGB to IYUV.
975 *
976 * @param[in] input Input RGB data buffer.
977 * @param[out] output Output IYUV buffer.
978 * @param[in] win Window for iterating the buffers.
979 *
980 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100981template <bool alpha>
982void colorconvert_rgb_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
983{
984 ARM_COMPUTE_ERROR_ON(nullptr == input);
985 ARM_COMPUTE_ERROR_ON(nullptr == output);
986 win.validate();
987
988 const auto input_ptr = static_cast<const IImage *__restrict>(input);
989 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
990
991 // UV's width and height are subsampled
992 Window win_uv(win);
993 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
994 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
995 win_uv.validate();
996
997 Iterator in(input_ptr, win);
998 Iterator out_y(output_ptr->plane(0), win);
999 Iterator out_u(output_ptr->plane(1), win_uv);
1000 Iterator out_v(output_ptr->plane(2), win_uv);
1001
1002 execute_window_loop(win, [&](const Coordinates & id)
1003 {
1004 const auto ta_rgb_top = load_rgb(in.ptr(), alpha);
1005 const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
1006 //ta_rgb.val[0] = R0 R1 R2 R3 ...
1007 //ta_rgb.val[1] = G0 G1 G2 G3 ...
1008 //ta_rgb.val[2] = B0 B1 B2 B3 ...
1009
1010 store_rgb_to_iyuv(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2],
1011 ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2],
1012 out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(),
1013 out_u.ptr(), out_v.ptr());
1014 },
1015 in, out_y, out_u, out_v);
1016}
1017
Alex Gildayc357c472018-03-21 13:54:09 +00001018/** Convert RGB to YUV4.
1019 *
1020 * @param[in] input Input RGB data buffer.
1021 * @param[out] output Output YUV4 buffer.
1022 * @param[in] win Window for iterating the buffers.
1023 *
1024 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001025template <bool alpha>
1026void colorconvert_rgb_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
1027{
1028 ARM_COMPUTE_ERROR_ON(nullptr == input);
1029 ARM_COMPUTE_ERROR_ON(nullptr == output);
1030 win.validate();
1031
1032 const auto input_ptr = static_cast<const IImage *__restrict>(input);
1033 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
1034
1035 Iterator in(input_ptr, win);
1036 Iterator out_y(output_ptr->plane(0), win);
1037 Iterator out_u(output_ptr->plane(1), win);
1038 Iterator out_v(output_ptr->plane(2), win);
1039
1040 execute_window_loop(win, [&](const Coordinates & id)
1041 {
1042 const auto ta_rgb = load_rgb(in.ptr(), alpha);
1043 //ta_rgb.val[0] = R0 R1 R2 R3 ...
1044 //ta_rgb.val[1] = G0 G1 G2 G3 ...
1045 //ta_rgb.val[2] = B0 B1 B2 B3 ...
1046
1047 store_rgb_to_yuv4(ta_rgb.val[0], ta_rgb.val[1], ta_rgb.val[2],
1048 out_y.ptr(), out_u.ptr(), out_v.ptr());
1049 },
1050 in, out_y, out_u, out_v);
1051}
Gian Marco Iodice356f6432017-09-22 11:32:21 +01001052} // namespace arm_compute