blob: 7145d6f2060e50c7cb5114e1bf8223bf9eb2679b [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Michalis Spyrou6bff1952019-10-02 17:22:11 +01002 * Copyright (c) 2016-2019 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/Error.h"
25#include "arm_compute/core/Helpers.h"
26#include "arm_compute/core/IMultiImage.h"
Manuel Bottini21079dd2019-10-29 17:20:09 +000027#include "arm_compute/core/NEON/NEMath.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010028#include "arm_compute/core/Utils.h"
29
30#include <arm_neon.h>
31
32namespace
33{
Alex Gildayc357c472018-03-21 13:54:09 +000034#ifndef DOXYGEN_SKIP_THIS
Anthony Barbier6ff3b192017-09-04 18:44:23 +010035constexpr float red_coef_bt709 = 1.5748F;
36constexpr float green_coef_bt709 = -0.1873f;
37constexpr float green_coef2_bt709 = -0.4681f;
38constexpr float blue_coef_bt709 = 1.8556f;
39
40constexpr float rgb2yuv_bt709_kr = 0.2126f;
41constexpr float rgb2yuv_bt709_kb = 0.0722f;
42// K_g = 1 - K_r - K_b
43constexpr float rgb2yuv_bt709_kg = 0.7152f;
44// C_u = 1 / (2 * (1 - K_b))
45constexpr float rgb2yuv_bt709_cu = 0.5389f;
46// C_v = 1 / (2 * (1 - K_r))
47constexpr float rgb2yuv_bt709_cv = 0.6350f;
48
Manuel Bottini4284bfa2018-09-26 15:33:15 +010049constexpr float rgb2u8_red_coef = 0.2126f;
50constexpr float rgb2u8_green_coef = 0.7152f;
51constexpr float rgb2u8_blue_coef = 0.0722f;
52
Michalis Spyrou6bff1952019-10-02 17:22:11 +010053inline float32x4_t rgb_to_greyscale_calculation(const float32x4_t &rcolor, const float32x4_t &gcolor, const float32x4_t &bcolor,
54 const float rcoef, const float gcoef, const float bcoef)
Manuel Bottini4284bfa2018-09-26 15:33:15 +010055{
56 float32x4_t greyscale = vmulq_n_f32(rcolor, rcoef);
Michalis Spyrou6bff1952019-10-02 17:22:11 +010057 greyscale = vmlaq_n_f32(greyscale, gcolor, gcoef);
58 greyscale = vmlaq_n_f32(greyscale, bcolor, bcoef);
Manuel Bottini4284bfa2018-09-26 15:33:15 +010059 return greyscale;
60}
61
62inline void rgb_to_u8_conversion(const uint8x16x3_t &in, uint8x16_t &out)
63{
64 float32x4x4_t out_float32;
65
66 //Conversion from 3(RGB) 4 uint8s to 3(RGB) 4 floats
Manuel Bottini21079dd2019-10-29 17:20:09 +000067 const float32x4x4_t r_float32 = arm_compute::convert_uint8x16_to_float32x4x4(in.val[0]);
68 const float32x4x4_t g_float32 = arm_compute::convert_uint8x16_to_float32x4x4(in.val[1]);
69 const float32x4x4_t b_float32 = arm_compute::convert_uint8x16_to_float32x4x4(in.val[2]);
Manuel Bottini4284bfa2018-09-26 15:33:15 +010070
71 //New grayscale image = ( (RED_COEFF * R) + (GREEN_COEFF * G) + (BLUE_COEFF * B) )
72 //Computation of 1(Greyscale) 4 uint8 using 3(RGB) 4 uint8s float
73 out_float32.val[0] = rgb_to_greyscale_calculation(r_float32.val[0], g_float32.val[0], b_float32.val[0],
Michalis Spyrou6bff1952019-10-02 17:22:11 +010074 rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
Manuel Bottini4284bfa2018-09-26 15:33:15 +010075
76 out_float32.val[1] = rgb_to_greyscale_calculation(r_float32.val[1], g_float32.val[1], b_float32.val[1],
Michalis Spyrou6bff1952019-10-02 17:22:11 +010077 rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
Manuel Bottini4284bfa2018-09-26 15:33:15 +010078
79 out_float32.val[2] = rgb_to_greyscale_calculation(r_float32.val[2], g_float32.val[2], b_float32.val[2],
Michalis Spyrou6bff1952019-10-02 17:22:11 +010080 rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
Manuel Bottini4284bfa2018-09-26 15:33:15 +010081
82 out_float32.val[3] = rgb_to_greyscale_calculation(r_float32.val[3], g_float32.val[3], b_float32.val[3],
Michalis Spyrou6bff1952019-10-02 17:22:11 +010083 rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
Manuel Bottini4284bfa2018-09-26 15:33:15 +010084
85 //Conversion from 1(Greyscale) 4 floats to 1(Greyscale) 4 uint8s
Sang-Hoon Parkc3a74202019-11-22 16:05:46 +000086 arm_compute::convert_float32x4x4_to_uint8x16(out_float32, out);
Manuel Bottini4284bfa2018-09-26 15:33:15 +010087}
88
Anthony Barbier6ff3b192017-09-04 18:44:23 +010089inline void rgb_to_yuv_calculation(const float32x4_t &rvec, const float32x4_t &gvec, const float32x4_t &bvec,
90 float32x4_t &yvec, float32x4_t &uvec, float32x4_t &vvec)
91{
92 /*
93 Y'= 0.2126*R' + 0.7152*G' + 0.0722*B'
94 U'=-0.1146*R' - 0.3854*G' + 0.5000*B'
95 V'= 0.5000*R' - 0.4542*G' - 0.0458*B'
96 */
97 const auto c128 = vdupq_n_f32(128.f);
98
99 // Y = R * K_r + G * (1 - K_r - K_b) * B * K_b
100 yvec = vmulq_n_f32(rvec, rgb2yuv_bt709_kr);
101 yvec = vmlaq_n_f32(yvec, gvec, rgb2yuv_bt709_kg);
102 yvec = vmlaq_n_f32(yvec, bvec, rgb2yuv_bt709_kb);
103
104 // U = (B - Y) / (2 * (1 - K_b))
105 uvec = vsubq_f32(bvec, yvec);
106 uvec = vmlaq_n_f32(c128, uvec, rgb2yuv_bt709_cu);
107
108 // V = (R - Y) / (2 * (1 - K_r))
109 vvec = vsubq_f32(rvec, yvec);
110 vvec = vmlaq_n_f32(c128, vvec, rgb2yuv_bt709_cv);
111}
112
113inline void yuyv_to_rgb_calculation(const float32x4_t &yvec_val, float32x4_t uvec_val, const float32x4_t &yyvec_val,
114 float32x4_t vvec_val, unsigned char *output_ptr, const bool alpha)
115{
116 float32x4x3_t rgb1, rgb2;
117
118 // Compute: cb - 128 and cr - 128;
119 const auto c128 = vdupq_n_f32(128.f);
120 uvec_val = vsubq_f32(uvec_val, c128);
121 vvec_val = vsubq_f32(vvec_val, c128);
122
123 // Compute:
124 // r = 0.0000f*f_u + 1.5748f*f_v;
125 // g = 0.1873f*f_u - 0.4681f*f_v;
126 // b = 1.8556f*f_u + 0.0000f*f_v;
127 const auto red = vmulq_n_f32(vvec_val, red_coef_bt709);
128 const auto blue = vmulq_n_f32(uvec_val, blue_coef_bt709);
129 const auto green = vaddq_f32(vmulq_n_f32(uvec_val, green_coef_bt709),
130 vmulq_n_f32(vvec_val, green_coef2_bt709));
131
132 // Compute the final r,g,b values using y1 for the first texel and y2 for the second one.
133 // the result is stored in two float32x4x3_t which then are converted to one uint8x8x3_t
134 // and written back to memory using vst3 instruction
135
136 rgb1.val[0] = vaddq_f32(yvec_val, red);
137 rgb1.val[1] = vaddq_f32(yvec_val, green);
138 rgb1.val[2] = vaddq_f32(yvec_val, blue);
139
140 rgb2.val[0] = vaddq_f32(yyvec_val, red);
141 rgb2.val[1] = vaddq_f32(yyvec_val, green);
142 rgb2.val[2] = vaddq_f32(yyvec_val, blue);
143
144 uint8x8x3_t u8_rgb;
Manuel Bottini21079dd2019-10-29 17:20:09 +0000145 arm_compute::convert_float32x4x3_to_uint8x8x3(rgb1, rgb2, u8_rgb);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100146
147 if(!alpha)
148 {
149 vst3_lane_u8(&output_ptr[0], u8_rgb, 0);
150 vst3_lane_u8(&output_ptr[3], u8_rgb, 4);
151 vst3_lane_u8(&output_ptr[6], u8_rgb, 1);
152 vst3_lane_u8(&output_ptr[9], u8_rgb, 5);
153 vst3_lane_u8(&output_ptr[12], u8_rgb, 2);
154 vst3_lane_u8(&output_ptr[15], u8_rgb, 6);
155 vst3_lane_u8(&output_ptr[18], u8_rgb, 3);
156 vst3_lane_u8(&output_ptr[21], u8_rgb, 7);
157 }
158 else
159 {
160 uint8x8x4_t u8_rgba;
161 u8_rgba.val[0] = u8_rgb.val[0];
162 u8_rgba.val[1] = u8_rgb.val[1];
163 u8_rgba.val[2] = u8_rgb.val[2];
164 u8_rgba.val[3] = vdup_n_u8(255);
165 vst4_lane_u8(&output_ptr[0], u8_rgba, 0);
166 vst4_lane_u8(&output_ptr[4], u8_rgba, 4);
167 vst4_lane_u8(&output_ptr[8], u8_rgba, 1);
168 vst4_lane_u8(&output_ptr[12], u8_rgba, 5);
169 vst4_lane_u8(&output_ptr[16], u8_rgba, 2);
170 vst4_lane_u8(&output_ptr[20], u8_rgba, 6);
171 vst4_lane_u8(&output_ptr[24], u8_rgba, 3);
172 vst4_lane_u8(&output_ptr[28], u8_rgba, 7);
173 }
174}
175
176inline uint8x16x3_t load_rgb(const unsigned char *const ptr, const bool alpha)
177{
178 uint8x16x3_t rgb;
179
180 if(alpha)
181 {
182 const auto tmp = vld4q_u8(ptr);
183 rgb.val[0] = tmp.val[0];
184 rgb.val[1] = tmp.val[1];
185 rgb.val[2] = tmp.val[2];
186 }
187 else
188 {
189 rgb = vld3q_u8(ptr);
190 }
191
192 return rgb;
193}
194
195inline void rgb_to_yuv_conversion(uint8x16x3_t &vec_top, uint8x16x3_t &vec_bottom)
196{
197 // Convert the uint8x16_t to float32x4x4_t
Manuel Bottini21079dd2019-10-29 17:20:09 +0000198 const float32x4x4_t frvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vec_top.val[0]);
199 const float32x4x4_t fgvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vec_top.val[1]);
200 const float32x4x4_t fbvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vec_top.val[2]);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100201
Manuel Bottini21079dd2019-10-29 17:20:09 +0000202 const float32x4x4_t frvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vec_bottom.val[0]);
203 const float32x4x4_t fgvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vec_bottom.val[1]);
204 const float32x4x4_t fbvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vec_bottom.val[2]);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100205
206 float32x4x4_t fyvec_top, fuvec_top, fvvec_top;
207 float32x4x4_t fyvec_bottom, fuvec_bottom, fvvec_bottom;
208
209 for(auto i = 0; i < 4; ++i)
210 {
211 rgb_to_yuv_calculation(frvec_top.val[i], fgvec_top.val[i], fbvec_top.val[i],
212 fyvec_top.val[i], fuvec_top.val[i], fvvec_top.val[i]);
213 rgb_to_yuv_calculation(frvec_bottom.val[i], fgvec_bottom.val[i], fbvec_bottom.val[i],
214 fyvec_bottom.val[i], fuvec_bottom.val[i], fvvec_bottom.val[i]);
215 }
216
Sang-Hoon Parkc3a74202019-11-22 16:05:46 +0000217 arm_compute::convert_float32x4x4_to_uint8x16(fyvec_top, vec_top.val[0]);
218 arm_compute::convert_float32x4x4_to_uint8x16(fuvec_top, vec_top.val[1]);
219 arm_compute::convert_float32x4x4_to_uint8x16(fvvec_top, vec_top.val[2]);
220 arm_compute::convert_float32x4x4_to_uint8x16(fyvec_bottom, vec_bottom.val[0]);
221 arm_compute::convert_float32x4x4_to_uint8x16(fuvec_bottom, vec_bottom.val[1]);
222 arm_compute::convert_float32x4x4_to_uint8x16(fvvec_bottom, vec_bottom.val[2]);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100223}
224
225inline void store_rgb_to_nv12(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top,
226 const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom,
227 unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom,
228 unsigned char *const __restrict out_uv)
229{
230 uint8x16x3_t vec_top, vec_bottom;
231 vec_top.val[0] = rvec_top;
232 vec_top.val[1] = gvec_top;
233 vec_top.val[2] = bvec_top;
234 vec_bottom.val[0] = rvec_bottom;
235 vec_bottom.val[1] = gvec_bottom;
236 vec_bottom.val[2] = bvec_bottom;
237
238 rgb_to_yuv_conversion(vec_top, vec_bottom);
239
240 vst1q_u8(out_y_top, vec_top.val[0]);
241 vst1q_u8(out_y_bottom, vec_bottom.val[0]);
242
243 const auto uvec = vuzpq_u8(vec_top.val[1], vec_bottom.val[1]);
244 const auto vvec = vuzpq_u8(vec_top.val[2], vec_bottom.val[2]);
245 const auto utmp = vrhaddq_u8(uvec.val[0], uvec.val[1]);
246 const auto vtmp = vrhaddq_u8(vvec.val[0], vvec.val[1]);
247
248 uint8x8x2_t uvvec;
249 uvvec.val[0] = vhadd_u8(vget_low_u8(utmp), vget_high_u8(utmp));
250 uvvec.val[1] = vhadd_u8(vget_low_u8(vtmp), vget_high_u8(vtmp));
251
252 vst2_u8(out_uv, uvvec);
253}
254
255inline void store_rgb_to_iyuv(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top,
256 const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom,
257 unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom,
258 unsigned char *const __restrict out_u,
259 unsigned char *const __restrict out_v)
260{
261 uint8x16x3_t vec_top, vec_bottom;
262 vec_top.val[0] = rvec_top;
263 vec_top.val[1] = gvec_top;
264 vec_top.val[2] = bvec_top;
265 vec_bottom.val[0] = rvec_bottom;
266 vec_bottom.val[1] = gvec_bottom;
267 vec_bottom.val[2] = bvec_bottom;
268
269 rgb_to_yuv_conversion(vec_top, vec_bottom);
270
271 vst1q_u8(out_y_top, vec_top.val[0]);
272 vst1q_u8(out_y_bottom, vec_bottom.val[0]);
273
274 const auto uvvec_top = vuzpq_u8(vec_top.val[1], vec_top.val[2]);
275 const auto uvvec_bottom = vuzpq_u8(vec_bottom.val[1], vec_bottom.val[2]);
276 const auto uvvec = vhaddq_u8(vrhaddq_u8(uvvec_top.val[0], uvvec_top.val[1]),
277 vrhaddq_u8(uvvec_bottom.val[0], uvvec_bottom.val[1]));
278
279 vst1_u8(out_u, vget_low_u8(uvvec));
280 vst1_u8(out_v, vget_high_u8(uvvec));
281}
282
283inline void store_rgb_to_yuv4(const uint8x16_t &rvec, const uint8x16_t &gvec, const uint8x16_t &bvec,
284 unsigned char *const __restrict out_y,
285 unsigned char *const __restrict out_u,
286 unsigned char *const __restrict out_v)
287{
288 // Convert the uint8x16_t to float32x4x4_t
Manuel Bottini21079dd2019-10-29 17:20:09 +0000289 const float32x4x4_t frvec = arm_compute::convert_uint8x16_to_float32x4x4(rvec);
290 const float32x4x4_t fgvec = arm_compute::convert_uint8x16_to_float32x4x4(gvec);
291 const float32x4x4_t fbvec = arm_compute::convert_uint8x16_to_float32x4x4(bvec);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100292
293 float32x4x4_t fyvec, fuvec, fvvec;
294 for(auto i = 0; i < 4; ++i)
295 {
296 rgb_to_yuv_calculation(frvec.val[i], fgvec.val[i], fbvec.val[i],
297 fyvec.val[i], fuvec.val[i], fvvec.val[i]);
298 }
299
300 uint8x16_t yvec, uvec, vvec;
Sang-Hoon Parkc3a74202019-11-22 16:05:46 +0000301 arm_compute::convert_float32x4x4_to_uint8x16(fyvec, yvec);
302 arm_compute::convert_float32x4x4_to_uint8x16(fuvec, uvec);
303 arm_compute::convert_float32x4x4_to_uint8x16(fvvec, vvec);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100304
305 vst1q_u8(out_y, yvec);
306 vst1q_u8(out_u, uvec);
307 vst1q_u8(out_v, vvec);
308}
Alex Gildayc357c472018-03-21 13:54:09 +0000309#endif /* DOXYGEN_SKIP_THIS */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100310}
311
312namespace arm_compute
313{
Alex Gildayc357c472018-03-21 13:54:09 +0000314/** Convert RGB to RGBX.
315 *
316 * @param[in] input Input RGB data buffer.
317 * @param[out] output Output RGBX buffer.
318 * @param[in] win Window for iterating the buffers.
319 *
320 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100321void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict output, const Window &win)
322{
323 ARM_COMPUTE_ERROR_ON(nullptr == input);
324 ARM_COMPUTE_ERROR_ON(nullptr == output);
325
326 const auto input_ptr = static_cast<const IImage *__restrict>(input);
327 const auto output_ptr = static_cast<IImage *__restrict>(output);
328
329 Iterator in(input_ptr, win);
330 Iterator out(output_ptr, win);
331
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100332 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100333 {
334 const auto ta1 = vld3q_u8(in.ptr());
335 uint8x16x4_t ta2;
336 ta2.val[0] = ta1.val[0];
337 ta2.val[1] = ta1.val[1];
338 ta2.val[2] = ta1.val[2];
339 ta2.val[3] = vdupq_n_u8(255);
340 vst4q_u8(out.ptr(), ta2);
341 },
342 in, out);
343}
344
Manuel Bottini4284bfa2018-09-26 15:33:15 +0100345/** Convert RGB to U8.
346 *
347 * @param[in] input Input RGB data buffer.
348 * @param[out] output Output U8 buffer.
349 * @param[in] win Window for iterating the buffers.
350 *
351 */
352void colorconvert_rgb_to_u8(const void *__restrict input, void *__restrict output, const Window &win)
353{
354 ARM_COMPUTE_ERROR_ON(nullptr == input);
355 ARM_COMPUTE_ERROR_ON(nullptr == output);
356
357 const auto input_ptr = static_cast<const IImage *__restrict>(input);
358 const auto output_ptr = static_cast<IImage *__restrict>(output);
359
360 Iterator in(input_ptr, win);
361 Iterator out(output_ptr, win);
362
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100363 execute_window_loop(win, [&](const Coordinates &)
Manuel Bottini4284bfa2018-09-26 15:33:15 +0100364 {
365 const auto ta1 = vld3q_u8(in.ptr());
366 uint8x16_t ta2;
367 rgb_to_u8_conversion(ta1, ta2);
368 vst1q_u8(out.ptr(), ta2);
369 },
370 in, out);
371}
372
Alex Gildayc357c472018-03-21 13:54:09 +0000373/** Convert RGBX to RGB.
374 *
375 * @param[in] input Input RGBX data buffer.
376 * @param[out] output Output RGB buffer.
377 * @param[in] win Window for iterating the buffers.
378 *
379 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100380void colorconvert_rgbx_to_rgb(const void *input, void *output, const Window &win)
381{
382 ARM_COMPUTE_ERROR_ON(nullptr == input);
383 ARM_COMPUTE_ERROR_ON(nullptr == output);
384
385 const auto input_ptr = static_cast<const IImage *__restrict>(input);
386 const auto output_ptr = static_cast<IImage *__restrict>(output);
387
388 Iterator in(input_ptr, win);
389 Iterator out(output_ptr, win);
390
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100391 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100392 {
393 const auto ta1 = vld4q_u8(in.ptr());
394 uint8x16x3_t ta2;
395 ta2.val[0] = ta1.val[0];
396 ta2.val[1] = ta1.val[1];
397 ta2.val[2] = ta1.val[2];
398 vst3q_u8(out.ptr(), ta2);
399 },
400 in, out);
401}
402
Alex Gildayc357c472018-03-21 13:54:09 +0000403/** Convert YUYV to RGB.
404 *
405 * @param[in] input Input YUYV data buffer.
406 * @param[out] output Output RGB buffer.
407 * @param[in] win Window for iterating the buffers.
408 *
409 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100410template <bool yuyv, bool alpha>
411void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
412{
413 ARM_COMPUTE_ERROR_ON(nullptr == input);
414 ARM_COMPUTE_ERROR_ON(nullptr == output);
415
416 const auto input_ptr = static_cast<const IImage *__restrict>(input);
417 const auto output_ptr = static_cast<IImage *__restrict>(output);
418
419 constexpr auto element_size = alpha ? 32 : 24;
420 constexpr auto shift = yuyv ? 0 : 1;
421
422 Iterator in(input_ptr, win);
423 Iterator out(output_ptr, win);
424
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100425 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100426 {
Manuel Bottini4284bfa2018-09-26 15:33:15 +0100427 const auto ta = vld4q_u8(in.ptr());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100428 //ta.val[0] = Y0 Y2 Y4 Y6 ...
429 //ta.val[1] = U0 U2 U4 U6 ...
430 //ta.val[2] = Y1 Y3 Y5 Y7 ...
431 //ta.val[3] = V0 V2 V4 V7 ...
432
433 // Convert the uint8x16x4_t to float32x4x4_t
Manuel Bottini21079dd2019-10-29 17:20:09 +0000434 const float32x4x4_t yvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[0 + shift]);
435 const float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[1 - shift]);
436 const float32x4x4_t yyvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[2 + shift]);
437 const float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[3 - shift]);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100438
439 yuyv_to_rgb_calculation(yvec.val[0], uvec.val[0], yyvec.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
440 yuyv_to_rgb_calculation(yvec.val[1], uvec.val[1], yyvec.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
441 yuyv_to_rgb_calculation(yvec.val[2], uvec.val[2], yyvec.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
442 yuyv_to_rgb_calculation(yvec.val[3], uvec.val[3], yyvec.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
443 },
444 in, out);
445}
446
Alex Gildayc357c472018-03-21 13:54:09 +0000447/** Convert NV12 to RGB.
448 *
449 * @param[in] input Input NV12 data buffer.
450 * @param[out] output Output RGB buffer.
451 * @param[in] win Window for iterating the buffers.
452 *
453 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100454template <bool uv, bool alpha>
455void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
456{
457 ARM_COMPUTE_ERROR_ON(nullptr == input);
458 ARM_COMPUTE_ERROR_ON(nullptr == output);
459 win.validate();
460
461 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
462 const auto output_ptr = static_cast<IImage *__restrict>(output);
463
464 constexpr auto element_size = alpha ? 32 : 24;
465 const auto out_stride = output_ptr->info()->strides_in_bytes().y();
466 constexpr auto shift = uv ? 0 : 1;
467
468 // UV's width and height are subsampled
469 Window win_uv(win);
470 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win.x().step() / 2));
471 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
472 win_uv.validate();
473
474 Iterator in_y(input_ptr->plane(0), win);
475 Iterator in_uv(input_ptr->plane(1), win_uv);
476 Iterator out(output_ptr, win);
477
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100478 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100479 {
480 const auto ta_y_top = vld2q_u8(in_y.ptr());
481 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
482 const auto ta_uv = vld2q_u8(in_uv.ptr());
483 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
484 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
485 //ta_uv.val[0] = U0 U2 U4 U6 ...
486 //ta_uv.val[1] = V0 V2 V4 V6 ...
487
488 // Convert the uint8x16x4_t to float32x4x4_t
Manuel Bottini21079dd2019-10-29 17:20:09 +0000489 float32x4x4_t yvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[0]);
490 float32x4x4_t yyvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[1]);
491 float32x4x4_t yvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0]);
492 float32x4x4_t yyvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1]);
493 float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_uv.val[0 + shift]);
494 float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_uv.val[1 - shift]);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100495
496 yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
497 yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
498 yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
499 yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
500
501 yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha);
502 yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha);
503 yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha);
504 yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha);
505 },
506 in_y, in_uv, out);
507}
508
Alex Gildayc357c472018-03-21 13:54:09 +0000509/** Convert IYUV to RGB.
510 *
511 * @param[in] input Input IYUV data buffer.
512 * @param[out] output Output RGB buffer.
513 * @param[in] win Window for iterating the buffers.
514 *
515 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100516template <bool alpha>
517void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win)
518{
519 ARM_COMPUTE_ERROR_ON(nullptr == input);
520 ARM_COMPUTE_ERROR_ON(nullptr == output);
521 win.validate();
522
523 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
524 const auto output_ptr = static_cast<IImage *__restrict>(output);
525
526 constexpr auto element_size = alpha ? 32 : 24;
527 const auto out_stride = output_ptr->info()->strides_in_bytes().y();
528
529 // UV's width and height are subsampled
530 Window win_uv(win);
531 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
532 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
533 win_uv.validate();
534
535 Iterator in_y(input_ptr->plane(0), win);
536 Iterator in_u(input_ptr->plane(1), win_uv);
537 Iterator in_v(input_ptr->plane(2), win_uv);
538 Iterator out(output_ptr, win);
539
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100540 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100541 {
542 const auto ta_y_top = vld2q_u8(in_y.ptr());
543 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
544 const auto ta_u = vld1q_u8(in_u.ptr());
545 const auto ta_v = vld1q_u8(in_v.ptr());
546 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
547 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
548 //ta_u.val[0] = U0 U2 U4 U6 ...
549 //ta_v.val[0] = V0 V2 V4 V6 ...
550
551 // Convert the uint8x16x4_t to float32x4x4_t
Manuel Bottini21079dd2019-10-29 17:20:09 +0000552 float32x4x4_t yvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[0]);
553 float32x4x4_t yyvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[1]);
554 float32x4x4_t yvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0]);
555 float32x4x4_t yyvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1]);
556 float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_u);
557 float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_v);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100558
559 yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
560 yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
561 yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
562 yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
563
564 yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha);
565 yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha);
566 yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha);
567 yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha);
568 },
569 in_y, in_u, in_v, out);
570}
571
Alex Gildayc357c472018-03-21 13:54:09 +0000572/** Convert YUYV to NV12.
573 *
574 * @param[in] input Input YUYV data buffer.
575 * @param[out] output Output NV12 buffer.
576 * @param[in] win Window for iterating the buffers.
577 *
578 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100579template <bool yuyv>
580void colorconvert_yuyv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
581{
582 ARM_COMPUTE_ERROR_ON(nullptr == input);
583 ARM_COMPUTE_ERROR_ON(nullptr == output);
584 win.validate();
585
586 const auto input_ptr = static_cast<const IImage *__restrict>(input);
587 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
588
589 constexpr auto shift = yuyv ? 0 : 1;
590
591 // NV12's UV's width and height are subsampled
592 Window win_uv(win);
593 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
594 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
595 win_uv.validate();
596
597 Iterator in(input_ptr, win);
598 Iterator out_y(output_ptr->plane(0), win);
599 Iterator out_uv(output_ptr->plane(1), win_uv);
600
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100601 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100602 {
603 const auto ta_top = vld4q_u8(in.ptr());
604 const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y());
605 //ta.val[0] = Y0 Y2 Y4 Y6 ...
606 //ta.val[1] = U0 U2 U4 U6 ...
607 //ta.val[2] = Y1 Y3 Y5 Y7 ...
608 //ta.val[3] = V0 V2 V4 V7 ...
609
610 uint8x16x2_t yvec;
611 yvec.val[0] = ta_top.val[0 + shift];
612 yvec.val[1] = ta_top.val[2 + shift];
613 vst2q_u8(out_y.ptr(), yvec);
614
615 uint8x16x2_t yyvec;
616 yyvec.val[0] = ta_bottom.val[0 + shift];
617 yyvec.val[1] = ta_bottom.val[2 + shift];
618 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
619
620 uint8x16x2_t uvvec;
621 uvvec.val[0] = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
622 uvvec.val[1] = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
623 vst2q_u8(out_uv.ptr(), uvvec);
624 },
625 in, out_y, out_uv);
626}
627
Alex Gildayc357c472018-03-21 13:54:09 +0000628/** Convert IYUV to NV12.
629 *
630 * @param[in] input Input IYUV data buffer.
631 * @param[out] output Output NV12 buffer.
632 * @param[in] win Window for iterating the buffers.
633 *
634 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100635void colorconvert_iyuv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
636{
637 ARM_COMPUTE_ERROR_ON(nullptr == input);
638 ARM_COMPUTE_ERROR_ON(nullptr == output);
639 win.validate();
640
641 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
642 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
643
644 // UV's width and height are subsampled
645 Window win_uv(win);
646 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
647 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
648 win_uv.validate();
649
650 Iterator in_y(input_ptr->plane(0), win);
651 Iterator in_u(input_ptr->plane(1), win_uv);
652 Iterator in_v(input_ptr->plane(2), win_uv);
653 Iterator out_y(output_ptr->plane(0), win);
654 Iterator out_uv(output_ptr->plane(1), win_uv);
655
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100656 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100657 {
658 const auto ta_y_top = vld2q_u8(in_y.ptr());
659 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
660 uint8x16x2_t ta_uv;
661 ta_uv.val[0] = vld1q_u8(in_u.ptr());
662 ta_uv.val[1] = vld1q_u8(in_v.ptr());
663 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
664 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
665 //ta_uv.val[0] = U0 U2 U4 U6 ...
666 //ta_uv.val[1] = V0 V2 V4 V6 ...
667
668 vst2q_u8(out_y.ptr(), ta_y_top);
669 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
670 vst2q_u8(out_uv.ptr(), ta_uv);
671 },
672 in_y, in_u, in_v, out_y, out_uv);
673}
674
Alex Gildayc357c472018-03-21 13:54:09 +0000675/** Convert NV12 to IYUV.
676 *
677 * @param[in] input Input NV12 data buffer.
678 * @param[out] output Output IYUV buffer.
679 * @param[in] win Window for iterating the buffers.
680 *
681 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100682template <bool uv>
683void colorconvert_nv12_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
684{
685 ARM_COMPUTE_ERROR_ON(nullptr == input);
686 ARM_COMPUTE_ERROR_ON(nullptr == output);
687 win.validate();
688
689 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
690 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
691
692 constexpr auto shift = uv ? 0 : 1;
693
694 // UV's width and height are subsampled
695 Window win_uv(win);
696 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
697 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
698 win_uv.validate();
699
700 Iterator in_y(input_ptr->plane(0), win);
701 Iterator in_uv(input_ptr->plane(1), win_uv);
702 Iterator out_y(output_ptr->plane(0), win);
703 Iterator out_u(output_ptr->plane(1), win_uv);
704 Iterator out_v(output_ptr->plane(2), win_uv);
705
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100706 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100707 {
708 const auto ta_y_top = vld2q_u8(in_y.ptr());
709 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
710 const auto ta_uv = vld2q_u8(in_uv.ptr());
711 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
712 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
713 //ta_uv.val[0] = U0 U2 U4 U6 ...
714 //ta_uv.val[1] = V0 V2 V4 V6 ...
715
716 vst2q_u8(out_y.ptr(), ta_y_top);
717 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
718 vst1q_u8(out_u.ptr(), ta_uv.val[0 + shift]);
719 vst1q_u8(out_v.ptr(), ta_uv.val[1 - shift]);
720 },
721 in_y, in_uv, out_y, out_u, out_v);
722}
723
Alex Gildayc357c472018-03-21 13:54:09 +0000724/** Convert YUYV to IYUV.
725 *
726 * @param[in] input Input YUYV data buffer.
727 * @param[out] output Output IYUV buffer.
728 * @param[in] win Window for iterating the buffers.
729 *
730 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100731template <bool yuyv>
732void colorconvert_yuyv_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
733{
734 ARM_COMPUTE_ERROR_ON(nullptr == input);
735 ARM_COMPUTE_ERROR_ON(nullptr == output);
736 win.validate();
737
738 const auto input_ptr = static_cast<const IImage *__restrict>(input);
739 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
740
741 constexpr auto shift = yuyv ? 0 : 1;
742
743 // Destination's UV's width and height are subsampled
744 Window win_uv(win);
745 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
746 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
747 win_uv.validate();
748
749 Iterator in(input_ptr, win);
750 Iterator out_y(output_ptr->plane(0), win);
751 Iterator out_u(output_ptr->plane(1), win_uv);
752 Iterator out_v(output_ptr->plane(2), win_uv);
753
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100754 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100755 {
756 const auto ta_top = vld4q_u8(in.ptr());
757 const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y());
758 //ta.val[0] = Y0 Y2 Y4 Y6 ...
759 //ta.val[1] = U0 U2 U4 U6 ...
760 //ta.val[2] = Y1 Y3 Y5 Y7 ...
761 //ta.val[3] = V0 V2 V4 V7 ...
762
763 uint8x16x2_t yvec;
764 yvec.val[0] = ta_top.val[0 + shift];
765 yvec.val[1] = ta_top.val[2 + shift];
766 vst2q_u8(out_y.ptr(), yvec);
767
768 uint8x16x2_t yyvec;
769 yyvec.val[0] = ta_bottom.val[0 + shift];
770 yyvec.val[1] = ta_bottom.val[2 + shift];
771 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
772
773 uint8x16_t uvec;
774 uvec = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
775 vst1q_u8(out_u.ptr(), uvec);
776
777 uint8x16_t vvec;
778 vvec = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
779 vst1q_u8(out_v.ptr(), vvec);
780 },
781 in, out_y, out_u, out_v);
782}
783
Alex Gildayc357c472018-03-21 13:54:09 +0000784/** Convert NV12 to YUV4.
785 *
786 * @param[in] input Input NV12 data buffer.
787 * @param[out] output Output YUV4 buffer.
788 * @param[in] win Window for iterating the buffers.
789 *
790 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100791template <bool uv>
792void colorconvert_nv12_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
793{
794 ARM_COMPUTE_ERROR_ON(nullptr == input);
795 ARM_COMPUTE_ERROR_ON(nullptr == output);
796 win.validate();
797
798 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
799 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
800
801 constexpr auto shift = uv ? 0 : 1;
802
803 // UV's width and height are subsampled
804 Window win_uv(win);
805 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
806 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
807 win_uv.validate();
808
809 Iterator in_y(input_ptr->plane(0), win);
810 Iterator in_uv(input_ptr->plane(1), win_uv);
811 Iterator out_y(output_ptr->plane(0), win);
812 Iterator out_u(output_ptr->plane(1), win);
813 Iterator out_v(output_ptr->plane(2), win);
814
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100815 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100816 {
817 const auto ta_y_top = vld2q_u8(in_y.ptr());
818 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
819 const auto ta_uv = vld2q_u8(in_uv.ptr());
820 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
821 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
822 //ta_uv.val[0] = U0 U2 U4 U6 ...
823 //ta_uv.val[1] = V0 V2 V4 V6 ...
824
825 vst2q_u8(out_y.ptr(), ta_y_top);
826 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
827
828 uint8x16x2_t uvec;
829 uvec.val[0] = ta_uv.val[0 + shift];
830 uvec.val[1] = ta_uv.val[0 + shift];
831 vst2q_u8(out_u.ptr(), uvec);
832 vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
833
834 uint8x16x2_t vvec;
835 vvec.val[0] = ta_uv.val[1 - shift];
836 vvec.val[1] = ta_uv.val[1 - shift];
837 vst2q_u8(out_v.ptr(), vvec);
838 vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
839 },
840 in_y, in_uv, out_y, out_u, out_v);
841}
842
Alex Gildayc357c472018-03-21 13:54:09 +0000843/** Convert IYUV to YUV4.
844 *
845 * @param[in] input Input IYUV data buffer.
846 * @param[out] output Output YUV4 buffer.
847 * @param[in] win Window for iterating the buffers.
848 *
849 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100850void colorconvert_iyuv_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
851{
852 ARM_COMPUTE_ERROR_ON(nullptr == input);
853 ARM_COMPUTE_ERROR_ON(nullptr == output);
854 win.validate();
855
856 const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
857 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
858
859 // UV's width and height are subsampled
860 Window win_uv(win);
861 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
862 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
863 win_uv.validate();
864
865 Iterator in_y(input_ptr->plane(0), win);
866 Iterator in_u(input_ptr->plane(1), win_uv);
867 Iterator in_v(input_ptr->plane(2), win_uv);
868 Iterator out_y(output_ptr->plane(0), win);
869 Iterator out_u(output_ptr->plane(1), win);
870 Iterator out_v(output_ptr->plane(2), win);
871
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100872 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100873 {
874 const auto ta_y_top = vld2q_u8(in_y.ptr());
875 const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
876 const auto ta_u = vld1q_u8(in_u.ptr());
877 const auto ta_v = vld1q_u8(in_v.ptr());
878 //ta_y.val[0] = Y0 Y2 Y4 Y6 ...
879 //ta_y.val[1] = Y1 Y3 Y5 Y7 ...
880 //ta_u = U0 U2 U4 U6 ...
881 //ta_v = V0 V2 V4 V6 ...
882
883 vst2q_u8(out_y.ptr(), ta_y_top);
884 vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
885
886 uint8x16x2_t uvec;
887 uvec.val[0] = ta_u;
888 uvec.val[1] = ta_u;
889 vst2q_u8(out_u.ptr(), uvec);
890 vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
891
892 uint8x16x2_t vvec;
893 vvec.val[0] = ta_v;
894 vvec.val[1] = ta_v;
895 vst2q_u8(out_v.ptr(), vvec);
896 vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
897 },
898 in_y, in_u, in_v, out_y, out_u, out_v);
899}
900
Alex Gildayc357c472018-03-21 13:54:09 +0000901/** Convert RGB to NV12.
902 *
903 * @param[in] input Input RGB data buffer.
904 * @param[out] output Output NV12 buffer.
905 * @param[in] win Window for iterating the buffers.
906 *
907 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100908template <bool alpha>
909void colorconvert_rgb_to_nv12(const void *__restrict input, void *__restrict output, const Window &win)
910{
911 ARM_COMPUTE_ERROR_ON(nullptr == input);
912 ARM_COMPUTE_ERROR_ON(nullptr == output);
913 win.validate();
914
915 const auto input_ptr = static_cast<const IImage *__restrict>(input);
916 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
917
918 // UV's width and height are subsampled
919 Window win_uv(win);
920 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
921 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
922 win_uv.validate();
923
924 Iterator in(input_ptr, win);
925 Iterator out_y(output_ptr->plane(0), win);
926 Iterator out_uv(output_ptr->plane(1), win_uv);
927
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100928 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100929 {
930 const auto ta_rgb_top = load_rgb(in.ptr(), alpha);
931 const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
932 //ta_rgb.val[0] = R0 R1 R2 R3 ...
933 //ta_rgb.val[1] = G0 G1 G2 G3 ...
934 //ta_rgb.val[2] = B0 B1 B2 B3 ...
935
936 store_rgb_to_nv12(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2],
937 ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2],
938 out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(),
939 out_uv.ptr());
940 },
941 in, out_y, out_uv);
942}
943
Alex Gildayc357c472018-03-21 13:54:09 +0000944/** Convert RGB to IYUV.
945 *
946 * @param[in] input Input RGB data buffer.
947 * @param[out] output Output IYUV buffer.
948 * @param[in] win Window for iterating the buffers.
949 *
950 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100951template <bool alpha>
952void colorconvert_rgb_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win)
953{
954 ARM_COMPUTE_ERROR_ON(nullptr == input);
955 ARM_COMPUTE_ERROR_ON(nullptr == output);
956 win.validate();
957
958 const auto input_ptr = static_cast<const IImage *__restrict>(input);
959 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
960
961 // UV's width and height are subsampled
962 Window win_uv(win);
963 win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
964 win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
965 win_uv.validate();
966
967 Iterator in(input_ptr, win);
968 Iterator out_y(output_ptr->plane(0), win);
969 Iterator out_u(output_ptr->plane(1), win_uv);
970 Iterator out_v(output_ptr->plane(2), win_uv);
971
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100972 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100973 {
974 const auto ta_rgb_top = load_rgb(in.ptr(), alpha);
975 const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
976 //ta_rgb.val[0] = R0 R1 R2 R3 ...
977 //ta_rgb.val[1] = G0 G1 G2 G3 ...
978 //ta_rgb.val[2] = B0 B1 B2 B3 ...
979
980 store_rgb_to_iyuv(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2],
981 ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2],
982 out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(),
983 out_u.ptr(), out_v.ptr());
984 },
985 in, out_y, out_u, out_v);
986}
987
Alex Gildayc357c472018-03-21 13:54:09 +0000988/** Convert RGB to YUV4.
989 *
990 * @param[in] input Input RGB data buffer.
991 * @param[out] output Output YUV4 buffer.
992 * @param[in] win Window for iterating the buffers.
993 *
994 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100995template <bool alpha>
996void colorconvert_rgb_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win)
997{
998 ARM_COMPUTE_ERROR_ON(nullptr == input);
999 ARM_COMPUTE_ERROR_ON(nullptr == output);
1000 win.validate();
1001
1002 const auto input_ptr = static_cast<const IImage *__restrict>(input);
1003 const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
1004
1005 Iterator in(input_ptr, win);
1006 Iterator out_y(output_ptr->plane(0), win);
1007 Iterator out_u(output_ptr->plane(1), win);
1008 Iterator out_v(output_ptr->plane(2), win);
1009
Michalis Spyrou6bff1952019-10-02 17:22:11 +01001010 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001011 {
1012 const auto ta_rgb = load_rgb(in.ptr(), alpha);
1013 //ta_rgb.val[0] = R0 R1 R2 R3 ...
1014 //ta_rgb.val[1] = G0 G1 G2 G3 ...
1015 //ta_rgb.val[2] = B0 B1 B2 B3 ...
1016
1017 store_rgb_to_yuv4(ta_rgb.val[0], ta_rgb.val[1], ta_rgb.val[2],
1018 out_y.ptr(), out_u.ptr(), out_v.ptr());
1019 },
1020 in, out_y, out_u, out_v);
1021}
Gian Marco Iodice356f6432017-09-22 11:32:21 +01001022} // namespace arm_compute