| /* |
| * Copyright (c) 2016, 2017 ARM Limited. |
| * |
| * SPDX-License-Identifier: MIT |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a copy |
| * of this software and associated documentation files (the "Software"), to |
| * deal in the Software without restriction, including without limitation the |
| * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
| * sell copies of the Software, and to permit persons to whom the Software is |
| * furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in all |
| * copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| #include "arm_compute/core/Error.h" |
| #include "arm_compute/core/Helpers.h" |
| #include "arm_compute/core/IMultiImage.h" |
| #include "arm_compute/core/Utils.h" |
| |
| #include <arm_neon.h> |
| |
| namespace |
| { |
| constexpr float red_coef_bt709 = 1.5748F; |
| constexpr float green_coef_bt709 = -0.1873f; |
| constexpr float green_coef2_bt709 = -0.4681f; |
| constexpr float blue_coef_bt709 = 1.8556f; |
| |
| constexpr float rgb2yuv_bt709_kr = 0.2126f; |
| constexpr float rgb2yuv_bt709_kb = 0.0722f; |
| // K_g = 1 - K_r - K_b |
| constexpr float rgb2yuv_bt709_kg = 0.7152f; |
| // C_u = 1 / (2 * (1 - K_b)) |
| constexpr float rgb2yuv_bt709_cu = 0.5389f; |
| // C_v = 1 / (2 * (1 - K_r)) |
| constexpr float rgb2yuv_bt709_cv = 0.6350f; |
| |
| inline void convert_uint8x16_to_float32x4x4(const uint8x16_t &in, float32x4x4_t &out) |
| { |
| const auto tmp1 = vmovl_u8(vget_low_u8(in)); |
| out.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp1))); |
| out.val[1] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp1))); |
| const auto tmp2 = vmovl_u8(vget_high_u8(in)); |
| out.val[2] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp2))); |
| out.val[3] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp2))); |
| } |
| |
| inline void convert_float32x4x3_to_uint8x8x3(const float32x4x3_t &in1, const float32x4x3_t &in2, uint8x8x3_t &out) |
| { |
| out.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[0])), |
| vqmovn_u32(vcvtq_u32_f32(in2.val[0])))); |
| out.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[1])), |
| vqmovn_u32(vcvtq_u32_f32(in2.val[1])))); |
| out.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[2])), |
| vqmovn_u32(vcvtq_u32_f32(in2.val[2])))); |
| } |
| |
| inline void convert_float32x4x4_to_unit8x16(const float32x4x4_t &in, uint8x16_t &out) |
| { |
| const auto low = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[0])), |
| vqmovn_u32(vcvtq_u32_f32(in.val[1]))); |
| const auto high = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[2])), |
| vqmovn_u32(vcvtq_u32_f32(in.val[3]))); |
| out = vcombine_u8(vqmovn_u16(low), vqmovn_u16(high)); |
| } |
| |
| inline void rgb_to_yuv_calculation(const float32x4_t &rvec, const float32x4_t &gvec, const float32x4_t &bvec, |
| float32x4_t &yvec, float32x4_t &uvec, float32x4_t &vvec) |
| { |
| /* |
| Y'= 0.2126*R' + 0.7152*G' + 0.0722*B' |
| U'=-0.1146*R' - 0.3854*G' + 0.5000*B' |
| V'= 0.5000*R' - 0.4542*G' - 0.0458*B' |
| */ |
| const auto c128 = vdupq_n_f32(128.f); |
| |
| // Y = R * K_r + G * (1 - K_r - K_b) * B * K_b |
| yvec = vmulq_n_f32(rvec, rgb2yuv_bt709_kr); |
| yvec = vmlaq_n_f32(yvec, gvec, rgb2yuv_bt709_kg); |
| yvec = vmlaq_n_f32(yvec, bvec, rgb2yuv_bt709_kb); |
| |
| // U = (B - Y) / (2 * (1 - K_b)) |
| uvec = vsubq_f32(bvec, yvec); |
| uvec = vmlaq_n_f32(c128, uvec, rgb2yuv_bt709_cu); |
| |
| // V = (R - Y) / (2 * (1 - K_r)) |
| vvec = vsubq_f32(rvec, yvec); |
| vvec = vmlaq_n_f32(c128, vvec, rgb2yuv_bt709_cv); |
| } |
| |
| inline void yuyv_to_rgb_calculation(const float32x4_t &yvec_val, float32x4_t uvec_val, const float32x4_t &yyvec_val, |
| float32x4_t vvec_val, unsigned char *output_ptr, const bool alpha) |
| { |
| float32x4x3_t rgb1, rgb2; |
| |
| // Compute: cb - 128 and cr - 128; |
| const auto c128 = vdupq_n_f32(128.f); |
| uvec_val = vsubq_f32(uvec_val, c128); |
| vvec_val = vsubq_f32(vvec_val, c128); |
| |
| // Compute: |
| // r = 0.0000f*f_u + 1.5748f*f_v; |
| // g = 0.1873f*f_u - 0.4681f*f_v; |
| // b = 1.8556f*f_u + 0.0000f*f_v; |
| const auto red = vmulq_n_f32(vvec_val, red_coef_bt709); |
| const auto blue = vmulq_n_f32(uvec_val, blue_coef_bt709); |
| const auto green = vaddq_f32(vmulq_n_f32(uvec_val, green_coef_bt709), |
| vmulq_n_f32(vvec_val, green_coef2_bt709)); |
| |
| // Compute the final r,g,b values using y1 for the first texel and y2 for the second one. |
| // the result is stored in two float32x4x3_t which then are converted to one uint8x8x3_t |
| // and written back to memory using vst3 instruction |
| |
| rgb1.val[0] = vaddq_f32(yvec_val, red); |
| rgb1.val[1] = vaddq_f32(yvec_val, green); |
| rgb1.val[2] = vaddq_f32(yvec_val, blue); |
| |
| rgb2.val[0] = vaddq_f32(yyvec_val, red); |
| rgb2.val[1] = vaddq_f32(yyvec_val, green); |
| rgb2.val[2] = vaddq_f32(yyvec_val, blue); |
| |
| uint8x8x3_t u8_rgb; |
| convert_float32x4x3_to_uint8x8x3(rgb1, rgb2, u8_rgb); |
| |
| if(!alpha) |
| { |
| vst3_lane_u8(&output_ptr[0], u8_rgb, 0); |
| vst3_lane_u8(&output_ptr[3], u8_rgb, 4); |
| vst3_lane_u8(&output_ptr[6], u8_rgb, 1); |
| vst3_lane_u8(&output_ptr[9], u8_rgb, 5); |
| vst3_lane_u8(&output_ptr[12], u8_rgb, 2); |
| vst3_lane_u8(&output_ptr[15], u8_rgb, 6); |
| vst3_lane_u8(&output_ptr[18], u8_rgb, 3); |
| vst3_lane_u8(&output_ptr[21], u8_rgb, 7); |
| } |
| else |
| { |
| uint8x8x4_t u8_rgba; |
| u8_rgba.val[0] = u8_rgb.val[0]; |
| u8_rgba.val[1] = u8_rgb.val[1]; |
| u8_rgba.val[2] = u8_rgb.val[2]; |
| u8_rgba.val[3] = vdup_n_u8(255); |
| vst4_lane_u8(&output_ptr[0], u8_rgba, 0); |
| vst4_lane_u8(&output_ptr[4], u8_rgba, 4); |
| vst4_lane_u8(&output_ptr[8], u8_rgba, 1); |
| vst4_lane_u8(&output_ptr[12], u8_rgba, 5); |
| vst4_lane_u8(&output_ptr[16], u8_rgba, 2); |
| vst4_lane_u8(&output_ptr[20], u8_rgba, 6); |
| vst4_lane_u8(&output_ptr[24], u8_rgba, 3); |
| vst4_lane_u8(&output_ptr[28], u8_rgba, 7); |
| } |
| } |
| |
| inline uint8x16x3_t load_rgb(const unsigned char *const ptr, const bool alpha) |
| { |
| uint8x16x3_t rgb; |
| |
| if(alpha) |
| { |
| const auto tmp = vld4q_u8(ptr); |
| rgb.val[0] = tmp.val[0]; |
| rgb.val[1] = tmp.val[1]; |
| rgb.val[2] = tmp.val[2]; |
| } |
| else |
| { |
| rgb = vld3q_u8(ptr); |
| } |
| |
| return rgb; |
| } |
| |
| inline void rgb_to_yuv_conversion(uint8x16x3_t &vec_top, uint8x16x3_t &vec_bottom) |
| { |
| // Convert the uint8x16_t to float32x4x4_t |
| float32x4x4_t frvec_top, fgvec_top, fbvec_top; |
| convert_uint8x16_to_float32x4x4(vec_top.val[0], frvec_top); |
| convert_uint8x16_to_float32x4x4(vec_top.val[1], fgvec_top); |
| convert_uint8x16_to_float32x4x4(vec_top.val[2], fbvec_top); |
| |
| float32x4x4_t frvec_bottom, fgvec_bottom, fbvec_bottom; |
| convert_uint8x16_to_float32x4x4(vec_bottom.val[0], frvec_bottom); |
| convert_uint8x16_to_float32x4x4(vec_bottom.val[1], fgvec_bottom); |
| convert_uint8x16_to_float32x4x4(vec_bottom.val[2], fbvec_bottom); |
| |
| float32x4x4_t fyvec_top, fuvec_top, fvvec_top; |
| float32x4x4_t fyvec_bottom, fuvec_bottom, fvvec_bottom; |
| |
| for(auto i = 0; i < 4; ++i) |
| { |
| rgb_to_yuv_calculation(frvec_top.val[i], fgvec_top.val[i], fbvec_top.val[i], |
| fyvec_top.val[i], fuvec_top.val[i], fvvec_top.val[i]); |
| rgb_to_yuv_calculation(frvec_bottom.val[i], fgvec_bottom.val[i], fbvec_bottom.val[i], |
| fyvec_bottom.val[i], fuvec_bottom.val[i], fvvec_bottom.val[i]); |
| } |
| |
| convert_float32x4x4_to_unit8x16(fyvec_top, vec_top.val[0]); |
| convert_float32x4x4_to_unit8x16(fuvec_top, vec_top.val[1]); |
| convert_float32x4x4_to_unit8x16(fvvec_top, vec_top.val[2]); |
| convert_float32x4x4_to_unit8x16(fyvec_bottom, vec_bottom.val[0]); |
| convert_float32x4x4_to_unit8x16(fuvec_bottom, vec_bottom.val[1]); |
| convert_float32x4x4_to_unit8x16(fvvec_bottom, vec_bottom.val[2]); |
| } |
| |
| inline void store_rgb_to_nv12(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top, |
| const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom, |
| unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom, |
| unsigned char *const __restrict out_uv) |
| { |
| uint8x16x3_t vec_top, vec_bottom; |
| vec_top.val[0] = rvec_top; |
| vec_top.val[1] = gvec_top; |
| vec_top.val[2] = bvec_top; |
| vec_bottom.val[0] = rvec_bottom; |
| vec_bottom.val[1] = gvec_bottom; |
| vec_bottom.val[2] = bvec_bottom; |
| |
| rgb_to_yuv_conversion(vec_top, vec_bottom); |
| |
| vst1q_u8(out_y_top, vec_top.val[0]); |
| vst1q_u8(out_y_bottom, vec_bottom.val[0]); |
| |
| const auto uvec = vuzpq_u8(vec_top.val[1], vec_bottom.val[1]); |
| const auto vvec = vuzpq_u8(vec_top.val[2], vec_bottom.val[2]); |
| const auto utmp = vrhaddq_u8(uvec.val[0], uvec.val[1]); |
| const auto vtmp = vrhaddq_u8(vvec.val[0], vvec.val[1]); |
| |
| uint8x8x2_t uvvec; |
| uvvec.val[0] = vhadd_u8(vget_low_u8(utmp), vget_high_u8(utmp)); |
| uvvec.val[1] = vhadd_u8(vget_low_u8(vtmp), vget_high_u8(vtmp)); |
| |
| vst2_u8(out_uv, uvvec); |
| } |
| |
| inline void store_rgb_to_iyuv(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top, |
| const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom, |
| unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom, |
| unsigned char *const __restrict out_u, |
| unsigned char *const __restrict out_v) |
| { |
| uint8x16x3_t vec_top, vec_bottom; |
| vec_top.val[0] = rvec_top; |
| vec_top.val[1] = gvec_top; |
| vec_top.val[2] = bvec_top; |
| vec_bottom.val[0] = rvec_bottom; |
| vec_bottom.val[1] = gvec_bottom; |
| vec_bottom.val[2] = bvec_bottom; |
| |
| rgb_to_yuv_conversion(vec_top, vec_bottom); |
| |
| vst1q_u8(out_y_top, vec_top.val[0]); |
| vst1q_u8(out_y_bottom, vec_bottom.val[0]); |
| |
| const auto uvvec_top = vuzpq_u8(vec_top.val[1], vec_top.val[2]); |
| const auto uvvec_bottom = vuzpq_u8(vec_bottom.val[1], vec_bottom.val[2]); |
| const auto uvvec = vhaddq_u8(vrhaddq_u8(uvvec_top.val[0], uvvec_top.val[1]), |
| vrhaddq_u8(uvvec_bottom.val[0], uvvec_bottom.val[1])); |
| |
| vst1_u8(out_u, vget_low_u8(uvvec)); |
| vst1_u8(out_v, vget_high_u8(uvvec)); |
| } |
| |
| inline void store_rgb_to_yuv4(const uint8x16_t &rvec, const uint8x16_t &gvec, const uint8x16_t &bvec, |
| unsigned char *const __restrict out_y, |
| unsigned char *const __restrict out_u, |
| unsigned char *const __restrict out_v) |
| { |
| // Convert the uint8x16_t to float32x4x4_t |
| float32x4x4_t frvec, fgvec, fbvec; |
| convert_uint8x16_to_float32x4x4(rvec, frvec); |
| convert_uint8x16_to_float32x4x4(gvec, fgvec); |
| convert_uint8x16_to_float32x4x4(bvec, fbvec); |
| |
| float32x4x4_t fyvec, fuvec, fvvec; |
| for(auto i = 0; i < 4; ++i) |
| { |
| rgb_to_yuv_calculation(frvec.val[i], fgvec.val[i], fbvec.val[i], |
| fyvec.val[i], fuvec.val[i], fvvec.val[i]); |
| } |
| |
| uint8x16_t yvec, uvec, vvec; |
| convert_float32x4x4_to_unit8x16(fyvec, yvec); |
| convert_float32x4x4_to_unit8x16(fuvec, uvec); |
| convert_float32x4x4_to_unit8x16(fvvec, vvec); |
| |
| vst1q_u8(out_y, yvec); |
| vst1q_u8(out_u, uvec); |
| vst1q_u8(out_v, vvec); |
| } |
| } |
| |
| namespace arm_compute |
| { |
| void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict output, const Window &win) |
| { |
| ARM_COMPUTE_ERROR_ON(nullptr == input); |
| ARM_COMPUTE_ERROR_ON(nullptr == output); |
| |
| const auto input_ptr = static_cast<const IImage *__restrict>(input); |
| const auto output_ptr = static_cast<IImage *__restrict>(output); |
| |
| Iterator in(input_ptr, win); |
| Iterator out(output_ptr, win); |
| |
| execute_window_loop(win, [&](const Coordinates & id) |
| { |
| const auto ta1 = vld3q_u8(in.ptr()); |
| uint8x16x4_t ta2; |
| ta2.val[0] = ta1.val[0]; |
| ta2.val[1] = ta1.val[1]; |
| ta2.val[2] = ta1.val[2]; |
| ta2.val[3] = vdupq_n_u8(255); |
| vst4q_u8(out.ptr(), ta2); |
| }, |
| in, out); |
| } |
| |
| void colorconvert_rgbx_to_rgb(const void *input, void *output, const Window &win) |
| { |
| ARM_COMPUTE_ERROR_ON(nullptr == input); |
| ARM_COMPUTE_ERROR_ON(nullptr == output); |
| |
| const auto input_ptr = static_cast<const IImage *__restrict>(input); |
| const auto output_ptr = static_cast<IImage *__restrict>(output); |
| |
| Iterator in(input_ptr, win); |
| Iterator out(output_ptr, win); |
| |
| execute_window_loop(win, [&](const Coordinates & id) |
| { |
| const auto ta1 = vld4q_u8(in.ptr()); |
| uint8x16x3_t ta2; |
| ta2.val[0] = ta1.val[0]; |
| ta2.val[1] = ta1.val[1]; |
| ta2.val[2] = ta1.val[2]; |
| vst3q_u8(out.ptr(), ta2); |
| }, |
| in, out); |
| } |
| |
| template <bool yuyv, bool alpha> |
| void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) |
| { |
| ARM_COMPUTE_ERROR_ON(nullptr == input); |
| ARM_COMPUTE_ERROR_ON(nullptr == output); |
| |
| const auto input_ptr = static_cast<const IImage *__restrict>(input); |
| const auto output_ptr = static_cast<IImage *__restrict>(output); |
| |
| constexpr auto element_size = alpha ? 32 : 24; |
| constexpr auto shift = yuyv ? 0 : 1; |
| |
| Iterator in(input_ptr, win); |
| Iterator out(output_ptr, win); |
| |
| execute_window_loop(win, [&](const Coordinates & id) |
| { |
| float32x4x4_t uvec, yvec, vvec, yyvec; |
| const auto ta = vld4q_u8(in.ptr()); |
| //ta.val[0] = Y0 Y2 Y4 Y6 ... |
| //ta.val[1] = U0 U2 U4 U6 ... |
| //ta.val[2] = Y1 Y3 Y5 Y7 ... |
| //ta.val[3] = V0 V2 V4 V7 ... |
| |
| // Convert the uint8x16x4_t to float32x4x4_t |
| convert_uint8x16_to_float32x4x4(ta.val[0 + shift], yvec); |
| convert_uint8x16_to_float32x4x4(ta.val[1 - shift], uvec); |
| convert_uint8x16_to_float32x4x4(ta.val[2 + shift], yyvec); |
| convert_uint8x16_to_float32x4x4(ta.val[3 - shift], vvec); |
| |
| yuyv_to_rgb_calculation(yvec.val[0], uvec.val[0], yyvec.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); |
| yuyv_to_rgb_calculation(yvec.val[1], uvec.val[1], yyvec.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); |
| yuyv_to_rgb_calculation(yvec.val[2], uvec.val[2], yyvec.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha); |
| yuyv_to_rgb_calculation(yvec.val[3], uvec.val[3], yyvec.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha); |
| }, |
| in, out); |
| } |
| |
| template <bool uv, bool alpha> |
| void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) |
| { |
| ARM_COMPUTE_ERROR_ON(nullptr == input); |
| ARM_COMPUTE_ERROR_ON(nullptr == output); |
| win.validate(); |
| |
| const auto input_ptr = static_cast<const IMultiImage *__restrict>(input); |
| const auto output_ptr = static_cast<IImage *__restrict>(output); |
| |
| constexpr auto element_size = alpha ? 32 : 24; |
| const auto out_stride = output_ptr->info()->strides_in_bytes().y(); |
| constexpr auto shift = uv ? 0 : 1; |
| |
| // UV's width and height are subsampled |
| Window win_uv(win); |
| win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win.x().step() / 2)); |
| win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); |
| win_uv.validate(); |
| |
| Iterator in_y(input_ptr->plane(0), win); |
| Iterator in_uv(input_ptr->plane(1), win_uv); |
| Iterator out(output_ptr, win); |
| |
| execute_window_loop(win, [&](const Coordinates & id) |
| { |
| const auto ta_y_top = vld2q_u8(in_y.ptr()); |
| const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); |
| const auto ta_uv = vld2q_u8(in_uv.ptr()); |
| //ta_y.val[0] = Y0 Y2 Y4 Y6 ... |
| //ta_y.val[1] = Y1 Y3 Y5 Y7 ... |
| //ta_uv.val[0] = U0 U2 U4 U6 ... |
| //ta_uv.val[1] = V0 V2 V4 V6 ... |
| |
| // Convert the uint8x16x4_t to float32x4x4_t |
| float32x4x4_t yvec_top, yyvec_top, yvec_bottom, yyvec_bottom, uvec, vvec; |
| convert_uint8x16_to_float32x4x4(ta_y_top.val[0], yvec_top); |
| convert_uint8x16_to_float32x4x4(ta_y_top.val[1], yyvec_top); |
| convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0], yvec_bottom); |
| convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1], yyvec_bottom); |
| convert_uint8x16_to_float32x4x4(ta_uv.val[0 + shift], uvec); |
| convert_uint8x16_to_float32x4x4(ta_uv.val[1 - shift], vvec); |
| |
| yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); |
| yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); |
| yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha); |
| yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha); |
| |
| yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha); |
| yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha); |
| yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha); |
| yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha); |
| }, |
| in_y, in_uv, out); |
| } |
| |
| template <bool alpha> |
| void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) |
| { |
| ARM_COMPUTE_ERROR_ON(nullptr == input); |
| ARM_COMPUTE_ERROR_ON(nullptr == output); |
| win.validate(); |
| |
| const auto input_ptr = static_cast<const IMultiImage *__restrict>(input); |
| const auto output_ptr = static_cast<IImage *__restrict>(output); |
| |
| constexpr auto element_size = alpha ? 32 : 24; |
| const auto out_stride = output_ptr->info()->strides_in_bytes().y(); |
| |
| // UV's width and height are subsampled |
| Window win_uv(win); |
| win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); |
| win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); |
| win_uv.validate(); |
| |
| Iterator in_y(input_ptr->plane(0), win); |
| Iterator in_u(input_ptr->plane(1), win_uv); |
| Iterator in_v(input_ptr->plane(2), win_uv); |
| Iterator out(output_ptr, win); |
| |
| execute_window_loop(win, [&](const Coordinates & id) |
| { |
| const auto ta_y_top = vld2q_u8(in_y.ptr()); |
| const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); |
| const auto ta_u = vld1q_u8(in_u.ptr()); |
| const auto ta_v = vld1q_u8(in_v.ptr()); |
| //ta_y.val[0] = Y0 Y2 Y4 Y6 ... |
| //ta_y.val[1] = Y1 Y3 Y5 Y7 ... |
| //ta_u.val[0] = U0 U2 U4 U6 ... |
| //ta_v.val[0] = V0 V2 V4 V6 ... |
| |
| // Convert the uint8x16x4_t to float32x4x4_t |
| float32x4x4_t yvec_top, yyvec_top, yvec_bottom, yyvec_bottom, uvec, vvec; |
| convert_uint8x16_to_float32x4x4(ta_y_top.val[0], yvec_top); |
| convert_uint8x16_to_float32x4x4(ta_y_top.val[1], yyvec_top); |
| convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0], yvec_bottom); |
| convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1], yyvec_bottom); |
| convert_uint8x16_to_float32x4x4(ta_u, uvec); |
| convert_uint8x16_to_float32x4x4(ta_v, vvec); |
| |
| yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); |
| yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); |
| yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha); |
| yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha); |
| |
| yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha); |
| yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha); |
| yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha); |
| yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha); |
| }, |
| in_y, in_u, in_v, out); |
| } |
| |
| template <bool yuyv> |
| void colorconvert_yuyv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) |
| { |
| ARM_COMPUTE_ERROR_ON(nullptr == input); |
| ARM_COMPUTE_ERROR_ON(nullptr == output); |
| win.validate(); |
| |
| const auto input_ptr = static_cast<const IImage *__restrict>(input); |
| const auto output_ptr = static_cast<IMultiImage *__restrict>(output); |
| |
| constexpr auto shift = yuyv ? 0 : 1; |
| |
| // NV12's UV's width and height are subsampled |
| Window win_uv(win); |
| win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); |
| win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); |
| win_uv.validate(); |
| |
| Iterator in(input_ptr, win); |
| Iterator out_y(output_ptr->plane(0), win); |
| Iterator out_uv(output_ptr->plane(1), win_uv); |
| |
| execute_window_loop(win, [&](const Coordinates & id) |
| { |
| const auto ta_top = vld4q_u8(in.ptr()); |
| const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y()); |
| //ta.val[0] = Y0 Y2 Y4 Y6 ... |
| //ta.val[1] = U0 U2 U4 U6 ... |
| //ta.val[2] = Y1 Y3 Y5 Y7 ... |
| //ta.val[3] = V0 V2 V4 V7 ... |
| |
| uint8x16x2_t yvec; |
| yvec.val[0] = ta_top.val[0 + shift]; |
| yvec.val[1] = ta_top.val[2 + shift]; |
| vst2q_u8(out_y.ptr(), yvec); |
| |
| uint8x16x2_t yyvec; |
| yyvec.val[0] = ta_bottom.val[0 + shift]; |
| yyvec.val[1] = ta_bottom.val[2 + shift]; |
| vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec); |
| |
| uint8x16x2_t uvvec; |
| uvvec.val[0] = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]); |
| uvvec.val[1] = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]); |
| vst2q_u8(out_uv.ptr(), uvvec); |
| }, |
| in, out_y, out_uv); |
| } |
| |
| void colorconvert_iyuv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) |
| { |
| ARM_COMPUTE_ERROR_ON(nullptr == input); |
| ARM_COMPUTE_ERROR_ON(nullptr == output); |
| win.validate(); |
| |
| const auto input_ptr = static_cast<const IMultiImage *__restrict>(input); |
| const auto output_ptr = static_cast<IMultiImage *__restrict>(output); |
| |
| // UV's width and height are subsampled |
| Window win_uv(win); |
| win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); |
| win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); |
| win_uv.validate(); |
| |
| Iterator in_y(input_ptr->plane(0), win); |
| Iterator in_u(input_ptr->plane(1), win_uv); |
| Iterator in_v(input_ptr->plane(2), win_uv); |
| Iterator out_y(output_ptr->plane(0), win); |
| Iterator out_uv(output_ptr->plane(1), win_uv); |
| |
| execute_window_loop(win, [&](const Coordinates & id) |
| { |
| const auto ta_y_top = vld2q_u8(in_y.ptr()); |
| const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); |
| uint8x16x2_t ta_uv; |
| ta_uv.val[0] = vld1q_u8(in_u.ptr()); |
| ta_uv.val[1] = vld1q_u8(in_v.ptr()); |
| //ta_y.val[0] = Y0 Y2 Y4 Y6 ... |
| //ta_y.val[1] = Y1 Y3 Y5 Y7 ... |
| //ta_uv.val[0] = U0 U2 U4 U6 ... |
| //ta_uv.val[1] = V0 V2 V4 V6 ... |
| |
| vst2q_u8(out_y.ptr(), ta_y_top); |
| vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); |
| vst2q_u8(out_uv.ptr(), ta_uv); |
| }, |
| in_y, in_u, in_v, out_y, out_uv); |
| } |
| |
| template <bool uv> |
| void colorconvert_nv12_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) |
| { |
| ARM_COMPUTE_ERROR_ON(nullptr == input); |
| ARM_COMPUTE_ERROR_ON(nullptr == output); |
| win.validate(); |
| |
| const auto input_ptr = static_cast<const IMultiImage *__restrict>(input); |
| const auto output_ptr = static_cast<IMultiImage *__restrict>(output); |
| |
| constexpr auto shift = uv ? 0 : 1; |
| |
| // UV's width and height are subsampled |
| Window win_uv(win); |
| win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); |
| win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); |
| win_uv.validate(); |
| |
| Iterator in_y(input_ptr->plane(0), win); |
| Iterator in_uv(input_ptr->plane(1), win_uv); |
| Iterator out_y(output_ptr->plane(0), win); |
| Iterator out_u(output_ptr->plane(1), win_uv); |
| Iterator out_v(output_ptr->plane(2), win_uv); |
| |
| execute_window_loop(win, [&](const Coordinates & id) |
| { |
| const auto ta_y_top = vld2q_u8(in_y.ptr()); |
| const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); |
| const auto ta_uv = vld2q_u8(in_uv.ptr()); |
| //ta_y.val[0] = Y0 Y2 Y4 Y6 ... |
| //ta_y.val[1] = Y1 Y3 Y5 Y7 ... |
| //ta_uv.val[0] = U0 U2 U4 U6 ... |
| //ta_uv.val[1] = V0 V2 V4 V6 ... |
| |
| vst2q_u8(out_y.ptr(), ta_y_top); |
| vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); |
| vst1q_u8(out_u.ptr(), ta_uv.val[0 + shift]); |
| vst1q_u8(out_v.ptr(), ta_uv.val[1 - shift]); |
| }, |
| in_y, in_uv, out_y, out_u, out_v); |
| } |
| |
| template <bool yuyv> |
| void colorconvert_yuyv_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) |
| { |
| ARM_COMPUTE_ERROR_ON(nullptr == input); |
| ARM_COMPUTE_ERROR_ON(nullptr == output); |
| win.validate(); |
| |
| const auto input_ptr = static_cast<const IImage *__restrict>(input); |
| const auto output_ptr = static_cast<IMultiImage *__restrict>(output); |
| |
| constexpr auto shift = yuyv ? 0 : 1; |
| |
| // Destination's UV's width and height are subsampled |
| Window win_uv(win); |
| win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); |
| win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); |
| win_uv.validate(); |
| |
| Iterator in(input_ptr, win); |
| Iterator out_y(output_ptr->plane(0), win); |
| Iterator out_u(output_ptr->plane(1), win_uv); |
| Iterator out_v(output_ptr->plane(2), win_uv); |
| |
| execute_window_loop(win, [&](const Coordinates & id) |
| { |
| const auto ta_top = vld4q_u8(in.ptr()); |
| const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y()); |
| //ta.val[0] = Y0 Y2 Y4 Y6 ... |
| //ta.val[1] = U0 U2 U4 U6 ... |
| //ta.val[2] = Y1 Y3 Y5 Y7 ... |
| //ta.val[3] = V0 V2 V4 V7 ... |
| |
| uint8x16x2_t yvec; |
| yvec.val[0] = ta_top.val[0 + shift]; |
| yvec.val[1] = ta_top.val[2 + shift]; |
| vst2q_u8(out_y.ptr(), yvec); |
| |
| uint8x16x2_t yyvec; |
| yyvec.val[0] = ta_bottom.val[0 + shift]; |
| yyvec.val[1] = ta_bottom.val[2 + shift]; |
| vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec); |
| |
| uint8x16_t uvec; |
| uvec = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]); |
| vst1q_u8(out_u.ptr(), uvec); |
| |
| uint8x16_t vvec; |
| vvec = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]); |
| vst1q_u8(out_v.ptr(), vvec); |
| }, |
| in, out_y, out_u, out_v); |
| } |
| |
| template <bool uv> |
| void colorconvert_nv12_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) |
| { |
| ARM_COMPUTE_ERROR_ON(nullptr == input); |
| ARM_COMPUTE_ERROR_ON(nullptr == output); |
| win.validate(); |
| |
| const auto input_ptr = static_cast<const IMultiImage *__restrict>(input); |
| const auto output_ptr = static_cast<IMultiImage *__restrict>(output); |
| |
| constexpr auto shift = uv ? 0 : 1; |
| |
| // UV's width and height are subsampled |
| Window win_uv(win); |
| win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); |
| win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); |
| win_uv.validate(); |
| |
| Iterator in_y(input_ptr->plane(0), win); |
| Iterator in_uv(input_ptr->plane(1), win_uv); |
| Iterator out_y(output_ptr->plane(0), win); |
| Iterator out_u(output_ptr->plane(1), win); |
| Iterator out_v(output_ptr->plane(2), win); |
| |
| execute_window_loop(win, [&](const Coordinates & id) |
| { |
| const auto ta_y_top = vld2q_u8(in_y.ptr()); |
| const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); |
| const auto ta_uv = vld2q_u8(in_uv.ptr()); |
| //ta_y.val[0] = Y0 Y2 Y4 Y6 ... |
| //ta_y.val[1] = Y1 Y3 Y5 Y7 ... |
| //ta_uv.val[0] = U0 U2 U4 U6 ... |
| //ta_uv.val[1] = V0 V2 V4 V6 ... |
| |
| vst2q_u8(out_y.ptr(), ta_y_top); |
| vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); |
| |
| uint8x16x2_t uvec; |
| uvec.val[0] = ta_uv.val[0 + shift]; |
| uvec.val[1] = ta_uv.val[0 + shift]; |
| vst2q_u8(out_u.ptr(), uvec); |
| vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec); |
| |
| uint8x16x2_t vvec; |
| vvec.val[0] = ta_uv.val[1 - shift]; |
| vvec.val[1] = ta_uv.val[1 - shift]; |
| vst2q_u8(out_v.ptr(), vvec); |
| vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec); |
| }, |
| in_y, in_uv, out_y, out_u, out_v); |
| } |
| |
| void colorconvert_iyuv_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) |
| { |
| ARM_COMPUTE_ERROR_ON(nullptr == input); |
| ARM_COMPUTE_ERROR_ON(nullptr == output); |
| win.validate(); |
| |
| const auto input_ptr = static_cast<const IMultiImage *__restrict>(input); |
| const auto output_ptr = static_cast<IMultiImage *__restrict>(output); |
| |
| // UV's width and height are subsampled |
| Window win_uv(win); |
| win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); |
| win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); |
| win_uv.validate(); |
| |
| Iterator in_y(input_ptr->plane(0), win); |
| Iterator in_u(input_ptr->plane(1), win_uv); |
| Iterator in_v(input_ptr->plane(2), win_uv); |
| Iterator out_y(output_ptr->plane(0), win); |
| Iterator out_u(output_ptr->plane(1), win); |
| Iterator out_v(output_ptr->plane(2), win); |
| |
| execute_window_loop(win, [&](const Coordinates & id) |
| { |
| const auto ta_y_top = vld2q_u8(in_y.ptr()); |
| const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); |
| const auto ta_u = vld1q_u8(in_u.ptr()); |
| const auto ta_v = vld1q_u8(in_v.ptr()); |
| //ta_y.val[0] = Y0 Y2 Y4 Y6 ... |
| //ta_y.val[1] = Y1 Y3 Y5 Y7 ... |
| //ta_u = U0 U2 U4 U6 ... |
| //ta_v = V0 V2 V4 V6 ... |
| |
| vst2q_u8(out_y.ptr(), ta_y_top); |
| vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); |
| |
| uint8x16x2_t uvec; |
| uvec.val[0] = ta_u; |
| uvec.val[1] = ta_u; |
| vst2q_u8(out_u.ptr(), uvec); |
| vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec); |
| |
| uint8x16x2_t vvec; |
| vvec.val[0] = ta_v; |
| vvec.val[1] = ta_v; |
| vst2q_u8(out_v.ptr(), vvec); |
| vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec); |
| }, |
| in_y, in_u, in_v, out_y, out_u, out_v); |
| } |
| |
| template <bool alpha> |
| void colorconvert_rgb_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) |
| { |
| ARM_COMPUTE_ERROR_ON(nullptr == input); |
| ARM_COMPUTE_ERROR_ON(nullptr == output); |
| win.validate(); |
| |
| const auto input_ptr = static_cast<const IImage *__restrict>(input); |
| const auto output_ptr = static_cast<IMultiImage *__restrict>(output); |
| |
| // UV's width and height are subsampled |
| Window win_uv(win); |
| win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); |
| win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); |
| win_uv.validate(); |
| |
| Iterator in(input_ptr, win); |
| Iterator out_y(output_ptr->plane(0), win); |
| Iterator out_uv(output_ptr->plane(1), win_uv); |
| |
| execute_window_loop(win, [&](const Coordinates & id) |
| { |
| const auto ta_rgb_top = load_rgb(in.ptr(), alpha); |
| const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha); |
| //ta_rgb.val[0] = R0 R1 R2 R3 ... |
| //ta_rgb.val[1] = G0 G1 G2 G3 ... |
| //ta_rgb.val[2] = B0 B1 B2 B3 ... |
| |
| store_rgb_to_nv12(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2], |
| ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2], |
| out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), |
| out_uv.ptr()); |
| }, |
| in, out_y, out_uv); |
| } |
| |
| template <bool alpha> |
| void colorconvert_rgb_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) |
| { |
| ARM_COMPUTE_ERROR_ON(nullptr == input); |
| ARM_COMPUTE_ERROR_ON(nullptr == output); |
| win.validate(); |
| |
| const auto input_ptr = static_cast<const IImage *__restrict>(input); |
| const auto output_ptr = static_cast<IMultiImage *__restrict>(output); |
| |
| // UV's width and height are subsampled |
| Window win_uv(win); |
| win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); |
| win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); |
| win_uv.validate(); |
| |
| Iterator in(input_ptr, win); |
| Iterator out_y(output_ptr->plane(0), win); |
| Iterator out_u(output_ptr->plane(1), win_uv); |
| Iterator out_v(output_ptr->plane(2), win_uv); |
| |
| execute_window_loop(win, [&](const Coordinates & id) |
| { |
| const auto ta_rgb_top = load_rgb(in.ptr(), alpha); |
| const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha); |
| //ta_rgb.val[0] = R0 R1 R2 R3 ... |
| //ta_rgb.val[1] = G0 G1 G2 G3 ... |
| //ta_rgb.val[2] = B0 B1 B2 B3 ... |
| |
| store_rgb_to_iyuv(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2], |
| ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2], |
| out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), |
| out_u.ptr(), out_v.ptr()); |
| }, |
| in, out_y, out_u, out_v); |
| } |
| |
| template <bool alpha> |
| void colorconvert_rgb_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) |
| { |
| ARM_COMPUTE_ERROR_ON(nullptr == input); |
| ARM_COMPUTE_ERROR_ON(nullptr == output); |
| win.validate(); |
| |
| const auto input_ptr = static_cast<const IImage *__restrict>(input); |
| const auto output_ptr = static_cast<IMultiImage *__restrict>(output); |
| |
| Iterator in(input_ptr, win); |
| Iterator out_y(output_ptr->plane(0), win); |
| Iterator out_u(output_ptr->plane(1), win); |
| Iterator out_v(output_ptr->plane(2), win); |
| |
| execute_window_loop(win, [&](const Coordinates & id) |
| { |
| const auto ta_rgb = load_rgb(in.ptr(), alpha); |
| //ta_rgb.val[0] = R0 R1 R2 R3 ... |
| //ta_rgb.val[1] = G0 G1 G2 G3 ... |
| //ta_rgb.val[2] = B0 B1 B2 B3 ... |
| |
| store_rgb_to_yuv4(ta_rgb.val[0], ta_rgb.val[1], ta_rgb.val[2], |
| out_y.ptr(), out_u.ptr(), out_v.ptr()); |
| }, |
| in, out_y, out_u, out_v); |
| } |
| } // namespace arm_compute |