Blame - src/core/NEON/kernels/detail/NEColorConvertHelper.inl - ml/ComputeLibrary

blob: ac196d9dbb2ec30bab3fc7ef3d0c6657680a96ab [file] [log] [blame]

Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1	/*
Michele Di Giorgio	d9eaf61	2020-07-08 11:12:57 +0100	[diff] [blame]	2	* Copyright (c) 2016-2020 Arm Limited.
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/core/Error.h"
				25	#include "arm_compute/core/Helpers.h"
				26	#include "arm_compute/core/IMultiImage.h"
				27	#include "arm_compute/core/Utils.h"
Georgios Pinitas	ddb93bb	2020-10-02 16:38:59 +0100	[diff] [blame]	28	#include "src/core/NEON/NEMath.h"
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	29
				30	#include <arm_neon.h>
				31
				32	namespace
				33	{
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	34	#ifndef DOXYGEN_SKIP_THIS
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	35	constexpr float red_coef_bt709 = 1.5748F;
				36	constexpr float green_coef_bt709 = -0.1873f;
				37	constexpr float green_coef2_bt709 = -0.4681f;
				38	constexpr float blue_coef_bt709 = 1.8556f;
				39
				40	constexpr float rgb2yuv_bt709_kr = 0.2126f;
				41	constexpr float rgb2yuv_bt709_kb = 0.0722f;
				42	// K_g = 1 - K_r - K_b
				43	constexpr float rgb2yuv_bt709_kg = 0.7152f;
				44	// C_u = 1 / (2 * (1 - K_b))
				45	constexpr float rgb2yuv_bt709_cu = 0.5389f;
				46	// C_v = 1 / (2 * (1 - K_r))
				47	constexpr float rgb2yuv_bt709_cv = 0.6350f;
				48
Manuel Bottini	4284bfa	2018-09-26 15:33:15 +0100	[diff] [blame]	49	constexpr float rgb2u8_red_coef = 0.2126f;
				50	constexpr float rgb2u8_green_coef = 0.7152f;
				51	constexpr float rgb2u8_blue_coef = 0.0722f;
				52
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	53	inline float32x4_t rgb_to_greyscale_calculation(const float32x4_t &rcolor, const float32x4_t &gcolor, const float32x4_t &bcolor,
				54	const float rcoef, const float gcoef, const float bcoef)
Manuel Bottini	4284bfa	2018-09-26 15:33:15 +0100	[diff] [blame]	55	{
				56	float32x4_t greyscale = vmulq_n_f32(rcolor, rcoef);
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	57	greyscale = vmlaq_n_f32(greyscale, gcolor, gcoef);
				58	greyscale = vmlaq_n_f32(greyscale, bcolor, bcoef);
Manuel Bottini	4284bfa	2018-09-26 15:33:15 +0100	[diff] [blame]	59	return greyscale;
				60	}
				61
				62	inline void rgb_to_u8_conversion(const uint8x16x3_t &in, uint8x16_t &out)
				63	{
				64	float32x4x4_t out_float32;
				65
				66	//Conversion from 3(RGB) 4 uint8s to 3(RGB) 4 floats
Manuel Bottini	21079dd	2019-10-29 17:20:09 +0000	[diff] [blame]	67	const float32x4x4_t r_float32 = arm_compute::convert_uint8x16_to_float32x4x4(in.val[0]);
				68	const float32x4x4_t g_float32 = arm_compute::convert_uint8x16_to_float32x4x4(in.val[1]);
				69	const float32x4x4_t b_float32 = arm_compute::convert_uint8x16_to_float32x4x4(in.val[2]);
Manuel Bottini	4284bfa	2018-09-26 15:33:15 +0100	[diff] [blame]	70
				71	//New grayscale image = ( (RED_COEFF * R) + (GREEN_COEFF * G) + (BLUE_COEFF * B) )
				72	//Computation of 1(Greyscale) 4 uint8 using 3(RGB) 4 uint8s float
				73	out_float32.val[0] = rgb_to_greyscale_calculation(r_float32.val[0], g_float32.val[0], b_float32.val[0],
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	74	rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
Manuel Bottini	4284bfa	2018-09-26 15:33:15 +0100	[diff] [blame]	75
				76	out_float32.val[1] = rgb_to_greyscale_calculation(r_float32.val[1], g_float32.val[1], b_float32.val[1],
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	77	rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
Manuel Bottini	4284bfa	2018-09-26 15:33:15 +0100	[diff] [blame]	78
				79	out_float32.val[2] = rgb_to_greyscale_calculation(r_float32.val[2], g_float32.val[2], b_float32.val[2],
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	80	rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
Manuel Bottini	4284bfa	2018-09-26 15:33:15 +0100	[diff] [blame]	81
				82	out_float32.val[3] = rgb_to_greyscale_calculation(r_float32.val[3], g_float32.val[3], b_float32.val[3],
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	83	rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
Manuel Bottini	4284bfa	2018-09-26 15:33:15 +0100	[diff] [blame]	84
				85	//Conversion from 1(Greyscale) 4 floats to 1(Greyscale) 4 uint8s
Sang-Hoon Park	c3a7420	2019-11-22 16:05:46 +0000	[diff] [blame]	86	arm_compute::convert_float32x4x4_to_uint8x16(out_float32, out);
Manuel Bottini	4284bfa	2018-09-26 15:33:15 +0100	[diff] [blame]	87	}
				88
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	89	inline void rgb_to_yuv_calculation(const float32x4_t &rvec, const float32x4_t &gvec, const float32x4_t &bvec,
				90	float32x4_t &yvec, float32x4_t &uvec, float32x4_t &vvec)
				91	{
				92	/*
				93	Y'= 0.2126R' + 0.7152G' + 0.0722*B'
				94	U'=-0.1146R' - 0.3854G' + 0.5000*B'
				95	V'= 0.5000R' - 0.4542G' - 0.0458*B'
				96	*/
				97	const auto c128 = vdupq_n_f32(128.f);
				98
				99	// Y = R * K_r + G * (1 - K_r - K_b) * B * K_b
				100	yvec = vmulq_n_f32(rvec, rgb2yuv_bt709_kr);
				101	yvec = vmlaq_n_f32(yvec, gvec, rgb2yuv_bt709_kg);
				102	yvec = vmlaq_n_f32(yvec, bvec, rgb2yuv_bt709_kb);
				103
				104	// U = (B - Y) / (2 * (1 - K_b))
				105	uvec = vsubq_f32(bvec, yvec);
				106	uvec = vmlaq_n_f32(c128, uvec, rgb2yuv_bt709_cu);
				107
				108	// V = (R - Y) / (2 * (1 - K_r))
				109	vvec = vsubq_f32(rvec, yvec);
				110	vvec = vmlaq_n_f32(c128, vvec, rgb2yuv_bt709_cv);
				111	}
				112
				113	inline void yuyv_to_rgb_calculation(const float32x4_t &yvec_val, float32x4_t uvec_val, const float32x4_t &yyvec_val,
				114	float32x4_t vvec_val, unsigned char *output_ptr, const bool alpha)
				115	{
				116	float32x4x3_t rgb1, rgb2;
				117
				118	// Compute: cb - 128 and cr - 128;
				119	const auto c128 = vdupq_n_f32(128.f);
				120	uvec_val = vsubq_f32(uvec_val, c128);
				121	vvec_val = vsubq_f32(vvec_val, c128);
				122
				123	// Compute:
				124	// r = 0.0000ff_u + 1.5748ff_v;
				125	// g = 0.1873ff_u - 0.4681ff_v;
				126	// b = 1.8556ff_u + 0.0000ff_v;
				127	const auto red = vmulq_n_f32(vvec_val, red_coef_bt709);
				128	const auto blue = vmulq_n_f32(uvec_val, blue_coef_bt709);
				129	const auto green = vaddq_f32(vmulq_n_f32(uvec_val, green_coef_bt709),
				130	vmulq_n_f32(vvec_val, green_coef2_bt709));
				131
				132	// Compute the final r,g,b values using y1 for the first texel and y2 for the second one.
				133	// the result is stored in two float32x4x3_t which then are converted to one uint8x8x3_t
				134	// and written back to memory using vst3 instruction
				135
				136	rgb1.val[0] = vaddq_f32(yvec_val, red);
				137	rgb1.val[1] = vaddq_f32(yvec_val, green);
				138	rgb1.val[2] = vaddq_f32(yvec_val, blue);
				139
				140	rgb2.val[0] = vaddq_f32(yyvec_val, red);
				141	rgb2.val[1] = vaddq_f32(yyvec_val, green);
				142	rgb2.val[2] = vaddq_f32(yyvec_val, blue);
				143
				144	uint8x8x3_t u8_rgb;
Manuel Bottini	21079dd	2019-10-29 17:20:09 +0000	[diff] [blame]	145	arm_compute::convert_float32x4x3_to_uint8x8x3(rgb1, rgb2, u8_rgb);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	146
				147	if(!alpha)
				148	{
				149	vst3_lane_u8(&output_ptr[0], u8_rgb, 0);
				150	vst3_lane_u8(&output_ptr[3], u8_rgb, 4);
				151	vst3_lane_u8(&output_ptr[6], u8_rgb, 1);
				152	vst3_lane_u8(&output_ptr[9], u8_rgb, 5);
				153	vst3_lane_u8(&output_ptr[12], u8_rgb, 2);
				154	vst3_lane_u8(&output_ptr[15], u8_rgb, 6);
				155	vst3_lane_u8(&output_ptr[18], u8_rgb, 3);
				156	vst3_lane_u8(&output_ptr[21], u8_rgb, 7);
				157	}
				158	else
				159	{
				160	uint8x8x4_t u8_rgba;
				161	u8_rgba.val[0] = u8_rgb.val[0];
				162	u8_rgba.val[1] = u8_rgb.val[1];
				163	u8_rgba.val[2] = u8_rgb.val[2];
				164	u8_rgba.val[3] = vdup_n_u8(255);
				165	vst4_lane_u8(&output_ptr[0], u8_rgba, 0);
				166	vst4_lane_u8(&output_ptr[4], u8_rgba, 4);
				167	vst4_lane_u8(&output_ptr[8], u8_rgba, 1);
				168	vst4_lane_u8(&output_ptr[12], u8_rgba, 5);
				169	vst4_lane_u8(&output_ptr[16], u8_rgba, 2);
				170	vst4_lane_u8(&output_ptr[20], u8_rgba, 6);
				171	vst4_lane_u8(&output_ptr[24], u8_rgba, 3);
				172	vst4_lane_u8(&output_ptr[28], u8_rgba, 7);
				173	}
				174	}
				175
				176	inline uint8x16x3_t load_rgb(const unsigned char *const ptr, const bool alpha)
				177	{
				178	uint8x16x3_t rgb;
				179
				180	if(alpha)
				181	{
				182	const auto tmp = vld4q_u8(ptr);
				183	rgb.val[0] = tmp.val[0];
				184	rgb.val[1] = tmp.val[1];
				185	rgb.val[2] = tmp.val[2];
				186	}
				187	else
				188	{
				189	rgb = vld3q_u8(ptr);
				190	}
				191
				192	return rgb;
				193	}
				194
				195	inline void rgb_to_yuv_conversion(uint8x16x3_t &vec_top, uint8x16x3_t &vec_bottom)
				196	{
				197	// Convert the uint8x16_t to float32x4x4_t
Manuel Bottini	21079dd	2019-10-29 17:20:09 +0000	[diff] [blame]	198	const float32x4x4_t frvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vec_top.val[0]);
				199	const float32x4x4_t fgvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vec_top.val[1]);
				200	const float32x4x4_t fbvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vec_top.val[2]);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	201
Manuel Bottini	21079dd	2019-10-29 17:20:09 +0000	[diff] [blame]	202	const float32x4x4_t frvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vec_bottom.val[0]);
				203	const float32x4x4_t fgvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vec_bottom.val[1]);
				204	const float32x4x4_t fbvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vec_bottom.val[2]);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	205
				206	float32x4x4_t fyvec_top, fuvec_top, fvvec_top;
				207	float32x4x4_t fyvec_bottom, fuvec_bottom, fvvec_bottom;
				208
				209	for(auto i = 0; i < 4; ++i)
				210	{
				211	rgb_to_yuv_calculation(frvec_top.val[i], fgvec_top.val[i], fbvec_top.val[i],
				212	fyvec_top.val[i], fuvec_top.val[i], fvvec_top.val[i]);
				213	rgb_to_yuv_calculation(frvec_bottom.val[i], fgvec_bottom.val[i], fbvec_bottom.val[i],
				214	fyvec_bottom.val[i], fuvec_bottom.val[i], fvvec_bottom.val[i]);
				215	}
				216
Sang-Hoon Park	c3a7420	2019-11-22 16:05:46 +0000	[diff] [blame]	217	arm_compute::convert_float32x4x4_to_uint8x16(fyvec_top, vec_top.val[0]);
				218	arm_compute::convert_float32x4x4_to_uint8x16(fuvec_top, vec_top.val[1]);
				219	arm_compute::convert_float32x4x4_to_uint8x16(fvvec_top, vec_top.val[2]);
				220	arm_compute::convert_float32x4x4_to_uint8x16(fyvec_bottom, vec_bottom.val[0]);
				221	arm_compute::convert_float32x4x4_to_uint8x16(fuvec_bottom, vec_bottom.val[1]);
				222	arm_compute::convert_float32x4x4_to_uint8x16(fvvec_bottom, vec_bottom.val[2]);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	223	}
				224
				225	inline void store_rgb_to_nv12(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top,
				226	const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom,
				227	unsigned char const __restrict out_y_top, unsigned char const __restrict out_y_bottom,
				228	unsigned char *const __restrict out_uv)
				229	{
				230	uint8x16x3_t vec_top, vec_bottom;
				231	vec_top.val[0] = rvec_top;
				232	vec_top.val[1] = gvec_top;
				233	vec_top.val[2] = bvec_top;
				234	vec_bottom.val[0] = rvec_bottom;
				235	vec_bottom.val[1] = gvec_bottom;
				236	vec_bottom.val[2] = bvec_bottom;
				237
				238	rgb_to_yuv_conversion(vec_top, vec_bottom);
				239
				240	vst1q_u8(out_y_top, vec_top.val[0]);
				241	vst1q_u8(out_y_bottom, vec_bottom.val[0]);
				242
				243	const auto uvec = vuzpq_u8(vec_top.val[1], vec_bottom.val[1]);
				244	const auto vvec = vuzpq_u8(vec_top.val[2], vec_bottom.val[2]);
				245	const auto utmp = vrhaddq_u8(uvec.val[0], uvec.val[1]);
				246	const auto vtmp = vrhaddq_u8(vvec.val[0], vvec.val[1]);
				247
				248	uint8x8x2_t uvvec;
				249	uvvec.val[0] = vhadd_u8(vget_low_u8(utmp), vget_high_u8(utmp));
				250	uvvec.val[1] = vhadd_u8(vget_low_u8(vtmp), vget_high_u8(vtmp));
				251
				252	vst2_u8(out_uv, uvvec);
				253	}
				254
				255	inline void store_rgb_to_iyuv(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top,
				256	const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom,
				257	unsigned char const __restrict out_y_top, unsigned char const __restrict out_y_bottom,
				258	unsigned char *const __restrict out_u,
				259	unsigned char *const __restrict out_v)
				260	{
				261	uint8x16x3_t vec_top, vec_bottom;
				262	vec_top.val[0] = rvec_top;
				263	vec_top.val[1] = gvec_top;
				264	vec_top.val[2] = bvec_top;
				265	vec_bottom.val[0] = rvec_bottom;
				266	vec_bottom.val[1] = gvec_bottom;
				267	vec_bottom.val[2] = bvec_bottom;
				268
				269	rgb_to_yuv_conversion(vec_top, vec_bottom);
				270
				271	vst1q_u8(out_y_top, vec_top.val[0]);
				272	vst1q_u8(out_y_bottom, vec_bottom.val[0]);
				273
				274	const auto uvvec_top = vuzpq_u8(vec_top.val[1], vec_top.val[2]);
				275	const auto uvvec_bottom = vuzpq_u8(vec_bottom.val[1], vec_bottom.val[2]);
				276	const auto uvvec = vhaddq_u8(vrhaddq_u8(uvvec_top.val[0], uvvec_top.val[1]),
				277	vrhaddq_u8(uvvec_bottom.val[0], uvvec_bottom.val[1]));
				278
				279	vst1_u8(out_u, vget_low_u8(uvvec));
				280	vst1_u8(out_v, vget_high_u8(uvvec));
				281	}
				282
				283	inline void store_rgb_to_yuv4(const uint8x16_t &rvec, const uint8x16_t &gvec, const uint8x16_t &bvec,
				284	unsigned char *const __restrict out_y,
				285	unsigned char *const __restrict out_u,
				286	unsigned char *const __restrict out_v)
				287	{
				288	// Convert the uint8x16_t to float32x4x4_t
Manuel Bottini	21079dd	2019-10-29 17:20:09 +0000	[diff] [blame]	289	const float32x4x4_t frvec = arm_compute::convert_uint8x16_to_float32x4x4(rvec);
				290	const float32x4x4_t fgvec = arm_compute::convert_uint8x16_to_float32x4x4(gvec);
				291	const float32x4x4_t fbvec = arm_compute::convert_uint8x16_to_float32x4x4(bvec);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	292
				293	float32x4x4_t fyvec, fuvec, fvvec;
				294	for(auto i = 0; i < 4; ++i)
				295	{
				296	rgb_to_yuv_calculation(frvec.val[i], fgvec.val[i], fbvec.val[i],
				297	fyvec.val[i], fuvec.val[i], fvvec.val[i]);
				298	}
				299
				300	uint8x16_t yvec, uvec, vvec;
Sang-Hoon Park	c3a7420	2019-11-22 16:05:46 +0000	[diff] [blame]	301	arm_compute::convert_float32x4x4_to_uint8x16(fyvec, yvec);
				302	arm_compute::convert_float32x4x4_to_uint8x16(fuvec, uvec);
				303	arm_compute::convert_float32x4x4_to_uint8x16(fvvec, vvec);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	304
				305	vst1q_u8(out_y, yvec);
				306	vst1q_u8(out_u, uvec);
				307	vst1q_u8(out_v, vvec);
				308	}
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	309	#endif /* DOXYGEN_SKIP_THIS */
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	310	}
				311
				312	namespace arm_compute
				313	{
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	314	/** Convert RGB to RGBX.
				315	*
				316	* @param[in] input Input RGB data buffer.
				317	* @param[out] output Output RGBX buffer.
				318	* @param[in] win Window for iterating the buffers.
				319	*
				320	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	321	void colorconvert_rgb_to_rgbx(const void __restrict input, void __restrict output, const Window &win)
				322	{
				323	ARM_COMPUTE_ERROR_ON(nullptr == input);
				324	ARM_COMPUTE_ERROR_ON(nullptr == output);
				325
				326	const auto input_ptr = static_cast<const IImage *__restrict>(input);
				327	const auto output_ptr = static_cast<IImage *__restrict>(output);
				328
				329	Iterator in(input_ptr, win);
				330	Iterator out(output_ptr, win);
				331
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	332	execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	333	{
				334	const auto ta1 = vld3q_u8(in.ptr());
				335	uint8x16x4_t ta2;
				336	ta2.val[0] = ta1.val[0];
				337	ta2.val[1] = ta1.val[1];
				338	ta2.val[2] = ta1.val[2];
				339	ta2.val[3] = vdupq_n_u8(255);
				340	vst4q_u8(out.ptr(), ta2);
				341	},
				342	in, out);
				343	}
				344
Manuel Bottini	4284bfa	2018-09-26 15:33:15 +0100	[diff] [blame]	345	/** Convert RGB to U8.
				346	*
				347	* @param[in] input Input RGB data buffer.
				348	* @param[out] output Output U8 buffer.
				349	* @param[in] win Window for iterating the buffers.
				350	*
				351	*/
				352	void colorconvert_rgb_to_u8(const void __restrict input, void __restrict output, const Window &win)
				353	{
				354	ARM_COMPUTE_ERROR_ON(nullptr == input);
				355	ARM_COMPUTE_ERROR_ON(nullptr == output);
				356
				357	const auto input_ptr = static_cast<const IImage *__restrict>(input);
				358	const auto output_ptr = static_cast<IImage *__restrict>(output);
				359
				360	Iterator in(input_ptr, win);
				361	Iterator out(output_ptr, win);
				362
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	363	execute_window_loop(win, [&](const Coordinates &)
Manuel Bottini	4284bfa	2018-09-26 15:33:15 +0100	[diff] [blame]	364	{
				365	const auto ta1 = vld3q_u8(in.ptr());
				366	uint8x16_t ta2;
				367	rgb_to_u8_conversion(ta1, ta2);
				368	vst1q_u8(out.ptr(), ta2);
				369	},
				370	in, out);
				371	}
				372
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	373	/** Convert RGBX to RGB.
				374	*
				375	* @param[in] input Input RGBX data buffer.
				376	* @param[out] output Output RGB buffer.
				377	* @param[in] win Window for iterating the buffers.
				378	*
				379	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	380	void colorconvert_rgbx_to_rgb(const void input, void output, const Window &win)
				381	{
				382	ARM_COMPUTE_ERROR_ON(nullptr == input);
				383	ARM_COMPUTE_ERROR_ON(nullptr == output);
				384
				385	const auto input_ptr = static_cast<const IImage *__restrict>(input);
				386	const auto output_ptr = static_cast<IImage *__restrict>(output);
				387
				388	Iterator in(input_ptr, win);
				389	Iterator out(output_ptr, win);
				390
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	391	execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	392	{
				393	const auto ta1 = vld4q_u8(in.ptr());
				394	uint8x16x3_t ta2;
				395	ta2.val[0] = ta1.val[0];
				396	ta2.val[1] = ta1.val[1];
				397	ta2.val[2] = ta1.val[2];
				398	vst3q_u8(out.ptr(), ta2);
				399	},
				400	in, out);
				401	}
				402
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	403	/** Convert YUYV to RGB.
				404	*
				405	* @param[in] input Input YUYV data buffer.
				406	* @param[out] output Output RGB buffer.
				407	* @param[in] win Window for iterating the buffers.
				408	*
				409	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	410	template <bool yuyv, bool alpha>
				411	void colorconvert_yuyv_to_rgb(const void __restrict input, void __restrict output, const Window &win)
				412	{
				413	ARM_COMPUTE_ERROR_ON(nullptr == input);
				414	ARM_COMPUTE_ERROR_ON(nullptr == output);
				415
				416	const auto input_ptr = static_cast<const IImage *__restrict>(input);
				417	const auto output_ptr = static_cast<IImage *__restrict>(output);
				418
				419	constexpr auto element_size = alpha ? 32 : 24;
				420	constexpr auto shift = yuyv ? 0 : 1;
				421
				422	Iterator in(input_ptr, win);
				423	Iterator out(output_ptr, win);
				424
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	425	execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	426	{
Manuel Bottini	4284bfa	2018-09-26 15:33:15 +0100	[diff] [blame]	427	const auto ta = vld4q_u8(in.ptr());
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	428	//ta.val[0] = Y0 Y2 Y4 Y6 ...
				429	//ta.val[1] = U0 U2 U4 U6 ...
				430	//ta.val[2] = Y1 Y3 Y5 Y7 ...
				431	//ta.val[3] = V0 V2 V4 V7 ...
				432
				433	// Convert the uint8x16x4_t to float32x4x4_t
Manuel Bottini	21079dd	2019-10-29 17:20:09 +0000	[diff] [blame]	434	const float32x4x4_t yvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[0 + shift]);
				435	const float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[1 - shift]);
				436	const float32x4x4_t yyvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[2 + shift]);
				437	const float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[3 - shift]);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	438
				439	yuyv_to_rgb_calculation(yvec.val[0], uvec.val[0], yyvec.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
				440	yuyv_to_rgb_calculation(yvec.val[1], uvec.val[1], yyvec.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
				441	yuyv_to_rgb_calculation(yvec.val[2], uvec.val[2], yyvec.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
				442	yuyv_to_rgb_calculation(yvec.val[3], uvec.val[3], yyvec.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
				443	},
				444	in, out);
				445	}
				446
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	447	/** Convert NV12 to RGB.
				448	*
				449	* @param[in] input Input NV12 data buffer.
				450	* @param[out] output Output RGB buffer.
				451	* @param[in] win Window for iterating the buffers.
				452	*
				453	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	454	template <bool uv, bool alpha>
				455	void colorconvert_nv12_to_rgb(const void __restrict input, void __restrict output, const Window &win)
				456	{
				457	ARM_COMPUTE_ERROR_ON(nullptr == input);
				458	ARM_COMPUTE_ERROR_ON(nullptr == output);
				459	win.validate();
				460
				461	const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
				462	const auto output_ptr = static_cast<IImage *__restrict>(output);
				463
				464	constexpr auto element_size = alpha ? 32 : 24;
				465	const auto out_stride = output_ptr->info()->strides_in_bytes().y();
				466	constexpr auto shift = uv ? 0 : 1;
				467
				468	// UV's width and height are subsampled
				469	Window win_uv(win);
				470	win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win.x().step() / 2));
				471	win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
				472	win_uv.validate();
				473
				474	Iterator in_y(input_ptr->plane(0), win);
				475	Iterator in_uv(input_ptr->plane(1), win_uv);
				476	Iterator out(output_ptr, win);
				477
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	478	execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	479	{
				480	const auto ta_y_top = vld2q_u8(in_y.ptr());
				481	const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
				482	const auto ta_uv = vld2q_u8(in_uv.ptr());
				483	//ta_y.val[0] = Y0 Y2 Y4 Y6 ...
				484	//ta_y.val[1] = Y1 Y3 Y5 Y7 ...
				485	//ta_uv.val[0] = U0 U2 U4 U6 ...
				486	//ta_uv.val[1] = V0 V2 V4 V6 ...
				487
				488	// Convert the uint8x16x4_t to float32x4x4_t
Manuel Bottini	21079dd	2019-10-29 17:20:09 +0000	[diff] [blame]	489	float32x4x4_t yvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[0]);
				490	float32x4x4_t yyvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[1]);
				491	float32x4x4_t yvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0]);
				492	float32x4x4_t yyvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1]);
				493	float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_uv.val[0 + shift]);
				494	float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_uv.val[1 - shift]);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	495
				496	yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
				497	yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
				498	yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
				499	yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
				500
				501	yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha);
				502	yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha);
				503	yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha);
				504	yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha);
				505	},
				506	in_y, in_uv, out);
				507	}
				508
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	509	/** Convert IYUV to RGB.
				510	*
				511	* @param[in] input Input IYUV data buffer.
				512	* @param[out] output Output RGB buffer.
				513	* @param[in] win Window for iterating the buffers.
				514	*
				515	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	516	template <bool alpha>
				517	void colorconvert_iyuv_to_rgb(const void __restrict input, void __restrict output, const Window &win)
				518	{
				519	ARM_COMPUTE_ERROR_ON(nullptr == input);
				520	ARM_COMPUTE_ERROR_ON(nullptr == output);
				521	win.validate();
				522
				523	const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
				524	const auto output_ptr = static_cast<IImage *__restrict>(output);
				525
				526	constexpr auto element_size = alpha ? 32 : 24;
				527	const auto out_stride = output_ptr->info()->strides_in_bytes().y();
				528
				529	// UV's width and height are subsampled
				530	Window win_uv(win);
				531	win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
				532	win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
				533	win_uv.validate();
				534
				535	Iterator in_y(input_ptr->plane(0), win);
				536	Iterator in_u(input_ptr->plane(1), win_uv);
				537	Iterator in_v(input_ptr->plane(2), win_uv);
				538	Iterator out(output_ptr, win);
				539
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	540	execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	541	{
Georgios Pinitas	b6f88b4	2020-03-05 15:05:00 +0000	[diff] [blame]	542	const auto *y_top_ptr = in_y.ptr();
				543	const auto *y_bottom_ptr = in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y();
				544	const auto *u_ptr = in_u.ptr();
				545	const auto *v_ptr = in_v.ptr();
				546
				547	// Work-around issue in gcc 9(>=) where vld2q might cause issues with register allocation
				548	#if defined(__arch64__)
				549	const auto ta0_y_top = vld1q_u8(y_top_ptr);
				550	const auto ta1_y_top = vld1q_u8(y_top_ptr + 16);
				551	const auto ta0_y_bottom = vld1q_u8(y_bottom_ptr);
				552	const auto ta1_y_bottom = vld1q_u8(y_bottom_ptr + 16);
				553	const auto ta_u = vld1q_u8(u_ptr);
				554	const auto ta_v = vld1q_u8(v_ptr);
				555
				556	// Convert the uint8x16x4_t to float32x4x4_t
				557	float32x4x4_t yvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vuzp1q_u8(ta0_y_top, ta1_y_top));
				558	float32x4x4_t yyvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vuzp2q_u8(ta0_y_top, ta1_y_top));
				559	float32x4x4_t yvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vuzp1q_u8(ta0_y_bottom, ta1_y_bottom));
				560	float32x4x4_t yyvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vuzp2q_u8(ta0_y_bottom, ta1_y_bottom));
				561	float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_u);
				562	float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_v);
				563	#else /* defined(__arch64__) */
				564	const auto ta_y_top = vld2q_u8(y_top_ptr);
				565	const auto ta_y_bottom = vld2q_u8(y_bottom_ptr);
				566	const auto ta_u = vld1q_u8(u_ptr);
				567	const auto ta_v = vld1q_u8(v_ptr);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	568	//ta_y.val[0] = Y0 Y2 Y4 Y6 ...
				569	//ta_y.val[1] = Y1 Y3 Y5 Y7 ...
				570	//ta_u.val[0] = U0 U2 U4 U6 ...
				571	//ta_v.val[0] = V0 V2 V4 V6 ...
				572
				573	// Convert the uint8x16x4_t to float32x4x4_t
Manuel Bottini	21079dd	2019-10-29 17:20:09 +0000	[diff] [blame]	574	float32x4x4_t yvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[0]);
				575	float32x4x4_t yyvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[1]);
				576	float32x4x4_t yvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0]);
				577	float32x4x4_t yyvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1]);
				578	float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_u);
				579	float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_v);
Georgios Pinitas	b6f88b4	2020-03-05 15:05:00 +0000	[diff] [blame]	580	#endif /* defined(__arch64__) */
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	581
				582	yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
				583	yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
				584	yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
				585	yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
				586
				587	yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha);
				588	yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha);
				589	yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha);
				590	yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha);
				591	},
				592	in_y, in_u, in_v, out);
				593	}
				594
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	595	/** Convert YUYV to NV12.
				596	*
				597	* @param[in] input Input YUYV data buffer.
				598	* @param[out] output Output NV12 buffer.
				599	* @param[in] win Window for iterating the buffers.
				600	*
				601	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	602	template <bool yuyv>
				603	void colorconvert_yuyv_to_nv12(const void __restrict input, void __restrict output, const Window &win)
				604	{
				605	ARM_COMPUTE_ERROR_ON(nullptr == input);
				606	ARM_COMPUTE_ERROR_ON(nullptr == output);
				607	win.validate();
				608
				609	const auto input_ptr = static_cast<const IImage *__restrict>(input);
				610	const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
				611
				612	constexpr auto shift = yuyv ? 0 : 1;
				613
				614	// NV12's UV's width and height are subsampled
				615	Window win_uv(win);
				616	win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
				617	win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
				618	win_uv.validate();
				619
				620	Iterator in(input_ptr, win);
				621	Iterator out_y(output_ptr->plane(0), win);
				622	Iterator out_uv(output_ptr->plane(1), win_uv);
				623
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	624	execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	625	{
				626	const auto ta_top = vld4q_u8(in.ptr());
				627	const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y());
				628	//ta.val[0] = Y0 Y2 Y4 Y6 ...
				629	//ta.val[1] = U0 U2 U4 U6 ...
				630	//ta.val[2] = Y1 Y3 Y5 Y7 ...
				631	//ta.val[3] = V0 V2 V4 V7 ...
				632
				633	uint8x16x2_t yvec;
				634	yvec.val[0] = ta_top.val[0 + shift];
				635	yvec.val[1] = ta_top.val[2 + shift];
				636	vst2q_u8(out_y.ptr(), yvec);
				637
				638	uint8x16x2_t yyvec;
				639	yyvec.val[0] = ta_bottom.val[0 + shift];
				640	yyvec.val[1] = ta_bottom.val[2 + shift];
				641	vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
				642
				643	uint8x16x2_t uvvec;
				644	uvvec.val[0] = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
				645	uvvec.val[1] = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
				646	vst2q_u8(out_uv.ptr(), uvvec);
				647	},
				648	in, out_y, out_uv);
				649	}
				650
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	651	/** Convert IYUV to NV12.
				652	*
				653	* @param[in] input Input IYUV data buffer.
				654	* @param[out] output Output NV12 buffer.
				655	* @param[in] win Window for iterating the buffers.
				656	*
				657	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	658	void colorconvert_iyuv_to_nv12(const void __restrict input, void __restrict output, const Window &win)
				659	{
				660	ARM_COMPUTE_ERROR_ON(nullptr == input);
				661	ARM_COMPUTE_ERROR_ON(nullptr == output);
				662	win.validate();
				663
				664	const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
				665	const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
				666
				667	// UV's width and height are subsampled
				668	Window win_uv(win);
				669	win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
				670	win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
				671	win_uv.validate();
				672
				673	Iterator in_y(input_ptr->plane(0), win);
				674	Iterator in_u(input_ptr->plane(1), win_uv);
				675	Iterator in_v(input_ptr->plane(2), win_uv);
				676	Iterator out_y(output_ptr->plane(0), win);
				677	Iterator out_uv(output_ptr->plane(1), win_uv);
				678
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	679	execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	680	{
				681	const auto ta_y_top = vld2q_u8(in_y.ptr());
				682	const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
				683	uint8x16x2_t ta_uv;
				684	ta_uv.val[0] = vld1q_u8(in_u.ptr());
				685	ta_uv.val[1] = vld1q_u8(in_v.ptr());
				686	//ta_y.val[0] = Y0 Y2 Y4 Y6 ...
				687	//ta_y.val[1] = Y1 Y3 Y5 Y7 ...
				688	//ta_uv.val[0] = U0 U2 U4 U6 ...
				689	//ta_uv.val[1] = V0 V2 V4 V6 ...
				690
				691	vst2q_u8(out_y.ptr(), ta_y_top);
				692	vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
				693	vst2q_u8(out_uv.ptr(), ta_uv);
				694	},
				695	in_y, in_u, in_v, out_y, out_uv);
				696	}
				697
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	698	/** Convert NV12 to IYUV.
				699	*
				700	* @param[in] input Input NV12 data buffer.
				701	* @param[out] output Output IYUV buffer.
				702	* @param[in] win Window for iterating the buffers.
				703	*
				704	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	705	template <bool uv>
				706	void colorconvert_nv12_to_iyuv(const void __restrict input, void __restrict output, const Window &win)
				707	{
				708	ARM_COMPUTE_ERROR_ON(nullptr == input);
				709	ARM_COMPUTE_ERROR_ON(nullptr == output);
				710	win.validate();
				711
				712	const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
				713	const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
				714
				715	constexpr auto shift = uv ? 0 : 1;
				716
				717	// UV's width and height are subsampled
				718	Window win_uv(win);
				719	win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
				720	win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
				721	win_uv.validate();
				722
				723	Iterator in_y(input_ptr->plane(0), win);
				724	Iterator in_uv(input_ptr->plane(1), win_uv);
				725	Iterator out_y(output_ptr->plane(0), win);
				726	Iterator out_u(output_ptr->plane(1), win_uv);
				727	Iterator out_v(output_ptr->plane(2), win_uv);
				728
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	729	execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	730	{
				731	const auto ta_y_top = vld2q_u8(in_y.ptr());
				732	const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
				733	const auto ta_uv = vld2q_u8(in_uv.ptr());
				734	//ta_y.val[0] = Y0 Y2 Y4 Y6 ...
				735	//ta_y.val[1] = Y1 Y3 Y5 Y7 ...
				736	//ta_uv.val[0] = U0 U2 U4 U6 ...
				737	//ta_uv.val[1] = V0 V2 V4 V6 ...
				738
				739	vst2q_u8(out_y.ptr(), ta_y_top);
				740	vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
				741	vst1q_u8(out_u.ptr(), ta_uv.val[0 + shift]);
				742	vst1q_u8(out_v.ptr(), ta_uv.val[1 - shift]);
				743	},
				744	in_y, in_uv, out_y, out_u, out_v);
				745	}
				746
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	747	/** Convert YUYV to IYUV.
				748	*
				749	* @param[in] input Input YUYV data buffer.
				750	* @param[out] output Output IYUV buffer.
				751	* @param[in] win Window for iterating the buffers.
				752	*
				753	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	754	template <bool yuyv>
				755	void colorconvert_yuyv_to_iyuv(const void __restrict input, void __restrict output, const Window &win)
				756	{
				757	ARM_COMPUTE_ERROR_ON(nullptr == input);
				758	ARM_COMPUTE_ERROR_ON(nullptr == output);
				759	win.validate();
				760
				761	const auto input_ptr = static_cast<const IImage *__restrict>(input);
				762	const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
				763
				764	constexpr auto shift = yuyv ? 0 : 1;
				765
				766	// Destination's UV's width and height are subsampled
				767	Window win_uv(win);
				768	win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
				769	win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
				770	win_uv.validate();
				771
				772	Iterator in(input_ptr, win);
				773	Iterator out_y(output_ptr->plane(0), win);
				774	Iterator out_u(output_ptr->plane(1), win_uv);
				775	Iterator out_v(output_ptr->plane(2), win_uv);
				776
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	777	execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	778	{
				779	const auto ta_top = vld4q_u8(in.ptr());
				780	const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y());
				781	//ta.val[0] = Y0 Y2 Y4 Y6 ...
				782	//ta.val[1] = U0 U2 U4 U6 ...
				783	//ta.val[2] = Y1 Y3 Y5 Y7 ...
				784	//ta.val[3] = V0 V2 V4 V7 ...
				785
				786	uint8x16x2_t yvec;
				787	yvec.val[0] = ta_top.val[0 + shift];
				788	yvec.val[1] = ta_top.val[2 + shift];
				789	vst2q_u8(out_y.ptr(), yvec);
				790
				791	uint8x16x2_t yyvec;
				792	yyvec.val[0] = ta_bottom.val[0 + shift];
				793	yyvec.val[1] = ta_bottom.val[2 + shift];
				794	vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
				795
				796	uint8x16_t uvec;
				797	uvec = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
				798	vst1q_u8(out_u.ptr(), uvec);
				799
				800	uint8x16_t vvec;
				801	vvec = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
				802	vst1q_u8(out_v.ptr(), vvec);
				803	},
				804	in, out_y, out_u, out_v);
				805	}
				806
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	807	/** Convert NV12 to YUV4.
				808	*
				809	* @param[in] input Input NV12 data buffer.
				810	* @param[out] output Output YUV4 buffer.
				811	* @param[in] win Window for iterating the buffers.
				812	*
				813	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	814	template <bool uv>
				815	void colorconvert_nv12_to_yuv4(const void __restrict input, void __restrict output, const Window &win)
				816	{
				817	ARM_COMPUTE_ERROR_ON(nullptr == input);
				818	ARM_COMPUTE_ERROR_ON(nullptr == output);
				819	win.validate();
				820
				821	const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
				822	const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
				823
				824	constexpr auto shift = uv ? 0 : 1;
				825
				826	// UV's width and height are subsampled
				827	Window win_uv(win);
				828	win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
				829	win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
				830	win_uv.validate();
				831
				832	Iterator in_y(input_ptr->plane(0), win);
				833	Iterator in_uv(input_ptr->plane(1), win_uv);
				834	Iterator out_y(output_ptr->plane(0), win);
				835	Iterator out_u(output_ptr->plane(1), win);
				836	Iterator out_v(output_ptr->plane(2), win);
				837
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	838	execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	839	{
				840	const auto ta_y_top = vld2q_u8(in_y.ptr());
				841	const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
				842	const auto ta_uv = vld2q_u8(in_uv.ptr());
				843	//ta_y.val[0] = Y0 Y2 Y4 Y6 ...
				844	//ta_y.val[1] = Y1 Y3 Y5 Y7 ...
				845	//ta_uv.val[0] = U0 U2 U4 U6 ...
				846	//ta_uv.val[1] = V0 V2 V4 V6 ...
				847
				848	vst2q_u8(out_y.ptr(), ta_y_top);
				849	vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
				850
				851	uint8x16x2_t uvec;
				852	uvec.val[0] = ta_uv.val[0 + shift];
				853	uvec.val[1] = ta_uv.val[0 + shift];
				854	vst2q_u8(out_u.ptr(), uvec);
				855	vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
				856
				857	uint8x16x2_t vvec;
				858	vvec.val[0] = ta_uv.val[1 - shift];
				859	vvec.val[1] = ta_uv.val[1 - shift];
				860	vst2q_u8(out_v.ptr(), vvec);
				861	vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
				862	},
				863	in_y, in_uv, out_y, out_u, out_v);
				864	}
				865
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	866	/** Convert IYUV to YUV4.
				867	*
				868	* @param[in] input Input IYUV data buffer.
				869	* @param[out] output Output YUV4 buffer.
				870	* @param[in] win Window for iterating the buffers.
				871	*
				872	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	873	void colorconvert_iyuv_to_yuv4(const void __restrict input, void __restrict output, const Window &win)
				874	{
				875	ARM_COMPUTE_ERROR_ON(nullptr == input);
				876	ARM_COMPUTE_ERROR_ON(nullptr == output);
				877	win.validate();
				878
				879	const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
				880	const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
				881
				882	// UV's width and height are subsampled
				883	Window win_uv(win);
				884	win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
				885	win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
				886	win_uv.validate();
				887
				888	Iterator in_y(input_ptr->plane(0), win);
				889	Iterator in_u(input_ptr->plane(1), win_uv);
				890	Iterator in_v(input_ptr->plane(2), win_uv);
				891	Iterator out_y(output_ptr->plane(0), win);
				892	Iterator out_u(output_ptr->plane(1), win);
				893	Iterator out_v(output_ptr->plane(2), win);
				894
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	895	execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	896	{
				897	const auto ta_y_top = vld2q_u8(in_y.ptr());
				898	const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
				899	const auto ta_u = vld1q_u8(in_u.ptr());
				900	const auto ta_v = vld1q_u8(in_v.ptr());
				901	//ta_y.val[0] = Y0 Y2 Y4 Y6 ...
				902	//ta_y.val[1] = Y1 Y3 Y5 Y7 ...
				903	//ta_u = U0 U2 U4 U6 ...
				904	//ta_v = V0 V2 V4 V6 ...
				905
				906	vst2q_u8(out_y.ptr(), ta_y_top);
				907	vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
				908
				909	uint8x16x2_t uvec;
				910	uvec.val[0] = ta_u;
				911	uvec.val[1] = ta_u;
				912	vst2q_u8(out_u.ptr(), uvec);
				913	vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
				914
				915	uint8x16x2_t vvec;
				916	vvec.val[0] = ta_v;
				917	vvec.val[1] = ta_v;
				918	vst2q_u8(out_v.ptr(), vvec);
				919	vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
				920	},
				921	in_y, in_u, in_v, out_y, out_u, out_v);
				922	}
				923
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	924	/** Convert RGB to NV12.
				925	*
				926	* @param[in] input Input RGB data buffer.
				927	* @param[out] output Output NV12 buffer.
				928	* @param[in] win Window for iterating the buffers.
				929	*
				930	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	931	template <bool alpha>
				932	void colorconvert_rgb_to_nv12(const void __restrict input, void __restrict output, const Window &win)
				933	{
				934	ARM_COMPUTE_ERROR_ON(nullptr == input);
				935	ARM_COMPUTE_ERROR_ON(nullptr == output);
				936	win.validate();
				937
				938	const auto input_ptr = static_cast<const IImage *__restrict>(input);
				939	const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
				940
				941	// UV's width and height are subsampled
				942	Window win_uv(win);
				943	win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
				944	win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
				945	win_uv.validate();
				946
				947	Iterator in(input_ptr, win);
				948	Iterator out_y(output_ptr->plane(0), win);
				949	Iterator out_uv(output_ptr->plane(1), win_uv);
				950
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	951	execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	952	{
				953	const auto ta_rgb_top = load_rgb(in.ptr(), alpha);
				954	const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
				955	//ta_rgb.val[0] = R0 R1 R2 R3 ...
				956	//ta_rgb.val[1] = G0 G1 G2 G3 ...
				957	//ta_rgb.val[2] = B0 B1 B2 B3 ...
				958
				959	store_rgb_to_nv12(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2],
				960	ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2],
				961	out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(),
				962	out_uv.ptr());
				963	},
				964	in, out_y, out_uv);
				965	}
				966
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	967	/** Convert RGB to IYUV.
				968	*
				969	* @param[in] input Input RGB data buffer.
				970	* @param[out] output Output IYUV buffer.
				971	* @param[in] win Window for iterating the buffers.
				972	*
				973	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	974	template <bool alpha>
				975	void colorconvert_rgb_to_iyuv(const void __restrict input, void __restrict output, const Window &win)
				976	{
				977	ARM_COMPUTE_ERROR_ON(nullptr == input);
				978	ARM_COMPUTE_ERROR_ON(nullptr == output);
				979	win.validate();
				980
				981	const auto input_ptr = static_cast<const IImage *__restrict>(input);
				982	const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
				983
				984	// UV's width and height are subsampled
				985	Window win_uv(win);
				986	win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
				987	win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
				988	win_uv.validate();
				989
				990	Iterator in(input_ptr, win);
				991	Iterator out_y(output_ptr->plane(0), win);
				992	Iterator out_u(output_ptr->plane(1), win_uv);
				993	Iterator out_v(output_ptr->plane(2), win_uv);
				994
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	995	execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	996	{
				997	const auto ta_rgb_top = load_rgb(in.ptr(), alpha);
				998	const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
				999	//ta_rgb.val[0] = R0 R1 R2 R3 ...
				1000	//ta_rgb.val[1] = G0 G1 G2 G3 ...
				1001	//ta_rgb.val[2] = B0 B1 B2 B3 ...
				1002
				1003	store_rgb_to_iyuv(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2],
				1004	ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2],
				1005	out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(),
				1006	out_u.ptr(), out_v.ptr());
				1007	},
				1008	in, out_y, out_u, out_v);
				1009	}
				1010
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	1011	/** Convert RGB to YUV4.
				1012	*
				1013	* @param[in] input Input RGB data buffer.
				1014	* @param[out] output Output YUV4 buffer.
				1015	* @param[in] win Window for iterating the buffers.
				1016	*
				1017	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1018	template <bool alpha>
				1019	void colorconvert_rgb_to_yuv4(const void __restrict input, void __restrict output, const Window &win)
				1020	{
				1021	ARM_COMPUTE_ERROR_ON(nullptr == input);
				1022	ARM_COMPUTE_ERROR_ON(nullptr == output);
				1023	win.validate();
				1024
				1025	const auto input_ptr = static_cast<const IImage *__restrict>(input);
				1026	const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
				1027
				1028	Iterator in(input_ptr, win);
				1029	Iterator out_y(output_ptr->plane(0), win);
				1030	Iterator out_u(output_ptr->plane(1), win);
				1031	Iterator out_v(output_ptr->plane(2), win);
				1032
Michalis Spyrou	6bff195	2019-10-02 17:22:11 +0100	[diff] [blame]	1033	execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1034	{
				1035	const auto ta_rgb = load_rgb(in.ptr(), alpha);
				1036	//ta_rgb.val[0] = R0 R1 R2 R3 ...
				1037	//ta_rgb.val[1] = G0 G1 G2 G3 ...
				1038	//ta_rgb.val[2] = B0 B1 B2 B3 ...
				1039
				1040	store_rgb_to_yuv4(ta_rgb.val[0], ta_rgb.val[1], ta_rgb.val[2],
				1041	out_y.ptr(), out_u.ptr(), out_v.ptr());
				1042	},
				1043	in, out_y, out_u, out_v);
				1044	}
Gian Marco Iodice	356f643	2017-09-22 11:32:21 +0100	[diff] [blame]	1045	} // namespace arm_compute