Blame - arm_compute/core/NEON/NEColorConvertHelper.inl - ml/ComputeLibrary

blob: 0da5affe182e8c8f7ee24c50fd8418e41e584941 [file] [log] [blame]

Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1	/*
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	2	* Copyright (c) 2016-2018 ARM Limited.
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/core/Error.h"
				25	#include "arm_compute/core/Helpers.h"
				26	#include "arm_compute/core/IMultiImage.h"
				27	#include "arm_compute/core/Utils.h"
				28
				29	#include <arm_neon.h>
				30
				31	namespace
				32	{
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	33	#ifndef DOXYGEN_SKIP_THIS
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	34	constexpr float red_coef_bt709 = 1.5748F;
				35	constexpr float green_coef_bt709 = -0.1873f;
				36	constexpr float green_coef2_bt709 = -0.4681f;
				37	constexpr float blue_coef_bt709 = 1.8556f;
				38
				39	constexpr float rgb2yuv_bt709_kr = 0.2126f;
				40	constexpr float rgb2yuv_bt709_kb = 0.0722f;
				41	// K_g = 1 - K_r - K_b
				42	constexpr float rgb2yuv_bt709_kg = 0.7152f;
				43	// C_u = 1 / (2 * (1 - K_b))
				44	constexpr float rgb2yuv_bt709_cu = 0.5389f;
				45	// C_v = 1 / (2 * (1 - K_r))
				46	constexpr float rgb2yuv_bt709_cv = 0.6350f;
				47
				48	inline void convert_uint8x16_to_float32x4x4(const uint8x16_t &in, float32x4x4_t &out)
				49	{
				50	const auto tmp1 = vmovl_u8(vget_low_u8(in));
				51	out.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp1)));
				52	out.val[1] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp1)));
				53	const auto tmp2 = vmovl_u8(vget_high_u8(in));
				54	out.val[2] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp2)));
				55	out.val[3] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp2)));
				56	}
				57
				58	inline void convert_float32x4x3_to_uint8x8x3(const float32x4x3_t &in1, const float32x4x3_t &in2, uint8x8x3_t &out)
				59	{
				60	out.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[0])),
				61	vqmovn_u32(vcvtq_u32_f32(in2.val[0]))));
				62	out.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[1])),
				63	vqmovn_u32(vcvtq_u32_f32(in2.val[1]))));
				64	out.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[2])),
				65	vqmovn_u32(vcvtq_u32_f32(in2.val[2]))));
				66	}
				67
				68	inline void convert_float32x4x4_to_unit8x16(const float32x4x4_t &in, uint8x16_t &out)
				69	{
				70	const auto low = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[0])),
				71	vqmovn_u32(vcvtq_u32_f32(in.val[1])));
				72	const auto high = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[2])),
				73	vqmovn_u32(vcvtq_u32_f32(in.val[3])));
				74	out = vcombine_u8(vqmovn_u16(low), vqmovn_u16(high));
				75	}
				76
				77	inline void rgb_to_yuv_calculation(const float32x4_t &rvec, const float32x4_t &gvec, const float32x4_t &bvec,
				78	float32x4_t &yvec, float32x4_t &uvec, float32x4_t &vvec)
				79	{
				80	/*
				81	Y'= 0.2126R' + 0.7152G' + 0.0722*B'
				82	U'=-0.1146R' - 0.3854G' + 0.5000*B'
				83	V'= 0.5000R' - 0.4542G' - 0.0458*B'
				84	*/
				85	const auto c128 = vdupq_n_f32(128.f);
				86
				87	// Y = R * K_r + G * (1 - K_r - K_b) * B * K_b
				88	yvec = vmulq_n_f32(rvec, rgb2yuv_bt709_kr);
				89	yvec = vmlaq_n_f32(yvec, gvec, rgb2yuv_bt709_kg);
				90	yvec = vmlaq_n_f32(yvec, bvec, rgb2yuv_bt709_kb);
				91
				92	// U = (B - Y) / (2 * (1 - K_b))
				93	uvec = vsubq_f32(bvec, yvec);
				94	uvec = vmlaq_n_f32(c128, uvec, rgb2yuv_bt709_cu);
				95
				96	// V = (R - Y) / (2 * (1 - K_r))
				97	vvec = vsubq_f32(rvec, yvec);
				98	vvec = vmlaq_n_f32(c128, vvec, rgb2yuv_bt709_cv);
				99	}
				100
				101	inline void yuyv_to_rgb_calculation(const float32x4_t &yvec_val, float32x4_t uvec_val, const float32x4_t &yyvec_val,
				102	float32x4_t vvec_val, unsigned char *output_ptr, const bool alpha)
				103	{
				104	float32x4x3_t rgb1, rgb2;
				105
				106	// Compute: cb - 128 and cr - 128;
				107	const auto c128 = vdupq_n_f32(128.f);
				108	uvec_val = vsubq_f32(uvec_val, c128);
				109	vvec_val = vsubq_f32(vvec_val, c128);
				110
				111	// Compute:
				112	// r = 0.0000ff_u + 1.5748ff_v;
				113	// g = 0.1873ff_u - 0.4681ff_v;
				114	// b = 1.8556ff_u + 0.0000ff_v;
				115	const auto red = vmulq_n_f32(vvec_val, red_coef_bt709);
				116	const auto blue = vmulq_n_f32(uvec_val, blue_coef_bt709);
				117	const auto green = vaddq_f32(vmulq_n_f32(uvec_val, green_coef_bt709),
				118	vmulq_n_f32(vvec_val, green_coef2_bt709));
				119
				120	// Compute the final r,g,b values using y1 for the first texel and y2 for the second one.
				121	// the result is stored in two float32x4x3_t which then are converted to one uint8x8x3_t
				122	// and written back to memory using vst3 instruction
				123
				124	rgb1.val[0] = vaddq_f32(yvec_val, red);
				125	rgb1.val[1] = vaddq_f32(yvec_val, green);
				126	rgb1.val[2] = vaddq_f32(yvec_val, blue);
				127
				128	rgb2.val[0] = vaddq_f32(yyvec_val, red);
				129	rgb2.val[1] = vaddq_f32(yyvec_val, green);
				130	rgb2.val[2] = vaddq_f32(yyvec_val, blue);
				131
				132	uint8x8x3_t u8_rgb;
				133	convert_float32x4x3_to_uint8x8x3(rgb1, rgb2, u8_rgb);
				134
				135	if(!alpha)
				136	{
				137	vst3_lane_u8(&output_ptr[0], u8_rgb, 0);
				138	vst3_lane_u8(&output_ptr[3], u8_rgb, 4);
				139	vst3_lane_u8(&output_ptr[6], u8_rgb, 1);
				140	vst3_lane_u8(&output_ptr[9], u8_rgb, 5);
				141	vst3_lane_u8(&output_ptr[12], u8_rgb, 2);
				142	vst3_lane_u8(&output_ptr[15], u8_rgb, 6);
				143	vst3_lane_u8(&output_ptr[18], u8_rgb, 3);
				144	vst3_lane_u8(&output_ptr[21], u8_rgb, 7);
				145	}
				146	else
				147	{
				148	uint8x8x4_t u8_rgba;
				149	u8_rgba.val[0] = u8_rgb.val[0];
				150	u8_rgba.val[1] = u8_rgb.val[1];
				151	u8_rgba.val[2] = u8_rgb.val[2];
				152	u8_rgba.val[3] = vdup_n_u8(255);
				153	vst4_lane_u8(&output_ptr[0], u8_rgba, 0);
				154	vst4_lane_u8(&output_ptr[4], u8_rgba, 4);
				155	vst4_lane_u8(&output_ptr[8], u8_rgba, 1);
				156	vst4_lane_u8(&output_ptr[12], u8_rgba, 5);
				157	vst4_lane_u8(&output_ptr[16], u8_rgba, 2);
				158	vst4_lane_u8(&output_ptr[20], u8_rgba, 6);
				159	vst4_lane_u8(&output_ptr[24], u8_rgba, 3);
				160	vst4_lane_u8(&output_ptr[28], u8_rgba, 7);
				161	}
				162	}
				163
				164	inline uint8x16x3_t load_rgb(const unsigned char *const ptr, const bool alpha)
				165	{
				166	uint8x16x3_t rgb;
				167
				168	if(alpha)
				169	{
				170	const auto tmp = vld4q_u8(ptr);
				171	rgb.val[0] = tmp.val[0];
				172	rgb.val[1] = tmp.val[1];
				173	rgb.val[2] = tmp.val[2];
				174	}
				175	else
				176	{
				177	rgb = vld3q_u8(ptr);
				178	}
				179
				180	return rgb;
				181	}
				182
				183	inline void rgb_to_yuv_conversion(uint8x16x3_t &vec_top, uint8x16x3_t &vec_bottom)
				184	{
				185	// Convert the uint8x16_t to float32x4x4_t
				186	float32x4x4_t frvec_top, fgvec_top, fbvec_top;
				187	convert_uint8x16_to_float32x4x4(vec_top.val[0], frvec_top);
				188	convert_uint8x16_to_float32x4x4(vec_top.val[1], fgvec_top);
				189	convert_uint8x16_to_float32x4x4(vec_top.val[2], fbvec_top);
				190
				191	float32x4x4_t frvec_bottom, fgvec_bottom, fbvec_bottom;
				192	convert_uint8x16_to_float32x4x4(vec_bottom.val[0], frvec_bottom);
				193	convert_uint8x16_to_float32x4x4(vec_bottom.val[1], fgvec_bottom);
				194	convert_uint8x16_to_float32x4x4(vec_bottom.val[2], fbvec_bottom);
				195
				196	float32x4x4_t fyvec_top, fuvec_top, fvvec_top;
				197	float32x4x4_t fyvec_bottom, fuvec_bottom, fvvec_bottom;
				198
				199	for(auto i = 0; i < 4; ++i)
				200	{
				201	rgb_to_yuv_calculation(frvec_top.val[i], fgvec_top.val[i], fbvec_top.val[i],
				202	fyvec_top.val[i], fuvec_top.val[i], fvvec_top.val[i]);
				203	rgb_to_yuv_calculation(frvec_bottom.val[i], fgvec_bottom.val[i], fbvec_bottom.val[i],
				204	fyvec_bottom.val[i], fuvec_bottom.val[i], fvvec_bottom.val[i]);
				205	}
				206
				207	convert_float32x4x4_to_unit8x16(fyvec_top, vec_top.val[0]);
				208	convert_float32x4x4_to_unit8x16(fuvec_top, vec_top.val[1]);
				209	convert_float32x4x4_to_unit8x16(fvvec_top, vec_top.val[2]);
				210	convert_float32x4x4_to_unit8x16(fyvec_bottom, vec_bottom.val[0]);
				211	convert_float32x4x4_to_unit8x16(fuvec_bottom, vec_bottom.val[1]);
				212	convert_float32x4x4_to_unit8x16(fvvec_bottom, vec_bottom.val[2]);
				213	}
				214
				215	inline void store_rgb_to_nv12(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top,
				216	const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom,
				217	unsigned char const __restrict out_y_top, unsigned char const __restrict out_y_bottom,
				218	unsigned char *const __restrict out_uv)
				219	{
				220	uint8x16x3_t vec_top, vec_bottom;
				221	vec_top.val[0] = rvec_top;
				222	vec_top.val[1] = gvec_top;
				223	vec_top.val[2] = bvec_top;
				224	vec_bottom.val[0] = rvec_bottom;
				225	vec_bottom.val[1] = gvec_bottom;
				226	vec_bottom.val[2] = bvec_bottom;
				227
				228	rgb_to_yuv_conversion(vec_top, vec_bottom);
				229
				230	vst1q_u8(out_y_top, vec_top.val[0]);
				231	vst1q_u8(out_y_bottom, vec_bottom.val[0]);
				232
				233	const auto uvec = vuzpq_u8(vec_top.val[1], vec_bottom.val[1]);
				234	const auto vvec = vuzpq_u8(vec_top.val[2], vec_bottom.val[2]);
				235	const auto utmp = vrhaddq_u8(uvec.val[0], uvec.val[1]);
				236	const auto vtmp = vrhaddq_u8(vvec.val[0], vvec.val[1]);
				237
				238	uint8x8x2_t uvvec;
				239	uvvec.val[0] = vhadd_u8(vget_low_u8(utmp), vget_high_u8(utmp));
				240	uvvec.val[1] = vhadd_u8(vget_low_u8(vtmp), vget_high_u8(vtmp));
				241
				242	vst2_u8(out_uv, uvvec);
				243	}
				244
				245	inline void store_rgb_to_iyuv(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top,
				246	const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom,
				247	unsigned char const __restrict out_y_top, unsigned char const __restrict out_y_bottom,
				248	unsigned char *const __restrict out_u,
				249	unsigned char *const __restrict out_v)
				250	{
				251	uint8x16x3_t vec_top, vec_bottom;
				252	vec_top.val[0] = rvec_top;
				253	vec_top.val[1] = gvec_top;
				254	vec_top.val[2] = bvec_top;
				255	vec_bottom.val[0] = rvec_bottom;
				256	vec_bottom.val[1] = gvec_bottom;
				257	vec_bottom.val[2] = bvec_bottom;
				258
				259	rgb_to_yuv_conversion(vec_top, vec_bottom);
				260
				261	vst1q_u8(out_y_top, vec_top.val[0]);
				262	vst1q_u8(out_y_bottom, vec_bottom.val[0]);
				263
				264	const auto uvvec_top = vuzpq_u8(vec_top.val[1], vec_top.val[2]);
				265	const auto uvvec_bottom = vuzpq_u8(vec_bottom.val[1], vec_bottom.val[2]);
				266	const auto uvvec = vhaddq_u8(vrhaddq_u8(uvvec_top.val[0], uvvec_top.val[1]),
				267	vrhaddq_u8(uvvec_bottom.val[0], uvvec_bottom.val[1]));
				268
				269	vst1_u8(out_u, vget_low_u8(uvvec));
				270	vst1_u8(out_v, vget_high_u8(uvvec));
				271	}
				272
				273	inline void store_rgb_to_yuv4(const uint8x16_t &rvec, const uint8x16_t &gvec, const uint8x16_t &bvec,
				274	unsigned char *const __restrict out_y,
				275	unsigned char *const __restrict out_u,
				276	unsigned char *const __restrict out_v)
				277	{
				278	// Convert the uint8x16_t to float32x4x4_t
				279	float32x4x4_t frvec, fgvec, fbvec;
				280	convert_uint8x16_to_float32x4x4(rvec, frvec);
				281	convert_uint8x16_to_float32x4x4(gvec, fgvec);
				282	convert_uint8x16_to_float32x4x4(bvec, fbvec);
				283
				284	float32x4x4_t fyvec, fuvec, fvvec;
				285	for(auto i = 0; i < 4; ++i)
				286	{
				287	rgb_to_yuv_calculation(frvec.val[i], fgvec.val[i], fbvec.val[i],
				288	fyvec.val[i], fuvec.val[i], fvvec.val[i]);
				289	}
				290
				291	uint8x16_t yvec, uvec, vvec;
				292	convert_float32x4x4_to_unit8x16(fyvec, yvec);
				293	convert_float32x4x4_to_unit8x16(fuvec, uvec);
				294	convert_float32x4x4_to_unit8x16(fvvec, vvec);
				295
				296	vst1q_u8(out_y, yvec);
				297	vst1q_u8(out_u, uvec);
				298	vst1q_u8(out_v, vvec);
				299	}
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	300	#endif /* DOXYGEN_SKIP_THIS */
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	301	}
				302
				303	namespace arm_compute
				304	{
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	305	/** Convert RGB to RGBX.
				306	*
				307	* @param[in] input Input RGB data buffer.
				308	* @param[out] output Output RGBX buffer.
				309	* @param[in] win Window for iterating the buffers.
				310	*
				311	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	312	void colorconvert_rgb_to_rgbx(const void __restrict input, void __restrict output, const Window &win)
				313	{
				314	ARM_COMPUTE_ERROR_ON(nullptr == input);
				315	ARM_COMPUTE_ERROR_ON(nullptr == output);
				316
				317	const auto input_ptr = static_cast<const IImage *__restrict>(input);
				318	const auto output_ptr = static_cast<IImage *__restrict>(output);
				319
				320	Iterator in(input_ptr, win);
				321	Iterator out(output_ptr, win);
				322
				323	execute_window_loop(win, [&](const Coordinates & id)
				324	{
				325	const auto ta1 = vld3q_u8(in.ptr());
				326	uint8x16x4_t ta2;
				327	ta2.val[0] = ta1.val[0];
				328	ta2.val[1] = ta1.val[1];
				329	ta2.val[2] = ta1.val[2];
				330	ta2.val[3] = vdupq_n_u8(255);
				331	vst4q_u8(out.ptr(), ta2);
				332	},
				333	in, out);
				334	}
				335
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	336	/** Convert RGBX to RGB.
				337	*
				338	* @param[in] input Input RGBX data buffer.
				339	* @param[out] output Output RGB buffer.
				340	* @param[in] win Window for iterating the buffers.
				341	*
				342	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	343	void colorconvert_rgbx_to_rgb(const void input, void output, const Window &win)
				344	{
				345	ARM_COMPUTE_ERROR_ON(nullptr == input);
				346	ARM_COMPUTE_ERROR_ON(nullptr == output);
				347
				348	const auto input_ptr = static_cast<const IImage *__restrict>(input);
				349	const auto output_ptr = static_cast<IImage *__restrict>(output);
				350
				351	Iterator in(input_ptr, win);
				352	Iterator out(output_ptr, win);
				353
				354	execute_window_loop(win, [&](const Coordinates & id)
				355	{
				356	const auto ta1 = vld4q_u8(in.ptr());
				357	uint8x16x3_t ta2;
				358	ta2.val[0] = ta1.val[0];
				359	ta2.val[1] = ta1.val[1];
				360	ta2.val[2] = ta1.val[2];
				361	vst3q_u8(out.ptr(), ta2);
				362	},
				363	in, out);
				364	}
				365
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	366	/** Convert YUYV to RGB.
				367	*
				368	* @param[in] input Input YUYV data buffer.
				369	* @param[out] output Output RGB buffer.
				370	* @param[in] win Window for iterating the buffers.
				371	*
				372	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	373	template <bool yuyv, bool alpha>
				374	void colorconvert_yuyv_to_rgb(const void __restrict input, void __restrict output, const Window &win)
				375	{
				376	ARM_COMPUTE_ERROR_ON(nullptr == input);
				377	ARM_COMPUTE_ERROR_ON(nullptr == output);
				378
				379	const auto input_ptr = static_cast<const IImage *__restrict>(input);
				380	const auto output_ptr = static_cast<IImage *__restrict>(output);
				381
				382	constexpr auto element_size = alpha ? 32 : 24;
				383	constexpr auto shift = yuyv ? 0 : 1;
				384
				385	Iterator in(input_ptr, win);
				386	Iterator out(output_ptr, win);
				387
				388	execute_window_loop(win, [&](const Coordinates & id)
				389	{
				390	float32x4x4_t uvec, yvec, vvec, yyvec;
				391	const auto ta = vld4q_u8(in.ptr());
				392	//ta.val[0] = Y0 Y2 Y4 Y6 ...
				393	//ta.val[1] = U0 U2 U4 U6 ...
				394	//ta.val[2] = Y1 Y3 Y5 Y7 ...
				395	//ta.val[3] = V0 V2 V4 V7 ...
				396
				397	// Convert the uint8x16x4_t to float32x4x4_t
				398	convert_uint8x16_to_float32x4x4(ta.val[0 + shift], yvec);
				399	convert_uint8x16_to_float32x4x4(ta.val[1 - shift], uvec);
				400	convert_uint8x16_to_float32x4x4(ta.val[2 + shift], yyvec);
				401	convert_uint8x16_to_float32x4x4(ta.val[3 - shift], vvec);
				402
				403	yuyv_to_rgb_calculation(yvec.val[0], uvec.val[0], yyvec.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
				404	yuyv_to_rgb_calculation(yvec.val[1], uvec.val[1], yyvec.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
				405	yuyv_to_rgb_calculation(yvec.val[2], uvec.val[2], yyvec.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
				406	yuyv_to_rgb_calculation(yvec.val[3], uvec.val[3], yyvec.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
				407	},
				408	in, out);
				409	}
				410
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	411	/** Convert NV12 to RGB.
				412	*
				413	* @param[in] input Input NV12 data buffer.
				414	* @param[out] output Output RGB buffer.
				415	* @param[in] win Window for iterating the buffers.
				416	*
				417	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	418	template <bool uv, bool alpha>
				419	void colorconvert_nv12_to_rgb(const void __restrict input, void __restrict output, const Window &win)
				420	{
				421	ARM_COMPUTE_ERROR_ON(nullptr == input);
				422	ARM_COMPUTE_ERROR_ON(nullptr == output);
				423	win.validate();
				424
				425	const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
				426	const auto output_ptr = static_cast<IImage *__restrict>(output);
				427
				428	constexpr auto element_size = alpha ? 32 : 24;
				429	const auto out_stride = output_ptr->info()->strides_in_bytes().y();
				430	constexpr auto shift = uv ? 0 : 1;
				431
				432	// UV's width and height are subsampled
				433	Window win_uv(win);
				434	win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win.x().step() / 2));
				435	win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
				436	win_uv.validate();
				437
				438	Iterator in_y(input_ptr->plane(0), win);
				439	Iterator in_uv(input_ptr->plane(1), win_uv);
				440	Iterator out(output_ptr, win);
				441
				442	execute_window_loop(win, [&](const Coordinates & id)
				443	{
				444	const auto ta_y_top = vld2q_u8(in_y.ptr());
				445	const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
				446	const auto ta_uv = vld2q_u8(in_uv.ptr());
				447	//ta_y.val[0] = Y0 Y2 Y4 Y6 ...
				448	//ta_y.val[1] = Y1 Y3 Y5 Y7 ...
				449	//ta_uv.val[0] = U0 U2 U4 U6 ...
				450	//ta_uv.val[1] = V0 V2 V4 V6 ...
				451
				452	// Convert the uint8x16x4_t to float32x4x4_t
				453	float32x4x4_t yvec_top, yyvec_top, yvec_bottom, yyvec_bottom, uvec, vvec;
				454	convert_uint8x16_to_float32x4x4(ta_y_top.val[0], yvec_top);
				455	convert_uint8x16_to_float32x4x4(ta_y_top.val[1], yyvec_top);
				456	convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0], yvec_bottom);
				457	convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1], yyvec_bottom);
				458	convert_uint8x16_to_float32x4x4(ta_uv.val[0 + shift], uvec);
				459	convert_uint8x16_to_float32x4x4(ta_uv.val[1 - shift], vvec);
				460
				461	yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
				462	yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
				463	yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
				464	yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
				465
				466	yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha);
				467	yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha);
				468	yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha);
				469	yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha);
				470	},
				471	in_y, in_uv, out);
				472	}
				473
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	474	/** Convert IYUV to RGB.
				475	*
				476	* @param[in] input Input IYUV data buffer.
				477	* @param[out] output Output RGB buffer.
				478	* @param[in] win Window for iterating the buffers.
				479	*
				480	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	481	template <bool alpha>
				482	void colorconvert_iyuv_to_rgb(const void __restrict input, void __restrict output, const Window &win)
				483	{
				484	ARM_COMPUTE_ERROR_ON(nullptr == input);
				485	ARM_COMPUTE_ERROR_ON(nullptr == output);
				486	win.validate();
				487
				488	const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
				489	const auto output_ptr = static_cast<IImage *__restrict>(output);
				490
				491	constexpr auto element_size = alpha ? 32 : 24;
				492	const auto out_stride = output_ptr->info()->strides_in_bytes().y();
				493
				494	// UV's width and height are subsampled
				495	Window win_uv(win);
				496	win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
				497	win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
				498	win_uv.validate();
				499
				500	Iterator in_y(input_ptr->plane(0), win);
				501	Iterator in_u(input_ptr->plane(1), win_uv);
				502	Iterator in_v(input_ptr->plane(2), win_uv);
				503	Iterator out(output_ptr, win);
				504
				505	execute_window_loop(win, [&](const Coordinates & id)
				506	{
				507	const auto ta_y_top = vld2q_u8(in_y.ptr());
				508	const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
				509	const auto ta_u = vld1q_u8(in_u.ptr());
				510	const auto ta_v = vld1q_u8(in_v.ptr());
				511	//ta_y.val[0] = Y0 Y2 Y4 Y6 ...
				512	//ta_y.val[1] = Y1 Y3 Y5 Y7 ...
				513	//ta_u.val[0] = U0 U2 U4 U6 ...
				514	//ta_v.val[0] = V0 V2 V4 V6 ...
				515
				516	// Convert the uint8x16x4_t to float32x4x4_t
				517	float32x4x4_t yvec_top, yyvec_top, yvec_bottom, yyvec_bottom, uvec, vvec;
				518	convert_uint8x16_to_float32x4x4(ta_y_top.val[0], yvec_top);
				519	convert_uint8x16_to_float32x4x4(ta_y_top.val[1], yyvec_top);
				520	convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0], yvec_bottom);
				521	convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1], yyvec_bottom);
				522	convert_uint8x16_to_float32x4x4(ta_u, uvec);
				523	convert_uint8x16_to_float32x4x4(ta_v, vvec);
				524
				525	yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
				526	yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
				527	yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha);
				528	yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha);
				529
				530	yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha);
				531	yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha);
				532	yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha);
				533	yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha);
				534	},
				535	in_y, in_u, in_v, out);
				536	}
				537
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	538	/** Convert YUYV to NV12.
				539	*
				540	* @param[in] input Input YUYV data buffer.
				541	* @param[out] output Output NV12 buffer.
				542	* @param[in] win Window for iterating the buffers.
				543	*
				544	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	545	template <bool yuyv>
				546	void colorconvert_yuyv_to_nv12(const void __restrict input, void __restrict output, const Window &win)
				547	{
				548	ARM_COMPUTE_ERROR_ON(nullptr == input);
				549	ARM_COMPUTE_ERROR_ON(nullptr == output);
				550	win.validate();
				551
				552	const auto input_ptr = static_cast<const IImage *__restrict>(input);
				553	const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
				554
				555	constexpr auto shift = yuyv ? 0 : 1;
				556
				557	// NV12's UV's width and height are subsampled
				558	Window win_uv(win);
				559	win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
				560	win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
				561	win_uv.validate();
				562
				563	Iterator in(input_ptr, win);
				564	Iterator out_y(output_ptr->plane(0), win);
				565	Iterator out_uv(output_ptr->plane(1), win_uv);
				566
				567	execute_window_loop(win, [&](const Coordinates & id)
				568	{
				569	const auto ta_top = vld4q_u8(in.ptr());
				570	const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y());
				571	//ta.val[0] = Y0 Y2 Y4 Y6 ...
				572	//ta.val[1] = U0 U2 U4 U6 ...
				573	//ta.val[2] = Y1 Y3 Y5 Y7 ...
				574	//ta.val[3] = V0 V2 V4 V7 ...
				575
				576	uint8x16x2_t yvec;
				577	yvec.val[0] = ta_top.val[0 + shift];
				578	yvec.val[1] = ta_top.val[2 + shift];
				579	vst2q_u8(out_y.ptr(), yvec);
				580
				581	uint8x16x2_t yyvec;
				582	yyvec.val[0] = ta_bottom.val[0 + shift];
				583	yyvec.val[1] = ta_bottom.val[2 + shift];
				584	vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
				585
				586	uint8x16x2_t uvvec;
				587	uvvec.val[0] = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
				588	uvvec.val[1] = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
				589	vst2q_u8(out_uv.ptr(), uvvec);
				590	},
				591	in, out_y, out_uv);
				592	}
				593
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	594	/** Convert IYUV to NV12.
				595	*
				596	* @param[in] input Input IYUV data buffer.
				597	* @param[out] output Output NV12 buffer.
				598	* @param[in] win Window for iterating the buffers.
				599	*
				600	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	601	void colorconvert_iyuv_to_nv12(const void __restrict input, void __restrict output, const Window &win)
				602	{
				603	ARM_COMPUTE_ERROR_ON(nullptr == input);
				604	ARM_COMPUTE_ERROR_ON(nullptr == output);
				605	win.validate();
				606
				607	const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
				608	const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
				609
				610	// UV's width and height are subsampled
				611	Window win_uv(win);
				612	win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
				613	win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
				614	win_uv.validate();
				615
				616	Iterator in_y(input_ptr->plane(0), win);
				617	Iterator in_u(input_ptr->plane(1), win_uv);
				618	Iterator in_v(input_ptr->plane(2), win_uv);
				619	Iterator out_y(output_ptr->plane(0), win);
				620	Iterator out_uv(output_ptr->plane(1), win_uv);
				621
				622	execute_window_loop(win, [&](const Coordinates & id)
				623	{
				624	const auto ta_y_top = vld2q_u8(in_y.ptr());
				625	const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
				626	uint8x16x2_t ta_uv;
				627	ta_uv.val[0] = vld1q_u8(in_u.ptr());
				628	ta_uv.val[1] = vld1q_u8(in_v.ptr());
				629	//ta_y.val[0] = Y0 Y2 Y4 Y6 ...
				630	//ta_y.val[1] = Y1 Y3 Y5 Y7 ...
				631	//ta_uv.val[0] = U0 U2 U4 U6 ...
				632	//ta_uv.val[1] = V0 V2 V4 V6 ...
				633
				634	vst2q_u8(out_y.ptr(), ta_y_top);
				635	vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
				636	vst2q_u8(out_uv.ptr(), ta_uv);
				637	},
				638	in_y, in_u, in_v, out_y, out_uv);
				639	}
				640
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	641	/** Convert NV12 to IYUV.
				642	*
				643	* @param[in] input Input NV12 data buffer.
				644	* @param[out] output Output IYUV buffer.
				645	* @param[in] win Window for iterating the buffers.
				646	*
				647	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	648	template <bool uv>
				649	void colorconvert_nv12_to_iyuv(const void __restrict input, void __restrict output, const Window &win)
				650	{
				651	ARM_COMPUTE_ERROR_ON(nullptr == input);
				652	ARM_COMPUTE_ERROR_ON(nullptr == output);
				653	win.validate();
				654
				655	const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
				656	const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
				657
				658	constexpr auto shift = uv ? 0 : 1;
				659
				660	// UV's width and height are subsampled
				661	Window win_uv(win);
				662	win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
				663	win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
				664	win_uv.validate();
				665
				666	Iterator in_y(input_ptr->plane(0), win);
				667	Iterator in_uv(input_ptr->plane(1), win_uv);
				668	Iterator out_y(output_ptr->plane(0), win);
				669	Iterator out_u(output_ptr->plane(1), win_uv);
				670	Iterator out_v(output_ptr->plane(2), win_uv);
				671
				672	execute_window_loop(win, [&](const Coordinates & id)
				673	{
				674	const auto ta_y_top = vld2q_u8(in_y.ptr());
				675	const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
				676	const auto ta_uv = vld2q_u8(in_uv.ptr());
				677	//ta_y.val[0] = Y0 Y2 Y4 Y6 ...
				678	//ta_y.val[1] = Y1 Y3 Y5 Y7 ...
				679	//ta_uv.val[0] = U0 U2 U4 U6 ...
				680	//ta_uv.val[1] = V0 V2 V4 V6 ...
				681
				682	vst2q_u8(out_y.ptr(), ta_y_top);
				683	vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
				684	vst1q_u8(out_u.ptr(), ta_uv.val[0 + shift]);
				685	vst1q_u8(out_v.ptr(), ta_uv.val[1 - shift]);
				686	},
				687	in_y, in_uv, out_y, out_u, out_v);
				688	}
				689
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	690	/** Convert YUYV to IYUV.
				691	*
				692	* @param[in] input Input YUYV data buffer.
				693	* @param[out] output Output IYUV buffer.
				694	* @param[in] win Window for iterating the buffers.
				695	*
				696	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	697	template <bool yuyv>
				698	void colorconvert_yuyv_to_iyuv(const void __restrict input, void __restrict output, const Window &win)
				699	{
				700	ARM_COMPUTE_ERROR_ON(nullptr == input);
				701	ARM_COMPUTE_ERROR_ON(nullptr == output);
				702	win.validate();
				703
				704	const auto input_ptr = static_cast<const IImage *__restrict>(input);
				705	const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
				706
				707	constexpr auto shift = yuyv ? 0 : 1;
				708
				709	// Destination's UV's width and height are subsampled
				710	Window win_uv(win);
				711	win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
				712	win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
				713	win_uv.validate();
				714
				715	Iterator in(input_ptr, win);
				716	Iterator out_y(output_ptr->plane(0), win);
				717	Iterator out_u(output_ptr->plane(1), win_uv);
				718	Iterator out_v(output_ptr->plane(2), win_uv);
				719
				720	execute_window_loop(win, [&](const Coordinates & id)
				721	{
				722	const auto ta_top = vld4q_u8(in.ptr());
				723	const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y());
				724	//ta.val[0] = Y0 Y2 Y4 Y6 ...
				725	//ta.val[1] = U0 U2 U4 U6 ...
				726	//ta.val[2] = Y1 Y3 Y5 Y7 ...
				727	//ta.val[3] = V0 V2 V4 V7 ...
				728
				729	uint8x16x2_t yvec;
				730	yvec.val[0] = ta_top.val[0 + shift];
				731	yvec.val[1] = ta_top.val[2 + shift];
				732	vst2q_u8(out_y.ptr(), yvec);
				733
				734	uint8x16x2_t yyvec;
				735	yyvec.val[0] = ta_bottom.val[0 + shift];
				736	yyvec.val[1] = ta_bottom.val[2 + shift];
				737	vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec);
				738
				739	uint8x16_t uvec;
				740	uvec = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]);
				741	vst1q_u8(out_u.ptr(), uvec);
				742
				743	uint8x16_t vvec;
				744	vvec = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]);
				745	vst1q_u8(out_v.ptr(), vvec);
				746	},
				747	in, out_y, out_u, out_v);
				748	}
				749
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	750	/** Convert NV12 to YUV4.
				751	*
				752	* @param[in] input Input NV12 data buffer.
				753	* @param[out] output Output YUV4 buffer.
				754	* @param[in] win Window for iterating the buffers.
				755	*
				756	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	757	template <bool uv>
				758	void colorconvert_nv12_to_yuv4(const void __restrict input, void __restrict output, const Window &win)
				759	{
				760	ARM_COMPUTE_ERROR_ON(nullptr == input);
				761	ARM_COMPUTE_ERROR_ON(nullptr == output);
				762	win.validate();
				763
				764	const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
				765	const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
				766
				767	constexpr auto shift = uv ? 0 : 1;
				768
				769	// UV's width and height are subsampled
				770	Window win_uv(win);
				771	win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
				772	win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
				773	win_uv.validate();
				774
				775	Iterator in_y(input_ptr->plane(0), win);
				776	Iterator in_uv(input_ptr->plane(1), win_uv);
				777	Iterator out_y(output_ptr->plane(0), win);
				778	Iterator out_u(output_ptr->plane(1), win);
				779	Iterator out_v(output_ptr->plane(2), win);
				780
				781	execute_window_loop(win, [&](const Coordinates & id)
				782	{
				783	const auto ta_y_top = vld2q_u8(in_y.ptr());
				784	const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
				785	const auto ta_uv = vld2q_u8(in_uv.ptr());
				786	//ta_y.val[0] = Y0 Y2 Y4 Y6 ...
				787	//ta_y.val[1] = Y1 Y3 Y5 Y7 ...
				788	//ta_uv.val[0] = U0 U2 U4 U6 ...
				789	//ta_uv.val[1] = V0 V2 V4 V6 ...
				790
				791	vst2q_u8(out_y.ptr(), ta_y_top);
				792	vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
				793
				794	uint8x16x2_t uvec;
				795	uvec.val[0] = ta_uv.val[0 + shift];
				796	uvec.val[1] = ta_uv.val[0 + shift];
				797	vst2q_u8(out_u.ptr(), uvec);
				798	vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
				799
				800	uint8x16x2_t vvec;
				801	vvec.val[0] = ta_uv.val[1 - shift];
				802	vvec.val[1] = ta_uv.val[1 - shift];
				803	vst2q_u8(out_v.ptr(), vvec);
				804	vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
				805	},
				806	in_y, in_uv, out_y, out_u, out_v);
				807	}
				808
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	809	/** Convert IYUV to YUV4.
				810	*
				811	* @param[in] input Input IYUV data buffer.
				812	* @param[out] output Output YUV4 buffer.
				813	* @param[in] win Window for iterating the buffers.
				814	*
				815	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	816	void colorconvert_iyuv_to_yuv4(const void __restrict input, void __restrict output, const Window &win)
				817	{
				818	ARM_COMPUTE_ERROR_ON(nullptr == input);
				819	ARM_COMPUTE_ERROR_ON(nullptr == output);
				820	win.validate();
				821
				822	const auto input_ptr = static_cast<const IMultiImage *__restrict>(input);
				823	const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
				824
				825	// UV's width and height are subsampled
				826	Window win_uv(win);
				827	win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
				828	win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
				829	win_uv.validate();
				830
				831	Iterator in_y(input_ptr->plane(0), win);
				832	Iterator in_u(input_ptr->plane(1), win_uv);
				833	Iterator in_v(input_ptr->plane(2), win_uv);
				834	Iterator out_y(output_ptr->plane(0), win);
				835	Iterator out_u(output_ptr->plane(1), win);
				836	Iterator out_v(output_ptr->plane(2), win);
				837
				838	execute_window_loop(win, [&](const Coordinates & id)
				839	{
				840	const auto ta_y_top = vld2q_u8(in_y.ptr());
				841	const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y());
				842	const auto ta_u = vld1q_u8(in_u.ptr());
				843	const auto ta_v = vld1q_u8(in_v.ptr());
				844	//ta_y.val[0] = Y0 Y2 Y4 Y6 ...
				845	//ta_y.val[1] = Y1 Y3 Y5 Y7 ...
				846	//ta_u = U0 U2 U4 U6 ...
				847	//ta_v = V0 V2 V4 V6 ...
				848
				849	vst2q_u8(out_y.ptr(), ta_y_top);
				850	vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom);
				851
				852	uint8x16x2_t uvec;
				853	uvec.val[0] = ta_u;
				854	uvec.val[1] = ta_u;
				855	vst2q_u8(out_u.ptr(), uvec);
				856	vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec);
				857
				858	uint8x16x2_t vvec;
				859	vvec.val[0] = ta_v;
				860	vvec.val[1] = ta_v;
				861	vst2q_u8(out_v.ptr(), vvec);
				862	vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec);
				863	},
				864	in_y, in_u, in_v, out_y, out_u, out_v);
				865	}
				866
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	867	/** Convert RGB to NV12.
				868	*
				869	* @param[in] input Input RGB data buffer.
				870	* @param[out] output Output NV12 buffer.
				871	* @param[in] win Window for iterating the buffers.
				872	*
				873	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	874	template <bool alpha>
				875	void colorconvert_rgb_to_nv12(const void __restrict input, void __restrict output, const Window &win)
				876	{
				877	ARM_COMPUTE_ERROR_ON(nullptr == input);
				878	ARM_COMPUTE_ERROR_ON(nullptr == output);
				879	win.validate();
				880
				881	const auto input_ptr = static_cast<const IImage *__restrict>(input);
				882	const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
				883
				884	// UV's width and height are subsampled
				885	Window win_uv(win);
				886	win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
				887	win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
				888	win_uv.validate();
				889
				890	Iterator in(input_ptr, win);
				891	Iterator out_y(output_ptr->plane(0), win);
				892	Iterator out_uv(output_ptr->plane(1), win_uv);
				893
				894	execute_window_loop(win, [&](const Coordinates & id)
				895	{
				896	const auto ta_rgb_top = load_rgb(in.ptr(), alpha);
				897	const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
				898	//ta_rgb.val[0] = R0 R1 R2 R3 ...
				899	//ta_rgb.val[1] = G0 G1 G2 G3 ...
				900	//ta_rgb.val[2] = B0 B1 B2 B3 ...
				901
				902	store_rgb_to_nv12(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2],
				903	ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2],
				904	out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(),
				905	out_uv.ptr());
				906	},
				907	in, out_y, out_uv);
				908	}
				909
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	910	/** Convert RGB to IYUV.
				911	*
				912	* @param[in] input Input RGB data buffer.
				913	* @param[out] output Output IYUV buffer.
				914	* @param[in] win Window for iterating the buffers.
				915	*
				916	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	917	template <bool alpha>
				918	void colorconvert_rgb_to_iyuv(const void __restrict input, void __restrict output, const Window &win)
				919	{
				920	ARM_COMPUTE_ERROR_ON(nullptr == input);
				921	ARM_COMPUTE_ERROR_ON(nullptr == output);
				922	win.validate();
				923
				924	const auto input_ptr = static_cast<const IImage *__restrict>(input);
				925	const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
				926
				927	// UV's width and height are subsampled
				928	Window win_uv(win);
				929	win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2));
				930	win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1));
				931	win_uv.validate();
				932
				933	Iterator in(input_ptr, win);
				934	Iterator out_y(output_ptr->plane(0), win);
				935	Iterator out_u(output_ptr->plane(1), win_uv);
				936	Iterator out_v(output_ptr->plane(2), win_uv);
				937
				938	execute_window_loop(win, [&](const Coordinates & id)
				939	{
				940	const auto ta_rgb_top = load_rgb(in.ptr(), alpha);
				941	const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha);
				942	//ta_rgb.val[0] = R0 R1 R2 R3 ...
				943	//ta_rgb.val[1] = G0 G1 G2 G3 ...
				944	//ta_rgb.val[2] = B0 B1 B2 B3 ...
				945
				946	store_rgb_to_iyuv(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2],
				947	ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2],
				948	out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(),
				949	out_u.ptr(), out_v.ptr());
				950	},
				951	in, out_y, out_u, out_v);
				952	}
				953
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	954	/** Convert RGB to YUV4.
				955	*
				956	* @param[in] input Input RGB data buffer.
				957	* @param[out] output Output YUV4 buffer.
				958	* @param[in] win Window for iterating the buffers.
				959	*
				960	*/
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	961	template <bool alpha>
				962	void colorconvert_rgb_to_yuv4(const void __restrict input, void __restrict output, const Window &win)
				963	{
				964	ARM_COMPUTE_ERROR_ON(nullptr == input);
				965	ARM_COMPUTE_ERROR_ON(nullptr == output);
				966	win.validate();
				967
				968	const auto input_ptr = static_cast<const IImage *__restrict>(input);
				969	const auto output_ptr = static_cast<IMultiImage *__restrict>(output);
				970
				971	Iterator in(input_ptr, win);
				972	Iterator out_y(output_ptr->plane(0), win);
				973	Iterator out_u(output_ptr->plane(1), win);
				974	Iterator out_v(output_ptr->plane(2), win);
				975
				976	execute_window_loop(win, [&](const Coordinates & id)
				977	{
				978	const auto ta_rgb = load_rgb(in.ptr(), alpha);
				979	//ta_rgb.val[0] = R0 R1 R2 R3 ...
				980	//ta_rgb.val[1] = G0 G1 G2 G3 ...
				981	//ta_rgb.val[2] = B0 B1 B2 B3 ...
				982
				983	store_rgb_to_yuv4(ta_rgb.val[0], ta_rgb.val[1], ta_rgb.val[2],
				984	out_y.ptr(), out_u.ptr(), out_v.ptr());
				985	},
				986	in, out_y, out_u, out_v);
				987	}
Gian Marco Iodice	356f643	2017-09-22 11:32:21 +0100	[diff] [blame]	988	} // namespace arm_compute