src/core/GLES_COMPUTE/cs_shaders/scale.cs - ml/ComputeLibrary - Gitiles

 /*
  * Copyright (c) 2016, 2017 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in all
  * copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */

 layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;

 #include "helpers_cs.h"

 /** Performs an affine transformation on an image interpolating with the NEAREAST NEIGHBOUR method. Input and output are single channel FP16.
  *
  * @param[in]  src_ptr      Pointer to the source tensor. Supported data types: FP16.
  * @param[in]  src_attrs    The attributes of the source tensor
  * @param[out] dst_ptr      Pointer to the destination tensor. Supported data types: FP16. (Must be the same as the input)
  * @param[in]  dst_attrs    The attributes of the destination tensor
  * @param[in]  input_width  Input image width
  * @param[in]  input_height Input image height
  * @param[in]  scale_x      The scale factor along x dimension
  * @param[in]  scale_y      The scale factor along y dimension
  */
 SHADER_PARAMS_DECLARATION
 {
     ImageAttributes src_attrs;
     ImageAttributes dst_attrs;
     float           input_width;
     float           input_height;
     float           scale_x;
     float           scale_y;
 };

 #if defined(DATA_TYPE_FP16)
 #if defined(SCALE_NEAREST_GENERIC)
 TENSOR_DECLARATION(1, srcBuffer, uint, src_ptr, src_shift, 2, readonly);
 TENSOR_DECLARATION(2, dstBuffer, uvec2, dst_ptr, dst_shift, 3, writeonly);

 vec4[2] transform_nearest(vec2 coord, vec2 scale)
 {
     vec4 in_x_coords = vec4(coord.x, 1.f + coord.x, 2.f + coord.x, 3.f + coord.x);

     vec4[2] t;
     t[0] = (in_x_coords + (vec4(0.5f))) * scale.x;
     t[1] = vec4((coord.y + 0.5f) * scale.y);

     return t;
 }

 vec4[2] clamp_to_border_with_size(vec4[2] coords, float width, float height, float border_size)
 {
     vec4[2] c;
     c[0] = clamp(coords[0], 0.0f - border_size, width - 1.f + border_size);
     c[1] = clamp(coords[1], 0.0f - border_size, height - 1.f + border_size);

     return c;
 }

 void main()
 {
     ImageIterator src_iter = CONVERT_TO_IMAGE_ITERATOR_NO_STEP(src_attrs, src_shift);
     ImageIterator dst_iter = CONVERT_TO_IMAGE_ITERATOR(dst_attrs, dst_shift);

     vec2 r     = vec2(scale_x, scale_y);
     vec4[2] tc = clamp_to_border_with_size(transform_nearest(vec2(gl_GlobalInvocationID.x << uint(2), gl_GlobalInvocationID.y), r), input_width, input_height, float(BORDER_SIZE));

     mediump vec2 s = vec2(0.0f);
     mediump vec4 d = vec4(0.0f);

     for(int i = 0; i < 4; i++)
     {
         uint offset = image_offset_in_bytes(src_iter, int(tc[0][i]), int(tc[1][i]));

         s = LOAD_UNPACK2_HALF(src_ptr, uint(offset >> src_shift));

         if(offset % uint(4) == uint(0))
         {
             d[i] = s.x;
         }
         else
         {
             d[i] = s.y;
         }
     }

     STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, d);
 }
 #elif defined(SCALE_NEAREST_8X) /* SCALE_NEAREST_GENERIC */
 TENSOR_DECLARATION(1, srcBuffer, uvec2, src_ptr, src_shift, 3, readonly);
 TENSOR_DECLARATION(2, dstBuffer, uvec4, dst_ptr, dst_shift, 4, writeonly);

 void main()
 {
     ImageIterator src_iter = CONVERT_TO_IMAGE_ITERATOR_NO_STEP(src_attrs, src_shift);
     ImageIterator dst_iter = CONVERT_TO_IMAGE_ITERATOR(dst_attrs, dst_shift);

     uvec2 tc = uvec2(gl_GlobalInvocationID.x << uint(2), gl_GlobalInvocationID.y >> uint(1));

     mediump vec4 s = vec4(0.0f);
     mediump      vec4[2] d;

     uint offset = image_offset_in_bytes(src_iter, int(tc[0]), int(tc[1]));
     s           = LOAD_UNPACK4_HALF(src_ptr, uint(offset >> src_shift));

     d[0] = vec4(s.x, s.x, s.y, s.y);
     d[1] = vec4(s.z, s.z, s.w, s.w);

     STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, d);
 }
 #endif                          /* SCALE_NEAREST_GENERIC */

 #else /* DATA_TYPE_FP16 */
 #error Data type not supported
 #endif /* DATA_TYPE_FP16 */
	/*
	* Copyright (c) 2016, 2017 ARM Limited.
	*
	* SPDX-License-Identifier: MIT
	*
	* Permission is hereby granted, free of charge, to any person obtaining a copy
	* of this software and associated documentation files (the "Software"), to
	* deal in the Software without restriction, including without limitation the
	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
	* sell copies of the Software, and to permit persons to whom the Software is
	* furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall be included in all
	* copies or substantial portions of the Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	* SOFTWARE.
	*/

	layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;

	#include "helpers_cs.h"

	/** Performs an affine transformation on an image interpolating with the NEAREAST NEIGHBOUR method. Input and output are single channel FP16.
	*
	* @param[in] src_ptr Pointer to the source tensor. Supported data types: FP16.
	* @param[in] src_attrs The attributes of the source tensor
	* @param[out] dst_ptr Pointer to the destination tensor. Supported data types: FP16. (Must be the same as the input)
	* @param[in] dst_attrs The attributes of the destination tensor
	* @param[in] input_width Input image width
	* @param[in] input_height Input image height
	* @param[in] scale_x The scale factor along x dimension
	* @param[in] scale_y The scale factor along y dimension
	*/
	SHADER_PARAMS_DECLARATION
	{
	ImageAttributes src_attrs;
	ImageAttributes dst_attrs;
	float input_width;
	float input_height;
	float scale_x;
	float scale_y;
	};

	#if defined(DATA_TYPE_FP16)
	#if defined(SCALE_NEAREST_GENERIC)
	TENSOR_DECLARATION(1, srcBuffer, uint, src_ptr, src_shift, 2, readonly);
	TENSOR_DECLARATION(2, dstBuffer, uvec2, dst_ptr, dst_shift, 3, writeonly);

	vec4[2] transform_nearest(vec2 coord, vec2 scale)
	{
	vec4 in_x_coords = vec4(coord.x, 1.f + coord.x, 2.f + coord.x, 3.f + coord.x);

	vec4[2] t;
	t[0] = (in_x_coords + (vec4(0.5f))) * scale.x;
	t[1] = vec4((coord.y + 0.5f) * scale.y);

	return t;
	}

	vec4[2] clamp_to_border_with_size(vec4[2] coords, float width, float height, float border_size)
	{
	vec4[2] c;
	c[0] = clamp(coords[0], 0.0f - border_size, width - 1.f + border_size);
	c[1] = clamp(coords[1], 0.0f - border_size, height - 1.f + border_size);

	return c;
	}

	void main()
	{
	ImageIterator src_iter = CONVERT_TO_IMAGE_ITERATOR_NO_STEP(src_attrs, src_shift);
	ImageIterator dst_iter = CONVERT_TO_IMAGE_ITERATOR(dst_attrs, dst_shift);

	vec2 r = vec2(scale_x, scale_y);
	vec4[2] tc = clamp_to_border_with_size(transform_nearest(vec2(gl_GlobalInvocationID.x << uint(2), gl_GlobalInvocationID.y), r), input_width, input_height, float(BORDER_SIZE));

	mediump vec2 s = vec2(0.0f);
	mediump vec4 d = vec4(0.0f);

	for(int i = 0; i < 4; i++)
	{
	uint offset = image_offset_in_bytes(src_iter, int(tc[0][i]), int(tc[1][i]));

	s = LOAD_UNPACK2_HALF(src_ptr, uint(offset >> src_shift));

	if(offset % uint(4) == uint(0))
	{
	d[i] = s.x;
	}
	else
	{
	d[i] = s.y;
	}
	}

	STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, d);
	}
	#elif defined(SCALE_NEAREST_8X) /* SCALE_NEAREST_GENERIC */
	TENSOR_DECLARATION(1, srcBuffer, uvec2, src_ptr, src_shift, 3, readonly);
	TENSOR_DECLARATION(2, dstBuffer, uvec4, dst_ptr, dst_shift, 4, writeonly);

	void main()
	{
	ImageIterator src_iter = CONVERT_TO_IMAGE_ITERATOR_NO_STEP(src_attrs, src_shift);
	ImageIterator dst_iter = CONVERT_TO_IMAGE_ITERATOR(dst_attrs, dst_shift);

	uvec2 tc = uvec2(gl_GlobalInvocationID.x << uint(2), gl_GlobalInvocationID.y >> uint(1));

	mediump vec4 s = vec4(0.0f);
	mediump vec4[2] d;

	uint offset = image_offset_in_bytes(src_iter, int(tc[0]), int(tc[1]));
	s = LOAD_UNPACK4_HALF(src_ptr, uint(offset >> src_shift));

	d[0] = vec4(s.x, s.x, s.y, s.y);
	d[1] = vec4(s.z, s.z, s.w, s.w);

	STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, d);
	}
	#endif /* SCALE_NEAREST_GENERIC */

	#else /* DATA_TYPE_FP16 */
	#error Data type not supported
	#endif /* DATA_TYPE_FP16 */