chapters/image.adoc - tosa/specification - Gitiles

 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
 // (C) COPYRIGHT 2020-2023 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
 // by a licensing agreement from ARM Limited.

 === Image Operators

 ==== RESIZE

 Resizes a tensor. Resize is only allowed in the H and W dimensions.


 The height dimension is scaled by factor (scale_y_n/scale_y_d).
 The width dimension is scaled by factor (scale_x_n/scale_x_d).

 The NEAREST_NEIGHBOR mode returns the value of the input tensor closest to the
 calculated sample position for both floating-point and integer data formats.

 Floating-point BILINEAR mode returns a bilinearly interpolated output value
 based on the four closest input sample positions.

 For integer BILINEAR interpolation mode, the output value must
 be scaled by 1/(scale_y_n * scale_x_n) in a following operation to
 complete the interpolation (for example with a RESCALE operator).

 The following examples show practical uses of the parameters:

 * For approximate uniform input sampling between (0, 0) and (IH - 1, IW - 1) set
 ** scale_y_n/scale_y_d = (OH - 1)/(IH - 1) as integer ratios
 ** scale_x_n/scale_x_d = (OW - 1)/(IW - 1) as integer ratios
 ** offset_x = 0, offset_y = 0, border_x = 0, border_y = 0

 * For power of two upscale [OH - 1,OW - 1] = (1 << k) * [IH - 1, IW - 1],
 sampling between (0,0) and (IH - 1,IW - 1), set:
 ** scale_y_n = (1 << k), scale_y_d = 1, offset_y = 0, border_y = 0
 ** scale_x_n = (1 << k), scale_x_d = 1, offset_x = 0, border_x = 0

 * For power of two upscale [OH,OW] = (1 << k) * [IH,IW],
 sampling range approximately (-0.5, -0.5) to (IH - 0.5, IW - 0.5), set:
 ** scale_y_n = 2 << k, scale_y_d = 2, offset_y = -(1 << k) + 1, border_y = (1 << k) - 1
 ** scale_x_n = 2 << k, scale_x_d = 2, offset_x = -(1 << k) + 1, border_x = (1 << k) - 1

 The output dimensions can be derived from the input dimensions by inverting
 the scale as described in the pseudocode. The [border_y, border_x] values
 adjust the output size to allow fractional sampling beyond integer
 input position (IH - 1,IW - 1).

 include::{generated}/operators/RESIZE.adoc[]

 *Resize Modes:*
 |===
 |Mode|Description

 |NEAREST|Nearest Neighbor
 |BILINEAR|Bilinear interpoloation
 |===

 [source,c++]
 ----
 // Ensure the image size is supported by GPU APIs and that for integer
 // implementations, position * stride does not overflow int32_t.
 ERROR_IF(max(OH,OW,IH,IW) >= 16384);
 ERROR_IF(scale_y_n <= 0 || scale_y_d <= 0 || scale_x_n <= 0 || scale_x_d <= 0);
 // if in_t=int8_t ensure that an int32_t accumulator can be used
 ERROR_IF(scale_y_n > (1 << 11) || scale_x_n > (1 << 11));
 // set a consistent lower limit of 1/16 downscale to simplify implementations
 ERROR_IF(scale_y_d >= 16 * scale_y_n || scale_x_d >= 16 * scale_x_n);
 ERROR_IF(offset_y < -scale_y_n || offset_y >= 16 * scale_y_n);
 ERROR_IF(offset_x < -scale_x_n || offset_x >= 16 * scale_x_n);
 ERROR_IF(border_y < -16 * scale_y_n || border_y >= scale_y_n);
 ERROR_IF(border_x < -16 * scale_x_n || border_x >= scale_x_n);
 ERROR_IF(OH != idiv_check((IH - 1) * scale_y_n - offset_y + border_y, scale_y_d) + 1);
 ERROR_IF(OW != idiv_check((IW - 1) * scale_x_n - offset_x + border_x, scale_x_d) + 1);
 for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) {
     out_t acc;
     resize_t dx, dy;
     resize_t unit_x, unit_y;

     unit_x = (is_floating_point(resize_t)) ? 1.0 : scale_x_n;
     unit_y = (is_floating_point(resize_t)) ? 1.0 : scale_y_n;

     int32_t y = oy * scale_y_d + offset_y;
     int32_t x = ox * scale_x_d + offset_x;
     int16_t iy = floor(y / scale_y_n);
     int16_t ix = floor(x / scale_x_n);
     int16_t ry = y - iy * scale_y_n;  // (y % scale_y_n)
     int16_t rx = x - ix * scale_x_n;  // (x % scale_x_n)

     if (is_floating_point(resize_t)) {
         dy = static_cast<resize_t>(ry) / static_cast<resize_t>(scale_y_n);
         dx = static_cast<resize_t>(rx) / static_cast<resize_t>(scale_x_n);
     } else {
         dy = ry;
         dx = rx;
     }
     // Note that -1 <= iy < IH and -1 <= ix < IW
     int16_t iy0 = apply_max_s(iy, 0);
     int16_t iy1 = apply_min_s(iy + 1, IH - 1);
     int16_t ix0 = apply_max_s(ix, 0);
     int16_t ix1 = apply_min_s(ix + 1, IW - 1);
     if (mode==BILINEAR) {
         using in_s_t = make_signed(in_t); // Use signed calculations for i8/i16
         in_s_t v00 = static_cast<in_s_t>(tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix0,c]));
         in_s_t v01 = static_cast<in_s_t>(tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix1,c]));
         in_s_t v10 = static_cast<in_s_t>(tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix0,c]));
         in_s_t v11 = static_cast<in_s_t>(tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix1,c]));
         acc  = v00 * (unit_y - dy) * (unit_x - dx);
         acc += v01 * (unit_y - dy) * dx;
         acc += v10 * dy * (unit_x - dx);
         acc += v11 * dy * dx;
         tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], acc);
     } else if (mode==NEAREST) {
         int32_t iy, ix;
         if (is_floating_point(resize_t)) {
             iy = (dy >= 0.5) ? iy1 : iy0;
             ix = (dx >= 0.5) ? ix1 : ix0;
         } else {
             iy = (2 * dy >= scale_y_n) ? iy1 : iy0;
             ix = (2 * dx >= scale_x_n) ? ix1 : ix0;
         }
         in_t v = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy,ix,c]);
         tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], v);
     }
 }
 ----
	//
	// This confidential and proprietary software may be used only as
	// authorised by a licensing agreement from ARM Limited
	// (C) COPYRIGHT 2020-2023 ARM Limited
	// ALL RIGHTS RESERVED
	// The entire notice above must be reproduced on all authorised
	// copies and copies may only be made to the extent permitted
	// by a licensing agreement from ARM Limited.

	=== Image Operators

	==== RESIZE

	Resizes a tensor. Resize is only allowed in the H and W dimensions.


	The height dimension is scaled by factor (scale_y_n/scale_y_d).
	The width dimension is scaled by factor (scale_x_n/scale_x_d).

	The NEAREST_NEIGHBOR mode returns the value of the input tensor closest to the
	calculated sample position for both floating-point and integer data formats.

	Floating-point BILINEAR mode returns a bilinearly interpolated output value
	based on the four closest input sample positions.

	For integer BILINEAR interpolation mode, the output value must
	be scaled by 1/(scale_y_n * scale_x_n) in a following operation to
	complete the interpolation (for example with a RESCALE operator).

	The following examples show practical uses of the parameters:

	* For approximate uniform input sampling between (0, 0) and (IH - 1, IW - 1) set
	** scale_y_n/scale_y_d = (OH - 1)/(IH - 1) as integer ratios
	** scale_x_n/scale_x_d = (OW - 1)/(IW - 1) as integer ratios
	** offset_x = 0, offset_y = 0, border_x = 0, border_y = 0

	* For power of two upscale [OH - 1,OW - 1] = (1 << k) * [IH - 1, IW - 1],
	sampling between (0,0) and (IH - 1,IW - 1), set:
	** scale_y_n = (1 << k), scale_y_d = 1, offset_y = 0, border_y = 0
	** scale_x_n = (1 << k), scale_x_d = 1, offset_x = 0, border_x = 0

	* For power of two upscale [OH,OW] = (1 << k) * [IH,IW],
	sampling range approximately (-0.5, -0.5) to (IH - 0.5, IW - 0.5), set:
	** scale_y_n = 2 << k, scale_y_d = 2, offset_y = -(1 << k) + 1, border_y = (1 << k) - 1
	** scale_x_n = 2 << k, scale_x_d = 2, offset_x = -(1 << k) + 1, border_x = (1 << k) - 1

	The output dimensions can be derived from the input dimensions by inverting
	the scale as described in the pseudocode. The [border_y, border_x] values
	adjust the output size to allow fractional sampling beyond integer
	input position (IH - 1,IW - 1).

	include::{generated}/operators/RESIZE.adoc[]

	Resize Modes:
	\|===
	\|Mode\|Description

	\|NEAREST\|Nearest Neighbor
	\|BILINEAR\|Bilinear interpoloation
	\|===

	[source,c++]
	----
	// Ensure the image size is supported by GPU APIs and that for integer
	// implementations, position * stride does not overflow int32_t.
	ERROR_IF(max(OH,OW,IH,IW) >= 16384);
	ERROR_IF(scale_y_n <= 0 \|\| scale_y_d <= 0 \|\| scale_x_n <= 0 \|\| scale_x_d <= 0);
	// if in_t=int8_t ensure that an int32_t accumulator can be used
	ERROR_IF(scale_y_n > (1 << 11) \|\| scale_x_n > (1 << 11));
	// set a consistent lower limit of 1/16 downscale to simplify implementations
	ERROR_IF(scale_y_d >= 16 * scale_y_n \|\| scale_x_d >= 16 * scale_x_n);
	ERROR_IF(offset_y < -scale_y_n \|\| offset_y >= 16 * scale_y_n);
	ERROR_IF(offset_x < -scale_x_n \|\| offset_x >= 16 * scale_x_n);
	ERROR_IF(border_y < -16 * scale_y_n \|\| border_y >= scale_y_n);
	ERROR_IF(border_x < -16 * scale_x_n \|\| border_x >= scale_x_n);
	ERROR_IF(OH != idiv_check((IH - 1) * scale_y_n - offset_y + border_y, scale_y_d) + 1);
	ERROR_IF(OW != idiv_check((IW - 1) * scale_x_n - offset_x + border_x, scale_x_d) + 1);
	for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) {
	out_t acc;
	resize_t dx, dy;
	resize_t unit_x, unit_y;

	unit_x = (is_floating_point(resize_t)) ? 1.0 : scale_x_n;
	unit_y = (is_floating_point(resize_t)) ? 1.0 : scale_y_n;

	int32_t y = oy * scale_y_d + offset_y;
	int32_t x = ox * scale_x_d + offset_x;
	int16_t iy = floor(y / scale_y_n);
	int16_t ix = floor(x / scale_x_n);
	int16_t ry = y - iy * scale_y_n; // (y % scale_y_n)
	int16_t rx = x - ix * scale_x_n; // (x % scale_x_n)

	if (is_floating_point(resize_t)) {
	dy = static_cast<resize_t>(ry) / static_cast<resize_t>(scale_y_n);
	dx = static_cast<resize_t>(rx) / static_cast<resize_t>(scale_x_n);
	} else {
	dy = ry;
	dx = rx;
	}
	// Note that -1 <= iy < IH and -1 <= ix < IW
	int16_t iy0 = apply_max_s(iy, 0);
	int16_t iy1 = apply_min_s(iy + 1, IH - 1);
	int16_t ix0 = apply_max_s(ix, 0);
	int16_t ix1 = apply_min_s(ix + 1, IW - 1);
	if (mode==BILINEAR) {
	using in_s_t = make_signed(in_t); // Use signed calculations for i8/i16
	in_s_t v00 = static_cast<in_s_t>(tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix0,c]));
	in_s_t v01 = static_cast<in_s_t>(tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix1,c]));
	in_s_t v10 = static_cast<in_s_t>(tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix0,c]));
	in_s_t v11 = static_cast<in_s_t>(tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix1,c]));
	acc = v00 * (unit_y - dy) * (unit_x - dx);
	acc += v01 * (unit_y - dy) * dx;
	acc += v10 * dy * (unit_x - dx);
	acc += v11 * dy * dx;
	tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], acc);
	} else if (mode==NEAREST) {
	int32_t iy, ix;
	if (is_floating_point(resize_t)) {
	iy = (dy >= 0.5) ? iy1 : iy0;
	ix = (dx >= 0.5) ? ix1 : ix0;
	} else {
	iy = (2 * dy >= scale_y_n) ? iy1 : iy0;
	ix = (2 * dx >= scale_x_n) ? ix1 : ix0;
	}
	in_t v = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy,ix,c]);
	tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], v);
	}
	}
	----