chapters/image.adoc - tosa/specification - Gitiles

 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
 // (C) COPYRIGHT 2020-2021 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
 // by a licensing agreement from ARM Limited.

 === Image Operators

 ==== RESIZE

 Resizes a tensor. Resize is only allowed in the H and W dimensions.

 The NEAREST_NEIGHBOR mode returns the value of the input tensor closest to the
 calculated sample position for both floating-point and integer data formats.

 Floating-point BILINEAR mode returns a bilinearly interpolated output value
 based on the four closest input sample positions.

 For integer BILINEAR interpolation mode, the output value is calculated by using
 the shift value along with the other parameters to create a fixed point scaling
 factor for each input. These values are then summed to create the value for
 output, which has 2 * shift fractional bits. To convert back to the original
 integer size, the output value must be rescaled.

 For floating-point stride, stride_y should be set to  IH/OH, stride_x should be
 set to IW/OW. When using integer stride, stride_y is approximately
 (IH<<shift)/OH and stride_x is approximately (IW<<shift)/OW. OH and OW are also
 supplied as inputs since there may be off by one errors if calculating OH and OW
 from the strides.

 *Arguments:*

 |===
 |Argument|Type|Name|Shape|Description

 |Input|in_t*|input|[N,IH,IW,C]|Input tensor
 |Attribute|int*|output_size|[2]|[OH,OW]
 |Attribute|resize_t*|stride|[2]|[stride_y, stride_x]
 |Attribute|resize_t*|offset|[2]|[offset_y, offset_x]
 |Attribute|int      |shift|-|Shift value (must be zero if resize_t is float)
 |Attribute|mode_t|mode|-|BILINEAR or NEAREST
 |Output|out_t*|output|[N,OH,OW,C]|Output tensor
 |===

 *Operation Function*

 [source,c++]
 ----
 // Ensure image size is supported by GPU APIs and that for integer
 // implementations, position * stride does not overflow int32_t.
 ERROR_IF(max(OH,OW,IH,IW) >= 16384);
 ERROR_IF(stride_x <= 0 || stride_y <= 0);
 if (resize_t == float_t) {
     // The shift attribute is not used for floating point
     ERROR_IF(shift != 0);
     ERROR_IF(stride_x > IW || stride_y > IH);
 } else {
     // if in_t=int8_t ensure that an int32_t accumulator can be used
     ERROR_IF(shift < 1 || shift > 11);
     // set a consistent lower limit of 1/16 downscale
     // independent of the shift value to simplify implementations
     ERROR_IF(stride_x >= (16 << shift));
     ERROR_IF(stride_y >= (16 << shift));
     // offset range is similarly limited to maximum 16 pixels irrespective
     // of shift. Both stride and offset fit in int16_t when shift=11.
     ERROR_IF(offset_x <= (-16 << shift) || offset_x >= (16 << shift));
     ERROR_IF(offset_y <= (-16 << shift) || offset_y >= (16 << shift));
 }
 for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) {
     unit = (resize_t == float_t) ? 1.0 : (1 << shift);
     y = oy * stride_y + offset_y;
     x = ox * stride_x + offset_x;
     if (resize_t == float_t) {
         iy = (int)apply_floor(y); dy = y - (float_t)iy;
         ix = (int)apply_floor(x); dx = x - (float_t)ix;
     } else {
         iy = y >> shift; dy = y - (iy<<shift);
         ix = x >> shift; dx = x - (ix<<shift);
     }
     iy0 = apply_max(iy, 0);
     iy1 = apply_min(iy+1, IH-1);
     ix0 = apply_max(ix, 0);
     ix1 = apply_min(ix+1, IW-1);
     REQUIRE(ix0 <= ix1 && iy0 <= iy1);
     if (mode==BILINEAR) {
         v00 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix0,c]);
         v01 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix1,c]);
         v10 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix0,c]);
         v11 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix1,c]);
         out_t acc = v00 * (unit - dy) * (unit - dx) + v01 * (unit - dy) * dx;
         acc = acc + v10 * dy * (unit-dx) + v11 * dy * dx;
         tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], acc);
     } else if (mode==NEAREST) {
         iy = (dy >= unit/2) ? iy1 : iy0;
         ix = (dx >= unit/2) ? ix1 : ix0;
         v = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy,ix,c]);
         tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], v);
     }
 }
 ----

 *Supported Data Types:*

 |===
 |Profile|Mode|resize_t|in_t|out_t

 |Any|signed 8,  bilinear|int16_t|int8_t|int32_t
 |Any|signed 8,  nearest |int16_t|int8_t|int8_t
 |Any|signed 16, bilinear|int16_t|int16_t|int48_t
 |Any|signed 16, nearest |int16_t|int16_t|int16_t
 |MI,MT|floating-point   |float_t|float_t|float_t
 |===

 *Resize Modes:*
 |===
 |Mode|Description

 |NEAREST|Nearest Neighbor
 |BILINEAR|Bilinear interpoloation
 |===
	//
	// This confidential and proprietary software may be used only as
	// authorised by a licensing agreement from ARM Limited
	// (C) COPYRIGHT 2020-2021 ARM Limited
	// ALL RIGHTS RESERVED
	// The entire notice above must be reproduced on all authorised
	// copies and copies may only be made to the extent permitted
	// by a licensing agreement from ARM Limited.

	=== Image Operators

	==== RESIZE

	Resizes a tensor. Resize is only allowed in the H and W dimensions.

	The NEAREST_NEIGHBOR mode returns the value of the input tensor closest to the
	calculated sample position for both floating-point and integer data formats.

	Floating-point BILINEAR mode returns a bilinearly interpolated output value
	based on the four closest input sample positions.

	For integer BILINEAR interpolation mode, the output value is calculated by using
	the shift value along with the other parameters to create a fixed point scaling
	factor for each input. These values are then summed to create the value for
	output, which has 2 * shift fractional bits. To convert back to the original
	integer size, the output value must be rescaled.

	For floating-point stride, stride_y should be set to IH/OH, stride_x should be
	set to IW/OW. When using integer stride, stride_y is approximately
	(IH<<shift)/OH and stride_x is approximately (IW<<shift)/OW. OH and OW are also
	supplied as inputs since there may be off by one errors if calculating OH and OW
	from the strides.

	Arguments:

	\|===
	\|Argument\|Type\|Name\|Shape\|Description

	\|Input\|in_t*\|input\|[N,IH,IW,C]\|Input tensor
	\|Attribute\|int*\|output_size\|[2]\|[OH,OW]
	\|Attribute\|resize_t*\|stride\|[2]\|[stride_y, stride_x]
	\|Attribute\|resize_t*\|offset\|[2]\|[offset_y, offset_x]
	\|Attribute\|int \|shift\|-\|Shift value (must be zero if resize_t is float)
	\|Attribute\|mode_t\|mode\|-\|BILINEAR or NEAREST
	\|Output\|out_t*\|output\|[N,OH,OW,C]\|Output tensor
	\|===

	Operation Function

	[source,c++]
	----
	// Ensure image size is supported by GPU APIs and that for integer
	// implementations, position * stride does not overflow int32_t.
	ERROR_IF(max(OH,OW,IH,IW) >= 16384);
	ERROR_IF(stride_x <= 0 \|\| stride_y <= 0);
	if (resize_t == float_t) {
	// The shift attribute is not used for floating point
	ERROR_IF(shift != 0);
	ERROR_IF(stride_x > IW \|\| stride_y > IH);
	} else {
	// if in_t=int8_t ensure that an int32_t accumulator can be used
	ERROR_IF(shift < 1 \|\| shift > 11);
	// set a consistent lower limit of 1/16 downscale
	// independent of the shift value to simplify implementations
	ERROR_IF(stride_x >= (16 << shift));
	ERROR_IF(stride_y >= (16 << shift));
	// offset range is similarly limited to maximum 16 pixels irrespective
	// of shift. Both stride and offset fit in int16_t when shift=11.
	ERROR_IF(offset_x <= (-16 << shift) \|\| offset_x >= (16 << shift));
	ERROR_IF(offset_y <= (-16 << shift) \|\| offset_y >= (16 << shift));
	}
	for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) {
	unit = (resize_t == float_t) ? 1.0 : (1 << shift);
	y = oy * stride_y + offset_y;
	x = ox * stride_x + offset_x;
	if (resize_t == float_t) {
	iy = (int)apply_floor(y); dy = y - (float_t)iy;
	ix = (int)apply_floor(x); dx = x - (float_t)ix;
	} else {
	iy = y >> shift; dy = y - (iy<<shift);
	ix = x >> shift; dx = x - (ix<<shift);
	}
	iy0 = apply_max(iy, 0);
	iy1 = apply_min(iy+1, IH-1);
	ix0 = apply_max(ix, 0);
	ix1 = apply_min(ix+1, IW-1);
	REQUIRE(ix0 <= ix1 && iy0 <= iy1);
	if (mode==BILINEAR) {
	v00 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix0,c]);
	v01 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix1,c]);
	v10 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix0,c]);
	v11 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix1,c]);
	out_t acc = v00 * (unit - dy) * (unit - dx) + v01 * (unit - dy) * dx;
	acc = acc + v10 * dy * (unit-dx) + v11 * dy * dx;
	tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], acc);
	} else if (mode==NEAREST) {
	iy = (dy >= unit/2) ? iy1 : iy0;
	ix = (dx >= unit/2) ? ix1 : ix0;
	v = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy,ix,c]);
	tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], v);
	}
	}
	----

	Supported Data Types:

	\|===
	\|Profile\|Mode\|resize_t\|in_t\|out_t

	\|Any\|signed 8, bilinear\|int16_t\|int8_t\|int32_t
	\|Any\|signed 8, nearest \|int16_t\|int8_t\|int8_t
	\|Any\|signed 16, bilinear\|int16_t\|int16_t\|int48_t
	\|Any\|signed 16, nearest \|int16_t\|int16_t\|int16_t
	\|MI,MT\|floating-point \|float_t\|float_t\|float_t
	\|===

	Resize Modes:
	\|===
	\|Mode\|Description

	\|NEAREST\|Nearest Neighbor
	\|BILINEAR\|Bilinear interpoloation
	\|===