Revert RESIZE behvior to the 0.23 version The current version does not match the reference model or serialization library. Revert to the old behavior until the model is updated and tested that it works correctly. Signed-off-by: Eric Kunze <eric.kunze@arm.com> Change-Id: I237dc3e94e6c31337073524527da75084ba7b578

commit: 4c6b3d8058af46b930f882686e776f9e72c3f6db [log] [tgz]
author: Eric Kunze <eric.kunze@arm.com> Thu Jun 16 12:21:31 2022 -0700
committer: Eric Kunze <eric.kunze@arm.com> Fri Jun 17 20:38:16 2022 +0000
tree: 78da6947c9da4eea50b2b5218482aad67b03c0bc
parent: 42229d03fe55c45f0ad2ba68f190f3d68a78ae79 [diff]
diff --git a/chapters/image.adoc b/chapters/image.adoc
index 690480c..6f1d3cc 100644
--- a/chapters/image.adoc
+++ b/chapters/image.adoc

@@ -13,40 +13,34 @@
 
 Resizes a tensor. Resize is only allowed in the H and W dimensions.
 
-The height dimension is scaled by factor (scale_y_n/scale_y_d).
-The width dimension is scaled by factor (scale_x_n/scale_x_d).
-
 The NEAREST_NEIGHBOR mode returns the value of the input tensor closest to the
 calculated sample position for both floating-point and integer data formats.
 
 Floating-point BILINEAR mode returns a bilinearly interpolated output value
 based on the four closest input sample positions.
 
-For integer BILINEAR interpolation mode, the output value must
-be scaled by 1/(scale_y_n * scale_x_n) in a following operation to
-complete the interpolation (for example with a RESCALE operator).
+For integer BILINEAR interpolation mode, the output value is calculated by using
+the shift value along with the other parameters to create a fixed point scaling
+factor for each input. These values are then summed to create the value for
+output, which has 2 * shift fractional bits. To convert back to the original
+integer size, the output value must be rescaled.
 
 The following examples show practical uses of the parameters:
 
 * For approximate uniform input sampling between (0, 0) and (IH-1, IW-1) set
-** scale_y_n/scale_y_d = (OH-1)/(IH-1) as integer ratios
-** scale_x_n/scale_x_d = (OW-1)/(IW-1) as integer ratios
-** offset_x = 0, offset_y = 0, border_x = 0, border_y = 0
+stride_y = ( (IH-1) * (1<<shift) ) / (OH-1),
+stride_x = ( (IW-1) * (1<<shift) ) / (OW-1),
+offset_x=0, offset_y=0, border_x=0, border_y=0.
 
-* For power of two upscale [OH-1,OW-1] = (1<<k) * [IH-1, IW-1],
-sampling between (0,0) and (IH-1,IW-1), set:
-** scale_y_n = (1<<k), scale_y_d = 1, offset_y = 0, border_y = 0
-** scale_x_n = (1<<k), scale_x_d = 1, offset_x = 0, border_x = 0
-
-* For power of two upscale [OH,OW] = (1<<k) * [IH,IW],
-sampling range approximately (-0.5, -0.5) to (IH-0.5, IW-0.5), set:
-** scale_y_n = 2<<k, scale_y_d = 2, offset_y = -(1<<k)+1, border_y = (1<<k)-1
-** scale_x_n = 2<<k, scale_x_d = 2, offset_x = -(1<<k)+1, border_x = (1<<k)-1
-
-The output dimensions can be derived from the input dimensions by inverting
-the scale as described in the pseudocode. The [border_y, border_x] values
-adjust the output size to allow fractional sampling beyond integer
-input position (IH-1,IW-1).
+* For power of two upscale by factor (1<<k) the following parameters can
+be used for fixed point upscales:
+** For upscale [OH-1,OW-1] = (1<<k) * [IH-1, IW-1] set
+shift=k, stride_y=1, stride_x=1, offset_x=0, offset_y=0,
+border_x=0, border_y=0.
+** For upscale [OH,OW] = (1<<k) * [IH,IW] set
+shift=(k+1), stride_y=2, stride_x=2, offset_x=-(1<<k)+1, offset_y=-(1<<k)+1,
+border_x=1<<(k-1), border_y=1<<(k-1). This samples approximately
+the input area (-0.5, -0.5) to (IH-0.5, IW-0.5).
 
 *Arguments:*
 
@@ -54,9 +48,11 @@
 |Argument|Type|Name|Shape|Description
 
 |Input|in_t*|input|[N,IH,IW,C]|Input tensor
-|Attribute|int16_t *|scale|[4]|[scale_y_n, scale_y_d, scale_x_n, scale_x_d]
-|Attribute|int16_t *|offset|[2]|[offset_y, offset_x]
+|Attribute|int32_t* |output_size|[2]|[OH,OW]
+|Attribute|resize_t*|stride|[2]|[stride_y, stride_x]
+|Attribute|resize_t*|offset|[2]|[offset_y, offset_x]
 |Attribute|int32_t* |border|[2]|[border_y, border_x]
+|Attribute|int32_t  |shift|-|Shift value (must be zero if resize_t is float)
 |Attribute|mode_t|mode|-|BILINEAR or NEAREST
 |Output|out_t*|output|[N,OH,OW,C]|Output tensor
 |===
@@ -65,61 +61,57 @@
 
 [source,c++]
 ----
+// Derive the output dimensions from the input dimensions
+OH = idiv((IH-1)*(1<<shift) - offset_y, stride_y) + 1 + border_y;
+OW = idiv((IW-1)*(1<<shift) - offset_x, stride_x) + 1 + border_x;
 // Ensure the image size is supported by GPU APIs and that for integer
 // implementations, position * stride does not overflow int32_t.
 ERROR_IF(max(OH,OW,IH,IW) >= 16384);
-ERROR_IF(scale_y_n <= 0 || scale_y_d <=0 || scale_x_n <=0 || scale_x_d <=0);
-// if in_t=int8_t ensure that an int32_t accumulator can be used
-ERROR_IF(scale_y_n > (1<<11) || scale_x_n > (1<<11));
-// set a consistent lower limit of 1/16 downscale to simplify implementations
-ERROR_IF(scale_y_d >= 16 * scale_y_n || scale_x_d >= 16 * scale_x_n);
-ERROR_IF(offset_y < -scale_y_n || offset_y >= 16*scale_y_n);
-ERROR_IF(offset_x < -scale_x_n || offset_x >= 16*scale_x_n);
-ERROR_IF(border_y < -16*scale_y_n || border_y >= scale_y_n);
-ERROR_IF(border_x < -16*scale_x_n || border_x >= scale_x_n);
-ERROR_IF(OH != idiv_check((IH-1)*scale_y_n - offset_y + border_y, scale_y_d) + 1);
-ERROR_IF(OW != idiv_check((IW-1)*scale_x_n - offset_x + border_x, scale_x_d) + 1);
+ERROR_IF(stride_x <= 0 || stride_y <= 0);
+if (is_floating_point(resize_t)) {
+    // The shift attribute is not used for floating point
+    ERROR_IF(shift != 0);
+    ERROR_IF(stride_x > IW || stride_y > IH);
+} else {
+    // if in_t=int8_t ensure that an int32_t accumulator can be used
+    ERROR_IF(shift < 1 || shift > 11);
+    // set a consistent lower limit of 1/16 downscale
+    // independent of the shift value to simplify implementations
+    ERROR_IF(stride_x >= (16 << shift));
+    ERROR_IF(stride_y >= (16 << shift));
+    // offset range is similarly limited to maximum 16 pixels irrespective
+    // of shift. Both stride and offset fit in int16_t when shift=11.
+    ERROR_IF(offset_x <= (-16 << shift) || offset_x >= (16 << shift));
+    ERROR_IF(offset_y <= (-16 << shift) || offset_y >= (16 << shift));
+}
 for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) {
-    out_t acc;
-    resize_t dx, dy;
-
-    int32_t y = oy * scale_y_d + offset_y;
-    int32_t x = ox * scale_x_d + offset_x;
-    int16_t iy = floor(y / scale_y_n);
-    int16_t ix = floor(x / scale_x_n);
-
+    unit = (is_floating_point(resize_t)) ? 1.0 : (1 << shift);
+    y = oy * stride_y + offset_y;
+    x = ox * stride_x + offset_x;
     if (is_floating_point(resize_t)) {
-        dy = ((resize_t)y / (resize_t)scale_y_n) - iy;
-        dx = ((resize_t)x / (resize_t)scale_x_n) - ix;
+        iy = (int32_t)apply_floor(y); dy = y - (resize_t)iy;
+        ix = (int32_t)apply_floor(x); dx = x - (resize_t)ix;
     } else {
-        dy = y - iy * scale_y_n;
-        dx = y - ix * scale_x_n;
+        iy = y >> shift; dy = y - (iy<<shift);
+        ix = x >> shift; dx = x - (ix<<shift);
     }
-    // Note that -1 <= iy < IH and -1 <= ix < IW
-    int16_t iy0 = apply_max(iy, 0);
-    int16_t iy1 = apply_min(iy+1, IH-1);
-    int16_t ix0 = apply_max(ix, 0);
-    int16_t ix1 = apply_min(ix+1, IW-1);
+    iy0 = apply_max(iy, 0);
+    iy1 = apply_min(iy+1, IH-1);
+    ix0 = apply_max(ix, 0);
+    ix1 = apply_min(ix+1, IW-1);
+    REQUIRE(ix0 <= ix1 && iy0 <= iy1);
     if (mode==BILINEAR) {
-        in_t v00 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix0,c]);
-        in_t v01 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix1,c]);
-        in_t v10 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix0,c]);
-        in_t v11 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix1,c]);
-        acc  = v00 * (scale_y_n - dy) * (scale_x_n - dx);
-        acc += v01 * (scale_y_n - dy) * dx;
-        acc += v10 * dy * (scale_x_n - dx);
-        acc += v11 * dy * dx;
+        v00 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix0,c]);
+        v01 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix1,c]);
+        v10 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix0,c]);
+        v11 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix1,c]);
+        out_t acc = v00 * (unit - dy) * (unit - dx) + v01 * (unit - dy) * dx;
+        acc = acc + v10 * dy * (unit-dx) + v11 * dy * dx;
         tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], acc);
     } else if (mode==NEAREST) {
-        int32_t iy, ix;
-        if (is_floating_point(resize_t)) {
-            iy = (dy >= 0.5) ? iy1 : iy0;
-            ix = (dx >= 0.5) ? ix1 : ix0;
-        } else {
-            iy = (2*dy >= scale_y_n) ? iy1 : iy0;
-            ix = (2*dx >= scale_x_n) ? ix1 : ix0;
-        }
-        in_t v = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy,ix,c]);
+        iy = (dy >= unit/2) ? iy1 : iy0;
+        ix = (dx >= unit/2) ? ix1 : ix0;
+        v = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy,ix,c]);
         tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], v);
     }
 }
commit	4c6b3d8058af46b930f882686e776f9e72c3f6db	[log] [tgz]
author	Eric Kunze <eric.kunze@arm.com>	Thu Jun 16 12:21:31 2022 -0700
committer	Eric Kunze <eric.kunze@arm.com>	Fri Jun 17 20:38:16 2022 +0000
tree	78da6947c9da4eea50b2b5218482aad67b03c0bc
parent	42229d03fe55c45f0ad2ba68f190f3d68a78ae79 [diff]