Revert RESIZE behvior to the 0.23 version

The current version does not match the reference model or
serialization library. Revert to the old behavior until the
model is updated and tested that it works correctly.

Signed-off-by: Eric Kunze <eric.kunze@arm.com>
Change-Id: I237dc3e94e6c31337073524527da75084ba7b578
diff --git a/chapters/image.adoc b/chapters/image.adoc
index 690480c..6f1d3cc 100644
--- a/chapters/image.adoc
+++ b/chapters/image.adoc
@@ -13,40 +13,34 @@
 
 Resizes a tensor. Resize is only allowed in the H and W dimensions.
 
-The height dimension is scaled by factor (scale_y_n/scale_y_d).
-The width dimension is scaled by factor (scale_x_n/scale_x_d).
-
 The NEAREST_NEIGHBOR mode returns the value of the input tensor closest to the
 calculated sample position for both floating-point and integer data formats.
 
 Floating-point BILINEAR mode returns a bilinearly interpolated output value
 based on the four closest input sample positions.
 
-For integer BILINEAR interpolation mode, the output value must
-be scaled by 1/(scale_y_n * scale_x_n) in a following operation to
-complete the interpolation (for example with a RESCALE operator).
+For integer BILINEAR interpolation mode, the output value is calculated by using
+the shift value along with the other parameters to create a fixed point scaling
+factor for each input. These values are then summed to create the value for
+output, which has 2 * shift fractional bits. To convert back to the original
+integer size, the output value must be rescaled.
 
 The following examples show practical uses of the parameters:
 
 * For approximate uniform input sampling between (0, 0) and (IH-1, IW-1) set
-** scale_y_n/scale_y_d = (OH-1)/(IH-1) as integer ratios
-** scale_x_n/scale_x_d = (OW-1)/(IW-1) as integer ratios
-** offset_x = 0, offset_y = 0, border_x = 0, border_y = 0
+stride_y = ( (IH-1) * (1<<shift) ) / (OH-1),
+stride_x = ( (IW-1) * (1<<shift) ) / (OW-1),
+offset_x=0, offset_y=0, border_x=0, border_y=0.
 
-* For power of two upscale [OH-1,OW-1] = (1<<k) * [IH-1, IW-1],
-sampling between (0,0) and (IH-1,IW-1), set:
-** scale_y_n = (1<<k), scale_y_d = 1, offset_y = 0, border_y = 0
-** scale_x_n = (1<<k), scale_x_d = 1, offset_x = 0, border_x = 0
-
-* For power of two upscale [OH,OW] = (1<<k) * [IH,IW],
-sampling range approximately (-0.5, -0.5) to (IH-0.5, IW-0.5), set:
-** scale_y_n = 2<<k, scale_y_d = 2, offset_y = -(1<<k)+1, border_y = (1<<k)-1
-** scale_x_n = 2<<k, scale_x_d = 2, offset_x = -(1<<k)+1, border_x = (1<<k)-1
-
-The output dimensions can be derived from the input dimensions by inverting
-the scale as described in the pseudocode. The [border_y, border_x] values
-adjust the output size to allow fractional sampling beyond integer
-input position (IH-1,IW-1).
+* For power of two upscale by factor (1<<k) the following parameters can
+be used for fixed point upscales:
+** For upscale [OH-1,OW-1] = (1<<k) * [IH-1, IW-1] set
+shift=k, stride_y=1, stride_x=1, offset_x=0, offset_y=0,
+border_x=0, border_y=0.
+** For upscale [OH,OW] = (1<<k) * [IH,IW] set
+shift=(k+1), stride_y=2, stride_x=2, offset_x=-(1<<k)+1, offset_y=-(1<<k)+1,
+border_x=1<<(k-1), border_y=1<<(k-1). This samples approximately
+the input area (-0.5, -0.5) to (IH-0.5, IW-0.5).
 
 *Arguments:*
 
@@ -54,9 +48,11 @@
 |Argument|Type|Name|Shape|Description
 
 |Input|in_t*|input|[N,IH,IW,C]|Input tensor
-|Attribute|int16_t *|scale|[4]|[scale_y_n, scale_y_d, scale_x_n, scale_x_d]
-|Attribute|int16_t *|offset|[2]|[offset_y, offset_x]
+|Attribute|int32_t* |output_size|[2]|[OH,OW]
+|Attribute|resize_t*|stride|[2]|[stride_y, stride_x]
+|Attribute|resize_t*|offset|[2]|[offset_y, offset_x]
 |Attribute|int32_t* |border|[2]|[border_y, border_x]
+|Attribute|int32_t  |shift|-|Shift value (must be zero if resize_t is float)
 |Attribute|mode_t|mode|-|BILINEAR or NEAREST
 |Output|out_t*|output|[N,OH,OW,C]|Output tensor
 |===
@@ -65,61 +61,57 @@
 
 [source,c++]
 ----
+// Derive the output dimensions from the input dimensions
+OH = idiv((IH-1)*(1<<shift) - offset_y, stride_y) + 1 + border_y;
+OW = idiv((IW-1)*(1<<shift) - offset_x, stride_x) + 1 + border_x;
 // Ensure the image size is supported by GPU APIs and that for integer
 // implementations, position * stride does not overflow int32_t.
 ERROR_IF(max(OH,OW,IH,IW) >= 16384);
-ERROR_IF(scale_y_n <= 0 || scale_y_d <=0 || scale_x_n <=0 || scale_x_d <=0);
-// if in_t=int8_t ensure that an int32_t accumulator can be used
-ERROR_IF(scale_y_n > (1<<11) || scale_x_n > (1<<11));
-// set a consistent lower limit of 1/16 downscale to simplify implementations
-ERROR_IF(scale_y_d >= 16 * scale_y_n || scale_x_d >= 16 * scale_x_n);
-ERROR_IF(offset_y < -scale_y_n || offset_y >= 16*scale_y_n);
-ERROR_IF(offset_x < -scale_x_n || offset_x >= 16*scale_x_n);
-ERROR_IF(border_y < -16*scale_y_n || border_y >= scale_y_n);
-ERROR_IF(border_x < -16*scale_x_n || border_x >= scale_x_n);
-ERROR_IF(OH != idiv_check((IH-1)*scale_y_n - offset_y + border_y, scale_y_d) + 1);
-ERROR_IF(OW != idiv_check((IW-1)*scale_x_n - offset_x + border_x, scale_x_d) + 1);
+ERROR_IF(stride_x <= 0 || stride_y <= 0);
+if (is_floating_point(resize_t)) {
+    // The shift attribute is not used for floating point
+    ERROR_IF(shift != 0);
+    ERROR_IF(stride_x > IW || stride_y > IH);
+} else {
+    // if in_t=int8_t ensure that an int32_t accumulator can be used
+    ERROR_IF(shift < 1 || shift > 11);
+    // set a consistent lower limit of 1/16 downscale
+    // independent of the shift value to simplify implementations
+    ERROR_IF(stride_x >= (16 << shift));
+    ERROR_IF(stride_y >= (16 << shift));
+    // offset range is similarly limited to maximum 16 pixels irrespective
+    // of shift. Both stride and offset fit in int16_t when shift=11.
+    ERROR_IF(offset_x <= (-16 << shift) || offset_x >= (16 << shift));
+    ERROR_IF(offset_y <= (-16 << shift) || offset_y >= (16 << shift));
+}
 for_each(0 <= n < N, 0 <= oy < OH, 0 <= ox < OW; 0 <= c < C) {
-    out_t acc;
-    resize_t dx, dy;
-
-    int32_t y = oy * scale_y_d + offset_y;
-    int32_t x = ox * scale_x_d + offset_x;
-    int16_t iy = floor(y / scale_y_n);
-    int16_t ix = floor(x / scale_x_n);
-
+    unit = (is_floating_point(resize_t)) ? 1.0 : (1 << shift);
+    y = oy * stride_y + offset_y;
+    x = ox * stride_x + offset_x;
     if (is_floating_point(resize_t)) {
-        dy = ((resize_t)y / (resize_t)scale_y_n) - iy;
-        dx = ((resize_t)x / (resize_t)scale_x_n) - ix;
+        iy = (int32_t)apply_floor(y); dy = y - (resize_t)iy;
+        ix = (int32_t)apply_floor(x); dx = x - (resize_t)ix;
     } else {
-        dy = y - iy * scale_y_n;
-        dx = y - ix * scale_x_n;
+        iy = y >> shift; dy = y - (iy<<shift);
+        ix = x >> shift; dx = x - (ix<<shift);
     }
-    // Note that -1 <= iy < IH and -1 <= ix < IW
-    int16_t iy0 = apply_max(iy, 0);
-    int16_t iy1 = apply_min(iy+1, IH-1);
-    int16_t ix0 = apply_max(ix, 0);
-    int16_t ix1 = apply_min(ix+1, IW-1);
+    iy0 = apply_max(iy, 0);
+    iy1 = apply_min(iy+1, IH-1);
+    ix0 = apply_max(ix, 0);
+    ix1 = apply_min(ix+1, IW-1);
+    REQUIRE(ix0 <= ix1 && iy0 <= iy1);
     if (mode==BILINEAR) {
-        in_t v00 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix0,c]);
-        in_t v01 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix1,c]);
-        in_t v10 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix0,c]);
-        in_t v11 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix1,c]);
-        acc  = v00 * (scale_y_n - dy) * (scale_x_n - dx);
-        acc += v01 * (scale_y_n - dy) * dx;
-        acc += v10 * dy * (scale_x_n - dx);
-        acc += v11 * dy * dx;
+        v00 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix0,c]);
+        v01 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy0,ix1,c]);
+        v10 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix0,c]);
+        v11 = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy1,ix1,c]);
+        out_t acc = v00 * (unit - dy) * (unit - dx) + v01 * (unit - dy) * dx;
+        acc = acc + v10 * dy * (unit-dx) + v11 * dy * dx;
         tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], acc);
     } else if (mode==NEAREST) {
-        int32_t iy, ix;
-        if (is_floating_point(resize_t)) {
-            iy = (dy >= 0.5) ? iy1 : iy0;
-            ix = (dx >= 0.5) ? ix1 : ix0;
-        } else {
-            iy = (2*dy >= scale_y_n) ? iy1 : iy0;
-            ix = (2*dx >= scale_x_n) ? ix1 : ix0;
-        }
-        in_t v = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy,ix,c]);
+        iy = (dy >= unit/2) ? iy1 : iy0;
+        ix = (dx >= unit/2) ? ix1 : ix0;
+        v = tensor_read<in_t>(input, [N,IH,IW,C], [n,iy,ix,c]);
         tensor_write<out_t>(output, [N,OH,OW,C], [n,oy,ox,c], v);
     }
 }