[MLBEDSW-2787] Remove op.attrs["rescale"] in softmax.py

Added RescaleAdd operation to avoid non-standard attribute
"rescale" for Add operation. Also changed ResizeBilinear
in the same way.

Signed-off-by: Fredrik Svedberg <fredrik.svedberg@arm.com>
Change-Id: I1d286f63890585c06b8a161df1ff77e3f844a4b9
diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py
index 511ac95..3c80658 100644
--- a/ethosu/vela/graph_optimiser.py
+++ b/ethosu/vela/graph_optimiser.py
@@ -288,11 +288,11 @@
 
         # Setup the scale value
         if scaled_op.inputs[0].dtype.bits == 8 and scaled_op.outputs[0].dtype.bits == 16:
-            scaled_op.attrs["rescale"] = 128
+            scaled_op.rescale = 128
         elif scaled_op.inputs[0].dtype.bits == 16 and scaled_op.outputs[0].dtype.bits == 8:
-            scaled_op.attrs["rescale"] = 1 / 128
-        elif "rescale" in scaled_op.attrs:
-            del scaled_op.attrs["rescale"]
+            scaled_op.rescale = 1 / 128
+        else:
+            scaled_op.rescale = None
         scaled_op.set_ifm_ofm_shapes()
 
     return op
diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py
index 0711702..8e4d33a 100644
--- a/ethosu/vela/high_level_command_to_npu_op.py
+++ b/ethosu/vela/high_level_command_to_npu_op.py
@@ -91,6 +91,7 @@
 elementwise_op_map = {
     Op.Mul: NpuElementWiseOp.MUL,
     Op.Add: NpuElementWiseOp.ADD,
+    Op.RescaleAdd: NpuElementWiseOp.ADD,
     Op.Sub: NpuElementWiseOp.SUB,
     Op.Minimum: NpuElementWiseOp.MIN,
     Op.Maximum: NpuElementWiseOp.MAX,
@@ -386,8 +387,8 @@
     npu_op = NpuPoolingOperation(pool_op)
     set_common_op_fields(npu_op, cmd, arch)
     # Pooling specific info
-    if op.type == Op.ResizeBilinear and "rescale" in op.attrs:
-        npu_op.rescale = op.attrs["rescale"]
+    if op.type == Op.ResizeBilinear:
+        npu_op.rescale = op.rescale
     return npu_op
 
 
@@ -426,8 +427,9 @@
         output_scale = npu_op.ifm2.quantization.scale_f32
     if op.type == Op.LeakyRelu:
         output_scale = op.attrs["alpha"]
-    if op.type in (Op.Add, Op.Sub) and "rescale" in op.attrs:
-        npu_op.rescale = op.attrs.get("rescale")
+    if op.type == Op.RescaleAdd:
+        assert op.rescale is not None, f"{op.type} must have rescale"
+        npu_op.rescale = op.rescale
     if op.type in (Op.Add, Op.Mul, Op.Sub):
         if op.activation is not None and op.activation.op_type in (Op.Sigmoid, Op.Tanh):
             output_scale = 1 / 0x3000
diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py
index af36587..844f298 100644
--- a/ethosu/vela/operation.py
+++ b/ethosu/vela/operation.py
@@ -219,6 +219,7 @@
     Relu = OperatorInfo(indices=IFM_INDICES)
     Relu6 = OperatorInfo(indices=IFM_INDICES)
     ReluN1To1 = OperatorInfo(indices=IFM_INDICES)
+    RescaleAdd = OperatorInfo(block_type=NpuBlockType.ElementWise, indices=IFM_IFM2_INDICES)
     Reshape = OperatorInfo(indices=IFM_INDICES)
     ResizeBilinear = OperatorInfo(block_type=NpuBlockType.Pooling, indices=IFM_INDICES)
     ResizeNearestNeighbor = OperatorInfo()
@@ -408,6 +409,7 @@
         "_kernel",
         "ifm_shapes",
         "ofm_shapes",
+        "rescale",
     )
 
     def __init__(self, op_type: Op, name: str):
@@ -431,6 +433,9 @@
         self._kernel = None
         self.ifm_shapes: List[Shape4D] = []
         self.ofm_shapes: List[Shape4D] = []
+        # If not none: contains rescale to be used as output scaling
+        # (which overrides the ofm tensor's scale)
+        self.rescale = None
 
     def clone(self, suffix="_clone"):
         res = Operation(self.type, self.name + suffix)
diff --git a/ethosu/vela/operation_util.py b/ethosu/vela/operation_util.py
index a55b954..7015b79 100644
--- a/ethosu/vela/operation_util.py
+++ b/ethosu/vela/operation_util.py
@@ -16,6 +16,7 @@
 # Description:
 # Utility functions for creating Network Operations.
 from typing import Optional
+from typing import Tuple
 
 from .data_type import DataType
 from .high_level_command_to_npu_op import ifm_ifm2_correct_order
@@ -98,6 +99,21 @@
     return create_binary_elementwise(Op.Add, name, ifm, ifm2, quantization, activation, dtype, attrs)
 
 
+def create_rescale_add(
+    name: str,
+    ifm: Tensor,
+    ifm2: Tensor,
+    rescale: Tuple[int, int],
+    quantization: QuantizationParameters,
+    activation: Optional[ActivationFunction] = None,
+    dtype: Optional[DataType] = None,
+    attrs: Optional[dict] = None,
+) -> Operation:
+    op = create_binary_elementwise(Op.RescaleAdd, name, ifm, ifm2, quantization, activation, dtype, attrs)
+    op.rescale = rescale
+    return op
+
+
 def create_clz(
     name: str,
     ifm: Tensor,
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index fa56d35..7de3d9a 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -637,7 +637,7 @@
         ofm_scale_f64 = np.double(ofm_quant.scale_f32)
         scale, shift = scaling.quantise_scale(ifm_scale_f64 / ofm_scale_f64)
     elif pool_op.rescale is not None:
-        # for ResizeBilinear operations with "rescale" in primary_op.attrs
+        # for ResizeBilinear operations with rescale
         rescale = pool_op.rescale
         rescale_bits = len(bin(round_up_to_int(rescale))) - 2 + 1
         scale, shift = scaling.quantise_pooling_scale(kernel.height * kernel.width, rescale_bits)
diff --git a/ethosu/vela/softmax.py b/ethosu/vela/softmax.py
index 3b4bace..8a1770e 100644
--- a/ethosu/vela/softmax.py
+++ b/ethosu/vela/softmax.py
@@ -35,6 +35,7 @@
 from .operation_util import create_depthwise_maxpool
 from .operation_util import create_mul
 from .operation_util import create_reduce_sum
+from .operation_util import create_rescale_add
 from .operation_util import create_shl
 from .operation_util import create_shr
 from .operation_util import create_sub
@@ -331,13 +332,13 @@
             "F0_one_const", [1, 1, 1, 1], DataType.int32, [(1 << 31) - 1], np.int32, quantization=no_scale_quant
         )
         half_denominator = add_op_get_ofm(
-            create_add(
+            create_rescale_add(
                 f"{self.op.name}_add{pass_number}",
                 f0_one_const,
                 shifted_sum_minus_one,
+                (1, 1),  # Custom rescale
                 one_scale_quant,
                 activation,
-                attrs={"rescale": (1, 1)},
             )
         )