[MLBEDSW-2335] SoftMax int16

Added graph rewrite of Softmax for int16.

Change-Id: Id7885af6056a23e8b8362fb61ae94283251eb398
Signed-off-by: Fredrik Svedberg <fredrik.svedberg@arm.com>
diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py
index 448d838..7134fd8 100644
--- a/ethosu/vela/operation.py
+++ b/ethosu/vela/operation.py
@@ -25,13 +25,25 @@
     Pooling = 3
     ConvolutionDepthWise = 4
     ElementWise = 5
+    ReduceSum = 6
 
 
 class Operation:
     """Class representing a Neural Network operation. Has a name, a type,
 input and output tensors, as well as an attribute dictionary."""
 
-    __slots__ = "type", "name", "op_index", "attrs", "inputs", "outputs", "flops", "scheduled_pass", "run_on_npu"
+    __slots__ = (
+        "type",
+        "name",
+        "op_index",
+        "attrs",
+        "inputs",
+        "outputs",
+        "flops",
+        "scheduled_pass",
+        "run_on_npu",
+        "activation_lut",
+    )
 
     def __init__(self, op_type, name):
         self.type = op_type
@@ -43,6 +55,7 @@
         self.run_on_npu = True
         self.scheduled_pass = None
         self.op_index = None  # input network operator index
+        self.activation_lut = None
 
     def clone(self, suffix="_clone"):
         res = Operation(self.type, self.name + suffix)
@@ -80,7 +93,7 @@
             elif self.type == "Conv2DBackpropInputSwitchedBias":
                 bias_idx = 3
 
-        elif npu_block_type == NpuBlockType.Pooling:
+        elif npu_block_type in (NpuBlockType.Pooling, NpuBlockType.ReduceSum):
             ifm_idx = 0
             ofm_idx = 0
         elif npu_block_type == NpuBlockType.VectorProduct:
@@ -102,8 +115,8 @@
             ifm2_idx = 1
             ofm_idx = 0
 
-            # LeakyRelu and Abs have a single IFM
-            if self.type in set(("LeakyRelu", "Abs")):
+            # LeakyRelu, Abs and CLZ have a single IFM
+            if self.type in set(("LeakyRelu", "Abs", "CLZ")):
                 ifm2_idx = -1
 
         elif self.type == "Conv2DBackpropInput":
@@ -292,3 +305,9 @@
             assert False
 
         return input_tens, outputs, axis, offset_start, offset_end
+
+    def set_activation_lut(self, lut_tensor):
+        lut_tensor.consumer_list.append(self)
+        self.attrs["fused_activation_function"] = "LUT"
+        self.activation_lut = lut_tensor
+        self.inputs.append(lut_tensor)