Add elementwise vector scalars support Write the constant scalars into flash. In case it's Dram or OffChipFlash, DMA the scalars from flash to sram. Signed-off-by: Charles Xu <charles.xu@arm.com> Change-Id: I42300a05dfe968d623b8aec8549644549e0f54b5

commit: 78792223369fa34dacd0e69e189af035283da2ae [log] [tgz]
author: Charles Xu <charles.xu@arm.com> Wed May 13 10:15:26 2020 +0200
committer: Tim Hall <tim.hall@arm.com> Thu Jun 18 17:53:52 2020 +0100
tree: ac3826df5528866319fd65d7a99eef8e87cd4084
parent: 620d88c60482bad4d96da4d32cc4cca5561cca9e [diff] [blame]
diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py
index 913b9a6..351716e 100644
--- a/ethosu/vela/graph_optimiser.py
+++ b/ethosu/vela/graph_optimiser.py

@@ -25,6 +25,7 @@
 from .operation import NpuBlockType
 from .operation import Operation
 from .tensor import Tensor
+from .numeric_util import full_shape
 
 passthrough_nodes = set(("Identity",))
 
@@ -313,6 +314,7 @@
 depthwise_op = set(("DepthwiseConv2dNative", "DepthwiseConv2dBiasAct",))
 pool_op = set(("AvgPool", "MaxPool", "QuantizedAvgPool", "QuantizedMaxPool", "AvgPoolAct", "MaxPoolAct", "ResizeBilinear",))
 elementwise_op = set(("AddAct", "MulAct", "SubAct", "Maximum", "Minimum", "LeakyRelu", "Abs"))
+binary_elementwise_op = set(("AddAct", "MulAct", "SubAct", "Maximum", "Minimum"))
 activation_ops = set(("Relu", "Relu6", "ReluN1To1", "Sigmoid", "Tanh"))
 memory_only_ops = set(("Reshape",))
 
@@ -399,6 +401,16 @@
             op.type = "Identity"
     return op
 
+def fixup_elementwise_with_scalars(op, arch):
+    if op.type in binary_elementwise_op:
+        ifm_tensor, ifm2_tensor, _, ofm_tensor = op.get_ifm_ifm2_weights_ofm()
+        if ifm2_tensor.shape != [] and ifm_tensor.shape != []:
+            diff = len(ifm_tensor.shape) - len(ifm2_tensor.shape)
+            if diff > 0:
+                ifm2_tensor.shape = full_shape(len(ifm_tensor.shape), ifm2_tensor.shape, 1)
+            elif diff < 0:
+                ifm_tensor.shape = full_shape(len(ifm2_tensor.shape), ifm_tensor.shape, 1)
+    return op
 
 # Set input/output tensor equivalence to the same id for memory operations
 def set_tensor_equivalence(op, arch):
@@ -492,6 +504,7 @@
         fixup_act_reorder,
         add_padding_fields,
         mark_npu_block_type,
+        fixup_elementwise_with_scalars,
         # convert_mul_max_to_abs_or_lrelu # TODO: enable optimisation once quantisation issues are resolved
     ]
commit	78792223369fa34dacd0e69e189af035283da2ae	[log] [tgz]
author	Charles Xu <charles.xu@arm.com>	Wed May 13 10:15:26 2020 +0200
committer	Tim Hall <tim.hall@arm.com>	Thu Jun 18 17:53:52 2020 +0100
tree	ac3826df5528866319fd65d7a99eef8e87cd4084
parent	620d88c60482bad4d96da4d32cc4cca5561cca9e [diff] [blame]