Add elementwise vector scalars support

Write the constant scalars into flash. In case it's Dram
or OffChipFlash, DMA the scalars from flash to sram.

Signed-off-by: Charles Xu <charles.xu@arm.com>
Change-Id: I42300a05dfe968d623b8aec8549644549e0f54b5
diff --git a/ethosu/vela/insert_dma.py b/ethosu/vela/insert_dma.py
index 703ab9d..b1b8985 100644
--- a/ethosu/vela/insert_dma.py
+++ b/ethosu/vela/insert_dma.py
@@ -21,6 +21,7 @@
 from .tensor import MemArea
 from .tensor import TensorPurpose
 
+binary_elementwise_op = set(("AddAct", "MulAct", "SubAct", "Maximum", "Minimum"))
 
 def insert_dma_cmd(op, arch):
     if op.type == "DMA":
@@ -28,7 +29,9 @@
     for idx, tens in enumerate(op.inputs):
 
         if tens.mem_area in (MemArea.Dram, MemArea.OffChipFlash) and tens.mem_area != arch.fast_storage_mem_area:
-            if tens.purpose == TensorPurpose.Weights:
+            if (tens.purpose == TensorPurpose.Weights or
+                (tens.purpose == TensorPurpose.FeatureMap and
+                 op.type in binary_elementwise_op)):
                 only_vector_product_consumers = True
                 for oper in tens.consumers():
                     if oper is None or oper.attrs.get("npu_block_type") != NpuBlockType.VectorProduct: