Add elementwise vector scalars support Write the constant scalars into flash. In case it's Dram or OffChipFlash, DMA the scalars from flash to sram. Signed-off-by: Charles Xu <charles.xu@arm.com> Change-Id: I42300a05dfe968d623b8aec8549644549e0f54b5

commit: 78792223369fa34dacd0e69e189af035283da2ae [log] [tgz]
author: Charles Xu <charles.xu@arm.com> Wed May 13 10:15:26 2020 +0200
committer: Tim Hall <tim.hall@arm.com> Thu Jun 18 17:53:52 2020 +0100
tree: ac3826df5528866319fd65d7a99eef8e87cd4084
parent: 620d88c60482bad4d96da4d32cc4cca5561cca9e [diff] [blame]
diff --git a/ethosu/vela/npu_serialisation.py b/ethosu/vela/npu_serialisation.py
index b8ac20f..0cb40ed 100644
--- a/ethosu/vela/npu_serialisation.py
+++ b/ethosu/vela/npu_serialisation.py

@@ -46,6 +46,10 @@
         memory_tensor.values[start_addr:end_addr] = compressed_values
         start_addr = end_addr
 
+def copy_ifm_values_to_memory_tensor(memory_tensor, src_tensor):
+    start_addr = src_tensor.address
+    end_addr = start_addr + src_tensor.quant_values.size
+    memory_tensor.values[start_addr:end_addr] = src_tensor.quant_values
 
 def serialise_npu_subgraph_into_tensors(nng, sg, arch, scratch_tens, flash_tens):
     if sg.placement != PassPlacement.Npu:
@@ -90,16 +94,22 @@
 
     for cps in sg.cascaded_passes:
         for ps in cps.passes:
-            if ps.placement == PassPlacement.Npu and ps.weight_tensor is not None:
-                # For DMA ops, ps.weight_tensor is referring to the SRAM weight tensor and therefore the address
-                # is pointing at the destination address of where the weights should be placed in SRAM.
-                # This ensures that the Flash weight tensor is used instead and thus gets the correct address.
-                if ps.weight_tensor.ops[0].type == "DMA":
-                    copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.weight_tensor.ops[0].inputs[0])
-                else:
-                    copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.weight_tensor)
+            if ps.placement == PassPlacement.Npu:
+                if ps.weight_tensor != None:
+                    # For DMA ops, ps.weight_tensor is referring to the SRAM weight tensor and therefore the address
+                    # is pointing at the destination address of where the weights should be placed in SRAM.
+                    # This ensures that the Flash weight tensor is used instead and thus gets the correct address.
+                    if ps.weight_tensor.ops[0].type == "DMA":
+                        copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.weight_tensor.ops[0].inputs[0])
+                    else:
+                        copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.weight_tensor)
 
-                copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.scale_tensor)
+                    copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.scale_tensor)
+
+                if ps.ifm_tensor != None and ps.ifm_tensor.mem_area != MemArea.Sram:
+                    copy_ifm_values_to_memory_tensor(sg.flash_tensor, ps.ifm_tensor)
+                if ps.ifm2_tensor != None and ps.ifm2_tensor.mem_area != MemArea.Sram:
+                    copy_ifm_values_to_memory_tensor(sg.flash_tensor, ps.ifm2_tensor)
 
     sg.command_stream_tensor = make_memory_tensor(
         sg.name + "_command_stream", flash_area, command_stream_size_bytes, True, arch
commit	78792223369fa34dacd0e69e189af035283da2ae	[log] [tgz]
author	Charles Xu <charles.xu@arm.com>	Wed May 13 10:15:26 2020 +0200
committer	Tim Hall <tim.hall@arm.com>	Thu Jun 18 17:53:52 2020 +0100
tree	ac3826df5528866319fd65d7a99eef8e87cd4084
parent	620d88c60482bad4d96da4d32cc4cca5561cca9e [diff] [blame]