MLBEDSW-6260: Add support for using DMA to copy feature maps - Reshape ops can be bypassed and there is no need to process them by the NPU. There are use cases when the IFM must be preserved so a memcpy is needed. This is implemented by an AvgPool. - In order to reduce the cost of the AvgPool the IFM can be copied by DMA. This is faster and also it can be turned into a real NOP in cases where the IFM and the OFM can use the same memory space. - Added new memcpy op. Only NHWC format supported since DMA can not change the format on the fly. - Allow ofm to reuse ifm for memcpy op - Make sure the DMA copy size is 16 byte aligned Change-Id: I3605a48d47646ff60d2bb3644dd3a23f872235a7 Signed-off-by: Johan Alfven <johan.alfven@arm.com>

commit: 90724965751e882c58de74a044cc7adab307bc55 [log] [tgz]
author: Johan Alfven <johan.alfven@arm.com> Thu Feb 02 09:07:48 2023 +0100
committer: Johan Alfven <johan.alfven@arm.com> Tue Mar 14 11:00:58 2023 +0100
tree: 425ccea87487b66ca298a801b298fbf8567f86d9
parent: bb9885190f5f7ea959f171b38ee1dd44d3e1e75e [diff] [blame]
diff --git a/ethosu/vela/live_range.py b/ethosu/vela/live_range.py
index 05e481e..995a0cc 100644
--- a/ethosu/vela/live_range.py
+++ b/ethosu/vela/live_range.py

@@ -165,16 +165,11 @@
 
 
 def _get_ifm_to_fuse(sched_op, target_mem_area=None, target_mem_type_set=None):
-    def _tensor_should_be_ignored(tens):
-        if tens.ifm_write_protected:
-            return True
-        return tensor_should_be_ignored(tens, target_mem_area, target_mem_type_set)
-
-    # Check if possible to merge ifm/ofm live ranges of elementwise op
     ifm_tens = None
     if sched_op.op_type.is_elementwise_op():
+        # Check if possible to merge ifm/ofm live ranges of elementwise op
         elem_op = sched_op.parent_op
-        if not _tensor_should_be_ignored(elem_op.ofm):
+        if not tensor_should_be_ignored(elem_op.ofm, target_mem_area, target_mem_type_set):
             # Check if overwriting the inputs can be allowed
             OpShapeTens = namedtuple("OpShapeTens", ["op_shape", "tens"])
             outp = OpShapeTens(elem_op.ofm_shapes[0], elem_op.ofm)
@@ -183,7 +178,6 @@
                 inps.append(OpShapeTens(elem_op.ifm_shapes[0], elem_op.ifm))
             if elem_op.ifm2 is not None:
                 inps.append(OpShapeTens(elem_op.ifm_shapes[1], elem_op.ifm2))
-
             # find an input tensor that can be overwritten by the output
             for inp in inps:
                 if (
@@ -192,7 +186,8 @@
                     # check input tensor is valid
                     and inp.tens is not None
                     and inp.tens.shape != []
-                    and not _tensor_should_be_ignored(inp.tens)
+                    and not inp.tens.ifm_write_protected
+                    and not tensor_should_be_ignored(inp.tens, target_mem_area, target_mem_type_set)
                     # check input and output tensors are compatible
                     and inp.tens.format == outp.tens.format
                     and inp.tens.dtype == outp.tens.dtype
@@ -203,6 +198,17 @@
                 ):
                     ifm_tens = inp.tens
                     break
+    elif sched_op.op_type == Op.Memcpy:
+        # Check if possible to merge ifm/ofm live ranges of dma op
+        dma_op = sched_op.parent_op
+        ifm = dma_op.ifm
+        ofm = dma_op.ofm
+        if not (
+            tensor_should_be_ignored(ifm, target_mem_area, target_mem_type_set)
+            or tensor_should_be_ignored(ofm, target_mem_area, target_mem_type_set)
+        ):
+            # Currently DMA only used when bypassing memory only ops so ok to reuse ifm
+            ifm_tens = ifm
 
     return ifm_tens
commit	90724965751e882c58de74a044cc7adab307bc55	[log] [tgz]
author	Johan Alfven <johan.alfven@arm.com>	Thu Feb 02 09:07:48 2023 +0100
committer	Johan Alfven <johan.alfven@arm.com>	Tue Mar 14 11:00:58 2023 +0100
tree	425ccea87487b66ca298a801b298fbf8567f86d9
parent	bb9885190f5f7ea959f171b38ee1dd44d3e1e75e [diff] [blame]