MLBEDSW-6260: Add support for using DMA to copy feature maps
- Reshape ops can be bypassed and there is no need to process them by the NPU.
There are use cases when the IFM must be preserved so a memcpy is needed.
This is implemented by an AvgPool.
- In order to reduce the cost of the AvgPool the IFM can be copied by DMA.
This is faster and also it can be turned into a real NOP in cases where
the IFM and the OFM can use the same memory space.
- Added new memcpy op. Only NHWC format supported since DMA can not change
the format on the fly.
- Allow ofm to reuse ifm for memcpy op
- Make sure the DMA copy size is 16 byte aligned
Change-Id: I3605a48d47646ff60d2bb3644dd3a23f872235a7
Signed-off-by: Johan Alfven <johan.alfven@arm.com>
diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py
index 19b00b3..6be9dc2 100644
--- a/ethosu/vela/operation.py
+++ b/ethosu/vela/operation.py
@@ -51,6 +51,7 @@
ConvolutionDepthWise = 4
ElementWise = 5
ReduceSum = 6
+ Dma = 7
class Kernel:
@@ -174,6 +175,7 @@
)
Dequantize = OperatorInfo(indices=NNG_IFM_INDICES)
Div = OperatorInfo()
+ Memcpy = OperatorInfo(block_type=NpuBlockType.Dma, indices=NNG_IFM_INDICES)
Elu = OperatorInfo()
EmbeddingLookup = OperatorInfo()
EmbeddingLookupSparse = OperatorInfo()
@@ -373,6 +375,9 @@
def is_resize_op(self):
return self in (Op.ResizeBilinear, Op.ResizeNearestNeighbor)
+ def is_memcpy_op(self):
+ return self.info.block_type == NpuBlockType.Dma
+
def needs_bias(self):
return bool(self.info.indices.biases)