MLBEDSW-2688: LUT DMA may require kernel wait LUT related updates specific for 16K SHRAM: - prevent LUT DMA transfer from overwriting accumulator SHRAM of an ongoing operation - do not use the last 2K of SHRAM as accumulator during LUT operations Change-Id: I17066e0410c6f07b125ed245002d7b19269a7a8a Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>

commit: 814cfbb8124ba0b3828db2bb12d9342ae9c39f19 [log] [tgz]
author: Louis Verhaard <louis.verhaard@arm.com> Fri Aug 21 14:06:25 2020 +0200
committer: Louis Verhaard <louis.verhaard@arm.com> Wed Aug 26 08:18:27 2020 +0200
tree: 519f7f41091efa944f6c4e3eb732892c56da40e1
parent: 7579c75d870c25ee075e46a110b6b89cf266db64 [diff] [blame]
diff --git a/ethosu/vela/shared_buffer_allocation.py b/ethosu/vela/shared_buffer_allocation.py
index 053377c..fdcbe94 100644
--- a/ethosu/vela/shared_buffer_allocation.py
+++ b/ethosu/vela/shared_buffer_allocation.py

@@ -25,6 +25,8 @@
 from .errors import VelaError
 from .ethos_u55_regs.ethos_u55_regs import resampling_mode
 from .operation import NpuBlockType
+from .range_set import MemoryRangeSet
+from .tensor import MemArea
 
 
 class SharedBufferAllocation:
@@ -40,6 +42,7 @@
         dilation = (1, 1, 1, 1)
         self.kernel = Kernel(1, 1)
         is_elementwise = ps.npu_block_type == NpuBlockType.ElementWise
+        self.uses_lut = False
 
         if ps.primary_op:
             strides = ps.primary_op.attrs.get("strides", strides)
@@ -55,6 +58,7 @@
                 k_w = ps.primary_op.attrs.get("filter_width", 1)
 
             self.kernel = Kernel(k_w, k_h, strides[2], strides[1], dilation[2], dilation[1])
+            self.uses_lut = ps.primary_op.activation_lut is not None
 
         self.is_equal_depth_op = is_elementwise or ps.npu_block_type in (
             NpuBlockType.ConvolutionDepthWise,
@@ -102,7 +106,7 @@
 
         # Accumulator area is measured from the end of the buffer
         self.bank_locations[SharedBufferArea.Accumulators] = (
-            self.arch.shram_total_banks - self.banks_required[SharedBufferArea.Accumulators]
+            self.arch.available_shram_banks(self.uses_lut) - self.banks_required[SharedBufferArea.Accumulators]
         )
         ifm_end = self.bank_locations[SharedBufferArea.IFM] + self.banks_required[SharedBufferArea.IFM]
         return ifm_end <= self.bank_locations[SharedBufferArea.Accumulators]
@@ -156,6 +160,13 @@
 
         return True
 
+    def get_shram_memory_access_range(self):
+        # Returns the SHRAM memory access range used by this shared buffer,
+        # excluding access to LUT
+        return MemoryRangeSet(
+            MemArea.Shram, 0, self.arch.available_shram_banks(self.uses_lut) * self.arch.shram_bank_size
+        )
+
 
 def shared_buffer_allocation_for_pass_and_block_config(arch, ps, block_config):
     alloc = SharedBufferAllocation(arch, ps)
commit	814cfbb8124ba0b3828db2bb12d9342ae9c39f19	[log] [tgz]
author	Louis Verhaard <louis.verhaard@arm.com>	Fri Aug 21 14:06:25 2020 +0200
committer	Louis Verhaard <louis.verhaard@arm.com>	Wed Aug 26 08:18:27 2020 +0200
tree	519f7f41091efa944f6c4e3eb732892c56da40e1
parent	7579c75d870c25ee075e46a110b6b89cf266db64 [diff] [blame]