MLBEDSW-3222: Bias tensors in fast storage

For IFM streamed cascades bias tensors are read several times.
Moves these tensors to fast storage and add DMA commands.

Change-Id: I630f6275986c1b5e3f126c925b11e22500fb1128
Signed-off-by: Andreas Nevalainen <andreas.nevalainen@arm.com>
diff --git a/ethosu/vela/high_level_command_stream.py b/ethosu/vela/high_level_command_stream.py
index b8a19f5..a5372d7 100644
--- a/ethosu/vela/high_level_command_stream.py
+++ b/ethosu/vela/high_level_command_stream.py
@@ -21,6 +21,7 @@
 
 from .numeric_util import round_up_divide
 from .operation import NpuBlockType
+from .operation import Op
 from .range_set import AccessDirection
 from .range_set import MemoryAccessSet
 from .range_set import MemoryRangeSet
@@ -236,6 +237,11 @@
                 ),
                 AccessDirection.Read,
             )
+        if self.scale_tensor is not None and self.scale_tensor.ops[0].type == Op.DMA:
+            res.add(
+                self.scale_tensor.get_address_ranges_for_coordinates([0], self.scale_tensor.shape),
+                AccessDirection.Read,
+            )
         # Add read access to SHRAM by any LUT-s
         for tens in self.ps.intermediates:
             if tens.purpose == TensorPurpose.LUT and tens.mem_area == MemArea.Shram: