MLBEDSW-6263: Use separate tensors for double buffering Uses separate tensors for the individual weight buffers in case of weight double buffering. Each weight buffer tensor gets its own individual live range. This patch is a clone of a previously reverted patch, but with some additional bug fixes applied. Signed-off-by: Rickard Bolin <rickard.bolin@arm.com> Change-Id: I868c70d15821eb9f1399186f2da6e7345f6ee343

commit: fd8b500085d1ac1cca54a71631d21713a3c21f09 [log] [tgz]
author: Rickard Bolin <rickard.bolin@arm.com> Mon May 16 09:11:06 2022 +0000
committer: Rickard Bolin <rickard.bolin@arm.com> Mon May 16 15:20:20 2022 +0000
tree: 4a8d1c7809dc1eb748f0f0b9ba2736e5d7bb5e69
parent: 6f4cb0362a2f00b3045565de2c27f72997b2998b [diff] [blame]
diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py
index 81d0be7..0c8a907 100644
--- a/ethosu/vela/npu_performance.py
+++ b/ethosu/vela/npu_performance.py

@@ -620,8 +620,8 @@
     prev_cost = schedule.cost_map[prev_op] if prev_op else None
     if op.parent_op.bias:
         query.const_shape = Shape4D(1, 1, 1, op.ofm.shape.depth)
-        if cost.buffered_weight_tensor:
-            query.const_memory_area = cost.buffered_weight_tensor.mem_area
+        if cost.buffered_weight_tensors:
+            query.const_memory_area = cost.buffered_weight_tensors[0].mem_area
         else:
             query.const_memory_area = cost.npu_weights_tensor.mem_area
 
@@ -649,7 +649,7 @@
             # LUT read from SHRAM TODO remove?
             scaled_bws[lut_tensor.mem_area][lut_tensor.purpose][BandwidthDirection.Read] += bw
 
-    if cost.npu_weights_tensor and cost.buffered_weight_tensor:
+    if cost.npu_weights_tensor and cost.buffered_weight_tensors:
         # DMA Weight Transfer
         sz = 0
         # Get the size of the first DMA
@@ -661,10 +661,10 @@
 
         total_sz = len(cost.npu_weights_tensor.buffer)
         bws[cost.npu_weights_tensor.mem_area][TensorPurpose.Weights][BandwidthDirection.Read] += total_sz
-        bws[cost.buffered_weight_tensor.mem_area][TensorPurpose.Weights][BandwidthDirection.Write] += total_sz
+        bws[cost.buffered_weight_tensors[0].mem_area][TensorPurpose.Weights][BandwidthDirection.Write] += total_sz
 
         ws_first_transfer_cycles = measure_mem2mem_cycles(
-            arch, cost.npu_weights_tensor.mem_area, cost.buffered_weight_tensor.mem_area, sz
+            arch, cost.npu_weights_tensor.mem_area, cost.buffered_weight_tensors[0].mem_area, sz
         )
 
         # Add cycles for Weight + Scale Transfer
@@ -720,7 +720,7 @@
         bw = access.const_read[0] * bandwidth_compression_scale_approx
         bws[query.const_memory_area][TensorPurpose.Weights][BandwidthDirection.Read] += bw
 
-        if not cost.buffered_weight_tensor:
+        if not cost.buffered_weight_tensors:
             scaled_bws[query.const_memory_area][TensorPurpose.Weights][BandwidthDirection.Read] += bw
 
     if access.const_read[1] > 0:
@@ -728,7 +728,7 @@
         bw = access.const_read[1] * op.parent_op.bias.element_size()
         bws[query.const_memory_area][TensorPurpose.FSBias][BandwidthDirection.Read] += bw
 
-        if not cost.buffered_weight_tensor:
+        if not cost.buffered_weight_tensors:
             scaled_bws[query.const_memory_area][TensorPurpose.FSBias][BandwidthDirection.Read] += bw
 
     update_summary_cycles(arch, scaled_bws, cycles_a)
commit	fd8b500085d1ac1cca54a71631d21713a3c21f09	[log] [tgz]
author	Rickard Bolin <rickard.bolin@arm.com>	Mon May 16 09:11:06 2022 +0000
committer	Rickard Bolin <rickard.bolin@arm.com>	Mon May 16 15:20:20 2022 +0000
tree	4a8d1c7809dc1eb748f0f0b9ba2736e5d7bb5e69
parent	6f4cb0362a2f00b3045565de2c27f72997b2998b [diff] [blame]