MLBEDSW-6263: Use separate tensors for double buffering Uses separate tensors for the individual weight buffers in case of weight double buffering. Each weight buffer tensor gets its own individual live range. Change-Id: I724a8c61a7045615fbd2ed9535663076ac8edd13 Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>

commit: cc5f4de1c35ba44fca7ff6295c6ae846f8242344 [log] [tgz]
author: Louis Verhaard <louis.verhaard@arm.com> Tue Mar 01 11:26:58 2022 +0100
committer: Fredrik Svedberg <fredrik.svedberg@arm.com> Wed Mar 30 13:00:15 2022 +0000
tree: 68c4f8124a3ee6ec6f7fceb32a1d8aec11ac9a86
parent: a19b4671dd0594181a2789930cc98bf5dc41ded4 [diff] [blame]
diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py
index 8c4aee6..4ffca49 100644
--- a/ethosu/vela/npu_performance.py
+++ b/ethosu/vela/npu_performance.py

@@ -608,8 +608,8 @@
     prev_cost = schedule.cost_map[prev_op] if prev_op else None
     if op.parent_op.bias:
         query.const_shape = Shape4D(1, 1, 1, op.ofm.shape.depth)
-        if cost.buffered_weight_tensor:
-            query.const_memory_area = cost.buffered_weight_tensor.mem_area
+        if cost.buffered_weight_tensors:
+            query.const_memory_area = cost.buffered_weight_tensors[0].mem_area
         else:
             query.const_memory_area = cost.npu_weights_tensor.mem_area
 
@@ -637,7 +637,7 @@
             # LUT read from SHRAM TODO remove?
             scaled_bws[lut_tensor.mem_area][lut_tensor.purpose][BandwidthDirection.Read] += bw
 
-    if cost.npu_weights_tensor and cost.buffered_weight_tensor:
+    if cost.npu_weights_tensor and cost.buffered_weight_tensors:
         # DMA Weight Transfer
         sz = 0
         # Get the size of the first DMA
@@ -649,10 +649,10 @@
 
         total_sz = len(cost.npu_weights_tensor.buffer)
         bws[cost.npu_weights_tensor.mem_area][TensorPurpose.Weights][BandwidthDirection.Read] += total_sz
-        bws[cost.buffered_weight_tensor.mem_area][TensorPurpose.Weights][BandwidthDirection.Write] += total_sz
+        bws[cost.buffered_weight_tensors[0].mem_area][TensorPurpose.Weights][BandwidthDirection.Write] += total_sz
 
         ws_first_transfer_cycles = measure_mem2mem_cycles(
-            arch, cost.npu_weights_tensor.mem_area, cost.buffered_weight_tensor.mem_area, sz
+            arch, cost.npu_weights_tensor.mem_area, cost.buffered_weight_tensors[0].mem_area, sz
         )
 
         # Add cycles for Weight + Scale Transfer
@@ -708,7 +708,7 @@
         bw = access.const_read[0] * bandwidth_compression_scale_approx
         bws[query.const_memory_area][TensorPurpose.Weights][BandwidthDirection.Read] += bw
 
-        if not cost.buffered_weight_tensor:
+        if not cost.buffered_weight_tensors:
             scaled_bws[query.const_memory_area][TensorPurpose.Weights][BandwidthDirection.Read] += bw
 
     if access.const_read[1] > 0:
@@ -716,7 +716,7 @@
         bw = access.const_read[1] * op.parent_op.bias.element_size()
         bws[query.const_memory_area][TensorPurpose.FSBias][BandwidthDirection.Read] += bw
 
-        if not cost.buffered_weight_tensor:
+        if not cost.buffered_weight_tensors:
             scaled_bws[query.const_memory_area][TensorPurpose.FSBias][BandwidthDirection.Read] += bw
 
     update_summary_cycles(arch, scaled_bws, cycles_a)
commit	cc5f4de1c35ba44fca7ff6295c6ae846f8242344	[log] [tgz]
author	Louis Verhaard <louis.verhaard@arm.com>	Tue Mar 01 11:26:58 2022 +0100
committer	Fredrik Svedberg <fredrik.svedberg@arm.com>	Wed Mar 30 13:00:15 2022 +0000
tree	68c4f8124a3ee6ec6f7fceb32a1d8aec11ac9a86
parent	a19b4671dd0594181a2789930cc98bf5dc41ded4 [diff] [blame]