MLBEDSW-2809: Redo the Tensor addressing
Added a static class TensorAddressMap that stores all Tensor addresses
based on their equivalence_id. Made the "address" field into a property
which getter and setter looks up/sets the tensor's address in
TensorAddressMap.
This makes the references to cpu_tensor/npu_tensor obsolete and they
have been removed.
Addition to scheduler: avoid SRAM spilling if an op has consumers in
other subgraphs.
Minor rework in LUTState; it will now assign a unique equivalence_id to
the SHRAM lut tensor to avoid issues with addressing. The equivalent
checks in LUTState now compares the values of the LUT instead of the the
equivalence_id.
Updated LUT unit tests accordingly.
Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com>
Change-Id: I41de5a8a4e5f07b77d6544d8d4034b754993e503
diff --git a/ethosu/vela/live_range.py b/ethosu/vela/live_range.py
index 156090f..9a8ee58 100644
--- a/ethosu/vela/live_range.py
+++ b/ethosu/vela/live_range.py
@@ -84,21 +84,11 @@
return self.name < other.name
def set_address(self, address):
- # Set address of all unaddressed tensors in LiveRange
+ # Set address of all tensors in LiveRange
for tens in self.tensors:
- if tens.address is None:
- addr = address
- else:
- # Limit to single tensor for the lr if the tensor address already assigned
- assert len(self.tensors) == 1
- addr = tens.address
- tens.address = addr
- # Also need to set the address to the tensor's cpu/npu clones
- if tens.cpu_tensor is not None:
- tens.cpu_tensor.address = addr
- if tens.npu_tensor is not None:
- tens.npu_tensor.address = addr
- return addr
+ tens.address = address
+
+ return address
def get_alignment(self):
return self.alignment
@@ -113,10 +103,6 @@
# For memory only passes, e.g. Reshape. Add input and output tensor to the same LiveRange
input_tensor = ps.inputs[0]
output_tensor = ps.outputs[0]
- # If the input or output tensor is tied to a Cpu tensor, i.e. a subgraph input
- # or output, fuse the live-range with the Cpu tensors' live-range instead.
- input_tensor = input_tensor.cpu_tensor if input_tensor.cpu_tensor is not None else input_tensor
- output_tensor = output_tensor.cpu_tensor if output_tensor.cpu_tensor is not None else output_tensor
if not tensor_should_be_ignored(input_tensor, target_mem_area) and not tensor_should_be_ignored(
output_tensor, target_mem_area
):
@@ -132,9 +118,9 @@
self.current_time = 0
def get_or_create_range(self, tens, alignment=Tensor.AllocationQuantum):
- for rng in self.ranges.values():
- # Return the live range of the tensor (or it's cpu/npu clone)
- if any(tensor in rng.tensors for tensor in [tens, tens.npu_tensor, tens.cpu_tensor]):
+ # Return the live range of the tensor (or any of its clones)
+ for existing_tensor, rng in self.ranges.items():
+ if tens.equivalent(existing_tensor):
rng.set_alignment(alignment)
return rng
@@ -252,10 +238,6 @@
# For memory only passes, e.g. Reshape. Add input and output tensor to the same LiveRange
input_tensor = ps.inputs[0]
output_tensor = ps.outputs[0]
- # If the input or output tensor is tied to a Cpu tensor, i.e. a subgraph input
- # or output, fuse the live-range with the Cpu tensors' live-range instead.
- input_tensor = input_tensor.cpu_tensor if input_tensor.cpu_tensor is not None else input_tensor
- output_tensor = output_tensor.cpu_tensor if output_tensor.cpu_tensor is not None else output_tensor
if not tensor_should_be_ignored(input_tensor, target_mem_area, target_mem_type_set) and not (
tensor_should_be_ignored(output_tensor, target_mem_area, target_mem_type_set)
):