MLBEDSW-2809: Redo the Tensor addressing

Added a static class TensorAddressMap that stores all Tensor addresses
based on their equivalence_id. Made the "address" field into a property
which getter and setter looks up/sets the tensor's address in
TensorAddressMap.

This makes the references to cpu_tensor/npu_tensor obsolete and they
have been removed.

Addition to scheduler: avoid SRAM spilling if an op has consumers in
other subgraphs.

Minor rework in LUTState; it will now assign a unique equivalence_id to
the SHRAM lut tensor to avoid issues with addressing. The equivalent
checks in LUTState now compares the values of the LUT instead of the the
equivalence_id.

Updated LUT unit tests accordingly.

Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com>
Change-Id: I41de5a8a4e5f07b77d6544d8d4034b754993e503
diff --git a/ethosu/vela/live_range.py b/ethosu/vela/live_range.py
index 156090f..9a8ee58 100644
--- a/ethosu/vela/live_range.py
+++ b/ethosu/vela/live_range.py
@@ -84,21 +84,11 @@
         return self.name < other.name
 
     def set_address(self, address):
-        # Set address of all unaddressed tensors in LiveRange
+        # Set address of all tensors in LiveRange
         for tens in self.tensors:
-            if tens.address is None:
-                addr = address
-            else:
-                # Limit to single tensor for the lr if the tensor address already assigned
-                assert len(self.tensors) == 1
-                addr = tens.address
-            tens.address = addr
-            # Also need to set the address to the tensor's cpu/npu clones
-            if tens.cpu_tensor is not None:
-                tens.cpu_tensor.address = addr
-            if tens.npu_tensor is not None:
-                tens.npu_tensor.address = addr
-        return addr
+            tens.address = address
+
+        return address
 
     def get_alignment(self):
         return self.alignment
@@ -113,10 +103,6 @@
             # For memory only passes, e.g. Reshape. Add input and output tensor to the same LiveRange
             input_tensor = ps.inputs[0]
             output_tensor = ps.outputs[0]
-            # If the input or output tensor is tied to a Cpu tensor, i.e. a subgraph input
-            # or output, fuse the live-range with the Cpu tensors' live-range instead.
-            input_tensor = input_tensor.cpu_tensor if input_tensor.cpu_tensor is not None else input_tensor
-            output_tensor = output_tensor.cpu_tensor if output_tensor.cpu_tensor is not None else output_tensor
             if not tensor_should_be_ignored(input_tensor, target_mem_area) and not tensor_should_be_ignored(
                 output_tensor, target_mem_area
             ):
@@ -132,9 +118,9 @@
         self.current_time = 0
 
     def get_or_create_range(self, tens, alignment=Tensor.AllocationQuantum):
-        for rng in self.ranges.values():
-            # Return the live range of the tensor (or it's cpu/npu clone)
-            if any(tensor in rng.tensors for tensor in [tens, tens.npu_tensor, tens.cpu_tensor]):
+        # Return the live range of the tensor (or any of its clones)
+        for existing_tensor, rng in self.ranges.items():
+            if tens.equivalent(existing_tensor):
                 rng.set_alignment(alignment)
                 return rng
 
@@ -252,10 +238,6 @@
                 # For memory only passes, e.g. Reshape. Add input and output tensor to the same LiveRange
                 input_tensor = ps.inputs[0]
                 output_tensor = ps.outputs[0]
-                # If the input or output tensor is tied to a Cpu tensor, i.e. a subgraph input
-                # or output, fuse the live-range with the Cpu tensors' live-range instead.
-                input_tensor = input_tensor.cpu_tensor if input_tensor.cpu_tensor is not None else input_tensor
-                output_tensor = output_tensor.cpu_tensor if output_tensor.cpu_tensor is not None else output_tensor
                 if not tensor_should_be_ignored(input_tensor, target_mem_area, target_mem_type_set) and not (
                     tensor_should_be_ignored(output_tensor, target_mem_area, target_mem_type_set)
                 ):