MLBEDSW-1941: Bug fix shared weights

If same weight tensor was used with different block configs,
errors would occur.

Fixed by always cloning weight tensors, using a global weight
compression cache and modifying the linear allocator to
detect multiple usage of same weight compression.

Change-Id: I91ca59176e1c59c66e0ac7a4227f2b5f0b47053f
Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
diff --git a/ethosu/vela/tensor_allocation.py b/ethosu/vela/tensor_allocation.py
index cd2b570..e3952df 100644
--- a/ethosu/vela/tensor_allocation.py
+++ b/ethosu/vela/tensor_allocation.py
@@ -27,18 +27,26 @@
 from .tensor import MemArea
 
 
-def linear_allocate_live_ranges(live_ranges, alloc_granularity=256):
+def linear_allocate_live_ranges(live_ranges, alloc_granularity=16):
+    # Allocates using increasing addresses. Duplicate constant tensors will be allocated to the same address
     total_sz = 0
     allocated_tensors = []
 
-    # just assign increasing addresses
+    # just assign increasing addresses, except for duplicates
     for tens, lr in live_ranges.ranges.items():
         if tens in allocated_tensors:
             continue
 
-        lr.set_address(total_sz)
+        address = total_sz
+        if tens.weight_compression_config is not None:
+            for allocated_tens in allocated_tensors:
+                if allocated_tens.weight_compression_config == tens.weight_compression_config:
+                    address = allocated_tens.address
+                    break
+        lr.set_address(address)
         allocated_tensors += lr.tensors
-        total_sz += numeric_util.round_up(int(math.ceil(lr.size)), alloc_granularity)
+        if address == total_sz:
+            total_sz += numeric_util.round_up(int(math.ceil(lr.size)), alloc_granularity)
 
     return total_sz