MLBEDSW-1941: Bug fix shared weights

If same weight tensor was used with different block configs,
errors would occur.

Fixed by always cloning weight tensors, using a global weight
compression cache and modifying the linear allocator to
detect multiple usage of same weight compression.

Change-Id: I91ca59176e1c59c66e0ac7a4227f2b5f0b47053f
Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py
index 64aff06..b6a98a6 100644
--- a/ethosu/vela/compiler_driver.py
+++ b/ethosu/vela/compiler_driver.py
@@ -144,13 +144,14 @@
     # processed first during serialization into tensors
     first_npu_sg = nng.subgraphs[1]
     assert first_npu_sg.placement == PassPlacement.Npu
+    # Use the linear allocator for constant tensors
     tensor_allocation.allocate_tensors(
         nng,
         first_npu_sg,
         arch,
         permanent_storage,
         scheduler_options.use_ifm_ofm_overlap,
-        options.tensor_allocator,
+        TensorAllocator.LinearAlloc,
         options.verbose_allocation,
         options.show_minimum_possible_allocation,
         lr_graph_flash,
@@ -195,7 +196,7 @@
         arch,
         permanent_storage,
         scheduler_options.use_ifm_ofm_overlap,
-        options.tensor_allocator,
+        TensorAllocator.LinearAlloc,
         options.verbose_allocation,
         options.show_minimum_possible_allocation,
     )