MLBEDSW-3070: Fix addressing of weights

Assign different equivalence ids to weights with same values but
different compression, to ensure correct addressing.

Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
Change-Id: I13aabad71520e4f4a78fb2d6a81740bdd4d1256c
diff --git a/ethosu/vela/lut.py b/ethosu/vela/lut.py
index e3373ca..69aa2a0 100644
--- a/ethosu/vela/lut.py
+++ b/ethosu/vela/lut.py
@@ -16,26 +16,16 @@
 # Description:
 # Functionality for lookup table support.
 import uuid
-from functools import lru_cache
 
 import numpy as np
 
 from . import numeric_util
 from .high_level_command_stream import CommandType
 from .tensor import create_const_tensor
+from .tensor import create_equivalence_id
 from .tensor import TensorPurpose
 
 
-@lru_cache(maxsize=None)
-def create_equivalence_id(key):
-    # Generates equivalence_id based on key.
-    # The DMA optimization of LUT-s assumes that 2 LUT tensors are identical
-    # if they have the same equivalence_id.
-    # So for example all created 256-byte tanh LUT tensors should have
-    # the same equivalence id.
-    return uuid.uuid4()
-
-
 class LUTState:
     # Tracks which LUT-s are located in SHRAM.
     def __init__(self):
diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py
index 0f8170d..eedbada 100644
--- a/ethosu/vela/tensor.py
+++ b/ethosu/vela/tensor.py
@@ -18,6 +18,7 @@
 import enum
 import uuid
 from collections import defaultdict
+from functools import lru_cache
 
 import numpy as np
 
@@ -159,6 +160,12 @@
     return new_shp
 
 
+@lru_cache(maxsize=None)
+def create_equivalence_id(key):
+    # Generates equivalence_id based on the given key.
+    return uuid.uuid4()
+
+
 class QuantizationParameters:
     __slots__ = "min", "max", "num_bits", "narrow_range", "scale_f32", "zero_point", "quant_min", "quant_max"
 
@@ -303,6 +310,7 @@
         "compression_scale_for_worst_weight_stream",
         "weight_compression_scales",
         "weight_compression_config",
+        "value_id",
         "storage_rounding_quantum",
         "brick_size",
         "quantization",
@@ -342,7 +350,10 @@
         self.bandwidth_compression_scale = 1.0
         self.compression_scale_for_worst_weight_stream = 1.0
         self.weight_compression_scales = None
+        # if two tensors have the same weight_compression_config, then they have the same compressed values
         self.weight_compression_config = None
+        # if two tensors have the same value_id, then they have the same values
+        self.value_id = uuid.uuid4()
         self.weight_compressed_offsets = []
         self.storage_rounding_quantum = (1, 1, 1, 1)
         self.brick_size = (1, 1, 1, 1)
@@ -375,7 +386,6 @@
 
         res.ops = []
         res.consumer_list = []
-        res.equivalence_id = self.equivalence_id
 
         res.values = self.values
         res.quant_values = self.quant_values
@@ -407,6 +417,7 @@
 
     def copy_compressed_weight_info(self, src_tens):
         # Copies compressed values + all related weight compression info from the given tensor
+        self.equivalence_id = src_tens.equivalence_id
         self.compressed_values = src_tens.compressed_values
         self.compressed_values_substream_offsets = src_tens.compressed_values_substream_offsets
         self.storage_shape = src_tens.storage_shape
@@ -418,6 +429,7 @@
         self.storage_compression_scale = src_tens.storage_compression_scale
         self.block_traversal = src_tens.block_traversal
         self.weight_compression_config = src_tens.weight_compression_config
+        self.value_id = src_tens.value_id
 
     def set_format(self, fmt, arch):
         self.format = fmt
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py
index 2374cd4..c5a3f3f 100644
--- a/ethosu/vela/weight_compressor.py
+++ b/ethosu/vela/weight_compressor.py
@@ -30,6 +30,7 @@
 from .operation import NpuBlockType
 from .scaling import quantise_scale
 from .scaling import reduced_quantise_scale
+from .tensor import create_equivalence_id
 from .tensor import TensorBlockTraversal
 from .tensor import TensorFormat
 from .tensor import TensorPurpose
@@ -40,7 +41,7 @@
 # Contains meta info for a weight compression. If two tensors have identical weight compression config,
 # then they also will have identical compressed weights.
 WeightCompressionConfig = namedtuple(
-    "WeightCompressionConfig", ["npu_block_type", "ofm_block_depth", "ofm_depth_step", "dilation", "equivalence_id"]
+    "WeightCompressionConfig", ["npu_block_type", "ofm_block_depth", "ofm_depth_step", "dilation", "value_id"]
 )
 
 
@@ -136,7 +137,7 @@
     # Note: for an ofm block only its depth is used in weight compression.
     # And block depth > ofm depth gives same result as block depth == ofm depth
     block_depth = min(ofm_block_depth, tens.quant_values.shape[-1])
-    return WeightCompressionConfig(npu_block_type, block_depth, ofm_depth_step, dilation, tens.equivalence_id)
+    return WeightCompressionConfig(npu_block_type, block_depth, ofm_depth_step, dilation, tens.value_id)
 
 
 def set_storage_shape(tens):
@@ -286,13 +287,15 @@
         nng.weight_cache = CompressedWeightCache()
     wcc = create_weight_compression_config(tens, npu_block_type, ofm_block_depth, ofm_depth_step, dilation)
     tens.weight_compression_config = wcc
+    # Reassign equivalence id such that tensors with same weight compression get identical equivalence ids,
+    # but tensors with the same values but different compression get different equivalence ids
+    tens.equivalence_id = create_equivalence_id(wcc)
     tens_cached = nng.weight_cache.get_tensor_with_same_compression(wcc)
     if tens_cached is not None:
         # Cache hit, copy weights from the cache
         tens.copy_compressed_weight_info(tens_cached)
         set_storage_shape(tens)
         return
-
     # No cache hit, perform the compression
     assert tens.quantization is not None
     assert tens.quantization.scale_f32 is not None