MLBEDSW-2625 Alignment of tensor for bias and scale

Fix for alignment of tensor for bias and scale

Change-Id: I303a225a536f169909cec9ba4d5cee088110bb94
Signed-off-by: Patrik Gustavsson <patrik.gustavsson@arm.com>
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py
index df7ff75..9300040 100644
--- a/ethosu/vela/weight_compressor.py
+++ b/ethosu/vela/weight_compressor.py
@@ -24,6 +24,7 @@
 from .errors import UnsupportedFeatureError
 from .nn_graph import SchedulingStrategy
 from .numeric_util import round_up
+from .numeric_util import round_up_divide
 from .operation import NpuBlockType
 from .scaling import quantise_scale
 from .scaling import reduced_quantise_scale
@@ -402,6 +403,7 @@
     tens.compressed_values_substream_offsets = []
 
     total_elements = len(quantised_scales)
+    alignment_bytes = 0
     for i in range(0, total_elements, ofm_depth_step):
         # Extract streams from brick to generate substreams for each core
         stream = bytearray()
@@ -417,6 +419,7 @@
             remainder = (len(stream)) % 16
             if remainder > 0:
                 stream.extend(bytearray(16 - remainder))
+                alignment_bytes += 16 - remainder
 
             substream_offsets.append(len(stream))
 
@@ -424,8 +427,7 @@
         tens.compressed_values.append(stream)
         tens.compressed_values_substream_offsets.append(substream_offsets)
 
-    tens.storage_shape = [total_elements]
-
+    tens.storage_shape = [total_elements + round_up_divide(alignment_bytes, tens.element_size_bytes)]
 
 
 def update_pass_weight_and_scale_tensors(nng, arch):