MLBEDSW-2548: Fix for Double Buffer size estimate

This will give a worst case estimate of the Double Buffer size in the
Scheduler and it will no longer be able to choose strategies that end
up with a buffer that doesn't fit in SRAM.

Signed-off-by: Jacob Bohlin <jacob.bohlin@arm.com>
Change-Id: I763731f63c7672679f3b8cd6db65dad03b946ae5
diff --git a/ethosu/vela/graph_optimiser.py b/ethosu/vela/graph_optimiser.py
index cd4ac63..dbf2b7b 100644
--- a/ethosu/vela/graph_optimiser.py
+++ b/ethosu/vela/graph_optimiser.py
@@ -435,6 +435,18 @@
     return op
 
 
+def reorder_depthwise_weights(op, arch):
+    if "DepthwiseConv2d" in op.type:
+        weight_tensor = op.inputs[1]
+        weight_tensor.quant_values = np.transpose(weight_tensor.quant_values, (0, 1, 3, 2))
+        weight_tensor.shape = weight_tensor.storage_shape = weight_tensor.bandwidth_shape = list(
+            weight_tensor.quant_values.shape
+        )
+        weight_tensor.weight_transpose_depthwise = True
+
+    return op
+
+
 # Reorder activation op if it's after the memory only operations
 def fixup_act_reorder(op, arch):
     if op.type in activation_ops:
@@ -589,6 +601,7 @@
         add_padding_fields,
         mark_npu_block_type,
         fixup_elementwise_with_scalars,
+        reorder_depthwise_weights,
         # convert_mul_max_to_abs_or_lrelu # TODO: enable optimisation once quantisation issues are resolved
     ]