MLBEDSW-8497: [MLCE] Avoid modifying FC with dynamic weights - If a npu op is followed by a convolution op with dynamic weights the optimized file ends up containing a duplicated tensor called _cpu. - Another problem is also that an empty bias tensor is added in the reader. - The fix is to ignore these cpu ops both in the reader and the writer. Change-Id: I476b4f6062e26cca4ba589df694a99ef79b0f6d4 Signed-off-by: Johan Alfven <johan.alfven@arm.com>

commit: 37dbca2adad678acf16f8360b8e3647563dababe [log] [tgz]
author: Johan Alfven <johan.alfven@arm.com> Thu Dec 21 12:37:17 2023 +0100
committer: Johan Alfven <johan.alfven@arm.com> Fri Dec 22 09:10:33 2023 +0100
tree: 690e1a02120e9c5c1037a22480185ce74dc7f9e8
parent: f4a511ffe7cd6c77e2effcbdf0843b2ef89d8df4 [diff]
diff --git a/ethosu/vela/tflite_reader.py b/ethosu/vela/tflite_reader.py
index 85acb6b..e732f19 100644
--- a/ethosu/vela/tflite_reader.py
+++ b/ethosu/vela/tflite_reader.py

@@ -153,18 +153,21 @@
             self.virtual_outputs.append(tens)
 
         if op.type.is_depthwise_conv2d_op() or op.type.is_conv2d_op() or op.type == Op.FullyConnected:
+            # Reshape and add bias for ops with constant weights
+            # Do not modify ops with dynamic data since they will run on CPU
             if inputs[1].values is not None:
                 if op.type == Op.FullyConnected:
                     inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 0), False)
                 else:
                     inputs[1] = clone_and_reshape_tensor(inputs[1], (1, 2, 3, 0), False)
-            if op.type.needs_bias() and len(inputs) <= op_type.info.indices.biases[0]:
-                # No Bias tensor
-                inputs.append(None)
-            if inputs[-1] and inputs[-1].values is not None:
-                # Since bias tensor is used for both bias and scale,
-                # a clone with a unique equivalence_id is needed.
-                inputs[-1] = clone_and_reshape_tensor(inputs[-1], None, True)
+
+                if op.type.needs_bias() and len(inputs) <= op_type.info.indices.biases[0]:
+                    # No Bias tensor
+                    inputs.append(None)
+                if inputs[-1] and inputs[-1].values is not None:
+                    # Since bias tensor is used for both bias and scale,
+                    # a clone with a unique equivalence_id is needed.
+                    inputs[-1] = clone_and_reshape_tensor(inputs[-1], None, True)
 
         if opt_serializer is not None:
             op.attrs = opt_serializer.deserialize(op_data)

diff --git a/ethosu/vela/tflite_writer.py b/ethosu/vela/tflite_writer.py
index 44ce711..d4e24a2 100644
--- a/ethosu/vela/tflite_writer.py
+++ b/ethosu/vela/tflite_writer.py

@@ -105,9 +105,11 @@
                     if op.type.is_conv2d_op() or op.type.is_depthwise_conv2d_op() or op.type == Op.FullyConnected:
                         # Op is run on CPU, make sure the original weight and bias tensors are written back
                         # instead of the cloned/reshaped (see tflite_reader)
-                        for idx, inp in enumerate(op.inputs):
-                            if inp != op.ifm and inp is not None and inp.src_tensor is not None:
-                                op.inputs[idx] = inp.src_tensor
+                        # Do nothing when values are None (dynamic weights)
+                        if op.inputs[1].values is not None:
+                            for idx, inp in enumerate(op.inputs):
+                                if inp != op.ifm and inp is not None and inp.src_tensor is not None:
+                                    op.inputs[idx] = inp.src_tensor
 
         # list of tuple(Op, string, op.version); the custom code is only used for 3rd party custom operators
         self.operator_codes = sorted(set((op.type, op.attrs.get("custom_code", ""), op.version) for op in all_ops))
commit	37dbca2adad678acf16f8360b8e3647563dababe	[log] [tgz]
author	Johan Alfven <johan.alfven@arm.com>	Thu Dec 21 12:37:17 2023 +0100
committer	Johan Alfven <johan.alfven@arm.com>	Fri Dec 22 09:10:33 2023 +0100
tree	690e1a02120e9c5c1037a22480185ce74dc7f9e8
parent	f4a511ffe7cd6c77e2effcbdf0843b2ef89d8df4 [diff]