MLBEDSW-2552: Skip npu cycle calculation for cpu op

Signed-off-by: Charles Xu <charles.xu@arm.com>
Change-Id: Ief50c934b9e9b0bd3024d3ed0bbaa7b655971952
diff --git a/ethosu/vela/npu_performance.py b/ethosu/vela/npu_performance.py
index 98626de..1663c26 100644
--- a/ethosu/vela/npu_performance.py
+++ b/ethosu/vela/npu_performance.py
@@ -232,7 +232,9 @@
     explicit_padding = (0, 0, 0, 0)
     primary_op = ps.primary_op
     replacement_read_bws = {}
-    if primary_op:
+    if ps.placement == PassPlacement.Cpu:
+        cycles[PassCycles.Cpu] = arch.cpu_cycle_estimate(ps.ops[0])
+    elif primary_op:
         skirt = primary_op.attrs.get("skirt", skirt)
         explicit_padding = primary_op.attrs.get("explicit_padding", explicit_padding)
         assert primary_op.attrs["npu_block_type"] == ps.npu_block_type
@@ -397,9 +399,6 @@
 
             cycles[PassCycles.ElementWise] = numeric_util.round_up_divide(elms, arch.num_elem_wise_units)
 
-    if ps.placement == PassPlacement.Cpu:
-        cycles[PassCycles.Cpu] = arch.cpu_cycle_estimate(ps.ops[0])
-
     # apply the desired rewrites
     for rewrite_op, tens, _, _, _, ps_to_rewrite in rewrite_list:
         if ps != ps_to_rewrite: