MLBEDSW-2600: Fix writing of register for wrong architecture

 - Parallelism mode register was being written for non Yoda targets.

Signed-off-by: Tim Hall <tim.hall@arm.com>
Change-Id: I31b50031dab4d615733c4c3790dec8934117f275
diff --git a/ethosu/vela/architecture_features.py b/ethosu/vela/architecture_features.py
index dc66889..1dce435 100644
--- a/ethosu/vela/architecture_features.py
+++ b/ethosu/vela/architecture_features.py
@@ -167,7 +167,7 @@
 
         self.system_config = system_config
 
-        is_yoda_system = "yoda-" in self.accelerator_config
+        self.is_yoda_system = "yoda-" in self.accelerator_config
 
         self.ncores = accel_config.cores
         self.ofm_ublock = accel_config.ofm_ublock
@@ -199,7 +199,7 @@
         self.memory_port_widths = np.zeros(MemArea.Size)
 
         # Get system configuration
-        self.__read_sys_config(is_yoda_system)
+        self.__read_sys_config(self.is_yoda_system)
 
         # apply the global memory clock scales to the individual ones from the system config
         for mem in MemArea.all():
@@ -230,7 +230,7 @@
         self.default_feature_map_format = TensorFormat.NHWC
 
         # This is to ignore permanent_storage = On/OffChipflash for Yoda
-        if not is_yoda_system and permanent_storage != MemArea.OffChipFlash:
+        if not self.is_yoda_system and permanent_storage != MemArea.OffChipFlash:
             self.permanent_storage_mem_area = permanent_storage
 
         self.tensor_storage_mem_area = {
@@ -274,7 +274,7 @@
         self.cycles_weight = 40
         self.max_sram_used_weight = 1000
 
-        if is_yoda_system:
+        if self.is_yoda_system:
             self.max_sram_used_weight = 1000
 
         # Shared Buffer Block allocations
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index 6cd8143..3b29498 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -391,7 +391,8 @@
             param = 0
             emit.cmd_wait(cmd0.NPU_OP_DMA_WAIT, param, absolute_dep[CommandType.DMA][0])
 
-    emit.cmd0_with_param(cmd0.NPU_SET_PARALLEL_MODE, arch.ncores-1)
+    if arch.is_yoda_system:
+        emit.cmd0_with_param(cmd0.NPU_SET_PARALLEL_MODE, arch.ncores-1)
 
     for cmd in cmd_stream:
         if cmd.cmdtype == CommandType.DMA: