diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py
index b5a6c42..f407fdc 100644
--- a/ethosu/vela/compiler_driver.py
+++ b/ethosu/vela/compiler_driver.py
@@ -30,6 +30,7 @@
 from . import scheduler
 from . import tensor_allocation
 from . import weight_compressor
+from .errors import VelaError
 from .nn_graph import PassPlacement
 from .nn_graph import TensorAllocator
 from .rewrite_graph import verify_graph_health
@@ -208,7 +209,11 @@
 
     if root_sg is not None and (arch.feature_map_storage_mem_area != arch.fast_storage_mem_area):
         if root_sg.memory_used_per_type.get(MemType.Scratch_fast, 0) > arch.sram_size:
-            print("Warning: Sram limit has been exceeded, by the scratch fast tensor")
+            raise VelaError(
+                "Sram limit {} bytes, has been exceeded by the scratch fast tensor {} bytes".format(
+                    arch.sram_size, root_sg.memory_used_per_type.get(MemType.Scratch_fast, 0)
+                )
+            )
 
     # Allocate all Cpu constant tensors, this is done last because the Npu-ops
     # have to be serialized into flash and scratch tensors first
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py
index 36bb3c2..cc9278f 100644
--- a/ethosu/vela/scheduler.py
+++ b/ethosu/vela/scheduler.py
@@ -640,6 +640,16 @@
         res = self.filter_pareto_frontier(res, remove_equally_good_candidates=True)
         return res
 
+    def avoid_for_spilling(self, pred_candidate):
+        if self.arch.feature_map_storage_mem_area == self.arch.fast_storage_mem_area:
+            return False
+
+        # For SRAM spilling, concat op is avoided as predecessor
+        for op in pred_candidate.ops:
+            if op.type == "ConcatSliceWrite":
+                return True
+        return False
+
     def search_ifm_streaming_partial(self, ps, block_config):
         if ps.placement != PassPlacement.Npu:
             return ABORT_SEARCH
@@ -664,8 +674,10 @@
                     # and it only has one successor, namely us
                     if pred_candidate.placement == PassPlacement.Npu:
                         if pred_candidate.npu_block_type in self.ifm_stream_npu_blocks:
-                            # and it is on the Npu and fusable - it's a candidate
-                            pred_pass_list.append(pred_candidate)
+                            # and it is on the Npu
+                            if not self.avoid_for_spilling(pred_candidate):
+                                # and fusable - it's a candidate
+                                pred_pass_list.append(pred_candidate)
 
         if not pred_pass_list:
             return ABORT_SEARCH
