MLBEDSW-6645: MLCE: Optimize SRAM usage - When compiling for shared SRAM the old scheduler has an option so that it produces less SRAM than what the new scheduler manages to produce. The old scheduler was able to creates more/longer cascades. In order to improve the new scheduler, the following has been implemented: - Take persistent IFM's into account when creating the min schedule. - Choose longer cascades when it is possible to reduce the total SRAM usage compared to using shorter cascades. - Updated calculation for estimated SRAM usage for elementwise ops. Signed-off-by: Johan Alfven <johan.alfven@arm.com> Change-Id: I209bbf2d94425e4f6aacb1d151b3b2aa65c0870b

commit: 255dad78c62c8924a44f78d91a573c0aa719cbde [log] [tgz]
author: Johan Alfvén <johan.alfven@arm.com> Sat Jul 16 18:27:05 2022 +0200
committer: Rickard Bolin <rickard.bolin@arm.com> Wed Aug 17 07:47:09 2022 +0000
tree: 0365d094fc2a19cbf819b30ee6292d0d5124f1a2
parent: 8e1352a00dcc0198ae2cd0d8380ef560bd3a847c [diff] [blame]
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py
index d01942b..e9f38b4 100644
--- a/ethosu/vela/scheduler.py
+++ b/ethosu/vela/scheduler.py

@@ -941,6 +941,29 @@
 
         return peak_mem_usage
 
+    def build_cascades_for_min_schedule(self, min_schedule: Schedule, max_template: Schedule, memory_limit: int):
+        # Update memory snapshot
+        self.sg.schedule = min_schedule
+        self.update_op_memory_snapshot(min_schedule)
+
+        # Calculate residual memory for Min schedule
+        non_local_mem_usage = {}
+        for sched_op in self.sched_ops:
+            time_index = min_schedule.cost_map[sched_op].time_index
+
+            if self.arch.is_spilling_enabled():
+                # For Dedicated SRAM only the intermediate buffers are in SRAM, hence op_mem_usage is 0
+                op_mem_usage = 0
+            else:
+                # Min schedule only have ifm and ofm in SRAM (no buffered weigth tensors)
+                op_mem_usage = sched_op.ifm_size_in_bytes() + sched_op.ofm_size_in_bytes()
+
+            non_local_mem_usage[sched_op] = min_schedule.memory_snapshot[time_index] - op_mem_usage
+
+        # Crate cascades for Min schedule
+        cascade_builder = CascadeBuilder(self.sched_ops, self.arch.is_spilling_enabled(), non_local_mem_usage)
+        cascade_builder.build_cascades(min_schedule, max_template, memory_limit)
+
     def optimize_sub_schedule(
         self, cascade_info: CascadeInfo, ref_schedule: Schedule, max_template: Schedule, memory_limit: int
     ) -> Schedule:
@@ -1545,8 +1568,8 @@
             if scheduler_options.optimization_strategy == OptimizationStrategy.Size:
                 initial_sram_limit = scheduler.min_memory_req
 
-            cascade_builder = CascadeBuilder(scheduler.sched_ops, arch.is_spilling_enabled())
-            cascade_builder.build_cascades(min_schedule, max_schedule_template, initial_sram_limit)
+            # Build cascades for Min schedule
+            scheduler.build_cascades_for_min_schedule(min_schedule, max_schedule_template, initial_sram_limit)
             sg.schedule = min_schedule
             scheduler.update_op_memory_snapshot(min_schedule)
commit	255dad78c62c8924a44f78d91a573c0aa719cbde	[log] [tgz]
author	Johan Alfvén <johan.alfven@arm.com>	Sat Jul 16 18:27:05 2022 +0200
committer	Rickard Bolin <rickard.bolin@arm.com>	Wed Aug 17 07:47:09 2022 +0000
tree	0365d094fc2a19cbf819b30ee6292d0d5124f1a2
parent	8e1352a00dcc0198ae2cd0d8380ef560bd3a847c [diff] [blame]