MLBEDSW-4812: Deep speech performance block config update

Deep speech was exhibiting poor performance in its first three
layers due to poor SHRAM utilisation.

 - Given a choice between multiple identical-cost block configs,
   the allocator was choosing the first one it encountered. This
   commit biases the choice towards blocks with a larger IFM
   fetch area to improve SHRAM utilisation.

Signed-off-by: Tim Hall <tim.hall@arm.com>
Change-Id: I2ff18a13444b8812cb451a606ff692bf290e7d20
diff --git a/ethosu/vela/architecture_allocator.py b/ethosu/vela/architecture_allocator.py
index 86410cf..3c49eb1 100644
--- a/ethosu/vela/architecture_allocator.py
+++ b/ethosu/vela/architecture_allocator.py
@@ -263,6 +263,7 @@
 
     # Block WHC search, loops across the search space looking for best efficiency
     best_cost = math.inf
+    best_coverage = math.inf
     depth = max(arch.ofm_ublock.depth, min(search_space.depth, SplitDepth))
     if depth < ofm_shape.depth:
         depth = round_up(depth, SplitDepth)
@@ -309,12 +310,27 @@
                     if ifm_shape.elements() < ifm_block.elements() * 2:
                         relative_cost = relative_cost / 2
 
-                    if relative_cost < best_cost:
-                        best_cost = relative_cost
-                        config.layout = layout
-                        config.bank_size = arch.shram_bank_size
-                        config.ifm_block = ifm_block
-                        config.ofm_block = Shape4D(1, height, width, depth)
+                    # Choose based on relative minimum cost or larger IFM area (if equal cost)
+                    if relative_cost <= best_cost:
+                        choose_this = False
+                        # Check IFM coverage only when it's equal best_cost and small OFM
+                        if relative_cost == best_cost:
+                            coverage_shape = Shape4D.min(ifm_shape, ifm_block)
+                            coverage = ifm_shape.elements_wh() / coverage_shape.elements_wh()
+                            # Small 4x4 IFM constraint found through analysis of networks
+                            if coverage <= best_coverage and (height <= 4 and width <= 4):
+                                best_coverage = coverage
+                                choose_this = True
+                        else:
+                            best_coverage = math.inf
+                            choose_this = True
+
+                        if choose_this:
+                            best_cost = relative_cost
+                            config.layout = layout
+                            config.bank_size = arch.shram_bank_size
+                            config.ifm_block = ifm_block
+                            config.ofm_block = Shape4D(1, height, width, depth)
                 else:
                     wont_fit[(width, height)] = True