vela: Rename --keep-scale-placement CLI

 - Changed to --cache-bias-scale-tensor

Signed-off-by: Tim Hall <tim.hall@arm.com>
Change-Id: I285fe253f03ba98eff36dbe996ad3a57e2ee3d99
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py
index 7b1ea21..d17f1e5 100644
--- a/ethosu/vela/compiler_driver.py
+++ b/ethosu/vela/compiler_driver.py
@@ -174,7 +174,7 @@
     # block config, and calc and pack the scales and biases
     weight_compressor.update_pass_weight_and_scale_tensors(nng, arch)
 
-    if not scheduler_options.keep_scale_placement:
+    if scheduler_options.cache_bias_scale_tensor:
         scheduler.move_scales_to_fast_storage(nng, arch)
 
     # LiveRanges for constant tensors for all Npu subgraphs
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py
index 7347b5a..977eb58 100644
--- a/ethosu/vela/scheduler.py
+++ b/ethosu/vela/scheduler.py
@@ -65,7 +65,7 @@
         use_ifm_streaming=True,
         pareto_metric=ParetoMetric.BwCycMem,
         use_nhcwb16_between_cascaded_passes=True,
-        keep_scale_placement=False,
+        cache_bias_scale_tensor=True,
     ):
         self.use_cascading = use_cascading
         self.verbose_schedule = verbose_schedule
@@ -73,7 +73,7 @@
         self.use_ifm_streaming = use_ifm_streaming
         self.pareto_metric = pareto_metric
         self.use_nhcwb16_between_cascaded_passes = use_nhcwb16_between_cascaded_passes
-        self.keep_scale_placement = keep_scale_placement
+        self.cache_bias_scale_tensor = cache_bias_scale_tensor
 
     def __str__(self):
         return type(self).__name__ + ": " + str(self.__dict__)
diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py
index b93774d..08ab483 100644
--- a/ethosu/vela/vela.py
+++ b/ethosu/vela/vela.py
@@ -245,7 +245,11 @@
         "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
     )
     parser.add_argument(
-        "--keep-scale-placement", action="store_true", help="Keep scale tensors memory placement during scheduling"
+        "--cache-bias-scale-tensor",
+        type=ast.literal_eval,
+        default=True,
+        choices=[True, False],
+        help="Controls the caching of the bias & scale tensors in SRAM (default: %(default)s)",
     )
     parser.add_argument(
         "--cascading",
@@ -416,7 +420,7 @@
         use_ifm_streaming=args.ifm_streaming,
         pareto_metric=args.pareto_metric,
         use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes,
-        keep_scale_placement=args.keep_scale_placement,
+        cache_bias_scale_tensor=args.cache_bias_scale_tensor,
     )
 
     model_reader_options = model_reader.ModelReaderOptions()