MLBED-2822 Added CLI-opt for weight size est.
Added --weight-estimation-scaling, which enables
additional scaling of weight compression scale estimate.
Signed-off-by: Patrik Gustavsson <patrik.gustavsson@arm.com>
Change-Id: Idcda41257f44901d3a3f345341e07fb1ae8585a9
diff --git a/OPTIONS.md b/OPTIONS.md
index fa060bb..fa5f413 100644
--- a/OPTIONS.md
+++ b/OPTIONS.md
@@ -244,6 +244,18 @@
vela network.tflite --nhcwb16-between-cascaded-passes
```
+### Scaling of weight estimates
+
+Performs an additional scaling of weight compression estimate used by Vela to estimate SRAM usage.
+Increasing this scaling factor will make the estimates more conservative (lower) and this can result
+in optimisations that use less SRAM, albeit at the cost of performance (inference speed).
+**Type: Float**
+**Default: 1.0**
+
+```bash
+vela network.tflite --weight-estimation-scaling=1.2
+```
+
## Verbose Print Options
All of the options below are disabled by default and enabling them will add
diff --git a/ethosu/vela/architecture_features.py b/ethosu/vela/architecture_features.py
index 5453f2c..8b968a3 100644
--- a/ethosu/vela/architecture_features.py
+++ b/ethosu/vela/architecture_features.py
@@ -184,6 +184,7 @@
global_memory_clock_scale,
max_blockdep,
softmax_support,
+ weight_estimation_scaling,
):
accelerator_config = accelerator_config.lower()
self.vela_config = vela_config
@@ -215,6 +216,7 @@
)
self.max_blockdep = max_blockdep
+ self.weight_estimation_scaling = weight_estimation_scaling
dpu_min_height = accel_config.ofm_ublock.height
dpu_min_width = accel_config.ofm_ublock.width
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py
index 5e9e38f..94900ad 100644
--- a/ethosu/vela/compiler_driver.py
+++ b/ethosu/vela/compiler_driver.py
@@ -212,7 +212,9 @@
if root_sg is not None and (arch.feature_map_storage_mem_area != arch.fast_storage_mem_area):
if root_sg.memory_used_per_type.get(MemType.Scratch_fast, 0) > arch.sram_size:
raise VelaError(
- "Sram limit {} bytes, has been exceeded by the scratch fast tensor {} bytes".format(
+ "Sram limit {} bytes, has been exceeded by the scratch fast tensor {} bytes. "
+ "Increasing the value of --weight-estimation-scaling may help to resolve the issue. "
+ "See OPTIONS.md for more information.".format(
arch.sram_size, root_sg.memory_used_per_type.get(MemType.Scratch_fast, 0)
)
)
diff --git a/ethosu/vela/scheduler.py b/ethosu/vela/scheduler.py
index f3b3a79..9a8215d 100644
--- a/ethosu/vela/scheduler.py
+++ b/ethosu/vela/scheduler.py
@@ -608,7 +608,10 @@
base_sram_used = 0
for tens in ps.intermediates:
if tens.mem_area == self.mem_area:
- base_sram_used += tens.storage_size()
+ if tens.purpose == TensorPurpose.Weights:
+ base_sram_used = tens.storage_size(self.arch.weight_estimation_scaling)
+ else:
+ base_sram_used += tens.storage_size()
all_block_configs = self.get_block_configs(ps)
for block_config in all_block_configs:
@@ -718,7 +721,7 @@
)
]
sram_used += ifm_tensor.storage_size_for_sub_purpose(
- TensorSubPurpose.RollingBufferY, rolling_buffer_y, None
+ self.arch, TensorSubPurpose.RollingBufferY, rolling_buffer_y, None
)
all_candidates.extend(self.append_sram_rewrite_list(sram_used, rewrite_list, [strat_opt]))
@@ -779,7 +782,9 @@
for tens in ps.intermediates:
if tens.mem_area == self.mem_area:
if tens.purpose == TensorPurpose.Weights:
- sram_used += tens.storage_size_for_sub_purpose(TensorSubPurpose.DoubleBuffer, block_config[3])
+ sram_used += tens.storage_size_for_sub_purpose(
+ self.arch, TensorSubPurpose.DoubleBuffer, block_config[3]
+ )
rewrite_list.append(
(
SchedulerRewrite.ChangeTensorSubPurpose,
diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py
index d4f6a40..3ad9b25 100644
--- a/ethosu/vela/tensor.py
+++ b/ethosu/vela/tensor.py
@@ -439,20 +439,25 @@
def has_fully_defined_shape(self):
return shape_fully_defined(self.shape)
- def storage_size(self):
- raw_size = self.storage_elements() * self.element_size()
+ def storage_size(self, scale=1.0):
+ raw_size = self.storage_elements() * self.element_size() * scale
if raw_size == 0:
raw_size = 1 # force it to take up space
rounded_size = numeric_util.round_up(numeric_util.round_up_to_int(raw_size), self.alignment)
return rounded_size
- def storage_size_for_sub_purpose(self, sub_purpose, param_a=None, param_b=None):
+ def storage_size_for_sub_purpose(self, arch, sub_purpose, param_a=None, param_b=None):
alt_shape = self.storage_shape_for_sub_purpose(sub_purpose, param_a, param_b)
elems = shape_num_elements(alt_shape)
if elems is None:
return 0
if sub_purpose == TensorSubPurpose.DoubleBuffer:
- raw_size = elems * self.element_size() * self.compression_scale_for_worst_weight_stream
+ raw_size = (
+ elems
+ * self.element_size()
+ * self.compression_scale_for_worst_weight_stream
+ * arch.weight_estimation_scaling
+ )
else:
# Rolling buffers are used for intermediate data in ifm streaming
# These will all use the NHCWB16 format, and need to be aligned to 16 in the C-dimension
diff --git a/ethosu/vela/test/testutil.py b/ethosu/vela/test/testutil.py
index 116afa4..68866fc 100644
--- a/ethosu/vela/test/testutil.py
+++ b/ethosu/vela/test/testutil.py
@@ -38,6 +38,7 @@
global_memory_clock_scale=1.0,
max_blockdep=0,
softmax_support=True,
+ weight_estimation_scaling=1.0,
)
diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py
index 97cc873..1908092 100644
--- a/ethosu/vela/vela.py
+++ b/ethosu/vela/vela.py
@@ -260,7 +260,12 @@
choices=[True, False],
help="Control if Softmax should be transformed into a set of npu operations (default: %(default)s)",
)
-
+ parser.add_argument(
+ "--weight-estimation-scaling",
+ type=float,
+ default=1.0,
+ help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),
+ )
args = parser.parse_args(args=args)
# Read configuration file
@@ -291,6 +296,7 @@
global_memory_clock_scale=args.global_memory_clock_scale,
max_blockdep=args.max_block_dependency,
softmax_support=args.softmax_support,
+ weight_estimation_scaling=args.weight_estimation_scaling,
)
compiler_options = compiler_driver.CompilerOptions(