MLBEDSW-4602: Fix Deepspeech scale & bias reuse issue.
- Deepspeech reuses identical weights and biases throughout
the network. Since biases are now interleaved with weights
there is a scaling issue when the ifm scales differ between
operations using the same weight and scale tensor.
- This commit uses interleaved weights/scales on their first use
but separates scales to source memory on subsequent use (if
the ifm scale is different).
Signed-off-by: Tim Hall <tim.hall@arm.com>
Change-Id: I7aae163438160a919cae04e235966e75355a6148
diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py
index 4ef7bee..80d0e47 100644
--- a/ethosu/vela/high_level_command_to_npu_op.py
+++ b/ethosu/vela/high_level_command_to_npu_op.py
@@ -267,11 +267,14 @@
return fm
-def create_weights(weight_tensor: Tensor, weight_box: Box, arch: ArchitectureFeatures) -> List[NpuAddressRange]:
+def create_weights(
+ weight_tensor: Tensor, weight_box: Box, scale_tensor: Tensor, arch: ArchitectureFeatures
+) -> List[NpuAddressRange]:
"""Returns address ranges for weights and scales"""
weights = []
biases = []
- region = get_region(weight_tensor.mem_type, arch)
+ shared_region = get_region(weight_tensor.mem_type, arch)
+ scale_region = scale_tensor and get_region(scale_tensor.mem_type, arch)
w_tensor_src = weight_tensor
if weight_tensor.src_tensor:
@@ -300,11 +303,19 @@
# Location of weights in tensor
addr_range = NpuAddressRange(
- region, int(address + weight_range.weight_offset), round_up(int(weight_range.weight_bytes), 16)
+ shared_region, int(address + weight_range.weight_offset), round_up(int(weight_range.weight_bytes), 16)
)
weights.append(addr_range)
- # Location of biases in tensor
- addr_range = NpuAddressRange(region, int(address), round_up(int(weight_range.scale_bytes), 16))
+
+ # Location of standalone scales or combined weights tensor scales
+ if scale_tensor:
+ assert scale_tensor.src_tensor is None # Must be standalone
+ scale_range = scale_tensor.encoded_ranges[key]
+ address = scale_tensor.address + scale_range.offset
+ addr_range = NpuAddressRange(scale_region, int(address), round_up(int(scale_range.scale_bytes), 16))
+ else:
+ addr_range = NpuAddressRange(shared_region, int(address), round_up(int(weight_range.scale_bytes), 16))
+
biases.append(addr_range)
return weights, biases
@@ -351,7 +362,7 @@
npu_op.ofm.quantization = get_ofm_quantization(ps, cmd.ofm_tensor)
if cmd.weight_tensor is not None:
- npu_op.weights, npu_op.biases = create_weights(cmd.weight_tensor, cmd.weight_box, arch)
+ npu_op.weights, npu_op.biases = create_weights(cmd.weight_tensor, cmd.weight_box, cmd.scale_tensor, arch)
npu_op.activation = create_npu_activation(op)
npu_op.fused_quantize = any(op.type == Op.Quantize for op in ps.ops)
npu_op.rounding_mode = get_rounding_mode(op, npu_op.fused_quantize)