MLBEDSW-7427 Fix scale calculations for FullyConnected
Fixed scale calculations for FullyConnected to match the reference.
Also removed unused low_precision_scaling.
Change-Id: I4b766febff4a0010acd3de708bb49be458d22bf3
Signed-off-by: Fredrik Svedberg <fredrik.svedberg@arm.com>
diff --git a/ethosu/vela/operation.py b/ethosu/vela/operation.py
index f85cb4b..19b00b3 100644
--- a/ethosu/vela/operation.py
+++ b/ethosu/vela/operation.py
@@ -487,7 +487,6 @@
"read_shapes",
"rounding_mode",
"explicit_scaling",
- "low_precision_scaling",
"write_offset",
"write_shape",
"ifm_resampling_mode",
@@ -525,9 +524,6 @@
self.rounding_mode: Optional[NpuRoundingMode] = None
# Rescale op in TOSA supplies explicit multiplier and shift values
self.explicit_scaling: Optional[ExplicitScaling] = None
- # The Mean operator (implemented as a depthwise convolution) requires scaling
- # to be calculated differently in one case. In that case, this is set to True.
- self.low_precision_scaling = False
# Write offset, for operations that only produce a part of the OFM
self.write_offset: Optional[Shape4D] = None
# The amount of OFM that is produced by the operation (only if write_offset is not None).
@@ -567,7 +563,6 @@
res.write_shape = Shape4D(*self.write_shape) if self.write_shape else None
res.rounding_mode = self.rounding_mode
res.explicit_scaling = self.explicit_scaling
- res.low_precision_scaling = self.low_precision_scaling
res.ifm_resampling_mode = self.ifm_resampling_mode
res.tile_base_offsets_ifm = [_ifm.copy() for _ifm in self.tile_base_offsets_ifm]
res.tile_base_offsets_ofm = self.tile_base_offsets_ofm.copy()
diff --git a/ethosu/vela/weight_compressor.py b/ethosu/vela/weight_compressor.py
index e56cc5e..ab22e94 100644
--- a/ethosu/vela/weight_compressor.py
+++ b/ethosu/vela/weight_compressor.py
@@ -266,17 +266,11 @@
# Convert scales to np.double (from np.float32) to conform to TensorFlow Lite which
# uses double during scaling calculations
- # TensorFlow Lite casts the scales slightly differently for uint8 and int8
+ # TensorFlow Lite casts the scales slightly differently for uint8 and int8 as well as
+ # for FullyConnected operators
if not rescale_for_faf:
- if ifm_dtype == DataType.uint8:
- # for some cases of the Mean operator, the scale must be calculated differently to match reference
- if first_consumer_op.low_precision_scaling:
- scales = [
- np.double(np.single(ifm_scale) / (np.single(weight_scale) * np.single(ofm_scale)))
- for weight_scale in weight_scales
- ]
- else:
- scales = [np.double(ifm_scale * weight_scale) / np.double(ofm_scale) for weight_scale in weight_scales]
+ if ifm_dtype == DataType.uint8 or first_consumer_op.type == Op.FullyConnected:
+ scales = [np.double(ifm_scale * weight_scale) / np.double(ofm_scale) for weight_scale in weight_scales]
elif ifm_dtype == DataType.int8 or ifm_dtype == DataType.int16:
scales = [
(np.double(ifm_scale) * np.double(weight_scale)) / np.double(ofm_scale)