MLBEDSW-819: make int16 changes
Enabled int16 support quantization to match the reference.
Change-Id: Ib369640241a9a491f2b0bc52d7f6cb025e30344b
Signed-off-by: Fredrik Svedberg <fredrik.svedberg@arm.com>
diff --git a/ethosu/vela/scaling.py b/ethosu/vela/scaling.py
index b255f93..785cddc 100644
--- a/ethosu/vela/scaling.py
+++ b/ethosu/vela/scaling.py
@@ -42,6 +42,15 @@
return significand_q31, shift
+# Reduced precision quantization for int16
+def reduced_quantise_scale(scale):
+ multiplier, shift = quantise_scale(scale)
+ reduced_multiplier = int((multiplier + (1 << 15)) >> 16)
+ reduced_shift = shift - 16
+
+ return reduced_multiplier, reduced_shift
+
+
# Calculate global OFM scale for Average Pooling
def quantise_pooling_scale(nr_kernel_elements, rescale_bits=0):
_, k = math.frexp(nr_kernel_elements - 1)