MLBEDSW-4071: Power of two handling 16-bit tanh/sigmoid Added special handling of power-of-two input scales for 16-bit tanh/sigmoid to align with the reference. Change-Id: I87831bcd587623d7db7100e768905355c2c98e9d Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>

commit: c629129f79666eeea1d86a779c8cb245e052672f [log] [tgz]
author: Louis Verhaard <louis.verhaard@arm.com> Fri Mar 19 09:35:48 2021 +0100
committer: patrik.gustavsson <patrik.gustavsson@arm.com> Thu Mar 25 09:50:27 2021 +0000
tree: 89af751d45f53674f64e51c1f6e97d1ab87c1da5
parent: 024c355e51666868616b7ec560c7f87e03fcd398 [diff]
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index a4466c9..65801b8 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py

@@ -17,6 +17,7 @@
 # Register level (low-level) command stream generation for Ethos-U. Takes a list of NPU operations and generates
 # all the register settings. Calculates dependencies between commands and inserts wait operations. And generates a bit
 # stream suitable for interpretation by the Ethos-U processor.
+import math
 from collections import defaultdict
 from enum import Enum
 from enum import IntEnum
@@ -640,12 +641,21 @@
         rescale = 0x3000 * ifm_quant.scale_f32
         if pool_op.ifm.data_type == NpuDataType.INT16:
             # Calculate scale and shift for the output scale of 1/(3*4096)
-            shift = 0
-            max_rescale = np.iinfo(np.int16).max / 2
-            while rescale <= max_rescale and shift <= 30:
-                shift += 1
-                rescale *= 2
-            scale = int(rescale)
+            x_log2 = math.log2(ifm_quant.scale_f32)
+            rounded_log2 = int(round(x_log2))
+            is_power_of_two = abs(x_log2 - rounded_log2) < 0.001
+            shift = rounded_log2 + 12
+            if is_power_of_two and shift in (0, 1):
+                # Special handling if input scale is 1/2048 or 1/4096
+                scale = 3 << shift
+                shift = 0
+            else:
+                shift = 0
+                max_rescale = np.iinfo(np.int16).max / 2
+                while rescale <= max_rescale and shift <= 30:
+                    shift += 1
+                    rescale *= 2
+                scale = int(rescale)
         else:
             rescale_bits = len(bin(round_up_to_int(rescale))) - 2 + 1
             scale, shift = scaling.quantise_pooling_scale(kernel.height * kernel.width, rescale_bits)
commit	c629129f79666eeea1d86a779c8cb245e052672f	[log] [tgz]
author	Louis Verhaard <louis.verhaard@arm.com>	Fri Mar 19 09:35:48 2021 +0100
committer	patrik.gustavsson <patrik.gustavsson@arm.com>	Thu Mar 25 09:50:27 2021 +0000
tree	89af751d45f53674f64e51c1f6e97d1ab87c1da5
parent	024c355e51666868616b7ec560c7f87e03fcd398 [diff]