MLBEDSW-819: make int16 changes

Enabled int16 support quantization to match the reference.

Change-Id: Ib369640241a9a491f2b0bc52d7f6cb025e30344b
Signed-off-by: Fredrik Svedberg <fredrik.svedberg@arm.com>
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index faae2cf..5fa71aa 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -27,7 +27,7 @@
 from .tensor import MemArea, TensorBlockTraversal
 from .operation import NpuBlockType
 from .numeric_util import quantise_float32, round_up, round_away_zero, round_up_to_int, clamp_sigmoid, clamp_tanh
-from .data_type import BaseType
+from .data_type import BaseType, DataType
 import numpy as np
 from .shared_buffer_allocation import SharedBufferAllocation
 from .architecture_features import SharedBufferArea, SHRAMElements, ArchitectureFeatures
@@ -615,6 +615,9 @@
 
                 else:  # Convolution
                     assert cmd.weight_tensor.block_traversal != TensorBlockTraversal.Default
+                    # Reduced precision quantization and natural rounding used for int16
+                    if cmd.ifm_tensor.dtype == DataType.int16:
+                        rounding_mode = rounding.NATURAL
                     emit.cmd0_with_param(cmd0.NPU_SET_KERNEL_HEIGHT_M1, cmd.weight_tensor.shape[0] - 1)
                     emit.cmd0_with_param(cmd0.NPU_SET_KERNEL_WIDTH_M1, cmd.weight_tensor.shape[1] - 1)
                     if cmd.weight_tensor.block_traversal == TensorBlockTraversal.PartKernelFirst: