Save Int16/UINT16 test outputs to native dtypes

* Int16/UInt16 reference outputs were previously saved to INT32
* Save those in their native dtypes and updated other affected code

Signed-off-by: Jerry Ge <jerry.ge@arm.com>
Change-Id: I0c3b7fba096a8cb1ddabef20ad13498b8f46d36f
diff --git a/reference_model/src/tensor.cc b/reference_model/src/tensor.cc
index 27f21f3..16020cf 100644
--- a/reference_model/src/tensor.cc
+++ b/reference_model/src/tensor.cc
@@ -353,6 +353,8 @@
     half_float::half* f16databuf    = nullptr;
     uint8_t* ui8databuf             = nullptr;
     int8_t* i8databuf               = nullptr;
+    int16_t* i16databuf             = nullptr;
+    uint16_t* ui16databuf           = nullptr;
     int32_t* i32databuf             = nullptr;
     int64_t* i64databuf             = nullptr;
     bool* bdatabuf                  = nullptr;
@@ -444,19 +446,32 @@
             free(i8databuf);
             break;
         case TOSA_REF_TYPE_INT16:
-        case TOSA_REF_TYPE_UINT16:
-            i32databuf = (int32_t*)calloc(sizeof(int32_t), elements);
-            ASSERT_MEM(i32databuf);
+            i16databuf = (int16_t*)calloc(sizeof(int16_t), elements);
+            ASSERT_MEM(i16databuf);
 
-            if (getTensorValueInt32(elements, i32databuf))
+            if (getTensorValueInt16(elements, i16databuf))
             {
-                free(i32databuf);
+                free(i16databuf);
                 return 1;
             }
 
-            nperror = NumpyUtilities::writeToNpyFile(filename, shape, i32databuf);
+            nperror = NumpyUtilities::writeToNpyFile(filename, shape, i16databuf);
 
-            free(i32databuf);
+            free(i16databuf);
+            break;
+        case TOSA_REF_TYPE_UINT16:
+            ui16databuf = (uint16_t*)calloc(sizeof(uint16_t), elements);
+            ASSERT_MEM(ui16databuf);
+
+            if (getTensorValueUInt16(elements, ui16databuf))
+            {
+                free(ui16databuf);
+                return 1;
+            }
+
+            nperror = NumpyUtilities::writeToNpyFile(filename, shape, ui16databuf);
+
+            free(ui16databuf);
             break;
         case TOSA_REF_TYPE_INT48:
         case TOSA_REF_TYPE_SHAPE:
@@ -761,6 +776,31 @@
     return 0;
 }
 
+int TosaReference::Tensor::readfromVector(const ArrayProxy<uint16_t> vals)
+{
+    uint32_t elements = getElementCount();
+    switch (getDtype())
+    {
+        case TOSA_REF_TYPE_INT16:
+        case TOSA_REF_TYPE_UINT16:
+            if (vals.size() != elements)
+            {
+                WARNING("The input size (%ld) doesn't match the number of elements (%d) assigned to the tensor.",
+                        vals.size(), elements);
+                return -1;
+            }
+
+            setTensorValueUInt16(elements, vals.data());
+            break;
+        default:
+            WARNING("The input type doesn't match the data type assigned to the tensor (%s).",
+                    EnumNameTOSAREFTYPE(getDtype()));
+            return -2;
+    }
+    setIsValid();
+    return 0;
+}
+
 int TosaReference::Tensor::readfromVector(const ArrayProxy<int16_t> vals)
 {
     uint32_t elements = getElementCount();
@@ -985,6 +1025,31 @@
     return 0;
 }
 
+int TosaReference::Tensor::writeToVector(ArrayProxy<uint16_t> vals)
+{
+    uint32_t elements = getElementCount();
+
+    switch (getDtype())
+    {
+        case TOSA_REF_TYPE_INT16:
+        case TOSA_REF_TYPE_UINT16:
+            if (vals.size() != elements)
+            {
+                WARNING("The output size (%ld) doesn't match the number of elements (%d) assigned to the tensor.",
+                        vals.size(), elements);
+                return -1;
+            }
+
+            getTensorValueUInt16(elements, vals.data());
+            break;
+        default:
+            WARNING("The output type doesn't match the data type assigned to the tensor (%s).",
+                    EnumNameTOSAREFTYPE(getDtype()));
+            return -2;
+    }
+    return 0;
+}
+
 int TosaReference::Tensor::writeToVector(ArrayProxy<int16_t> vals)
 {
     uint32_t elements = getElementCount();
@@ -1841,9 +1906,161 @@
 }
 
 template <class T>
+int TosaReference::TensorTemplate<T>::setTensorValueUInt16(const size_t bufLen, const uint16_t* vals)
+{
+    FATAL_ERROR("TensorTemplate<T>::setTensorValueUInt16 should not be called.  "
+                "Implement template specialization version.");
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor0<int32_t>::setTensorValueUInt16(const size_t bufLen, const uint16_t* vals)
+{
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
+
+    (*tensor)(0) = static_cast<int32_t>(vals[0]);
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor1<int32_t>::setTensorValueUInt16(const size_t bufLen, const uint16_t* vals)
+{
+    uint32_t idx = 0;
+
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        (*tensor)(i0) = static_cast<int32_t>(vals[idx++]);
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor2<int32_t>::setTensorValueUInt16(const size_t bufLen, const uint16_t* vals)
+{
+    uint32_t idx = 0;
+
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            (*tensor)(i0, i1) = static_cast<int32_t>(vals[idx++]);
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor3<int32_t>::setTensorValueUInt16(const size_t bufLen, const uint16_t* vals)
+{
+    uint32_t idx = 0;
+
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                (*tensor)(i0, i1, i2) = static_cast<int32_t>(vals[idx++]);
+            }
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor4<int32_t>::setTensorValueUInt16(const size_t bufLen, const uint16_t* vals)
+{
+    uint32_t idx = 0;
+
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                for (int i3 = 0; i3 < shape[3]; i3++)
+                {
+                    (*tensor)(i0, i1, i2, i3) = static_cast<int32_t>(vals[idx++]);
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor5<int32_t>::setTensorValueUInt16(const size_t bufLen, const uint16_t* vals)
+{
+    uint32_t idx = 0;
+
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                for (int i3 = 0; i3 < shape[3]; i3++)
+                {
+                    for (int i4 = 0; i4 < shape[4]; i4++)
+                    {
+                        (*tensor)(i0, i1, i2, i3, i4) = static_cast<int32_t>(vals[idx++]);
+                    }
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor6<int32_t>::setTensorValueUInt16(const size_t bufLen, const uint16_t* vals)
+{
+    uint32_t idx = 0;
+
+    ASSERT_MSG(bufLen == getElementCount(), "Total elements must match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                for (int i3 = 0; i3 < shape[3]; i3++)
+                {
+                    for (int i4 = 0; i4 < shape[4]; i4++)
+                    {
+                        for (int i5 = 0; i5 < shape[5]; i5++)
+                        {
+                            (*tensor)(i0, i1, i2, i3, i4, i5) = static_cast<int32_t>(vals[idx++]);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+template <class T>
 int TosaReference::TensorTemplate<T>::setTensorValueInt16(const size_t bufLen, const int16_t* vals)
 {
-    FATAL_ERROR("TensorTemplate<T>::setTensorValueInt32 should not be called.  "
+    FATAL_ERROR("TensorTemplate<T>::setTensorValueInt16 should not be called.  "
                 "Implement template specialization version.");
     return 0;
 }
@@ -3211,9 +3428,199 @@
 }
 
 template <class T>
+int TosaReference::TensorTemplate<T>::getTensorValueUInt16(const size_t bufLen, uint16_t* vals) const
+{
+    FATAL_ERROR("TensorTemplate<T>::getTensorValueUInt16 should not be called.  "
+                "Implement template specialization version.");
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor0<int32_t>::getTensorValueUInt16(const size_t bufLen, uint16_t* vals) const
+{
+    int totalVals = 1;
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    vals[0] = (*tensor)(0);
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor1<int32_t>::getTensorValueUInt16(const size_t bufLen, uint16_t* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        vals[idx++] = (*tensor)(i0);
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor2<int32_t>::getTensorValueUInt16(const size_t bufLen, uint16_t* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            vals[idx++] = (*tensor)(i0, i1);
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor3<int32_t>::getTensorValueUInt16(const size_t bufLen, uint16_t* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                vals[idx++] = (*tensor)(i0, i1, i2);
+            }
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor4<int32_t>::getTensorValueUInt16(const size_t bufLen, uint16_t* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                for (int i3 = 0; i3 < shape[3]; i3++)
+                {
+                    vals[idx++] = (*tensor)(i0, i1, i2, i3);
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor5<int32_t>::getTensorValueUInt16(const size_t bufLen, uint16_t* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                for (int i3 = 0; i3 < shape[3]; i3++)
+                {
+                    for (int i4 = 0; i4 < shape[4]; i4++)
+                    {
+                        vals[idx++] = (*tensor)(i0, i1, i2, i3, i4);
+                    }
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+template <>
+int TosaReference::Tensor6<int32_t>::getTensorValueUInt16(const size_t bufLen, uint16_t* vals) const
+{
+    uint32_t idx  = 0;
+    int totalVals = 1;
+
+    for (size_t i = 0; i < shape.size(); i++)
+    {
+        totalVals *= shape[i];
+    }
+
+    ASSERT_MSG((size_t)totalVals == bufLen, "Output buffer and tensor size do not match");
+
+    for (int i0 = 0; i0 < shape[0]; i0++)
+    {
+        for (int i1 = 0; i1 < shape[1]; i1++)
+        {
+            for (int i2 = 0; i2 < shape[2]; i2++)
+            {
+                for (int i3 = 0; i3 < shape[3]; i3++)
+                {
+                    for (int i4 = 0; i4 < shape[4]; i4++)
+                    {
+                        for (int i5 = 0; i5 < shape[5]; i5++)
+                        {
+                            vals[idx++] = (*tensor)(i0, i1, i2, i3, i4, i5);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+template <class T>
 int TosaReference::TensorTemplate<T>::getTensorValueInt16(const size_t bufLen, int16_t* vals) const
 {
-    FATAL_ERROR("TensorTemplate<T>::getTensorValueInt32 should not be called.  "
+    FATAL_ERROR("TensorTemplate<T>::getTensorValueInt16 should not be called.  "
                 "Implement template specialization version.");
     return 0;
 }
diff --git a/reference_model/src/tensor.h b/reference_model/src/tensor.h
index 1659a2f..26c6aa7 100644
--- a/reference_model/src/tensor.h
+++ b/reference_model/src/tensor.h
@@ -239,22 +239,24 @@
     virtual int dumpTensorParams(FILE* out) const;
     virtual int dumpTensorParams(std::ostream& out) const;
 
-    virtual int setTensorValueDouble(const size_t bufLen, const double* vals) = 0;
-    virtual int setTensorValueFloat(const size_t bufLen, const float* vals)   = 0;
-    virtual int setTensorValueUInt8(const size_t bufLen, const uint8_t* vals) = 0;
-    virtual int setTensorValueInt8(const size_t bufLen, const int8_t* vals)   = 0;
-    virtual int setTensorValueInt16(const size_t bufLen, const int16_t* vals) = 0;
-    virtual int setTensorValueInt32(const size_t bufLen, const int32_t* vals) = 0;
-    virtual int setTensorValueInt64(const size_t bufLen, const int64_t* vals) = 0;
-    virtual int setTensorValueBool(const size_t bufLen, const bool* vals)     = 0;
-    virtual int getTensorValueDouble(const size_t bufLen, double* fbuf) const = 0;
-    virtual int getTensorValueFloat(const size_t bufLen, float* fbuf) const   = 0;
-    virtual int getTensorValueUInt8(const size_t bufLen, uint8_t* ibuf) const = 0;
-    virtual int getTensorValueInt8(const size_t bufLen, int8_t* ibuf) const   = 0;
-    virtual int getTensorValueInt16(const size_t bufLen, int16_t* ibuf) const = 0;
-    virtual int getTensorValueInt32(const size_t bufLen, int32_t* ibuf) const = 0;
-    virtual int getTensorValueInt64(const size_t bufLen, int64_t* ibuf) const = 0;
-    virtual int getTensorValueBool(const size_t bufLen, bool* ibuf) const     = 0;
+    virtual int setTensorValueDouble(const size_t bufLen, const double* vals)   = 0;
+    virtual int setTensorValueFloat(const size_t bufLen, const float* vals)     = 0;
+    virtual int setTensorValueUInt8(const size_t bufLen, const uint8_t* vals)   = 0;
+    virtual int setTensorValueInt8(const size_t bufLen, const int8_t* vals)     = 0;
+    virtual int setTensorValueUInt16(const size_t bufLen, const uint16_t* vals) = 0;
+    virtual int setTensorValueInt16(const size_t bufLen, const int16_t* vals)   = 0;
+    virtual int setTensorValueInt32(const size_t bufLen, const int32_t* vals)   = 0;
+    virtual int setTensorValueInt64(const size_t bufLen, const int64_t* vals)   = 0;
+    virtual int setTensorValueBool(const size_t bufLen, const bool* vals)       = 0;
+    virtual int getTensorValueDouble(const size_t bufLen, double* fbuf) const   = 0;
+    virtual int getTensorValueFloat(const size_t bufLen, float* fbuf) const     = 0;
+    virtual int getTensorValueUInt8(const size_t bufLen, uint8_t* ibuf) const   = 0;
+    virtual int getTensorValueInt8(const size_t bufLen, int8_t* ibuf) const     = 0;
+    virtual int getTensorValueUInt16(const size_t bufLen, uint16_t* ibuf) const = 0;
+    virtual int getTensorValueInt16(const size_t bufLen, int16_t* ibuf) const   = 0;
+    virtual int getTensorValueInt32(const size_t bufLen, int32_t* ibuf) const   = 0;
+    virtual int getTensorValueInt64(const size_t bufLen, int64_t* ibuf) const   = 0;
+    virtual int getTensorValueBool(const size_t bufLen, bool* ibuf) const       = 0;
 
     virtual int readFromNpyFile(const char* filename);
     virtual int writeToNpyFile(const char* filename) const;
@@ -264,6 +266,7 @@
     virtual int readfromVector(const ArrayProxy<float> vals);
     virtual int readfromVector(const ArrayProxy<half_float::half> vals);
     virtual int readfromVector(const ArrayProxy<int8_t> vals);
+    virtual int readfromVector(const ArrayProxy<uint16_t> vals);
     virtual int readfromVector(const ArrayProxy<int16_t> vals);
     virtual int readfromVector(const ArrayProxy<int32_t> vals);
     virtual int readfromVector(const ArrayProxy<int64_t> vals);
@@ -273,6 +276,7 @@
     virtual int writeToVector(ArrayProxy<float> vals);
     virtual int writeToVector(ArrayProxy<half_float::half> vals);
     virtual int writeToVector(ArrayProxy<int8_t> vals);
+    virtual int writeToVector(ArrayProxy<uint16_t> vals);
     virtual int writeToVector(ArrayProxy<int16_t> vals);
     virtual int writeToVector(ArrayProxy<int32_t> vals);
     virtual int writeToVector(ArrayProxy<int64_t> vals);
@@ -369,6 +373,7 @@
     virtual int setTensorValueFloat(const size_t bufLen, const float* vals);
     virtual int setTensorValueUInt8(const size_t bufLen, const uint8_t* vals);
     virtual int setTensorValueInt8(const size_t bufLen, const int8_t* vals);
+    virtual int setTensorValueUInt16(const size_t bufLen, const uint16_t* vals);
     virtual int setTensorValueInt16(const size_t bufLen, const int16_t* vals);
     virtual int setTensorValueInt32(const size_t bufLen, const int32_t* vals);
     virtual int setTensorValueInt64(const size_t bufLen, const int64_t* vals);
@@ -378,6 +383,7 @@
     virtual int getTensorValueFloat(const size_t bufLen, float* fbuf) const;
     virtual int getTensorValueUInt8(const size_t bufLen, uint8_t* ibuf) const;
     virtual int getTensorValueInt8(const size_t bufLen, int8_t* ibuf) const;
+    virtual int getTensorValueUInt16(const size_t bufLen, uint16_t* ibuf) const;
     virtual int getTensorValueInt16(const size_t bufLen, int16_t* ibuf) const;
     virtual int getTensorValueInt32(const size_t bufLen, int32_t* ibuf) const;
     virtual int getTensorValueInt64(const size_t bufLen, int64_t* ibuf) const;
@@ -572,6 +578,21 @@
 int Tensor6<int32_t>::setTensorValueInt8(const size_t bufLen, const int8_t* vals);
 
 template <>
+int Tensor0<int32_t>::setTensorValueUInt16(const size_t bufLen, const uint16_t* vals);
+template <>
+int Tensor1<int32_t>::setTensorValueUInt16(const size_t bufLen, const uint16_t* vals);
+template <>
+int Tensor2<int32_t>::setTensorValueUInt16(const size_t bufLen, const uint16_t* vals);
+template <>
+int Tensor3<int32_t>::setTensorValueUInt16(const size_t bufLen, const uint16_t* vals);
+template <>
+int Tensor4<int32_t>::setTensorValueUInt16(const size_t bufLen, const uint16_t* vals);
+template <>
+int Tensor5<int32_t>::setTensorValueUInt16(const size_t bufLen, const uint16_t* vals);
+template <>
+int Tensor6<int32_t>::setTensorValueUInt16(const size_t bufLen, const uint16_t* vals);
+
+template <>
 int Tensor0<int32_t>::setTensorValueInt16(const size_t bufLen, const int16_t* vals);
 template <>
 int Tensor1<int32_t>::setTensorValueInt16(const size_t bufLen, const int16_t* vals);
@@ -632,6 +653,21 @@
 int Tensor6<int32_t>::getTensorValueInt8(const size_t bufLen, int8_t* vals) const;
 
 template <>
+int Tensor0<int32_t>::getTensorValueUInt16(const size_t bufLen, uint16_t* vals) const;
+template <>
+int Tensor1<int32_t>::getTensorValueUInt16(const size_t bufLen, uint16_t* vals) const;
+template <>
+int Tensor2<int32_t>::getTensorValueUInt16(const size_t bufLen, uint16_t* vals) const;
+template <>
+int Tensor3<int32_t>::getTensorValueUInt16(const size_t bufLen, uint16_t* vals) const;
+template <>
+int Tensor4<int32_t>::getTensorValueUInt16(const size_t bufLen, uint16_t* vals) const;
+template <>
+int Tensor5<int32_t>::getTensorValueUInt16(const size_t bufLen, uint16_t* vals) const;
+template <>
+int Tensor6<int32_t>::getTensorValueUInt16(const size_t bufLen, uint16_t* vals) const;
+
+template <>
 int Tensor0<int32_t>::getTensorValueInt16(const size_t bufLen, int16_t* vals) const;
 template <>
 int Tensor1<int32_t>::getTensorValueInt16(const size_t bufLen, int16_t* vals) const;
diff --git a/verif/frameworks/tosa_verif_framework_compiler_runner.py b/verif/frameworks/tosa_verif_framework_compiler_runner.py
index ce9b253..56daa51 100755
--- a/verif/frameworks/tosa_verif_framework_compiler_runner.py
+++ b/verif/frameworks/tosa_verif_framework_compiler_runner.py
@@ -695,7 +695,11 @@
         tf_result = tf_result.astype(np.int8)
     elif tf_result.dtype == np.uint8:
         tf_result = tf_result.astype(np.uint8)
-    elif tf_result.dtype == np.int16 or tf_result.dtype == np.int64:
+    elif tf_result.dtype == np.int16:
+        tf_result = tf_result.astype(np.int16)
+    elif tf_result.dtype == np.uint16:
+        tf_result = tf_result.astype(np.uint16)
+    elif tf_result.dtype == np.int64:
         tf_result = tf_result.astype(np.int32)
 
     # For now, search for the first output from ref_model
diff --git a/verif/generator/tosa_test_gen.py b/verif/generator/tosa_test_gen.py
index bc931dc..8440853 100644
--- a/verif/generator/tosa_test_gen.py
+++ b/verif/generator/tosa_test_gen.py
@@ -195,6 +195,10 @@
             return np.int8(self.rng.integers(low=low, high=high, size=shape))
         elif dtype == DType.UINT8:
             return np.uint8(self.rng.integers(low=low, high=high, size=shape))
+        elif dtype == DType.INT16:
+            return np.int16(self.rng.integers(low=low, high=high, size=shape))
+        elif dtype == DType.UINT16:
+            return np.uint16(self.rng.integers(low=low, high=high, size=shape))
         elif dtype in (DType.INT48, DType.SHAPE):
             return np.int64(self.rng.integers(low=low, high=high, size=shape))
         elif dtype in (