COMPMID-436, COMPMID-437 - Port NEConvolutionLayer & NEFullyConnectedLayer to support 16 bit fixed point

Change-Id: I69edf2dac242f941bac95c8479d921e7be6abca7
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79725
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp
index 128fb8e..1cf630a 100644
--- a/tests/validation/NEON/ConvolutionLayer.cpp
+++ b/tests/validation/NEON/ConvolutionLayer.cpp
@@ -46,7 +46,7 @@
 #ifdef ARM_COMPUTE_ENABLE_FP16
 const float tolerance_f16 = 0.01f; /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
 #endif                             /* ARM_COMPUTE_ENABLE_FP16 */
-const float tolerance_qs8 = 3.0f;  /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS8 */
+const float tolerance_q = 1.0f;    /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */
 
 Tensor compute_convolution_layer(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, DataType dt,
                                  const PadStrideInfo &conv_info, int fixed_point_position)
@@ -101,7 +101,7 @@
 
 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
 BOOST_DATA_TEST_CASE(Configuration,
-                     AlexNetConvolutionLayerDataset() * boost::unit_test::data::make({ DataType::F32, DataType::QS8 }),
+                     AlexNetConvolutionLayerDataset() * boost::unit_test::data::make({ DataType::F32, DataType::QS8, DataType::QS16 }),
                      conv_set, dt)
 {
     // Set fixed point position data type allowed
@@ -188,7 +188,7 @@
 BOOST_AUTO_TEST_SUITE(Quantized)
 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
 BOOST_DATA_TEST_CASE(SmallConvolutionLayer,
-                     SmallConvolutionLayerDataset() * boost::unit_test::data::make(DataType::QS8) * boost::unit_test::data::xrange(4, 7),
+                     SmallConvolutionLayerDataset() * boost::unit_test::data::make({ DataType::QS8, DataType::QS16 }) * boost::unit_test::data::xrange(4, 7),
                      conv_set, dt, fixed_point_position)
 {
     // Compute function
@@ -198,12 +198,12 @@
     RawTensor ref_dst = Reference::compute_reference_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, fixed_point_position);
 
     // Validate output
-    validate(NEAccessor(dst), ref_dst, tolerance_qs8);
+    validate(NEAccessor(dst), ref_dst, tolerance_q);
 }
 
 BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
 BOOST_DATA_TEST_CASE(LargeConvolutionLayer,
-                     AlexNetConvolutionLayerDataset() * boost::unit_test::data::make(DataType::QS8) * boost::unit_test::data::xrange(4, 7),
+                     AlexNetConvolutionLayerDataset() * boost::unit_test::data::make({ DataType::QS8, DataType::QS16 }) * boost::unit_test::data::xrange(4, 7),
                      conv_set, dt, fixed_point_position)
 {
     // Compute function
@@ -213,7 +213,7 @@
     RawTensor ref_dst = Reference::compute_reference_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, fixed_point_position);
 
     // Validate output
-    validate(NEAccessor(dst), ref_dst, tolerance_qs8);
+    validate(NEAccessor(dst), ref_dst, tolerance_q);
 }
 BOOST_AUTO_TEST_SUITE_END()
 
diff --git a/tests/validation/NEON/FullyConnectedLayer.cpp b/tests/validation/NEON/FullyConnectedLayer.cpp
index ae0d94a..87e0071 100644
--- a/tests/validation/NEON/FullyConnectedLayer.cpp
+++ b/tests/validation/NEON/FullyConnectedLayer.cpp
@@ -44,7 +44,7 @@
 namespace
 {
 const float tolerance_f32 = 1e-03f; /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
-const float tolerance_qs8 = 1.0f;   /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS8 */
+const float tolerance_q   = 1.0f;   /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */
 
 Tensor compute_fully_connected_layer(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, DataType dt,
                                      bool transpose_weights, int fixed_point_position)
@@ -109,7 +109,7 @@
 
 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
 BOOST_DATA_TEST_CASE(Configuration,
-                     SmallFullyConnectedLayerDataset() * boost::unit_test::data::make({ DataType::F32, DataType::QS8 }),
+                     SmallFullyConnectedLayerDataset() * boost::unit_test::data::make({ DataType::F32, DataType::QS8, DataType::QS16 }),
                      fc_set, dt)
 {
     // Set fixed point position data type allowed
@@ -188,7 +188,7 @@
 BOOST_AUTO_TEST_SUITE(Quantized)
 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
 BOOST_DATA_TEST_CASE(RunSmall,
-                     SmallFullyConnectedLayerDataset() * boost::unit_test::data::make({ DataType::QS8 }) * boost::unit_test::data::xrange(4, 7),
+                     SmallFullyConnectedLayerDataset() * boost::unit_test::data::make({ DataType::QS8, DataType::QS16 }) * boost::unit_test::data::xrange(4, 7),
                      fc_set, dt, fixed_point_position)
 {
     // Compute function
@@ -198,12 +198,12 @@
     RawTensor ref_dst = Reference::compute_reference_fully_connected_layer(fc_set.src_shape, fc_set.weights_shape, fc_set.bias_shape, fc_set.dst_shape, dt, fc_set.transpose_weights, fixed_point_position);
 
     // Validate output
-    validate(NEAccessor(dst), ref_dst, tolerance_qs8);
+    validate(NEAccessor(dst), ref_dst, tolerance_q);
 }
 
 BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
 BOOST_DATA_TEST_CASE(RunLarge,
-                     LargeFullyConnectedLayerDataset() * boost::unit_test::data::make({ DataType::QS8 }) * boost::unit_test::data::xrange(4, 7),
+                     LargeFullyConnectedLayerDataset() * boost::unit_test::data::make({ DataType::QS8, DataType::QS16 }) * boost::unit_test::data::xrange(4, 7),
                      fc_set, dt, fixed_point_position)
 {
     // Compute function
@@ -213,7 +213,7 @@
     RawTensor ref_dst = Reference::compute_reference_fully_connected_layer(fc_set.src_shape, fc_set.weights_shape, fc_set.bias_shape, fc_set.dst_shape, dt, fc_set.transpose_weights, fixed_point_position);
 
     // Validate output
-    validate(NEAccessor(dst), ref_dst, tolerance_qs8);
+    validate(NEAccessor(dst), ref_dst, tolerance_q);
 }
 BOOST_AUTO_TEST_SUITE_END()
 
diff --git a/tests/validation/TensorOperations.h b/tests/validation/TensorOperations.h
index 488ffa9..0502f53 100644
--- a/tests/validation/TensorOperations.h
+++ b/tests/validation/TensorOperations.h
@@ -158,7 +158,7 @@
     *out = res.raw();
 }
 
-template <typename T>
+template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type * = nullptr>
 void vector_matrix_multiply(const T *in, const T *weights, const T *bias, T *out, int cols_weights, int rows_weights, uint8_t fixed_point_position)
 {
     for(int x = 0; x < cols_weights; ++x)
@@ -172,11 +172,12 @@
     }
 }
 
-template <>
-void vector_matrix_multiply(const int8_t *in, const int8_t *weights, const int8_t *bias, int8_t *out, int cols_weights, int rows_weights, uint8_t fixed_point_position)
+// Vector matrix multiply for fixed point type
+template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
+void vector_matrix_multiply(const T *in, const T *weights, const T *bias, T *out, int cols_weights, int rows_weights, uint8_t fixed_point_position)
 {
     using namespace fixed_point_arithmetic;
-    using promoted_type = typename fixed_point_arithmetic::traits::promote<int8_t>::type;
+    using promoted_type = typename fixed_point_arithmetic::traits::promote<T>::type;
 
     for(int x = 0; x < cols_weights; ++x)
     {
@@ -192,10 +193,10 @@
         }
 
         // Get the bias
-        const fixed_point<int8_t> b(bias[x], fixed_point_position, true);
+        const fixed_point<T> b(bias[x], fixed_point_position, true);
 
         // Convert back and accumulate the bias
-        fixed_point<int8_t> res(acc);
+        fixed_point<T> res(acc);
         res = res + b;
 
         // Store the result