COMPMID-436, COMPMID-437 - Port NEConvolutionLayer & NEFullyConnectedLayer to support 16 bit fixed point Change-Id: I69edf2dac242f941bac95c8479d921e7be6abca7 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79725 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Pablo Tello <pablo.tello@arm.com>

commit: 2bbd96457e3740fd9df5556607514b5e80a25720 [log] [tgz]
author: Gian Marco Iodice <gianmarco.iodice@arm.com> Tue Jul 04 16:46:32 2017 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> Mon Sep 17 14:15:39 2018 +0100
tree: 679935dd849bdac044769dfff67516962493dd51
parent: 8a383694445dfebb84732b19d5b3299961e8ffe3 [diff]
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp
index 128fb8e..1cf630a 100644
--- a/tests/validation/NEON/ConvolutionLayer.cpp
+++ b/tests/validation/NEON/ConvolutionLayer.cpp

@@ -46,7 +46,7 @@
 #ifdef ARM_COMPUTE_ENABLE_FP16
 const float tolerance_f16 = 0.01f; /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
 #endif                             /* ARM_COMPUTE_ENABLE_FP16 */
-const float tolerance_qs8 = 3.0f;  /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS8 */
+const float tolerance_q = 1.0f;    /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */
 
 Tensor compute_convolution_layer(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, DataType dt,
                                  const PadStrideInfo &conv_info, int fixed_point_position)
@@ -101,7 +101,7 @@
 
 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
 BOOST_DATA_TEST_CASE(Configuration,
-                     AlexNetConvolutionLayerDataset() * boost::unit_test::data::make({ DataType::F32, DataType::QS8 }),
+                     AlexNetConvolutionLayerDataset() * boost::unit_test::data::make({ DataType::F32, DataType::QS8, DataType::QS16 }),
                      conv_set, dt)
 {
     // Set fixed point position data type allowed
@@ -188,7 +188,7 @@
 BOOST_AUTO_TEST_SUITE(Quantized)
 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
 BOOST_DATA_TEST_CASE(SmallConvolutionLayer,
-                     SmallConvolutionLayerDataset() * boost::unit_test::data::make(DataType::QS8) * boost::unit_test::data::xrange(4, 7),
+                     SmallConvolutionLayerDataset() * boost::unit_test::data::make({ DataType::QS8, DataType::QS16 }) * boost::unit_test::data::xrange(4, 7),
                      conv_set, dt, fixed_point_position)
 {
     // Compute function
@@ -198,12 +198,12 @@
     RawTensor ref_dst = Reference::compute_reference_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, fixed_point_position);
 
     // Validate output
-    validate(NEAccessor(dst), ref_dst, tolerance_qs8);
+    validate(NEAccessor(dst), ref_dst, tolerance_q);
 }
 
 BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
 BOOST_DATA_TEST_CASE(LargeConvolutionLayer,
-                     AlexNetConvolutionLayerDataset() * boost::unit_test::data::make(DataType::QS8) * boost::unit_test::data::xrange(4, 7),
+                     AlexNetConvolutionLayerDataset() * boost::unit_test::data::make({ DataType::QS8, DataType::QS16 }) * boost::unit_test::data::xrange(4, 7),
                      conv_set, dt, fixed_point_position)
 {
     // Compute function
@@ -213,7 +213,7 @@
     RawTensor ref_dst = Reference::compute_reference_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, fixed_point_position);
 
     // Validate output
-    validate(NEAccessor(dst), ref_dst, tolerance_qs8);
+    validate(NEAccessor(dst), ref_dst, tolerance_q);
 }
 BOOST_AUTO_TEST_SUITE_END()
 

diff --git a/tests/validation/NEON/FullyConnectedLayer.cpp b/tests/validation/NEON/FullyConnectedLayer.cpp
index ae0d94a..87e0071 100644
--- a/tests/validation/NEON/FullyConnectedLayer.cpp
+++ b/tests/validation/NEON/FullyConnectedLayer.cpp

@@ -44,7 +44,7 @@
 namespace
 {
 const float tolerance_f32 = 1e-03f; /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
-const float tolerance_qs8 = 1.0f;   /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS8 */
+const float tolerance_q   = 1.0f;   /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */
 
 Tensor compute_fully_connected_layer(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, DataType dt,
                                      bool transpose_weights, int fixed_point_position)
@@ -109,7 +109,7 @@
 
 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
 BOOST_DATA_TEST_CASE(Configuration,
-                     SmallFullyConnectedLayerDataset() * boost::unit_test::data::make({ DataType::F32, DataType::QS8 }),
+                     SmallFullyConnectedLayerDataset() * boost::unit_test::data::make({ DataType::F32, DataType::QS8, DataType::QS16 }),
                      fc_set, dt)
 {
     // Set fixed point position data type allowed
@@ -188,7 +188,7 @@
 BOOST_AUTO_TEST_SUITE(Quantized)
 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
 BOOST_DATA_TEST_CASE(RunSmall,
-                     SmallFullyConnectedLayerDataset() * boost::unit_test::data::make({ DataType::QS8 }) * boost::unit_test::data::xrange(4, 7),
+                     SmallFullyConnectedLayerDataset() * boost::unit_test::data::make({ DataType::QS8, DataType::QS16 }) * boost::unit_test::data::xrange(4, 7),
                      fc_set, dt, fixed_point_position)
 {
     // Compute function
@@ -198,12 +198,12 @@
     RawTensor ref_dst = Reference::compute_reference_fully_connected_layer(fc_set.src_shape, fc_set.weights_shape, fc_set.bias_shape, fc_set.dst_shape, dt, fc_set.transpose_weights, fixed_point_position);
 
     // Validate output
-    validate(NEAccessor(dst), ref_dst, tolerance_qs8);
+    validate(NEAccessor(dst), ref_dst, tolerance_q);
 }
 
 BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
 BOOST_DATA_TEST_CASE(RunLarge,
-                     LargeFullyConnectedLayerDataset() * boost::unit_test::data::make({ DataType::QS8 }) * boost::unit_test::data::xrange(4, 7),
+                     LargeFullyConnectedLayerDataset() * boost::unit_test::data::make({ DataType::QS8, DataType::QS16 }) * boost::unit_test::data::xrange(4, 7),
                      fc_set, dt, fixed_point_position)
 {
     // Compute function
@@ -213,7 +213,7 @@
     RawTensor ref_dst = Reference::compute_reference_fully_connected_layer(fc_set.src_shape, fc_set.weights_shape, fc_set.bias_shape, fc_set.dst_shape, dt, fc_set.transpose_weights, fixed_point_position);
 
     // Validate output
-    validate(NEAccessor(dst), ref_dst, tolerance_qs8);
+    validate(NEAccessor(dst), ref_dst, tolerance_q);
 }
 BOOST_AUTO_TEST_SUITE_END()
 

diff --git a/tests/validation/TensorOperations.h b/tests/validation/TensorOperations.h
index 488ffa9..0502f53 100644
--- a/tests/validation/TensorOperations.h
+++ b/tests/validation/TensorOperations.h

@@ -158,7 +158,7 @@
     *out = res.raw();
 }
 
-template <typename T>
+template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type * = nullptr>
 void vector_matrix_multiply(const T *in, const T *weights, const T *bias, T *out, int cols_weights, int rows_weights, uint8_t fixed_point_position)
 {
     for(int x = 0; x < cols_weights; ++x)
@@ -172,11 +172,12 @@
     }
 }
 
-template <>
-void vector_matrix_multiply(const int8_t *in, const int8_t *weights, const int8_t *bias, int8_t *out, int cols_weights, int rows_weights, uint8_t fixed_point_position)
+// Vector matrix multiply for fixed point type
+template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
+void vector_matrix_multiply(const T *in, const T *weights, const T *bias, T *out, int cols_weights, int rows_weights, uint8_t fixed_point_position)
 {
     using namespace fixed_point_arithmetic;
-    using promoted_type = typename fixed_point_arithmetic::traits::promote<int8_t>::type;
+    using promoted_type = typename fixed_point_arithmetic::traits::promote<T>::type;
 
     for(int x = 0; x < cols_weights; ++x)
     {
@@ -192,10 +193,10 @@
         }
 
         // Get the bias
-        const fixed_point<int8_t> b(bias[x], fixed_point_position, true);
+        const fixed_point<T> b(bias[x], fixed_point_position, true);
 
         // Convert back and accumulate the bias
-        fixed_point<int8_t> res(acc);
+        fixed_point<T> res(acc);
         res = res + b;
 
         // Store the result
commit	2bbd96457e3740fd9df5556607514b5e80a25720	[log] [tgz]
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	Tue Jul 04 16:46:32 2017 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	Mon Sep 17 14:15:39 2018 +0100
tree	679935dd849bdac044769dfff67516962493dd51
parent	8a383694445dfebb84732b19d5b3299961e8ffe3 [diff]