COMPMID-661: Add avgpool-uint8 support. Optimize avgpool-fp32 for Bifrost. (#13)

Change-Id: I32ba6afbac6694ffa053dd16f03a1b3d14627a19
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/94857
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/tests/validation/CL/PoolingLayer.cpp b/tests/validation/CL/PoolingLayer.cpp
index 7038f2c..b3d5612 100644
--- a/tests/validation/CL/PoolingLayer.cpp
+++ b/tests/validation/CL/PoolingLayer.cpp
@@ -43,19 +43,26 @@
 {
 namespace
 {
-/** Input data set for float data types */
+/** Input data set for floating-point data types */
 const auto PoolingLayerDatasetFP = combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { 2, 3, 4, 7, 9 })),
                                                    framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })),
                                            framework::dataset::make("ExcludePadding", { true, false }));
 
-/** Input data set for quantized data types */
+/** Input data set for fixed-point data types */
 const auto PoolingLayerDatasetQS = combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), framework::dataset::make("PoolingSize", { 2, 3 })),
                                                    framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })),
                                            framework::dataset::make("ExcludePadding", { true, false }));
-constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for float types */
-constexpr AbsoluteTolerance<float> tolerance_f16(0.01f);  /**< Tolerance value for comparing reference's output against implementation's output for float types */
-constexpr AbsoluteTolerance<float> tolerance_qs8(3);      /**< Tolerance value for comparing reference's output against implementation's output for quantized input */
-constexpr AbsoluteTolerance<float> tolerance_qs16(6);     /**< Tolerance value for comparing reference's output against implementation's output for quantized input */
+
+/** Input data set for asymmetric data type */
+const auto PoolingLayerDatasetQASYMM8 = combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), framework::dataset::make("PoolingSize", { 2, 3 })),
+                                                        framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })),
+                                                framework::dataset::make("ExcludePadding", { true, false }));
+
+constexpr AbsoluteTolerance<float>   tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */
+constexpr AbsoluteTolerance<float>   tolerance_f16(0.01f);  /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */
+constexpr AbsoluteTolerance<float>   tolerance_qs16(6);     /**< Tolerance value for comparing reference's output against implementation's output for 16-bit fixed-point type */
+constexpr AbsoluteTolerance<float>   tolerance_qs8(3);      /**< Tolerance value for comparing reference's output against implementation's output for 8-bit fixed-point type */
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);  /**< Tolerance value for comparing reference's output against implementation's output for 8-bit asymmetric type */
 } // namespace
 
 TEST_SUITE(CL)
@@ -64,12 +71,13 @@
 // *INDENT-OFF*
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
-               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),     // Mismatching data type
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),
-                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 4),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 4),     // Mismatching fixed point position
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS16, 11),
-                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),
-                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),     // Invalid pad/size combination
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),     // Invalid pad/size combination
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8, 0), // Invalid parameters
                                                      }),
                framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16, 0),
                                                        TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 0),
@@ -77,6 +85,7 @@
                                                        TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::QS16, 11),
                                                        TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32, 0),
                                                        TensorInfo(TensorShape(25U, 16U, 2U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8, 0),
                                                      })),
                framework::dataset::make("PoolInfo",  { PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)),
                                                        PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)),
@@ -84,8 +93,9 @@
                                                        PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)),
                                                        PoolingLayerInfo(PoolingType::AVG, 2, PadStrideInfo(1, 1, 2, 0)),
                                                        PoolingLayerInfo(PoolingType::AVG, 2, PadStrideInfo(1, 1, 0, 2)),
+                                                       PoolingLayerInfo(PoolingType::L2, 3, PadStrideInfo(1, 1, 0, 0)),
                                                       })),
-               framework::dataset::make("Expected", { true, false, true, false, true, true})),
+               framework::dataset::make("Expected", { true, false, true, false, true, true, true })),
                input_info, output_info, pool_info, expected)
 {
     ARM_COMPUTE_EXPECT(bool(CLPoolingLayer::validate(&input_info, &output_info, pool_info)) == expected, framework::LogLevel::ERRORS);
@@ -131,7 +141,7 @@
 template <typename T>
 using CLPoolingLayerFixedPointFixture = PoolingLayerValidationFixedPointFixture<CLTensor, CLAccessor, CLPoolingLayer, T>;
 
-TEST_SUITE(Quantized)
+TEST_SUITE(FixedPoint)
 TEST_SUITE(QS8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQS,
                                                                                                                        framework::dataset::make("DataType", DataType::QS8))),
@@ -167,6 +177,31 @@
 TEST_SUITE_END()
 TEST_SUITE_END()
 
+TEST_SUITE(Quantized)
+
+template <typename T>
+using CLPoolingLayerQuantizedFixture = PoolingLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLPoolingLayer, T>;
+
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQASYMM8,
+                                                                                                                       framework::dataset::make("DataType", DataType::QASYMM8))),
+                                                                                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127),
+                                                                                                                       QuantizationInfo(7.f / 255, 123)
+                                                                                                                                                            })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetQASYMM8,
+                                                                                                                   framework::dataset::make("DataType", DataType::QASYMM8))),
+                                                                                                                   framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255, 0) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
 TEST_SUITE_END()
 TEST_SUITE_END()
 } // namespace validation
diff --git a/tests/validation/CPP/PoolingLayer.cpp b/tests/validation/CPP/PoolingLayer.cpp
index 4f755ce..90a48e0 100644
--- a/tests/validation/CPP/PoolingLayer.cpp
+++ b/tests/validation/CPP/PoolingLayer.cpp
@@ -25,6 +25,7 @@
 
 #include "arm_compute/core/Types.h"
 #include "tests/validation/FixedPoint.h"
+#include "tests/validation/Helpers.h"
 
 namespace arm_compute
 {
@@ -277,6 +278,15 @@
     return dst;
 }
 
+template <>
+SimpleTensor<uint8_t> pooling_layer<uint8_t>(const SimpleTensor<uint8_t> &src, PoolingLayerInfo info)
+{
+    SimpleTensor<float>   src_tmp = convert_from_asymmetric(src);
+    SimpleTensor<float>   dst_tmp = pooling_layer<float>(src_tmp, info);
+    SimpleTensor<uint8_t> dst     = convert_to_asymmetric(dst_tmp, src.quantization_info());
+    return dst;
+}
+
 template SimpleTensor<float> pooling_layer(const SimpleTensor<float> &src, PoolingLayerInfo info);
 template SimpleTensor<half> pooling_layer(const SimpleTensor<half> &src, PoolingLayerInfo info);
 template SimpleTensor<qint8_t> pooling_layer(const SimpleTensor<qint8_t> &src, PoolingLayerInfo info);
diff --git a/tests/validation/Helpers.cpp b/tests/validation/Helpers.cpp
index 23ad62a..3ef5fc1 100644
--- a/tests/validation/Helpers.cpp
+++ b/tests/validation/Helpers.cpp
@@ -112,6 +112,27 @@
 
     return params;
 }
+
+SimpleTensor<float> convert_from_asymmetric(const SimpleTensor<uint8_t> &src)
+{
+    const QuantizationInfo &quantization_info = src.quantization_info();
+    SimpleTensor<float>     dst{ src.shape(), DataType::F32, 1, 0 };
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        dst[i] = quantization_info.dequantize(src[i]);
+    }
+    return dst;
+}
+
+SimpleTensor<uint8_t> convert_to_asymmetric(const SimpleTensor<float> &src, const QuantizationInfo &quantization_info)
+{
+    SimpleTensor<uint8_t> dst{ src.shape(), DataType::QASYMM8, 1, 0, quantization_info };
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        dst[i] = quantization_info.quantize(src[i]);
+    }
+    return dst;
+}
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h
index eecf976..6b1c4b9 100644
--- a/tests/validation/Helpers.h
+++ b/tests/validation/Helpers.h
@@ -201,6 +201,23 @@
 
     return bounds;
 }
+
+/** Convert quantized simple tensor into float using tensor quantization information.
+ *
+ * @param[in] src Quantized tensor.
+ *
+ * @return Float tensor.
+*/
+SimpleTensor<float> convert_from_asymmetric(const SimpleTensor<uint8_t> &src);
+
+/** Convert float simple tensor into quantized using specified quantization information.
+ *
+ * @param[in] src               Float tensor.
+ * @param[in] quantization_info Quantification information.
+ *
+ * @return Quantized tensor.
+*/
+SimpleTensor<uint8_t> convert_to_asymmetric(const SimpleTensor<float> &src, const QuantizationInfo &quantization_info);
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/fixtures/PoolingLayerFixture.h b/tests/validation/fixtures/PoolingLayerFixture.h
index 09b9e0e..d6190e2 100644
--- a/tests/validation/fixtures/PoolingLayerFixture.h
+++ b/tests/validation/fixtures/PoolingLayerFixture.h
@@ -43,28 +43,34 @@
 namespace validation
 {
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class PoolingLayerValidationFixedPointFixture : public framework::Fixture
+class PoolingLayerValidationGenericFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, PoolingType pool_type, int pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, int fractional_bits)
+    void setup(TensorShape shape, PoolingType pool_type, int pool_size, PadStrideInfo pad_stride_info, bool exclude_padding,
+               DataType data_type, int fractional_bits, QuantizationInfo quantization_info)
     {
-        _fractional_bits = fractional_bits;
+        _fractional_bits   = fractional_bits;
+        _quantization_info = quantization_info;
         PoolingLayerInfo info(pool_type, pool_size, pad_stride_info, exclude_padding);
 
-        _target    = compute_target(shape, info, data_type, fractional_bits);
-        _reference = compute_reference(shape, info, data_type, fractional_bits);
+        _target    = compute_target(shape, info, data_type, fractional_bits, quantization_info);
+        _reference = compute_reference(shape, info, data_type, fractional_bits, quantization_info);
     }
 
 protected:
     template <typename U>
     void fill(U &&tensor)
     {
-        if(_fractional_bits == 0)
+        if(!is_data_type_quantized(tensor.data_type()))
         {
             std::uniform_real_distribution<> distribution(-1.f, 1.f);
             library->fill(tensor, distribution, 0);
         }
+        else if(is_data_type_quantized_asymmetric(tensor.data_type()))
+        {
+            library->fill_tensor_uniform(tensor, 0);
+        }
         else
         {
             const int                       one_fixed = 1 << _fractional_bits;
@@ -73,10 +79,11 @@
         }
     }
 
-    TensorType compute_target(const TensorShape &shape, PoolingLayerInfo info, DataType data_type, int fixed_point_position = 0)
+    TensorType compute_target(const TensorShape &shape, PoolingLayerInfo info,
+                              DataType data_type, int fixed_point_position, QuantizationInfo quantization_info)
     {
         // Create tensors
-        TensorType src = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position);
+        TensorType src = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position, quantization_info);
         TensorType dst;
 
         // Create and configure function
@@ -102,10 +109,11 @@
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &shape, PoolingLayerInfo info, DataType data_type, int fixed_point_position = 0)
+    SimpleTensor<T> compute_reference(const TensorShape &shape, PoolingLayerInfo info,
+                                      DataType data_type, int fixed_point_position, QuantizationInfo quantization_info)
     {
         // Create reference
-        SimpleTensor<T> src{ shape, data_type, 1, fixed_point_position };
+        SimpleTensor<T> src{ shape, data_type, 1, fixed_point_position, quantization_info };
 
         // Fill reference
         fill(src);
@@ -113,30 +121,56 @@
         return reference::pooling_layer<T>(src, info);
     }
 
-    TensorType      _target{};
-    SimpleTensor<T> _reference{};
-    int             _fractional_bits{};
+    TensorType       _target{};
+    SimpleTensor<T>  _reference{};
+    int              _fractional_bits{};
+    QuantizationInfo _quantization_info{};
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class PoolingLayerValidationFixture : public PoolingLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>
+class PoolingLayerValidationFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
 {
 public:
     template <typename...>
     void setup(TensorShape shape, PoolingType pool_type, int pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type)
     {
-        PoolingLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, pool_type, pool_size, pad_stride_info, exclude_padding, data_type, 0);
+        PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, pool_type, pool_size, pad_stride_info, exclude_padding,
+                                                                                               data_type, 0, QuantizationInfo());
     }
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class GlobalPoolingLayerValidationFixture : public PoolingLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>
+class PoolingLayerValidationFixedPointFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, PoolingType pool_type, int pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, int fractional_bits)
+    {
+        PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, pool_type, pool_size, pad_stride_info, exclude_padding,
+                                                                                               data_type, fractional_bits, QuantizationInfo());
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class PoolingLayerValidationQuantizedFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, PoolingType pool_type, int pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, QuantizationInfo quantization_info)
+    {
+        PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, pool_type, pool_size, pad_stride_info, exclude_padding,
+                                                                                               data_type, 0, quantization_info);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class GlobalPoolingLayerValidationFixture : public PoolingLayerValidationFixture<TensorType, AccessorType, FunctionType, T>
 {
 public:
     template <typename...>
     void setup(TensorShape shape, PoolingType pool_type, DataType data_type)
     {
-        PoolingLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, pool_type, shape.x(), PadStrideInfo(1, 1, 0, 0), true, data_type, 0);
+        PoolingLayerValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, pool_type, shape.x(), PadStrideInfo(1, 1, 0, 0), true, data_type);
     }
 };
 } // namespace validation