COMPMID-3502: Add support of different quantization input/output for ReduceMean

Change-Id: If9a5c6ee3902a7381f4117e473adbddf006f3347
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3731
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com>
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index 0be4caf..f2f5a30 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -47,13 +47,13 @@
  *
  * @return the calculated shape
  */
-inline TensorShape calculate_reduce_mean_shape(ITensor *input, const Coordinates &reduction_axis, bool keep_dims)
+inline TensorShape calculate_reduce_mean_shape(ITensorInfo *input, const Coordinates &reduction_axis, bool keep_dims)
 {
     const int   reduction_ops = reduction_axis.num_dimensions();
     Coordinates axis_local    = reduction_axis;
-    const int   input_dims    = input->info()->num_dimensions();
+    const int   input_dims    = input->num_dimensions();
     convert_negative_axis(axis_local, input_dims);
-    TensorShape out_shape = input->info()->tensor_shape();
+    TensorShape out_shape = input->tensor_shape();
     // Configure reshape layer if we want to drop the dimensions
     if(!keep_dims)
     {
diff --git a/arm_compute/runtime/CL/functions/CLReduceMean.h b/arm_compute/runtime/CL/functions/CLReduceMean.h
index 88ead9d..c37ee8c 100644
--- a/arm_compute/runtime/CL/functions/CLReduceMean.h
+++ b/arm_compute/runtime/CL/functions/CLReduceMean.h
@@ -25,7 +25,9 @@
 #define ARM_COMPUTE_CL_REDUCE_MEAN_H
 
 #include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+#include "arm_compute/runtime/CL/functions/CLDequantizationLayer.h"
 #include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
+#include "arm_compute/runtime/CL/functions/CLQuantizationLayer.h"
 #include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
 #include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
 #include "arm_compute/runtime/IMemoryManager.h"
@@ -82,8 +84,13 @@
     std::vector<CLReductionOperation> _reduction_kernels;
     std::vector<CLTensor>             _reduced_outs;
     CLReshapeLayer                    _reshape;
+    CLDequantizationLayer             _dequant;
+    CLQuantizationLayer               _requant;
     int                               _reduction_ops;
     bool                              _keep_dims;
+    bool                              _do_requant;
+    CLTensor                          _input_no_quant;
+    CLTensor                          _output_no_quant;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CL_REDUCE_MEAN_H */
diff --git a/arm_compute/runtime/NEON/functions/NEReduceMean.h b/arm_compute/runtime/NEON/functions/NEReduceMean.h
index a1b6e34..eee3f7f 100644
--- a/arm_compute/runtime/NEON/functions/NEReduceMean.h
+++ b/arm_compute/runtime/NEON/functions/NEReduceMean.h
@@ -29,6 +29,8 @@
 #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
 #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
 #include "arm_compute/runtime/Tensor.h"
@@ -71,8 +73,13 @@
     std::vector<NEReductionOperation> _reduction_kernels;
     std::vector<Tensor>               _reduced_outs;
     NEReshapeLayer                    _reshape;
+    NEDequantizationLayer             _dequant;
+    NEQuantizationLayer               _requant;
     int                               _reduction_ops;
     bool                              _keep_dims;
+    bool                              _do_requant;
+    Tensor                            _input_no_quant;
+    Tensor                            _output_no_quant;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NEON_REDUCE_MEAN_H */
diff --git a/src/runtime/CL/functions/CLReduceMean.cpp b/src/runtime/CL/functions/CLReduceMean.cpp
index c8eb542..0e2ede7 100644
--- a/src/runtime/CL/functions/CLReduceMean.cpp
+++ b/src/runtime/CL/functions/CLReduceMean.cpp
@@ -83,15 +83,25 @@
         }
         const TensorInfo out_info = input->clone()->set_tensor_shape(out_shape);
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info);
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
+        const bool requant = is_data_type_quantized(input->data_type()) && input->quantization_info() != output->quantization_info();
+        if(requant)
+        {
+            TensorInfo input_no_quant(input->clone()->set_data_type(DataType::F32));
+            CLDequantizationLayer::validate(input, &input_no_quant);
+            TensorInfo output_no_quant(output->clone()->set_data_type(DataType::F32));
+            CLQuantizationLayer::validate(&output_no_quant, output);
+        }
     }
     return Status{};
 }
 }
+
 CLReduceMean::CLReduceMean(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(), _reduction_ops(), _keep_dims()
+    : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(), _dequant(), _requant(), _reduction_ops(), _keep_dims(), _do_requant(), _input_no_quant(),
+      _output_no_quant()
 {
 }
+
 void CLReduceMean::configure(ICLTensor *input, const Coordinates &reduction_axis, bool keep_dims, ICLTensor *output)
 {
     configure(CLKernelLibrary::get().get_compile_context(), input, reduction_axis, keep_dims, output);
@@ -102,33 +112,49 @@
     // Perform validate step
     ARM_COMPUTE_ERROR_THROW_ON(CLReduceMean::validate(input->info(), reduction_axis, keep_dims, output->info()));
     // Output auto inizialitation if not yet initialized
-    const TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_reduce_mean_shape(input, reduction_axis, keep_dims);
+    const TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_reduce_mean_shape(input->info(), reduction_axis, keep_dims);
     auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
 
+    _do_requant    = is_data_type_quantized(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info();
     _reduction_ops = reduction_axis.num_dimensions();
     _reduction_kernels.resize(_reduction_ops);
     _reduced_outs.resize(_reduction_ops - (keep_dims ? 1 : 0));
     _keep_dims = keep_dims;
 
+    ICLTensor *tmp_input  = input;
+    ICLTensor *tmp_output = output;
+    if(_do_requant)
+    {
+        _memory_group.manage(&_input_no_quant);
+        _memory_group.manage(&_output_no_quant);
+        TensorInfo output_no_quant_info = input->info()->clone()->set_tensor_shape(output_shape);
+        output_no_quant_info.set_data_type(DataType::F32);
+        auto_init_if_empty(*_output_no_quant.info(), output_no_quant_info);
+        auto_init_if_empty(*_input_no_quant.info(), input->info()->clone()->set_data_type(DataType::F32));
+        _dequant.configure(compile_context, input, &_input_no_quant);
+        tmp_input  = &_input_no_quant;
+        tmp_output = &_output_no_quant;
+    }
+
     Coordinates axis_local = reduction_axis;
-    const int   input_dims = input->info()->num_dimensions();
+    const int   input_dims = tmp_input->info()->num_dimensions();
 
     convert_negative_axis(axis_local, input_dims);
 
     // Perform reduction for every axis
     for(int i = 0; i < _reduction_ops; ++i)
     {
-        TensorShape out_shape = i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
+        TensorShape out_shape = i == 0 ? tmp_input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
         out_shape.set(axis_local[i], 1);
-        auto in = (i == 0) ? input : (&_reduced_outs[i - 1]);
+        auto in = (i == 0) ? tmp_input : (&_reduced_outs[i - 1]);
 
         if(i == _reduction_ops - 1 && keep_dims)
         {
-            _reduction_kernels[i].configure(compile_context, in, output, axis_local[i], ReductionOperation::MEAN_SUM);
+            _reduction_kernels[i].configure(compile_context, in, tmp_output, axis_local[i], ReductionOperation::MEAN_SUM);
         }
         else
         {
-            _reduced_outs[i].allocator()->init(TensorInfo(out_shape, input->info()->num_channels(), input->info()->data_type(), input->info()->quantization_info()));
+            _reduced_outs[i].allocator()->init(TensorInfo(out_shape, tmp_input->info()->num_channels(), tmp_input->info()->data_type(), tmp_input->info()->quantization_info()));
             _memory_group.manage(&_reduced_outs[i]);
             _reduction_kernels[i].configure(compile_context, in, &_reduced_outs[i], axis_local[i], ReductionOperation::MEAN_SUM);
         }
@@ -141,9 +167,9 @@
     }
 
     // Configure reshape layer if we want to drop the dimensions
-    if(!keep_dims)
+    if(!_keep_dims)
     {
-        TensorShape out_shape = input->info()->tensor_shape();
+        TensorShape out_shape = tmp_input->info()->tensor_shape();
 
         // We have to sort the reduction axis vectors in order for remove_dimension
         // to work properly
@@ -152,8 +178,14 @@
         {
             out_shape.remove_dimension(axis_local[i] - i);
         }
-        auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(out_shape));
-        _reshape.configure(compile_context, &_reduced_outs[_reduction_ops - 1], output);
+        auto_init_if_empty(*tmp_output->info(), tmp_input->info()->clone()->set_tensor_shape(out_shape));
+        _reshape.configure(compile_context, &_reduced_outs[_reduction_ops - 1], tmp_output);
+    }
+    if(_do_requant)
+    {
+        _requant.configure(compile_context, &_output_no_quant, output);
+        _input_no_quant.allocator()->allocate();
+        _output_no_quant.allocator()->allocate();
     }
 }
 
@@ -166,14 +198,21 @@
 {
     MemoryGroupResourceScope scope_mg(_memory_group);
 
+    if(_do_requant)
+    {
+        _dequant.run();
+    }
     for(auto &kernel : _reduction_kernels)
     {
         kernel.run();
     }
-
     if(!_keep_dims)
     {
         _reshape.run();
     }
+    if(_do_requant)
+    {
+        _requant.run();
+    }
 }
 } // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEReduceMean.cpp b/src/runtime/NEON/functions/NEReduceMean.cpp
index 079c7c6..021f7b5 100644
--- a/src/runtime/NEON/functions/NEReduceMean.cpp
+++ b/src/runtime/NEON/functions/NEReduceMean.cpp
@@ -33,13 +33,6 @@
 {
 namespace
 {
-} // namespace
-
-NEReduceMean::NEReduceMean(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(), _reduction_ops(), _keep_dims()
-{
-}
-
 Status validate_config(const ITensorInfo *input, const Coordinates &reduction_axis, bool keep_dims, const ITensorInfo *output)
 {
     ARM_COMPUTE_UNUSED(keep_dims);
@@ -89,10 +82,24 @@
         }
         const TensorInfo out_info = input->clone()->set_tensor_shape(out_shape);
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info);
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
+        const bool requant = is_data_type_quantized(input->data_type()) && input->quantization_info() != output->quantization_info();
+        if(requant)
+        {
+            TensorInfo input_no_quant(input->clone()->set_data_type(DataType::F32));
+            NEDequantizationLayer::validate(input, &input_no_quant);
+            TensorInfo output_no_quant(output->clone()->set_data_type(DataType::F32));
+            NEQuantizationLayer::validate(&output_no_quant, output);
+        }
     }
     return Status{};
 }
+} // namespace
+
+NEReduceMean::NEReduceMean(std::shared_ptr<IMemoryManager> memory_manager)
+    : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(), _dequant(), _requant(), _reduction_ops(), _keep_dims(), _do_requant(), _input_no_quant(),
+      _output_no_quant()
+{
+}
 
 Status NEReduceMean::validate(const ITensorInfo *input, const Coordinates &reduction_axis, bool keep_dims, const ITensorInfo *output)
 {
@@ -104,33 +111,49 @@
     // Perform validate step
     ARM_COMPUTE_ERROR_THROW_ON(NEReduceMean::validate(input->info(), reduction_axis, keep_dims, output->info()));
     // Output auto inizialitation if not yet initialized
-    const TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_reduce_mean_shape(input, reduction_axis, keep_dims);
+    const TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_reduce_mean_shape(input->info(), reduction_axis, keep_dims);
     auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
 
+    _do_requant    = is_data_type_quantized(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info();
     _reduction_ops = reduction_axis.num_dimensions();
     _reduction_kernels.resize(_reduction_ops);
     _reduced_outs.resize(_reduction_ops - (keep_dims ? 1 : 0));
     _keep_dims = keep_dims;
 
+    ITensor *tmp_input  = input;
+    ITensor *tmp_output = output;
+    if(_do_requant)
+    {
+        _memory_group.manage(&_input_no_quant);
+        _memory_group.manage(&_output_no_quant);
+        TensorInfo output_no_quant_info = input->info()->clone()->set_tensor_shape(output_shape);
+        output_no_quant_info.set_data_type(DataType::F32);
+        auto_init_if_empty(*_output_no_quant.info(), output_no_quant_info);
+        auto_init_if_empty(*_input_no_quant.info(), input->info()->clone()->set_data_type(DataType::F32));
+        _dequant.configure(input, &_input_no_quant);
+        tmp_input  = &_input_no_quant;
+        tmp_output = &_output_no_quant;
+    }
+
     Coordinates axis_local = reduction_axis;
-    const int   input_dims = input->info()->num_dimensions();
+    const int   input_dims = tmp_input->info()->num_dimensions();
 
     convert_negative_axis(axis_local, input_dims);
 
     // Perform reduction for every axis
     for(int i = 0; i < _reduction_ops; ++i)
     {
-        TensorShape out_shape = i == 0 ? input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
+        TensorShape out_shape = i == 0 ? tmp_input->info()->tensor_shape() : (&_reduced_outs[i - 1])->info()->tensor_shape();
         out_shape.set(axis_local[i], 1);
-        auto in = (i == 0) ? input : (&_reduced_outs[i - 1]);
+        auto in = (i == 0) ? tmp_input : (&_reduced_outs[i - 1]);
 
         if(i == _reduction_ops - 1 && keep_dims)
         {
-            _reduction_kernels[i].configure(in, output, axis_local[i], ReductionOperation::MEAN_SUM);
+            _reduction_kernels[i].configure(in, tmp_output, axis_local[i], ReductionOperation::MEAN_SUM);
         }
         else
         {
-            _reduced_outs[i].allocator()->init(TensorInfo(out_shape, input->info()->num_channels(), input->info()->data_type(), input->info()->quantization_info()));
+            _reduced_outs[i].allocator()->init(TensorInfo(out_shape, tmp_input->info()->num_channels(), tmp_input->info()->data_type(), tmp_input->info()->quantization_info()));
             _memory_group.manage(&_reduced_outs[i]);
             _reduction_kernels[i].configure(in, &_reduced_outs[i], axis_local[i], ReductionOperation::MEAN_SUM);
         }
@@ -145,7 +168,7 @@
     // Configure reshape layer if we want to drop the dimensions
     if(!keep_dims)
     {
-        TensorShape out_shape = input->info()->tensor_shape();
+        TensorShape out_shape = tmp_input->info()->tensor_shape();
         // We have to sort the reduction axis vectors in order for remove_dimension
         // to work properly
         std::sort(axis_local.begin(), axis_local.begin() + _reduction_ops);
@@ -153,22 +176,35 @@
         {
             out_shape.remove_dimension(axis_local[i] - i);
         }
-        auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(out_shape));
-        _reshape.configure(&_reduced_outs[_reduction_ops - 1], output);
+        auto_init_if_empty(*tmp_output->info(), tmp_input->info()->clone()->set_tensor_shape(out_shape));
+        _reshape.configure(&_reduced_outs[_reduction_ops - 1], tmp_output);
+    }
+    if(_do_requant)
+    {
+        _requant.configure(&_output_no_quant, output);
+        _input_no_quant.allocator()->allocate();
+        _output_no_quant.allocator()->allocate();
     }
 }
 
 void NEReduceMean::run()
 {
     MemoryGroupResourceScope scope_mg(_memory_group);
+    if(_do_requant)
+    {
+        _dequant.run();
+    }
     for(auto &kernel : _reduction_kernels)
     {
         kernel.run();
     }
-
     if(!_keep_dims)
     {
         _reshape.run();
     }
+    if(_do_requant)
+    {
+        _requant.run();
+    }
 }
 } // namespace arm_compute
diff --git a/tests/validation/CL/ReduceMean.cpp b/tests/validation/CL/ReduceMean.cpp
index cb1e38e..1dc6c61 100644
--- a/tests/validation/CL/ReduceMean.cpp
+++ b/tests/validation/CL/ReduceMean.cpp
@@ -133,16 +133,33 @@
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        CLReduceMeanQuantizedFixture<uint8_t>,
                        framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), concat(axis_keep, axis_drop)), framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255, 5) })))
+                       combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), concat(axis_keep, axis_drop)),
+                                       framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.f / 255, 5) })),
+                               framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.f / 255, 5) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 
+TEST_SUITE(Requant)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       CLReduceMeanQuantizedFixture<uint8_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), axis_drop),
+                                       framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.f / 255, 5) })),
+                               framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.f / 200, 16) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // Requant
+
 FIXTURE_DATA_TEST_CASE(RunLarge,
                        CLReduceMeanQuantizedFixture<uint8_t>,
                        framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), concat(axis_keep, axis_drop)), framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255, 5) })))
+                       combine(combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), concat(axis_keep, axis_drop)),
+                                       framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.f / 255, 5) })),
+                               framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.f / 255, 5) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
@@ -153,16 +170,33 @@
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        CLReduceMeanQuantizedFixture<int8_t>,
                        framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), concat(axis_keep, axis_drop)), framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 102, 2) })))
+                       combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), concat(axis_keep, axis_drop)),
+                                       framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.f / 102, 2) })),
+                               framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.f / 102, 2) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 
+TEST_SUITE(Requant)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       CLReduceMeanQuantizedFixture<int8_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), axis_drop),
+                                       framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.f / 102, 2) })),
+                               framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.f / 113, 10) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // Requant
+
 FIXTURE_DATA_TEST_CASE(RunLarge,
                        CLReduceMeanQuantizedFixture<int8_t>,
                        framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), concat(axis_keep, axis_drop)), framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 102, 2) })))
+                       combine(combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), concat(axis_keep, axis_drop)),
+                                       framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.f / 102, 2) })),
+                               framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.f / 102, 2) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
diff --git a/tests/validation/NEON/ReduceMean.cpp b/tests/validation/NEON/ReduceMean.cpp
index 23229a0..e5a5a17 100644
--- a/tests/validation/NEON/ReduceMean.cpp
+++ b/tests/validation/NEON/ReduceMean.cpp
@@ -160,16 +160,33 @@
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        NEReduceMeanQuantizedFixture<uint8_t>,
                        framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), concat(axis_keep, axis_drop)), framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255, 5) })))
+                       combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), concat(axis_keep, axis_drop)),
+                                       framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.f / 255, 5) })),
+                               framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.f / 255, 5) })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_u8);
 }
 
+TEST_SUITE(Requant)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       NEReduceMeanQuantizedFixture<uint8_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), axis_drop),
+                                       framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.f / 255, 5) })),
+                               framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.f / 200, 16) })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_u8);
+}
+TEST_SUITE_END() // Requant
+
 FIXTURE_DATA_TEST_CASE(RunLarge,
                        NEReduceMeanQuantizedFixture<uint8_t>,
                        framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), concat(axis_keep, axis_drop)), framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255, 5) })))
+                       combine(combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), concat(axis_keep, axis_drop)),
+                                       framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.f / 255, 5) })),
+                               framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.f / 255, 5) })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_u8);
@@ -180,15 +197,32 @@
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        NEReduceMeanQuantizedFixture<int8_t>,
                        framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), concat(axis_keep, axis_drop)), framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 127, -10), QuantizationInfo(1.f / 250, -20) })))
+                       combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), concat(axis_keep, axis_drop)),
+                                       framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.f / 127, -10), QuantizationInfo(1.f / 250, -20) })),
+                               framework::dataset::make("QuantizationInfoInputOutput", { QuantizationInfo(1.f / 127, -10) })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_s8);
 }
+TEST_SUITE(Requant)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       NEReduceMeanQuantizedFixture<int8_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), axis_drop),
+                                       framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.f / 102, 2) })),
+                               framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.f / 113, 10) })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_s8);
+}
+TEST_SUITE_END() // Requant
+
 FIXTURE_DATA_TEST_CASE(RunLarge,
                        NEReduceMeanQuantizedFixture<int8_t>,
                        framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), concat(axis_keep, axis_drop)), framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 127, 0) })))
+                       combine(combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), concat(axis_keep, axis_drop)),
+                                       framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.f / 127, -10) })),
+                               framework::dataset::make("QuantizationInfoInputOutput", { QuantizationInfo(1.f / 127, -10) })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_s8);
diff --git a/tests/validation/fixtures/ReduceMeanFixture.h b/tests/validation/fixtures/ReduceMeanFixture.h
index d102921..7288761 100644
--- a/tests/validation/fixtures/ReduceMeanFixture.h
+++ b/tests/validation/fixtures/ReduceMeanFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,6 +26,7 @@
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "tests/AssetsLibrary.h"
 #include "tests/Globals.h"
@@ -47,10 +48,10 @@
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type, Coordinates axis, bool keep_dims, QuantizationInfo quantization_info)
+    void setup(TensorShape shape, DataType data_type, Coordinates axis, bool keep_dims, QuantizationInfo quantization_info_input, QuantizationInfo quantization_info_output)
     {
-        _target    = compute_target(shape, data_type, axis, keep_dims, quantization_info);
-        _reference = compute_reference(shape, data_type, axis, keep_dims, quantization_info);
+        _target    = compute_target(shape, data_type, axis, keep_dims, quantization_info_input, quantization_info_output);
+        _reference = compute_reference(shape, data_type, axis, keep_dims, quantization_info_input, quantization_info_output);
     }
 
 protected:
@@ -71,11 +72,12 @@
         }
     }
 
-    TensorType compute_target(TensorShape &src_shape, DataType data_type, Coordinates axis, bool keep_dims, QuantizationInfo quantization_info)
+    TensorType compute_target(TensorShape &src_shape, DataType data_type, Coordinates axis, bool keep_dims, QuantizationInfo quantization_info_input, QuantizationInfo quantization_info_output)
     {
         // Create tensors
-        TensorType src = create_tensor<TensorType>(src_shape, data_type, 1, quantization_info);
-        TensorType dst;
+        TensorType  src       = create_tensor<TensorType>(src_shape, data_type, 1, quantization_info_input);
+        TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_reduce_mean_shape(src.info(), axis, keep_dims);
+        TensorType  dst       = create_tensor<TensorType>(dst_shape, data_type, 1, quantization_info_output);
 
         // Create and configure function
         FunctionType reduction_mean;
@@ -100,10 +102,10 @@
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(TensorShape &src_shape, DataType data_type, Coordinates axis, bool keep_dims, QuantizationInfo quantization_info)
+    SimpleTensor<T> compute_reference(TensorShape &src_shape, DataType data_type, Coordinates axis, bool keep_dims, QuantizationInfo quantization_info_input, QuantizationInfo quantization_info_output)
     {
         // Create reference
-        SimpleTensor<T> src{ src_shape, data_type, 1, quantization_info };
+        SimpleTensor<T> src{ src_shape, data_type, 1, quantization_info_input };
 
         // Fill reference
         fill(src);
@@ -113,7 +115,7 @@
         {
             TensorShape output_shape = i == 0 ? src_shape : out.shape();
             output_shape.set(axis[i], 1);
-            out = reference::reduction_operation<T, T>(i == 0 ? src : out, output_shape, axis[i], ReductionOperation::MEAN_SUM);
+            out = reference::reduction_operation<T, T>(i == 0 ? src : out, output_shape, axis[i], ReductionOperation::MEAN_SUM, quantization_info_output);
         }
 
         if(!keep_dims)
@@ -139,9 +141,9 @@
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type, Coordinates axis, bool keep_dims, QuantizationInfo quantization_info = QuantizationInfo())
+    void setup(TensorShape shape, DataType data_type, Coordinates axis, bool keep_dims, QuantizationInfo quantization_info_input, QuantizationInfo quantization_info_output)
     {
-        ReduceMeanValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, axis, keep_dims, quantization_info);
+        ReduceMeanValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, axis, keep_dims, quantization_info_input, quantization_info_output);
     }
 };
 
@@ -152,7 +154,7 @@
     template <typename...>
     void setup(TensorShape shape, DataType data_type, Coordinates axis, bool keep_dims)
     {
-        ReduceMeanValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, axis, keep_dims, QuantizationInfo());
+        ReduceMeanValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, axis, keep_dims, QuantizationInfo(), QuantizationInfo());
     }
 };
 } // namespace validation
diff --git a/tests/validation/fixtures/ReductionOperationFixture.h b/tests/validation/fixtures/ReductionOperationFixture.h
index 3fb8544..646518d 100644
--- a/tests/validation/fixtures/ReductionOperationFixture.h
+++ b/tests/validation/fixtures/ReductionOperationFixture.h
@@ -126,7 +126,7 @@
         // Fill reference
         fill(src);
 
-        return reference::reduction_operation<T, T>(src, dst_shape, axis, op);
+        return reference::reduction_operation<T, T>(src, dst_shape, axis, op, quantization_info);
     }
 
     TensorType      _target{};
diff --git a/tests/validation/reference/ReductionOperation.cpp b/tests/validation/reference/ReductionOperation.cpp
index 5bdd4f7..ffb79f8 100644
--- a/tests/validation/reference/ReductionOperation.cpp
+++ b/tests/validation/reference/ReductionOperation.cpp
@@ -269,18 +269,19 @@
 }
 
 template <typename T, typename OT>
-SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op)
+SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output)
 {
+    ARM_COMPUTE_UNUSED(quantization_info_output);
     return compute_reduction_operation<T, OT>(src, dst_shape, axis, op);
 }
 
 template <>
-SimpleTensor<uint8_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op)
+SimpleTensor<uint8_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output)
 {
     if(src.data_type() == DataType::QASYMM8)
     {
         // If the operation is MEAN_SUM, we can directly use the uint8 implementation without taking into account scale and offset
-        if(op == ReductionOperation::MEAN_SUM)
+        if(op == ReductionOperation::MEAN_SUM && src.quantization_info() == quantization_info_output)
         {
             return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op);
         }
@@ -288,7 +289,7 @@
         {
             SimpleTensor<float> src_f = convert_from_asymmetric(src);
             SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op);
-            return convert_to_asymmetric<uint8_t>(dst_f, src.quantization_info());
+            return convert_to_asymmetric<uint8_t>(dst_f, quantization_info_output);
         }
     }
     else
@@ -298,12 +299,12 @@
 }
 
 template <>
-SimpleTensor<int8_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op)
+SimpleTensor<int8_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output)
 {
     if(src.data_type() == DataType::QASYMM8_SIGNED)
     {
         // If the operation is MEAN_SUM, we can directly use the int8 implementation without taking into account scale and offset
-        if(op == ReductionOperation::MEAN_SUM)
+        if(op == ReductionOperation::MEAN_SUM && src.quantization_info() == quantization_info_output)
         {
             return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op);
         }
@@ -311,7 +312,7 @@
         {
             SimpleTensor<float> src_f = convert_from_asymmetric(src);
             SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op);
-            return convert_to_asymmetric<int8_t>(dst_f, src.quantization_info());
+            return convert_to_asymmetric<int8_t>(dst_f, quantization_info_output);
         }
     }
     else
@@ -320,14 +321,21 @@
     }
 }
 
-template SimpleTensor<float> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
-template SimpleTensor<half> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
+template SimpleTensor<float> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
+                                                 QuantizationInfo quantization_info_output = QuantizationInfo());
+template SimpleTensor<half> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
+                                                QuantizationInfo quantization_info_output = QuantizationInfo());
 
-template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
-template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<int32_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
-template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
-template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
-template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
+template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
+                                                   QuantizationInfo quantization_info_output = QuantizationInfo());
+template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<int32_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
+                                                   QuantizationInfo quantization_info_output = QuantizationInfo());
+template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
+                                                   QuantizationInfo quantization_info_output = QuantizationInfo());
+template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
+                                                   QuantizationInfo quantization_info_output = QuantizationInfo());
+template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
+                                                   QuantizationInfo quantization_info_output = QuantizationInfo());
 
 } // namespace reference
 } // namespace validation
diff --git a/tests/validation/reference/ReductionOperation.h b/tests/validation/reference/ReductionOperation.h
index 56d37e4..9c9e721 100644
--- a/tests/validation/reference/ReductionOperation.h
+++ b/tests/validation/reference/ReductionOperation.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,7 +36,8 @@
 namespace reference
 {
 template <typename T, typename OT>
-SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op);
+SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
+                                     QuantizationInfo quantization_info_output = QuantizationInfo());
 } // namespace reference
 } // namespace validation
 } // namespace test