COMPMID-2609: Enable quantization with multiplier greater than 1 on OpenCL

Change-Id: I050f1f84e214e61f7cbb0197a672b68a4940edae
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2158
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Manuel Bottini <manuel.bottini@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
diff --git a/tests/validation/CL/ConvolutionLayer.cpp b/tests/validation/CL/ConvolutionLayer.cpp
index 9eb6c6d..5ee5d84 100644
--- a/tests/validation/CL/ConvolutionLayer.cpp
+++ b/tests/validation/CL/ConvolutionLayer.cpp
@@ -292,7 +292,7 @@
 {
     QuantizationInfo(0.5f, 10),
     QuantizationInfo(0.3f, 3),
-    QuantizationInfo(1.f, 10),
+    QuantizationInfo(1.1f, 10),
 });
 TEST_SUITE(QASYMM8)
 
diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp
index ac8e170..090db1c 100644
--- a/tests/validation/CL/DeconvolutionLayer.cpp
+++ b/tests/validation/CL/DeconvolutionLayer.cpp
@@ -123,19 +123,19 @@
 // *INDENT-ON*
 
 template <typename T>
-using CLDeconvolutionLayerFixture4x4      = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 4, 4>;
+using CLDeconvolutionLayerFixture4x4 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 4, 4>;
 
 template <typename T>
-using CLDeconvolutionLayerFixture3x3      = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 3, 3>;
+using CLDeconvolutionLayerFixture3x3 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 3, 3>;
 
 template <typename T>
 using CLDeconvolutionLayerAsymmFixture3x3 = DeconvolutionValidationAsymmFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 3, 3>;
 
 template <typename T>
-using CLDeconvolutionLayerFixture2x2      = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 2, 2>;
+using CLDeconvolutionLayerFixture2x2 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 2, 2>;
 
 template <typename T>
-using CLDeconvolutionLayerFixture1x1      = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 1, 1>;
+using CLDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 1, 1>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
@@ -286,8 +286,8 @@
                        framework::dataset::make("DataType",
                                                 DataType::QASYMM8)),
                        data_layouts_dataset),
-                       framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 0), QuantizationInfo(2.f / 255.f, 0) })),
-                       framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 0), QuantizationInfo(4.f / 255.f, 0) })),
+                       framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 0), QuantizationInfo(2.f, 0) })),
+                       framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 0), QuantizationInfo(3.f, 0) })),
                        add_bias_dataset))
 {
     // Validate output
@@ -297,8 +297,8 @@
                        framework::dataset::make("DataType",
                                                 DataType::QASYMM8)),
                        data_layouts_dataset),
-                       framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 0), QuantizationInfo(2.f / 255.f, 0) })),
-                       framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 0), QuantizationInfo(4.f / 255.f, 0) })),
+                       framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 0), QuantizationInfo(2.f, 0) })),
+                       framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 0), QuantizationInfo(3.f, 0) })),
                        add_bias_dataset))
 {
     // Validate output
@@ -310,8 +310,8 @@
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedFixture2x2<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(data2x2_precommit,
                        framework::dataset::make("DataType", DataType::QASYMM8)),
                        data_layouts_dataset),
-                       framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 0), QuantizationInfo(2.f / 255.f, 0) })),
-                       framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 0), QuantizationInfo(4.f / 255.f, 0) })),
+                       framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 0), QuantizationInfo(2.f, 0) })),
+                       framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 0), QuantizationInfo(3.f, 0) })),
                        add_bias_dataset))
 {
     // Validate output
@@ -323,8 +323,8 @@
 FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture1x1<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data1x1, framework::dataset::make("DataType",
                                                                                                                        DataType::QASYMM8)),
                                                                                                                        data_layouts_dataset),
-                                                                                                                       framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 0), QuantizationInfo(2.f / 255.f, 0) })),
-                                                                                                                       framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 0), QuantizationInfo(4.f / 255.f, 0) })),
+                                                                                                                       framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 0), QuantizationInfo(2.f, 0) })),
+                                                                                                                       framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 0), QuantizationInfo(3.f, 0) })),
                                                                                                                        add_bias_dataset))
 {
     // Validate output
diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
index 5d8fd20..e2cdf54 100644
--- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
@@ -563,7 +563,7 @@
                        combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
                                                                        depth_multipliers),
                                                                framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) })),
+                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) })),
                                                framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                                ActivationFunctionsDataset))
@@ -574,7 +574,7 @@
                        combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
                                                                        large_depth_multipliers),
                                                                framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })),
                                                framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.7f, 2) })),
                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                                ActivationFunctionsDataset))
@@ -586,7 +586,7 @@
                        combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
                                                                        depth_multipliers),
                                                                framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })),
                                                framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) })),
                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                                ActivationFunctionsDataset))
@@ -597,7 +597,7 @@
                        combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
                                                                        large_depth_multipliers),
                                                                framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(1.3f, 10) })),
                                                framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.9f, 11) })),
                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                                ActivationFunctionsDataset))
@@ -611,7 +611,7 @@
                        combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
                                                                        depth_multipliers),
                                                                framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) })),
+                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) })),
                                                framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                                ActivationFunctionsDataset))
@@ -622,7 +622,7 @@
                        combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
                                                                        large_depth_multipliers),
                                                                framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })),
                                                framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                                ActivationFunctionsDataset))
@@ -634,7 +634,7 @@
                        combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
                                                                        depth_multipliers),
                                                                framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })),
                                                framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                                ActivationFunctionsDataset))
@@ -645,7 +645,7 @@
                        combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
                                                                        large_depth_multipliers),
                                                                framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })),
                                                framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                                ActivationFunctionsDataset))
diff --git a/tests/validation/CL/DirectConvolutionLayer.cpp b/tests/validation/CL/DirectConvolutionLayer.cpp
index 5007738..15b6c83 100644
--- a/tests/validation/CL/DirectConvolutionLayer.cpp
+++ b/tests/validation/CL/DirectConvolutionLayer.cpp
@@ -251,7 +251,7 @@
 TEST_SUITE(QASYMM8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit, framework::dataset::make("DataType",
                        DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10) })),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) })),
                        QuantizedActivationFunctionsDataset))
 {
     // Validate output
@@ -260,7 +260,7 @@
 FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit_9x9,
                        framework::dataset::make("DataType",
                                                 DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10) })),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) })),
                        QuantizedActivationFunctionsDataset))
 {
     // Validate output
@@ -268,7 +268,7 @@
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_nightly, framework::dataset::make("DataType",
                        DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10) })),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) })),
                        QuantizedActivationFunctionsDataset))
 {
     // Validate output
@@ -276,7 +276,7 @@
 }
 FIXTURE_DATA_TEST_CASE(RunLarge9x9, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_nightly_9x9, framework::dataset::make("DataType",
                        DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10) })),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) })),
                        QuantizedActivationFunctionsDataset))
 {
     // Validate output
@@ -288,7 +288,7 @@
 TEST_SUITE(QASYMM8_CustomDataset)
 FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionValidationWithTensorShapesQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::DirectConvolutionLayerDataset(),
                        framework::dataset::make("DataType", DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127) })),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127), QuantizationInfo(1.1f, 10) })),
                        QuantizedActivationFunctionsDataset))
 {
     // Validate output
diff --git a/tests/validation/CL/FullyConnectedLayer.cpp b/tests/validation/CL/FullyConnectedLayer.cpp
index af45be2..091d941 100644
--- a/tests/validation/CL/FullyConnectedLayer.cpp
+++ b/tests/validation/CL/FullyConnectedLayer.cpp
@@ -60,6 +60,12 @@
 });
 
 const auto FullyConnectedParameters = combine(framework::dataset::make("TransposeWeights", { false, true }), framework::dataset::make("ReshapeWeights", { false, true }));
+
+const auto QuantizationData = framework::dataset::make("QuantizationInfo",
+{
+    QuantizationInfo(1.f / 255.f, 10),
+    QuantizationInfo(1.1f, 10),
+});
 } // namespace
 
 TEST_SUITE(CL)
@@ -204,20 +210,14 @@
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(
-                           combine(datasets::SmallFullyConnectedLayerDataset(),
-                                   FullyConnectedParameters),
-                           framework::dataset::make("DataType", DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 10) })))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), QuantizationData))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(
-                           combine(datasets::LargeFullyConnectedLayerDataset(),
-                                   FullyConnectedParameters),
-                           framework::dataset::make("DataType", DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 256.f, 10) })))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), QuantizationData))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
diff --git a/tests/validation/reference/Convolution3d.h b/tests/validation/reference/Convolution3d.h
index 23918a4..6ac5df9 100644
--- a/tests/validation/reference/Convolution3d.h
+++ b/tests/validation/reference/Convolution3d.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_VALIDATION_CONVOLUTION_H__
-#define __ARM_COMPUTE_TEST_VALIDATION_CONVOLUTION_H__
+#ifndef ARM_COMPUTE_TEST_VALIDATION_CONVOLUTION_H
+#define ARM_COMPUTE_TEST_VALIDATION_CONVOLUTION_H
 
 #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
 #include "tests/validation/Helpers.h"
@@ -133,7 +133,7 @@
     int         output_multiplier = 0;
     int         output_shift      = 0;
     const float multiplier        = input_scale * weights_scale / output_scale;
-    arm_compute::quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+    arm_compute::quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
 
     const int half_width_weights_start  = width_weights / 2;
     const int half_width_weights_end    = ((width_weights % 2) == 0) ? (half_width_weights_start - 1) : half_width_weights_start;
@@ -171,9 +171,8 @@
     // Accumulate the bias
     acc += (*b_ptr);
 
-    acc = validation::asymm_rounding_divide_by_pow2(validation::asymm_int_mult(acc, output_multiplier), output_shift);
-    acc += output_offset;
-    acc = utility::clamp<int32_t>(acc, 0, 255);
+    // Quantize down
+    acc = validation::quantize_down_scale_by_fixedpoint(acc, output_multiplier, output_shift, output_offset, 0, 255);
 
     // Store the result
     *out_ptr = acc;
@@ -182,4 +181,4 @@
 } // namespace convolution_3d
 } // namespace test
 } // namespace arm_compute
-#endif /*__ARM_COMPUTE_TEST_VALIDATION_CONVOLUTION_H__ */
+#endif /* ARM_COMPUTE_TEST_VALIDATION_CONVOLUTION_H */
diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.cpp b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
index 608093d..0c7e92b 100644
--- a/tests/validation/reference/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
@@ -197,7 +197,7 @@
                 int         output_shift      = 0;
                 const float weights_scale     = (is_quantized_per_channel) ? weights_scale_vec[out_z] : weights_scale_vec[0];
                 const float multiplier        = input_scale * weights_scale / output_scale;
-                arm_compute::quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+                arm_compute::quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
 
                 for(int y = minimum_y; y <= minimum_y + maximum_y; y += conv_info.stride().second)
                 {
@@ -220,9 +220,8 @@
                             }
                         }
                         val += bias_val;
-                        val = asymm_rounding_divide_by_pow2(asymm_int_mult(val, output_multiplier), output_shift);
-                        val += output_offset;
-                        val = utility::clamp<int32_t>(val, 0, 255);
+                        // Quantize down
+                        val = quantize_down_scale_by_fixedpoint(val, output_multiplier, output_shift, output_offset, 0, 255);
 
                         // Store the result
                         dst[out_pos++] = val;
diff --git a/tests/validation/reference/FullyConnectedLayer.cpp b/tests/validation/reference/FullyConnectedLayer.cpp
index cd84b9c..261c645 100644
--- a/tests/validation/reference/FullyConnectedLayer.cpp
+++ b/tests/validation/reference/FullyConnectedLayer.cpp
@@ -81,7 +81,7 @@
     int         output_multiplier = 0;
     int         output_shift      = 0;
     const float multiplier        = input_scale * weights_scale / output_scale;
-    arm_compute::quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+    arm_compute::quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
 
     for(int y = 0; y < rows_weights; ++y)
     {
@@ -96,9 +96,8 @@
         // Accumulate the bias
         acc += bias_ptr[y];
 
-        acc = asymm_rounding_divide_by_pow2(asymm_int_mult(acc, output_multiplier), output_shift);
-        acc += output_offset;
-        acc = utility::clamp<int32_t>(acc, 0, 255);
+        // Quantize down
+        acc = quantize_down_scale_by_fixedpoint(acc, output_multiplier, output_shift, output_offset, 0, 255);
 
         // Store the result
         dst_ptr[y] = static_cast<T>(acc);
diff --git a/tests/validation/reference/UtilsQuantizedAsymm.h b/tests/validation/reference/UtilsQuantizedAsymm.h
index b7b69d5..444696c 100644
--- a/tests/validation/reference/UtilsQuantizedAsymm.h
+++ b/tests/validation/reference/UtilsQuantizedAsymm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_VALIDATION_UTILS_QUANTIZED_ASYMM_H__
-#define __ARM_COMPUTE_TEST_VALIDATION_UTILS_QUANTIZED_ASYMM_H__
+#ifndef ARM_COMPUTE_TEST_VALIDATION_UTILS_QUANTIZED_ASYMM_H
+#define ARM_COMPUTE_TEST_VALIDATION_UTILS_QUANTIZED_ASYMM_H
 
 #include <cstdint>
 
@@ -51,7 +51,25 @@
     int32_t ab_x2_high32 = static_cast<int32_t>((ab_64 + nudge) / (1ll << 31));
     return overflow ? std::numeric_limits<int32_t>::max() : ab_x2_high32;
 }
+
+/** Quantize down the input value in range [min, max]. */
+inline int32_t quantize_down_scale_by_fixedpoint(int32_t val, int32_t result_mult_int, int32_t result_shift,
+                                                 int32_t result_offset_after_shift, int32_t min, int32_t max)
+{
+    int32_t res = 0;
+    if(result_shift < 0)
+    {
+        res = asymm_int_mult(val * (1 << (-result_shift)), result_mult_int);
+    }
+    else
+    {
+        res = asymm_rounding_divide_by_pow2(asymm_int_mult(val, result_mult_int), result_shift);
+    }
+    res += result_offset_after_shift;
+    res = utility::clamp<int32_t>(res, min, max);
+    return res;
+}
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_VALIDATION_UTILS_QUANTIZED_ASYMM_H__ */
+#endif /* ARM_COMPUTE_TEST_VALIDATION_UTILS_QUANTIZED_ASYMM_H */