COMPMID-2608: Enable quantization with multiplier greater than 1 on NEON
Change-Id: Ib2b0c9ac88fc2b645f478c9981f71ee28f2c77fd
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2425
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
index ca4fe73..ddcc71f 100644
--- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
@@ -60,16 +60,6 @@
const bool is_quantized = (!is_data_type_quantized_per_channel(weights->data_type())) && is_data_type_quantized_asymmetric(input->data_type());
- if(is_quantized)
- {
- const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
- const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
- const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
-
- float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
- ARM_COMPUTE_UNUSED(multiplier);
- ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f);
- }
if(!NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input, weights, conv_info, depth_multiplier, dilation))
{
TensorInfo accumulator = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
@@ -205,7 +195,7 @@
float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
int32_t output_multiplier;
int32_t output_shift;
- quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+ quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
_output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, output_multiplier, output_shift, oq_info.offset);
_accumulator.allocator()->allocate();
}
diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
index 322bb2c..6553884 100644
--- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
@@ -31,8 +31,8 @@
#include <cmath>
#include <tuple>
-using namespace arm_compute;
-
+namespace arm_compute
+{
NEDirectConvolutionLayer::NEDirectConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _output_stage_kernel(), _conv_kernel(), _input_border_handler(), _activationlayer_function(), _accumulator(), _has_bias(false),
_is_activationlayer_enabled(false), _dim_split(Window::DimZ)
@@ -118,3 +118,4 @@
_activationlayer_function.run();
}
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
index b3b90f8..01746eb 100644
--- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
@@ -33,7 +33,8 @@
#include <algorithm>
#include <cmath>
-using namespace arm_compute;
+namespace arm_compute
+{
using namespace arm_compute::misc::shape_calculator;
namespace
@@ -258,7 +259,7 @@
float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
int32_t output_multiplier;
int32_t output_shift;
- quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+ quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
_gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, oq_info.offset);
_gemmlowp_output.allocator()->allocate();
}
@@ -352,13 +353,14 @@
// Validate output stage for asymmetric quantized types
if(is_quantized)
{
- const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
- const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
- const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
- const float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
+ const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
- ARM_COMPUTE_UNUSED(multiplier);
- ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f);
+ float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
+ int output_multiplier;
+ int output_shift;
+ ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(&gemmlowp_output, biases, output));
}
@@ -475,4 +477,5 @@
_is_prepared = true;
}
-}
\ No newline at end of file
+}
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
index 0507c6b..920917a 100644
--- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp
@@ -154,7 +154,7 @@
output_info.gemmlowp_min_bound = min_activation;
output_info.gemmlowp_max_bound = max_activation;
output_info.is_quantized_per_channel = (weights->info()->data_type() == DataType::QSYMM8_PER_CHANNEL);
- quantization::calculate_quantized_multipliers_less_than_one(iqinfo, wqinfo, oqinfo, output_info);
+ quantization::calculate_quantized_multipliers(iqinfo, wqinfo, oqinfo, output_info);
_mm_gemmlowp.configure(input, weights, biases, output, GEMMInfo(false, false, true, gemm_3d_depth, _skip_im2col, false, output_info));
@@ -217,7 +217,7 @@
output_info.gemmlowp_min_bound = min_activation;
output_info.gemmlowp_max_bound = max_activation;
output_info.is_quantized_per_channel = (weights->data_type() == DataType::QSYMM8_PER_CHANNEL);
- ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multipliers_less_than_one(iqinfo, wqinfo, oqinfo, output_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multipliers(iqinfo, wqinfo, oqinfo, output_info));
// Perform validation step on GEMMLowp
std::unique_ptr<ITensorInfo> input_qa = input->clone();
diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
index e36cb3d..440f043 100644
--- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
@@ -344,6 +344,19 @@
{
run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, c, output, gemm_info));
run_optimised_requantized = run_optimised;
+
+ const UniformQuantizationInfo a_qinfo = a_to_use->quantization_info().uniform();
+ const QuantizationInfo b_qinfo = b->quantization_info();
+ const UniformQuantizationInfo output_qinfo = output->quantization_info().uniform();
+ for(auto const s : b_qinfo.scale())
+ {
+ const float fmultipler = a_qinfo.scale * s / output_qinfo.scale;
+ if(fmultipler > 1.f)
+ {
+ run_optimised_requantized = false;
+ break;
+ }
+ }
}
else
{
diff --git a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp
index 3235eee..142f873 100644
--- a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp
+++ b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp
@@ -437,6 +437,16 @@
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
}
+ // The uniform quantization case will only have 1 scale value in the weights quantization info
+ const UniformQuantizationInfo input_qinfo = input->quantization_info().uniform();
+ const QuantizationInfo weights_qinfo = weights->quantization_info();
+ const UniformQuantizationInfo output_qinfo = output->quantization_info().uniform();
+ for(auto const s : weights_qinfo.scale())
+ {
+ const float fmultipler = input_qinfo.scale * s / output_qinfo.scale;
+ ARM_COMPUTE_RETURN_ERROR_ON(fmultipler > 1.f);
+ }
+
return Status{};
}