COMPMID-706 - Add GEMMLowp output stage for scaling by a fixed point number DoD: - Implement NEON kernel for quantizing down the gemmlowp result. The result should be scaled by a fixedpoint number - Implement OpenCL kernel for quantizing down the gemmlowp result. The result should be scaled by a fixedpoint number - Add test for validating the result Required for: - Integration of GEMMLowp in Android NN - Convolution quantized - Fully connected quantized Change-Id: Ia963d25d695471e963961fb49a5600e78374ac4f Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110981 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>

commit: 58c5794b917dae10ff115dd85ec69e2ca41136c1 [log] [tgz]
author: Gian Marco <gianmarco.iodice@arm.com> Tue Nov 28 09:10:03 2017 +0000
committer: Anthony Barbier <anthony.barbier@arm.com> Fri Nov 02 16:41:58 2018 +0000
tree: f2cea2d94e6566be720256dc6105056798723699
parent: 754e9526a7caf50876c2db9563dc72f096093b34 [diff] [blame]
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
index 670b11f..edd6a9f 100644
--- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp

@@ -78,7 +78,7 @@
         window_changed = window_changed || update_window_and_padding(win, bias_access);
     }
 
-    output_result_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->tensor_shape()));
+    output_result_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
 
     Error err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Error{};
     return std::make_pair(err, win);
@@ -186,15 +186,15 @@
                 }
             };
 
-            // Add the offset terms to GEMM's result and multiply by result_mult_int
-            scale_input(in_s32, result_offset_s32, _result_mult_int);
-
             // Add the bias to GEMM's result
             in_s32.val[0] = vaddq_s32(in_s32.val[0], bias_s32.val[0]);
             in_s32.val[1] = vaddq_s32(in_s32.val[1], bias_s32.val[1]);
             in_s32.val[2] = vaddq_s32(in_s32.val[2], bias_s32.val[2]);
             in_s32.val[3] = vaddq_s32(in_s32.val[3], bias_s32.val[3]);
 
+            // Add the offset terms to GEMM's result and multiply by result_mult_int
+            scale_input(in_s32, result_offset_s32, _result_mult_int);
+
             vst1q_u8(out.ptr(), finalize_quantization<is_bounded_relu>(in_s32, result_shift_s32, min_u8, max_u8));
         },
         in, bias, out);
@@ -231,6 +231,10 @@
 {
     // Perform validate step
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+    // Output auto inizialitation if not yet initialized
+    auto_init_if_empty(*output->info(), input->info()->clone()->set_data_type(DataType::QASYMM8));
+
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(),
                                                   (bias != nullptr) ? bias->info() : nullptr,
                                                   output->info(),
commit	58c5794b917dae10ff115dd85ec69e2ca41136c1	[log] [tgz]
author	Gian Marco <gianmarco.iodice@arm.com>	Tue Nov 28 09:10:03 2017 +0000
committer	Anthony Barbier <anthony.barbier@arm.com>	Fri Nov 02 16:41:58 2018 +0000
tree	f2cea2d94e6566be720256dc6105056798723699
parent	754e9526a7caf50876c2db9563dc72f096093b34 [diff] [blame]