COMPMID-746 Allow NEDirectConvolution to work without biases for QS.

Renamed BiasAccumulateKernel to OutputStage. If no bias is provided
when the input is quantized, the kernel simply downscales the input.
Throw error if no bias is provided and input is floating point.

Change-Id: I645a4ee9c6014b0547778fdd92c9ec72ef2f0aab
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/114158
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index 659594f..6208c20 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -28,7 +28,7 @@
 #include "arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h"
 #include "arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h"
 #include "arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h"
+#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
 #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h"
 #include "arm_compute/core/Types.h"
@@ -67,10 +67,10 @@
     void run() override;
 
 private:
-    NEDepthwiseConvolutionLayer3x3Kernel         _kernel;
-    NEDirectConvolutionLayerBiasAccumulateKernel _bias_kernel;
-    NEFillBorderKernel                           _border_handler;
-    bool                                         _has_bias;
+    NEDepthwiseConvolutionLayer3x3Kernel      _kernel;
+    NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
+    NEFillBorderKernel                        _border_handler;
+    bool                                      _has_bias;
 };
 
 /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels:
diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
index 09a5496..e1aa839 100644
--- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
@@ -24,8 +24,8 @@
 #ifndef __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__
 #define __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__
 
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h"
 #include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
+#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
 #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
@@ -42,7 +42,7 @@
  *  This function calls the following NEON kernels:
  *
  * -# @ref NEFillBorderKernel for the input
- * -# @ref NEDirectConvolutionLayerBiasAccumulateKernel
+ * -# @ref NEDirectConvolutionLayerOutputStageKernel
  * -# @ref NEDirectConvolutionLayerKernel
  */
 class NEDirectConvolutionLayer : public IFunction
@@ -93,12 +93,13 @@
     void run() override;
 
 private:
-    MemoryGroup                                  _memory_group;
-    NEDirectConvolutionLayerBiasAccumulateKernel _accumulate_bias_kernel;
-    NEDirectConvolutionLayerKernel               _conv_kernel;
-    NEFillBorderKernel                           _input_border_handler;
-    Tensor                                       _accumulator;
-    bool                                         _has_bias;
+    MemoryGroup                               _memory_group;
+    NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
+    NEDirectConvolutionLayerKernel            _conv_kernel;
+    NEFillBorderKernel                        _input_border_handler;
+    Tensor                                    _accumulator;
+    bool                                      _has_bias;
+    bool                                      _is_fixed_point;
 };
 }
 #endif /* __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__ */