COMPMID-355 Implement CL DirectConvolution1x1

* Add FP16 to validation tests.
* Complete benchmark tests for CL and NEON Direct Convolution.

Change-Id: Ie73d8580832372db01b82b39786fd9c8be560090
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/82014
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h
index eeb3e76..1a4476e 100644
--- a/arm_compute/core/CL/CLHelpers.h
+++ b/arm_compute/core/CL/CLHelpers.h
@@ -53,6 +53,14 @@
  */
 std::string get_cl_type_from_data_type(const DataType &dt);
 
+/** Get the size of a data type in number of bits.
+ *
+ * @param[in] dt @ref DataType.
+ *
+ * @return Number of bits in the data type specified.
+ */
+std::string get_data_size_from_data_type(const DataType &dt);
+
 /** Translates fixed point tensor data type to the underlying OpenCL type.
  *
  * @param[in] dt @ref DataType to be translated to OpenCL type.
diff --git a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h
index 28eecf0..635ec88 100644
--- a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h
@@ -33,7 +33,6 @@
 
 /** Interface for the  direct convolution kernel.
  */
-template <unsigned int kernel_size>
 class CLDirectConvolutionLayerKernel : public ICLKernel
 {
 public:
@@ -52,7 +51,7 @@
     /** Set the input, weights, biases and output tensors.
      *
      * @param[in]  input     The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
-     *                       while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F32.
+     *                       while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16, F32.
      * @param[in]  weights   Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
      *                       The 3rd dimension must be the same as the input's volume 3rd dimension.
      *                       Data type supported:Same as @p input.
@@ -80,7 +79,5 @@
     int              _conv_stride_x;
     int              _conv_stride_y;
 };
-
-using CLDirectConvolutionLayer3x3Kernel = CLDirectConvolutionLayerKernel<3>;
 }
 #endif /*__ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H__ */