COMPMID-417: Port DepthConcatenate to QS8/QS16 for NEON/CL.

Change-Id: I3dddae63043c7aa18d908a4fc8abacf3c64f98ca
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/80081
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Steven Niu <steven.niu@arm.com>
diff --git a/arm_compute/runtime/CL/functions/CLDepthConcatenate.h b/arm_compute/runtime/CL/functions/CLDepthConcatenate.h
index 3199936..77997f6 100644
--- a/arm_compute/runtime/CL/functions/CLDepthConcatenate.h
+++ b/arm_compute/runtime/CL/functions/CLDepthConcatenate.h
@@ -29,14 +29,15 @@
 #include "arm_compute/core/Window.h"
 #include "arm_compute/runtime/IFunction.h"
 
+#include "arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h"
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+
 #include <memory>
 #include <vector>
 
 namespace arm_compute
 {
 class ICLTensor;
-class CLDepthConcatenateKernel;
-class CLFillBorderKernel;
 
 /** Basic function to execute concatenate tensors along z axis. This function calls the following kernels:
  *
@@ -51,8 +52,8 @@
     CLDepthConcatenate();
     /** Initialise the kernel's inputs vector and output.
      *
-     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported:  F32.
-     * @param[out]    output        Output tensor. Data types supported: F32.
+     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QS8/QS16/F16/F32.
+     * @param[out]    output        Output tensor. Data types supported: Same as @p input.
      */
     void configure(std::vector<ICLTensor *> inputs_vector, ICLTensor *output);