COMPMID-1365: Add support for NHWC in CLDepthConcatenateLayer

Change-Id: I3ed55bdb95d888aff0b0b76fb841bf1669659308
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/139963
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
index cbcab8f..ff80090 100644
--- a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
@@ -52,7 +52,7 @@
     ~CLDepthConcatenateLayerKernel() = default;
     /** Initialise the kernel's inputs and output
      *
-     * @param[in]     input        Input tensor. Data types supported: F16/F32.
+     * @param[in]     input        Input tensor. Data types supported: QASYMM8/F16/F32.
      * @param[in]     depth_offset The offset on the Z axis.
      * @param[in,out] output       Output tensor. Data types supported: Same as @p input.
      *
@@ -61,6 +61,15 @@
      *
      */
     void configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output);
+    /**  Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel
+     *
+     * @param[in] input        Input tensor info. Data types supported: QASYMM8/F16/F32
+     * @param[in] depth_offset The offset on the Z axis.
+     * @param[in] output       Output tensor info. Data types supported: Same as @p input.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h
index d206eb0..7ecd927 100644
--- a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h
@@ -58,7 +58,7 @@
      *
      */
     void configure(const ICLTensor *input, unsigned int width_offset, ICLTensor *output);
-    /**  Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel
+    /**  Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenateLayerKernel
      *
      * @param[in] input        Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
      * @param[in] width_offset The offset on the X axis.
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index 729a46f..1cdfd38 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -630,37 +630,6 @@
     return std::max(1, std::abs(std::accumulate(matrix, matrix + size, 0)));
 }
 
-/** Calculate the output shapes of the depth concatenate function.
- *
- * @param[in] inputs_vector The vector that stores all the pointers to input.
- *
- * @return the output shape
- */
-template <typename T>
-TensorShape calculate_depth_concatenate_shape(const std::vector<T *> &inputs_vector)
-{
-    TensorShape out_shape = inputs_vector[0]->info()->tensor_shape();
-
-    size_t max_x = 0;
-    size_t max_y = 0;
-    size_t depth = 0;
-
-    for(const auto &tensor : inputs_vector)
-    {
-        ARM_COMPUTE_ERROR_ON(tensor == nullptr);
-        const TensorShape shape = tensor->info()->tensor_shape();
-        max_x                   = std::max(shape.x(), max_x);
-        max_y                   = std::max(shape.y(), max_y);
-        depth += shape.z();
-    }
-
-    out_shape.set(0, max_x);
-    out_shape.set(1, max_y);
-    out_shape.set(2, depth);
-
-    return out_shape;
-}
-
 /** Adjust tensor shape size if width or height are odd for a given multi-planar format. No modification is done for other formats.
  *
  * @note Adding here a few links discussing the issue of odd size and sharing the same solution:
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index 9bf6b04..e5516ba 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -414,6 +414,31 @@
 }
 
 template <typename T>
+inline TensorShape calculate_depth_concatenate_shape(const std::vector<T *> &inputs_vector)
+{
+    TensorShape out_shape = get_shape_from_info(inputs_vector[0]);
+
+    size_t max_x = 0;
+    size_t max_y = 0;
+    size_t depth = 0;
+
+    for(const auto &tensor : inputs_vector)
+    {
+        ARM_COMPUTE_ERROR_ON(tensor == nullptr);
+        const TensorShape shape = get_shape_from_info(tensor);
+        max_x                   = std::max(shape.x(), max_x);
+        max_y                   = std::max(shape.y(), max_y);
+        depth += shape.z();
+    }
+
+    out_shape.set(0, max_x);
+    out_shape.set(1, max_y);
+    out_shape.set(2, depth);
+
+    return out_shape;
+}
+
+template <typename T>
 inline TensorShape calculate_width_concatenate_shape(const std::vector<T *> &inputs_vector)
 {
     TensorShape out_shape = get_shape_from_info(inputs_vector[0]);