COMPMID-417: Port DepthConcatenate to QS8/QS16 for NEON/CL.

Change-Id: I3dddae63043c7aa18d908a4fc8abacf3c64f98ca
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/80081
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Steven Niu <steven.niu@arm.com>
diff --git a/tests/validation/CL/DepthConcatenateLayer.cpp b/tests/validation/CL/DepthConcatenateLayer.cpp
new file mode 100644
index 0000000..a9b727b
--- /dev/null
+++ b/tests/validation/CL/DepthConcatenateLayer.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "Globals.h"
+#include "PaddingCalculator.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Helpers.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLDepthConcatenate.h"
+
+#include "support/ToolchainSupport.h"
+
+#include "boost_wrapper.h"
+
+#include <algorithm>
+#include <memory>
+#include <random>
+#include <string>
+#include <vector>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::cl;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute OpenCL depth concatenate layer function.
+ *
+ * @param[in] shapes               List of shapes to concatenate
+ * @param[in] dt                   Datatype of tensors
+ * @param[in] fixed_point_position (Optional) Number of bits for the fractional part of fixed point numbers.
+ *
+ * @return Computed output tensor.
+ */
+CLTensor compute_depth_concatenate_layer(const std::vector<TensorShape> &shapes, DataType dt, int fixed_point_position = 0)
+{
+    std::vector<std::unique_ptr<CLTensor>> srcs{};
+    TensorShape                            dst_shape = calculate_depth_concatenate_shape(shapes);
+
+    // Create tensors
+    for(unsigned int i = 0; i < shapes.size(); ++i)
+    {
+        srcs.push_back(support::cpp14::make_unique<CLTensor>());
+        srcs[i]->allocator()->init(TensorInfo(shapes[i], 1, dt, fixed_point_position));
+    }
+    CLTensor dst = create_tensor<CLTensor>(dst_shape, dt, 1, fixed_point_position);
+
+    // Create a vector of raw pointer
+    std::vector<ICLTensor *> srcs_raw{};
+    srcs_raw.resize(srcs.size());
+    std::transform(srcs.begin(), srcs.end(), srcs_raw.begin(), [](std::unique_ptr<CLTensor> const & t)
+    {
+        return t.get();
+    });
+
+    // Create and configure function
+    CLDepthConcatenate depth_concat;
+    depth_concat.configure(srcs_raw, &dst);
+
+    // Allocate tensors
+    for(auto &t : srcs)
+    {
+        t->allocator()->allocate();
+    }
+    dst.allocator()->allocate();
+
+    for(const auto &t : srcs)
+    {
+        BOOST_TEST(!t->info()->is_resizable());
+    }
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    for(unsigned int i = 0; i < srcs.size(); ++i)
+    {
+        library->fill_tensor_uniform(CLAccessor(*srcs[i]), i);
+    }
+
+    // Compute function
+    depth_concat.run();
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(CL)
+BOOST_AUTO_TEST_SUITE(DepthConcatenateLayer)
+
+BOOST_AUTO_TEST_SUITE(Float)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * CNNFloatDataTypes(), shape, dt)
+{
+    // Create input shapes
+    std::vector<unsigned int> depths = { 4, 6, 11, 13 };
+    std::vector<TensorShape>  shapes(depths.size(), shape);
+    for(unsigned int i = 0; i < shapes.size(); ++i)
+    {
+        shapes[i].set(2, depths[i]);
+    }
+
+    // Compute function
+    CLTensor dst = compute_depth_concatenate_layer(shapes, dt);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_concatenate_layer(shapes, dt);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmallPad, CNNFloatDataTypes(), dt)
+{
+    // Create input shapes
+    std::vector<TensorShape> shapes{ TensorShape(12u, 12u, 14u, 8u), TensorShape(14u, 14u, 12u, 8u), TensorShape(16u, 16u, 11u, 8u) };
+
+    // Compute function
+    CLTensor dst = compute_depth_concatenate_layer(shapes, dt);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_concatenate_layer(shapes, dt);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(Quantized)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * CNNFixedPointDataTypes() * boost::unit_test::data::xrange(3, 6, 1), shape, dt, fixed_point_position)
+{
+    // Create input shapes
+    std::vector<unsigned int> depths = { 4, 6, 11, 13 };
+    std::vector<TensorShape>  shapes(depths.size(), shape);
+    for(unsigned int i = 0; i < shapes.size(); ++i)
+    {
+        shapes[i].set(2, depths[i]);
+    }
+
+    // Compute function
+    CLTensor dst = compute_depth_concatenate_layer(shapes, dt, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_concatenate_layer(shapes, dt, fixed_point_position);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmallPad, CNNFixedPointDataTypes() * boost::unit_test::data::xrange(3, 5, 1), dt, fixed_point_position)
+{
+    // Create input shapes
+    std::vector<TensorShape> shapes{ TensorShape(12u, 12u, 14u, 8u), TensorShape(14u, 14u, 12u, 8u), TensorShape(16u, 16u, 11u, 8u) };
+
+    // Compute function
+    CLTensor dst = compute_depth_concatenate_layer(shapes, dt, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_concatenate_layer(shapes, dt, fixed_point_position);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif /* DOXYGEN_SKIP_THIS */
diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h
index a551da7..cae1976 100644
--- a/tests/validation/Helpers.h
+++ b/tests/validation/Helpers.h
@@ -172,6 +172,34 @@
     }
 }
 
+/** Calculate output tensor shape give a vector of input tensor to concatenate
+ *
+ * @param[in] input_shapes Shapes of the tensors to concatenate across depth.
+ *
+ * @return The shape of output concatenated tensor.
+ */
+inline TensorShape calculate_depth_concatenate_shape(std::vector<TensorShape> input_shapes)
+{
+    TensorShape out_shape = input_shapes.at(0);
+
+    unsigned int max_x = 0;
+    unsigned int max_y = 0;
+    unsigned int depth = 0;
+
+    for(auto const &shape : input_shapes)
+    {
+        max_x = std::max<unsigned int>(shape.x(), max_x);
+        max_y = std::max<unsigned int>(shape.y(), max_y);
+        depth += shape.z();
+    }
+
+    out_shape.set(0, max_x);
+    out_shape.set(1, max_y);
+    out_shape.set(2, depth);
+
+    return out_shape;
+}
+
 /** Create a vector of random ROIs.
  *
  * @param[in] shape     The shape of the input tensor.
diff --git a/tests/validation/NEON/DepthConcatenateLayer.cpp b/tests/validation/NEON/DepthConcatenateLayer.cpp
new file mode 100644
index 0000000..6ab5885
--- /dev/null
+++ b/tests/validation/NEON/DepthConcatenateLayer.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/NEAccessor.h"
+#include "PaddingCalculator.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Helpers.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthConcatenate.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "support/ToolchainSupport.h"
+
+#include "boost_wrapper.h"
+
+#include <algorithm>
+#include <memory>
+#include <random>
+#include <string>
+#include <vector>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute NEON depth concatenate layer function.
+ *
+ * @param[in] shapes               List of shapes to concatenate
+ * @param[in] dt                   Datatype of tensors
+ * @param[in] fixed_point_position (Optional) Number of bits for the fractional part of fixed point numbers.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_depth_concatenate_layer(const std::vector<TensorShape> &shapes, DataType dt, int fixed_point_position = 0)
+{
+    std::vector<std::unique_ptr<Tensor>> srcs{};
+    TensorShape                          dst_shape = calculate_depth_concatenate_shape(shapes);
+
+    // Create tensors
+    for(unsigned int i = 0; i < shapes.size(); ++i)
+    {
+        srcs.push_back(support::cpp14::make_unique<Tensor>());
+        srcs[i]->allocator()->init(TensorInfo(shapes[i], 1, dt, fixed_point_position));
+    }
+    Tensor dst = create_tensor<Tensor>(dst_shape, dt, 1, fixed_point_position);
+
+    // Create a vector of raw pointer
+    std::vector<ITensor *> srcs_raw{};
+    srcs_raw.resize(srcs.size());
+    std::transform(srcs.begin(), srcs.end(), srcs_raw.begin(), [](std::unique_ptr<Tensor> const & t)
+    {
+        return t.get();
+    });
+
+    // Create and configure function
+    NEDepthConcatenate depth_concat;
+    depth_concat.configure(srcs_raw, &dst);
+
+    // Allocate tensors
+    for(auto &t : srcs)
+    {
+        t->allocator()->allocate();
+    }
+    dst.allocator()->allocate();
+
+    for(const auto &t : srcs)
+    {
+        BOOST_TEST(!t->info()->is_resizable());
+    }
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    for(unsigned int i = 0; i < srcs.size(); ++i)
+    {
+        library->fill_tensor_uniform(NEAccessor(*srcs[i]), i);
+    }
+
+    // Compute function
+    depth_concat.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(DepthConcatenateLayer)
+
+BOOST_AUTO_TEST_SUITE(Float)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * CNNFloatDataTypes(), shape, dt)
+{
+    // Create input shapes
+    std::vector<unsigned int> depths = { 4, 6, 11, 13 };
+    std::vector<TensorShape>  shapes(depths.size(), shape);
+    for(unsigned int i = 0; i < shapes.size(); ++i)
+    {
+        shapes[i].set(2, depths[i]);
+    }
+
+    // Compute function
+    Tensor dst = compute_depth_concatenate_layer(shapes, dt);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_concatenate_layer(shapes, dt);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmallPad, CNNFloatDataTypes(), dt)
+{
+    // Create input shapes
+    std::vector<TensorShape> shapes{ TensorShape(12u, 12u, 14u, 8u), TensorShape(14u, 14u, 12u, 8u), TensorShape(16u, 16u, 11u, 8u) };
+
+    // Compute function
+    Tensor dst = compute_depth_concatenate_layer(shapes, dt);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_concatenate_layer(shapes, dt);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(Quantized)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * CNNFixedPointDataTypes() * boost::unit_test::data::xrange(3, 6, 1), shape, dt, fixed_point_position)
+{
+    // Create input shapes
+    std::vector<unsigned int> depths = { 4, 6, 11, 13 };
+    std::vector<TensorShape>  shapes(depths.size(), shape);
+    for(unsigned int i = 0; i < shapes.size(); ++i)
+    {
+        shapes[i].set(2, depths[i]);
+    }
+
+    // Compute function
+    Tensor dst = compute_depth_concatenate_layer(shapes, dt, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_concatenate_layer(shapes, dt, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmallPad, CNNFixedPointDataTypes() * boost::unit_test::data::xrange(3, 5, 1), dt, fixed_point_position)
+{
+    // Create input shapes
+    std::vector<TensorShape> shapes{ TensorShape(12u, 12u, 14u, 8u), TensorShape(14u, 14u, 12u, 8u), TensorShape(16u, 16u, 11u, 8u) };
+
+    // Compute function
+    Tensor dst = compute_depth_concatenate_layer(shapes, dt, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_concatenate_layer(shapes, dt, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif /* DOXYGEN_SKIP_THIS */
diff --git a/tests/validation/Reference.cpp b/tests/validation/Reference.cpp
index 04362f0..857dd7c 100644
--- a/tests/validation/Reference.cpp
+++ b/tests/validation/Reference.cpp
@@ -538,6 +538,30 @@
     return ref_dst;
 }
 
+RawTensor Reference::compute_reference_depth_concatenate_layer(const std::vector<TensorShape> &shapes, DataType dt, int fixed_point_position)
+{
+    std::vector<std::unique_ptr<RawTensor>> ref_srcs{};
+    TensorShape                             dst_shape = calculate_depth_concatenate_shape(shapes);
+
+    // Create tensors
+    for(unsigned int i = 0; i < shapes.size(); ++i)
+    {
+        ref_srcs.push_back(support::cpp14::make_unique<RawTensor>(RawTensor(shapes[i], dt, 1, fixed_point_position)));
+    }
+    RawTensor ref_dst = library->get(dst_shape, dt, 1, fixed_point_position);
+
+    // Fill references
+    for(unsigned int i = 0; i < ref_srcs.size(); ++i)
+    {
+        library->fill_tensor_uniform(*ref_srcs[i], i);
+    }
+
+    // Compute reference
+    ReferenceCPP::depth_concatenate_layer(ref_srcs, ref_dst);
+
+    return ref_dst;
+}
+
 RawTensor Reference::compute_reference_fully_connected_layer(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape,
                                                              DataType dt, bool transpose_weights, int fixed_point_position)
 {
diff --git a/tests/validation/Reference.h b/tests/validation/Reference.h
index cbdeb01..37a072b 100644
--- a/tests/validation/Reference.h
+++ b/tests/validation/Reference.h
@@ -26,7 +26,6 @@
 
 #include "RawTensor.h"
 #include "Types.h"
-#include <vector>
 
 #include <vector>
 
@@ -285,7 +284,7 @@
      * @return Computed raw tensor.
      */
     static RawTensor compute_reference_batch_normalization_layer(const TensorShape &shape0, const TensorShape &shape1, DataType dt, float epsilon, int fixed_point_position = 0);
-    /** Compute reference pixel-wise multiplication
+    /** Compute reference convolution layer
      *
      * @param[in] input_shape          Shape for the input tensor
      * @param[in] weights_shape        Shape for the weights tensor
@@ -299,6 +298,15 @@
      */
     static RawTensor compute_reference_convolution_layer(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, DataType dt,
                                                          const PadStrideInfo &conv_info, int fixed_point_position);
+    /** Compute reference depth concatenation layer
+     *
+     * @param[in] shapes               Input tensor shapes (All dimensions should match apart from DimZ)
+     * @param[in] dt                   Data type to use
+     * @param[in] fixed_point_position (Optional) Number of bits for the fractional part of the fixed point numbers
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_depth_concatenate_layer(const std::vector<TensorShape> &shapes, DataType dt, int fixed_point_position = 0);
     /** Compute reference for fully connected layer function
      *
      * @param[in] input_shape          Shape for the input tensor
diff --git a/tests/validation/ReferenceCPP.cpp b/tests/validation/ReferenceCPP.cpp
index c89b737..105bfc4 100644
--- a/tests/validation/ReferenceCPP.cpp
+++ b/tests/validation/ReferenceCPP.cpp
@@ -36,7 +36,9 @@
 
 #include "boost_wrapper.h"
 
+#include <algorithm>
 #include <functional>
+#include <memory>
 #include <numeric>
 #include <vector>
 
@@ -292,6 +294,19 @@
     boost::apply_visitor(tensor_visitors::convolution_layer_visitor(s, w, b, conv_info), d);
 }
 
+// Depth concatenate layer
+void ReferenceCPP::depth_concatenate_layer(const std::vector<std::unique_ptr<RawTensor>> &srcs, RawTensor &dst)
+{
+    std::vector<TensorVariant> ss;
+    ss.resize(srcs.size());
+    std::transform(srcs.begin(), srcs.end(), ss.begin(), [](std::unique_ptr<RawTensor> const & t)
+    {
+        return TensorFactory::get_tensor(*t);
+    });
+    TensorVariant d = TensorFactory::get_tensor(dst);
+    boost::apply_visitor(tensor_visitors::depth_concatenate_layer_visitor(ss), d);
+}
+
 // Fully connected layer
 void ReferenceCPP::fully_connected_layer(const RawTensor &src, const RawTensor &weights, const RawTensor &bias, RawTensor &dst)
 {
diff --git a/tests/validation/ReferenceCPP.h b/tests/validation/ReferenceCPP.h
index 10e5ab6..d3c77a2 100644
--- a/tests/validation/ReferenceCPP.h
+++ b/tests/validation/ReferenceCPP.h
@@ -28,6 +28,7 @@
 
 #include "RawTensor.h"
 
+#include <memory>
 #include <ostream>
 #include <vector>
 
@@ -262,6 +263,12 @@
      * @param[in]  conv_info Pads and strides information for the convolution layer.
      */
     static void convolution_layer(const RawTensor &src, const RawTensor &weights, const RawTensor &bias, RawTensor &dst, const PadStrideInfo &conv_info);
+    /** Depth concatenate layer from @p srcs to @p dst
+     *
+     * @param[in]  srcs Input tensors.
+     * @param[out] dst  Result tensor.
+     */
+    static void depth_concatenate_layer(const std::vector<std::unique_ptr<RawTensor>> &srcs, RawTensor &dst);
     /** Fully connected layer function
      *
      * @param[in]  src     Input tensor
diff --git a/tests/validation/TensorOperations.h b/tests/validation/TensorOperations.h
index adac709..bf9bcef 100644
--- a/tests/validation/TensorOperations.h
+++ b/tests/validation/TensorOperations.h
@@ -41,6 +41,7 @@
 #include <array>
 #include <cmath>
 #include <random>
+#include <string>
 #include <vector>
 
 namespace arm_compute
@@ -1003,6 +1004,53 @@
     }
 }
 
+// Depth Concatenate layer
+template <typename T>
+void depth_concatenate_layer(const std::vector<const Tensor<T> *> &srcs, Tensor<T> &out)
+{
+    unsigned  depth_offset = 0;
+    const int width_out    = out.shape().x();
+    const int height_out   = out.shape().y();
+    const int depth_out    = out.shape().z();
+    const int out_stride_z = width_out * height_out;
+    const int batches      = out.shape().total_size_upper(3);
+
+    // Set output tensor to 0
+    memset(out.data(), 0, out.num_elements() * element_size_from_data_type(out.data_type()));
+
+    for(unsigned int i = 0; i < srcs.size(); ++i)
+    {
+        ARM_COMPUTE_ERROR_ON(srcs[i] == nullptr);
+        ARM_COMPUTE_ERROR_ON(srcs[i]->data_type() != out.data_type());
+        ARM_COMPUTE_ERROR_ON(depth_offset >= out.shape().z());
+        ARM_COMPUTE_ERROR_ON(batches != static_cast<int>(srcs[i]->shape().total_size_upper(3)));
+
+        const Tensor<T>   *src    = srcs[i];
+        const int          width  = src->shape().x();
+        const int          height = src->shape().y();
+        const int          depth  = src->shape().z();
+        const unsigned int x_diff = (width_out - width) / 2;
+        const unsigned int y_diff = (height_out - height) / 2;
+
+        const T *src_ptr = src->data();
+        for(int b = 0; b < batches; ++b)
+        {
+            const unsigned int offset_to_first_element = b * out_stride_z * depth_out + depth_offset * out_stride_z
+                                                         + y_diff * width_out + x_diff;
+            for(int d = 0; d < depth; ++d)
+            {
+                for(int r = 0; r < height; ++r)
+                {
+                    std::copy(src_ptr, src_ptr + width, out.data() + offset_to_first_element + d * out_stride_z + r * width_out);
+                    src_ptr += width;
+                }
+            }
+        }
+
+        depth_offset += depth;
+    }
+}
+
 // Convolution layer
 template <typename T>
 void convolution_layer(const Tensor<T> &in, const Tensor<T> &weights, const Tensor<T> &bias, Tensor<T> &out, const PadStrideInfo &conv_info)
diff --git a/tests/validation/TensorVisitors.h b/tests/validation/TensorVisitors.h
index 723302c..fcc584d 100644
--- a/tests/validation/TensorVisitors.h
+++ b/tests/validation/TensorVisitors.h
@@ -30,6 +30,8 @@
 
 #include "boost_wrapper.h"
 
+#include <algorithm>
+#include <memory>
 #include <ostream>
 #include <vector>
 
@@ -253,7 +255,31 @@
     const TensorVariant &_bias;
     PadStrideInfo        _conv_info;
 };
+// Depth Concatenate Layer visitor
+struct depth_concatenate_layer_visitor : public boost::static_visitor<>
+{
+public:
+    explicit depth_concatenate_layer_visitor(const std::vector<TensorVariant> &srcs)
+        : _srcs(srcs)
+    {
+    }
 
+    template <typename T>
+    void operator()(Tensor<T> &out) const
+    {
+        std::vector<const Tensor<T> *> srcs;
+        srcs.resize(_srcs.size());
+        std::transform(_srcs.begin(), _srcs.end(), srcs.begin(), [](const TensorVariant & t)
+        {
+            return &(boost::get<Tensor<T>>(t));
+        });
+        tensor_operations::depth_concatenate_layer(srcs, out);
+    }
+
+private:
+    const std::vector<TensorVariant> &_srcs;
+};
+// Fully Connected Layer visitor
 struct fully_connected_layer_visitor : public boost::static_visitor<>
 {
 public: