COMPMID-1333: Add CLSplit

Change-Id: I0f31e68dc0a1d6ddec5cd32602b6a3aa62070fe1
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/146778
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com>
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index 9c7cfec..d2af844 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -51,12 +51,14 @@
 
     return output_shape;
 }
+
 inline TensorShape compute_permutation_output_shape(const ITensorInfo &input, const PermutationVector &perm)
 {
     TensorShape output_shape = input.tensor_shape();
     permute(output_shape, perm);
     return output_shape;
 }
+
 inline TensorShape compute_reorg_output_shape(const ITensorInfo &input, int32_t stride)
 {
     ARM_COMPUTE_ERROR_ON(stride <= 0);
@@ -73,6 +75,7 @@
 
     return output_shape;
 }
+
 inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, unsigned int num_groups = 1)
 {
     // Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
@@ -95,6 +98,7 @@
 
     return weights_reshaped;
 }
+
 inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height = 1, bool reinterpret_input_as_3d = false)
 {
     // The interleaved output matrix will have the following shape: [ a_height * W, ceil(a_width / W) ] where W = 4 * mult_interleave4x4_height
@@ -116,6 +120,7 @@
 
     return shape_interleaved_a;
 }
+
 inline TensorShape compute_transpose1xW_shape(const ITensorInfo &b)
 {
     // The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width / 16.0f) ]
@@ -125,6 +130,7 @@
 
     return shape_transposed1xW_b;
 }
+
 inline TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInfo &b, int mult_transpose1xW_width = 1)
 {
     // Note: mult_transpose1xW_width expresses the number of chunks with size 1x(W) we want to store on the same row
@@ -138,6 +144,7 @@
 
     return shape_transposed1xW_b;
 }
+
 inline TensorShape compute_reductionA_shape(const ITensorInfo &b)
 {
     TensorShape shape_vector_sum_col{ b.tensor_shape() };
@@ -148,6 +155,7 @@
 
     return shape_vector_sum_col;
 }
+
 inline TensorShape compute_reductionB_shape(const ITensorInfo &a)
 {
     TensorShape shape_vector_sum_row{ a.tensor_shape() };
@@ -159,6 +167,7 @@
 
     return shape_vector_sum_row;
 }
+
 inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D &convolved_dims, bool batch_size_on_z, unsigned int num_groups = 1)
 {
     ARM_COMPUTE_ERROR_ON(num_groups == 0);
@@ -175,6 +184,7 @@
 
     return col2im_shape;
 }
+
 inline TensorShape compute_transposed_shape(const ITensorInfo &input)
 {
     TensorShape shape_transposed{ input.tensor_shape() };
@@ -184,6 +194,7 @@
 
     return shape_transposed;
 }
+
 inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info, unsigned int depth_multiplier)
 {
     const TensorShape input_shape{ input.tensor_shape() };
@@ -207,6 +218,7 @@
 
     return output_shape;
 }
+
 inline TensorShape compute_deconvolution_shape(const ITensorInfo &input, unsigned int sx, unsigned int sy, unsigned int inner_border_right, unsigned int inner_border_top, const PadStrideInfo &info)
 {
     TensorShape        scale_out_shape(input.tensor_shape());
@@ -217,6 +229,7 @@
 
     return scale_out_shape;
 }
+
 inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z,
                                              unsigned int num_groups = 1)
 {
@@ -248,6 +261,7 @@
 
     return output_shape;
 }
+
 inline TensorShape compute_flatten_shape(const ITensorInfo *input)
 {
     // The output shape will be the flatten version of the input (i.e. [ width * height * channels, num_batches, ... ] ). Used for FlattenLayer and FullyConnectedLayer.
@@ -258,6 +272,7 @@
 
     return output_shape;
 }
+
 inline TensorShape compute_interleave_custom_shape(const TensorShape &input, const int x_interleave, const int y_interleave)
 {
     TensorShape output_shape{ input };
@@ -267,6 +282,7 @@
 
     return output_shape;
 }
+
 inline TensorShape compute_fully_connected_reshaped_weights_shape(const ITensorInfo *input, bool transpose_weights, bool is_batched_fc_layer, const int interleave)
 {
     TensorShape output_shape{ input->tensor_shape() };
@@ -302,6 +318,7 @@
 
     return tensor_shape;
 }
+
 inline TensorShape compute_winograd_input_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
 {
     const PadStrideInfo conv_info        = winograd_info.convolution_info;
@@ -330,6 +347,7 @@
 
     return output_shape;
 }
+
 inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
 {
     const PadStrideInfo conv_info        = winograd_info.convolution_info;
@@ -356,6 +374,7 @@
 
     return tensor_shape;
 }
+
 inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info)
 {
     const TensorShape input_shape{ input.tensor_shape() };
@@ -381,6 +400,7 @@
 
     return output_shape;
 }
+
 inline TensorShape compute_min_max_shape(const ITensorInfo *input)
 {
     TensorShape output_shape{ input->tensor_shape() };
@@ -423,6 +443,7 @@
 
     return output_shape;
 }
+
 inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info)
 {
     ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
@@ -449,6 +470,7 @@
 
     return output_shape;
 }
+
 inline TensorShape compute_strided_slice_shape(const ITensorInfo &input,
                                                const Coordinates &starts, const Coordinates &ends, const Coordinates &strides,
                                                int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask)
@@ -464,6 +486,7 @@
 
     return compute_strided_slice_output_shape(input_shape, starts_abs, ends_abs, final_strides);
 }
+
 inline TensorShape compute_batch_to_space_shape(const ITensorInfo *input, const int block_x, const int block_y)
 {
     ARM_COMPUTE_ERROR_ON(block_x <= 0 || block_y <= 0);
@@ -475,6 +498,31 @@
     return output_shape;
 }
 
+inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int axis, unsigned int num_splits)
+{
+    TensorShape empty_shape;
+    empty_shape.set(0, 0);
+
+    TensorShape out_shape{ input->tensor_shape() };
+
+    // Return empty shape if axis is invalid
+    if(axis > input->tensor_shape().num_dimensions())
+    {
+        return empty_shape;
+    }
+
+    size_t axis_size = out_shape[axis];
+
+    // Return empty shape if num_split is not valid
+    if(axis_size % num_splits)
+    {
+        return empty_shape;
+    }
+
+    out_shape[axis] = axis_size / num_splits;
+    return out_shape;
+}
+
 template <typename T>
 inline TensorShape extract_shape(T *data)
 {
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h
index 02a4dab..d0c62c3 100644
--- a/arm_compute/runtime/CL/CLFunctions.h
+++ b/arm_compute/runtime/CL/CLFunctions.h
@@ -112,6 +112,7 @@
 #include "arm_compute/runtime/CL/functions/CLSobel5x5.h"
 #include "arm_compute/runtime/CL/functions/CLSobel7x7.h"
 #include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h"
+#include "arm_compute/runtime/CL/functions/CLSplit.h"
 #include "arm_compute/runtime/CL/functions/CLStridedSlice.h"
 #include "arm_compute/runtime/CL/functions/CLTableLookup.h"
 #include "arm_compute/runtime/CL/functions/CLThreshold.h"
diff --git a/arm_compute/runtime/CL/functions/CLSplit.h b/arm_compute/runtime/CL/functions/CLSplit.h
new file mode 100644
index 0000000..47da177
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLSplit.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLSPLIT_H__
+#define __ARM_COMPUTE_CLSPLIT_H__
+
+#include "arm_compute/core/Types.h"
+
+#include "arm_compute/runtime/CL/functions/CLSlice.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+#include <vector>
+
+namespace arm_compute
+{
+// Forward declarations
+class ICLTensor;
+
+/** Basic function to split a tensor along a given axis */
+class CLSplit : public IFunction
+{
+public:
+    /** Default constructor */
+    CLSplit();
+    /** Initialise the kernel's input and outputs.
+     *
+     * @param[in]  input   The input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[out] outputs A vector containing the output tensors. Data types supported: Same as @p input.
+     *                     The output tensors should match the input tensor dimensions for all shape dimensions apart
+     *                     from the split dimension.
+     * @param[in]  axis    Axis on which to split the input.
+     */
+    void configure(const ICLTensor *input, const std::vector<ICLTensor *> &outputs, unsigned int axis);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLSplit
+     *
+     * @param[in] input   The input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in] outputs A vector containing the output tensors' info. Data types supported: Same as @p input.
+     *                    The output tensors should match the input tensor dimensions for all shape dimensions apart
+     *                    from the split dimension
+     * @param[in] axis    Axis on which to split the input.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const std::vector<ITensorInfo *> &outputs, unsigned int axis);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    std::vector<ICLTensor *>   _outputs_vector;
+    std::unique_ptr<CLSlice[]> _slice_functions;
+    unsigned int               _num_outputs;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLSPLIT_H__ */
diff --git a/src/runtime/CL/functions/CLSplit.cpp b/src/runtime/CL/functions/CLSplit.cpp
new file mode 100644
index 0000000..f084351
--- /dev/null
+++ b/src/runtime/CL/functions/CLSplit.cpp
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLSplit.h"
+
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "support/ToolchainSupport.h"
+
+namespace arm_compute
+{
+CLSplit::CLSplit()
+    : _outputs_vector(), _slice_functions(), _num_outputs(0)
+{
+}
+
+void CLSplit::configure(const ICLTensor *input, const std::vector<ICLTensor *> &outputs, unsigned int axis)
+{
+    // Create Slice functions
+    _num_outputs     = outputs.size();
+    _slice_functions = arm_compute::support::cpp14::make_unique<CLSlice[]>(_num_outputs);
+
+    // Get output shape
+    const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_split_shape(input->info(), axis, _num_outputs);
+
+    // Extract output tensor info
+    std::vector<ITensorInfo *> outputs_info;
+    for(auto &output : outputs)
+    {
+        ARM_COMPUTE_ERROR_ON_NULLPTR(output);
+        outputs_info.emplace_back(output->info());
+    }
+
+    // Validate
+    ARM_COMPUTE_ERROR_THROW_ON(CLSplit::validate(input->info(), outputs_info, axis));
+
+    const size_t axis_split_step = output_shape[axis];
+    unsigned int axis_offset     = 0;
+
+    // Start/End coordinates
+    Coordinates start_coords;
+    Coordinates end_coords;
+    for(unsigned int d = 0; d < output_shape.num_dimensions(); ++d)
+    {
+        end_coords.set(d, -1);
+    }
+
+    for(unsigned int i = 0; i < _num_outputs; i++)
+    {
+        // Update coordinate on axis
+        start_coords.set(axis, axis_offset);
+        end_coords.set(axis, axis_offset + axis_split_step);
+
+        // Configure slice function
+        _slice_functions[i].configure(input, outputs[i], start_coords, end_coords);
+
+        // Set valid region from shape
+        outputs[i]->info()->set_valid_region(ValidRegion(Coordinates(), output_shape));
+
+        // Update axis offset
+        axis_offset += axis_split_step;
+    }
+}
+
+Status CLSplit::validate(const ITensorInfo *input, const std::vector<ITensorInfo *> &outputs, unsigned int axis)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
+    ARM_COMPUTE_RETURN_ERROR_ON(axis >= input->num_dimensions());
+    ARM_COMPUTE_RETURN_ERROR_ON(outputs.size() < 2);
+
+    // Get output shape
+    const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_split_shape(input, axis, outputs.size());
+    ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() == 0);
+
+    const size_t axis_split_step = output_shape[axis];
+    unsigned int axis_offset     = 0;
+
+    // Start/End coordinates
+    Coordinates start_coords;
+    Coordinates end_coords;
+    for(unsigned int d = 0; d < output_shape.num_dimensions(); ++d)
+    {
+        end_coords.set(d, -1);
+    }
+
+    // Validate output tensors
+    for(const auto &output : outputs)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
+
+        // Output auto inizialitation if not yet initialized
+        TensorInfo tmp_output_info = *output->clone();
+        auto_init_if_empty(tmp_output_info, input->clone()->set_is_resizable(true).set_tensor_shape(output_shape));
+
+        // Update coordinate on axis
+        start_coords.set(axis, axis_offset);
+        end_coords.set(axis, axis_offset + axis_split_step);
+
+        ARM_COMPUTE_RETURN_ON_ERROR(CLSlice::validate(input, output, start_coords, end_coords));
+        axis_offset += axis_split_step;
+    }
+
+    return Status{};
+}
+
+void CLSplit::run()
+{
+    cl::CommandQueue q = CLScheduler::get().queue();
+
+    for(unsigned i = 0; i < _num_outputs; ++i)
+    {
+        _slice_functions[i].run();
+    }
+}
+} // namespace arm_compute
diff --git a/tests/datasets/SplitDataset.h b/tests/datasets/SplitDataset.h
new file mode 100644
index 0000000..b38252a
--- /dev/null
+++ b/tests/datasets/SplitDataset.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_SPLIT_DATASET
+#define ARM_COMPUTE_TEST_SPLIT_DATASET
+
+#include "utils/TypePrinter.h"
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class SplitDataset
+{
+public:
+    using type = std::tuple<TensorShape, unsigned int, unsigned int>;
+
+    struct iterator
+    {
+        iterator(std::vector<TensorShape>::const_iterator  tensor_shapes_it,
+                 std::vector<unsigned int>::const_iterator axis_values_it,
+                 std::vector<unsigned int>::const_iterator splits_values_it)
+            : _tensor_shapes_it{ std::move(tensor_shapes_it) },
+              _axis_values_it{ std::move(axis_values_it) },
+              _splits_values_it{ std::move(splits_values_it) }
+        {
+        }
+
+        std::string description() const
+        {
+            std::stringstream description;
+            description << "Shape=" << *_tensor_shapes_it << ":";
+            description << "Axis=" << *_axis_values_it << ":";
+            description << "Splits=" << *_splits_values_it << ":";
+            return description.str();
+        }
+
+        SplitDataset::type operator*() const
+        {
+            return std::make_tuple(*_tensor_shapes_it, *_axis_values_it, *_splits_values_it);
+        }
+
+        iterator &operator++()
+        {
+            ++_tensor_shapes_it;
+            ++_axis_values_it;
+            ++_splits_values_it;
+            return *this;
+        }
+
+    private:
+        std::vector<TensorShape>::const_iterator  _tensor_shapes_it;
+        std::vector<unsigned int>::const_iterator _axis_values_it;
+        std::vector<unsigned int>::const_iterator _splits_values_it;
+    };
+
+    iterator begin() const
+    {
+        return iterator(_tensor_shapes.begin(), _axis_values.begin(), _splits_values.begin());
+    }
+
+    int size() const
+    {
+        return std::min(_tensor_shapes.size(), std::min(_axis_values.size(), _splits_values.size()));
+    }
+
+    void add_config(TensorShape shape, unsigned int axis, unsigned int splits)
+    {
+        _tensor_shapes.emplace_back(std::move(shape));
+        _axis_values.emplace_back(axis);
+        _splits_values.emplace_back(splits);
+    }
+
+protected:
+    SplitDataset()                = default;
+    SplitDataset(SplitDataset &&) = default;
+
+private:
+    std::vector<TensorShape>  _tensor_shapes{};
+    std::vector<unsigned int> _axis_values{};
+    std::vector<unsigned int> _splits_values{};
+};
+
+class SmallSplitDataset final : public SplitDataset
+{
+public:
+    SmallSplitDataset()
+    {
+        add_config(TensorShape(128U), 0U, 4U);
+        add_config(TensorShape(6U, 3U, 4U), 2U, 2U);
+        add_config(TensorShape(27U, 14U, 2U), 1U, 2U);
+        add_config(TensorShape(64U, 32U, 4U, 6U), 3U, 3U);
+    }
+};
+
+class LargeSplitDataset final : public SplitDataset
+{
+public:
+    LargeSplitDataset()
+    {
+        add_config(TensorShape(512U), 0U, 8U);
+        add_config(TensorShape(128U, 64U, 8U), 2U, 2U);
+        add_config(TensorShape(128U, 64U, 8U, 2U), 1U, 2U);
+        add_config(TensorShape(128U, 64U, 32U, 4U), 3U, 4U);
+    }
+};
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_SPLIT_DATASET */
diff --git a/tests/validation/CL/Split.cpp b/tests/validation/CL/Split.cpp
new file mode 100644
index 0000000..7736318
--- /dev/null
+++ b/tests/validation/CL/Split.cpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLSplit.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/SplitDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/SplitFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(Split)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+        framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 3U, 16U, 2U), 1, DataType::F32), // Invalid axis
+                                                TensorInfo(TensorShape(27U, 3U, 16U, 2U), 1, DataType::F32), // Invalid number of splits
+                                                TensorInfo(TensorShape(27U, 3U, 16U, 2U), 1, DataType::F32)
+        }),
+        framework::dataset::make("Axis", { 4, 2, 2 })),
+        framework::dataset::make("Splits", { 4, 5, 4 })),
+        framework::dataset::make("Expected", { false, false, true })),
+        input_info, axis, splits, expected)
+{
+    std::vector<TensorInfo> outputs_info(splits);
+    std::vector<ITensorInfo*> outputs_info_ptr;
+    for(auto &output_info : outputs_info)
+    {
+        outputs_info_ptr.emplace_back(&output_info);
+    }
+    const Status status = CLSplit::validate(&input_info.clone()->set_is_resizable(false), outputs_info_ptr, axis);
+    ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+DATA_TEST_CASE(Configuration,
+               framework::DatasetMode::ALL,
+               combine(datasets::SmallSplitDataset(), framework::dataset::make("DataType", { DataType::F16, DataType::F32 })),
+               shape, axis, splits, data_type)
+{
+    // Create tensors
+    CLTensor                 src = create_tensor<CLTensor>(shape, data_type);
+    std::vector<CLTensor>    dsts(splits);
+    std::vector<ICLTensor *> dsts_ptrs;
+    for(auto &dst : dsts)
+    {
+        dsts_ptrs.emplace_back(&dst);
+    }
+
+    // Create and Configure function
+    CLSplit split;
+    split.configure(&src, dsts_ptrs, axis);
+
+    // Validate valid regions
+    for(auto &dst : dsts)
+    {
+        const ValidRegion valid_region = shape_to_valid_region(dst.info()->tensor_shape());
+        validate(dst.info()->valid_region(), valid_region);
+    }
+}
+
+template <typename T>
+using CLSplitFixture = SplitFixture<CLTensor, ICLTensor, CLAccessor, CLSplit, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       CLSplitFixture<half>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallSplitDataset(), framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate outputs
+    for(unsigned int i = 0; i < _target.size(); ++i)
+    {
+        validate(CLAccessor(_target[i]), _reference[i]);
+    }
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge,
+                       CLSplitFixture<half>,
+                       framework::DatasetMode::NIGHTLY,
+                       combine(datasets::LargeSplitDataset(), framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate outputs
+    for(unsigned int i = 0; i < _target.size(); ++i)
+    {
+        validate(CLAccessor(_target[i]), _reference[i]);
+    }
+}
+TEST_SUITE_END() // FP16
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       CLSplitFixture<float>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallSplitDataset(), framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate outputs
+    for(unsigned int i = 0; i < _target.size(); ++i)
+    {
+        validate(CLAccessor(_target[i]), _reference[i]);
+    }
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge,
+                       CLSplitFixture<float>,
+                       framework::DatasetMode::NIGHTLY,
+                       combine(datasets::LargeSplitDataset(), framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate outputs
+    for(unsigned int i = 0; i < _target.size(); ++i)
+    {
+        validate(CLAccessor(_target[i]), _reference[i]);
+    }
+}
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
+
+TEST_SUITE_END() // Split
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/fixtures/SliceOperationsFixtures.h b/tests/validation/fixtures/SliceOperationsFixtures.h
index 018bafb..df016d5 100644
--- a/tests/validation/fixtures/SliceOperationsFixtures.h
+++ b/tests/validation/fixtures/SliceOperationsFixtures.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_STRIDED_SLICE_FIXTURE
-#define ARM_COMPUTE_TEST_STRIDED_SLICE_FIXTURE
+#ifndef ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE
+#define ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -179,4 +179,4 @@
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_STRIDED_SLICE_FIXTURE */
+#endif /* ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE */
diff --git a/tests/validation/fixtures/SplitFixture.h b/tests/validation/fixtures/SplitFixture.h
new file mode 100644
index 0000000..0750177
--- /dev/null
+++ b/tests/validation/fixtures/SplitFixture.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_SPLIT_FIXTURE
+#define ARM_COMPUTE_TEST_SPLIT_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/RawLutAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/SliceOperations.h"
+
+#include <algorithm>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename ITensorType, typename AccessorType, typename FunctionType, typename T>
+class SplitFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, unsigned int axis, unsigned int splits, DataType data_type)
+    {
+        _target    = compute_target(shape, axis, splits, data_type);
+        _reference = compute_reference(shape, axis, splits, data_type);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        library->fill_tensor_uniform(tensor, i);
+    }
+
+    std::vector<TensorType> compute_target(const TensorShape &shape, unsigned int axis, unsigned int splits, DataType data_type)
+    {
+        // Create tensors
+        TensorType                 src = create_tensor<TensorType>(shape, data_type);
+        std::vector<TensorType>    dsts(splits);
+        std::vector<ITensorType *> dsts_ptr;
+        for(auto &dst : dsts)
+        {
+            dsts_ptr.emplace_back(&dst);
+        }
+
+        // Create and configure function
+        FunctionType split;
+        split.configure(&src, dsts_ptr, axis);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(std::all_of(dsts.cbegin(), dsts.cend(), [](const TensorType & t)
+        {
+            return t.info()->is_resizable();
+        }),
+        framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        for(unsigned int i = 0; i < splits; ++i)
+        {
+            dsts[i].allocator()->allocate();
+        }
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(std::all_of(dsts.cbegin(), dsts.cend(), [](const TensorType & t)
+        {
+            return !t.info()->is_resizable();
+        }),
+        framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src), 0);
+
+        // Compute function
+        split.run();
+
+        return dsts;
+    }
+
+    std::vector<SimpleTensor<T>> compute_reference(const TensorShape &shape, unsigned int axis, unsigned int splits, DataType data_type)
+    {
+        // Create reference
+        SimpleTensor<T>              src{ shape, data_type };
+        std::vector<SimpleTensor<T>> dsts;
+
+        // Fill reference
+        fill(src, 0);
+
+        // Calculate splice for each split
+        const size_t axis_split_step = shape[axis] / splits;
+        unsigned int axis_offset     = 0;
+
+        // Start/End coordinates
+        Coordinates start_coords;
+        Coordinates end_coords;
+        for(unsigned int d = 0; d < shape.num_dimensions(); ++d)
+        {
+            end_coords.set(d, -1);
+        }
+
+        for(unsigned int i = 0; i < splits; ++i)
+        {
+            // Update coordinate on axis
+            start_coords.set(axis, axis_offset);
+            end_coords.set(axis, axis_offset + axis_split_step);
+
+            dsts.emplace_back(std::move(reference::slice(src, start_coords, end_coords)));
+
+            axis_offset += axis_split_step;
+        }
+
+        return dsts;
+    }
+
+    std::vector<TensorType>      _target{};
+    std::vector<SimpleTensor<T>> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_SPLIT_FIXTURE */