COMPMID-1332: Implement Slice for CL

Change-Id: I0dbc4fd7f640d31daa1970eb3da0e941cb771f2b
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/146145
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
diff --git a/arm_compute/core/utils/helpers/tensor_transform.h b/arm_compute/core/utils/helpers/tensor_transform.h
index e5b32a5..966c1f1 100644
--- a/arm_compute/core/utils/helpers/tensor_transform.h
+++ b/arm_compute/core/utils/helpers/tensor_transform.h
@@ -32,12 +32,33 @@
 {
 namespace tensor_transform
 {
+/** Returns the absolute ends coordinates of slice
+ *
+ * @param[in] input_shape Input tensor shape
+ * @param[in] ends        End coordinates
+ *
+ * @return Absolute end coordinate
+ */
+Coordinates slice_absolute_end_coords(TensorShape input_shape, Coordinates ends);
+
+/** Computes output shape of slice
+ *
+ * @warning Ends must be non-negative
+ *
+ * @param[in] input_shape Input tensor shape
+ * @param[in] starts      Start coordinates
+ * @param[in] ends_abs    Absolute end coordinates
+ *
+ * @return The output tensor shape
+ */
+TensorShape compute_slice_output_shape(TensorShape input_shape, Coordinates starts, Coordinates ends_abs);
+
 /** Returns the absolute start coordinates of strided slice
  *
  * @param[in] input_shape Input tensor shape
  * @param[in] starts      Start coordinates
  * @param[in] strides     Slice strides
- * @param[in] begin_mask  (Optional) If the ith bit of begin_mask is set, begin[i] is ignored and
+ * @param[in] begin_mask  (Optional) If the ith bit of begin_mask is set, starts[i] is ignored and
  *                        the fullest possible range in that dimension is used instead.
  *
  * @return Absolute start coordinates
@@ -46,6 +67,8 @@
 
 /** Returns the absolute ends coordinates of strided slice
  *
+ * @warning Starts must be non-negative
+ *
  * @param[in] input_shape      Input tensor shape
  * @param[in] starts_abs       Absolute start coordinates
  * @param[in] ends             End coordinates
@@ -53,7 +76,7 @@
  * @param[in] end_mask         (Optional) If the ith bit of end_mask is set, end[i] is ignored and
  *                             the fullest possible range in that dimension is used instead.
  * @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
- *                             A slice of size 1 starting from begin[i] in the dimension must be preserved.
+ *                             A slice of size 1 starting from starts[i] in the dimension must be preserved.
  *
  * @return Absolute end coordinates
  */
@@ -68,7 +91,10 @@
  */
 Coordinates strided_slice_strides(TensorShape input_shape, Coordinates strides);
 
-/** Computes output shape of a strided slice
+/** Computes output shape of strided slice
+ *
+ * @warning Starts and ends must be non-negative
+ * @warning Starts, ends and final strides should have the same dimensions as the input shape
  *
  * @param[in] input_shape   Input tensor shape
  * @param[in] starts_abs    Absolute start coordinates
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h
index 76c2128..d2bfdfd 100644
--- a/arm_compute/runtime/CL/CLFunctions.h
+++ b/arm_compute/runtime/CL/CLFunctions.h
@@ -106,6 +106,7 @@
 #include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
 #include "arm_compute/runtime/CL/functions/CLScale.h"
 #include "arm_compute/runtime/CL/functions/CLScharr3x3.h"
+#include "arm_compute/runtime/CL/functions/CLSlice.h"
 #include "arm_compute/runtime/CL/functions/CLSobel3x3.h"
 #include "arm_compute/runtime/CL/functions/CLSobel5x5.h"
 #include "arm_compute/runtime/CL/functions/CLSobel7x7.h"
diff --git a/arm_compute/runtime/CL/functions/CLSlice.h b/arm_compute/runtime/CL/functions/CLSlice.h
new file mode 100644
index 0000000..acd4f0d
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLSlice.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CL_SLICE_H__
+#define __ARM_COMPUTE_CL_SLICE_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+namespace arm_compute
+{
+// Forward Declarations
+class ICLTensor;
+
+/** Basic function to perform tensor slicing */
+class CLSlice : public ICLSimpleFunction
+{
+public:
+    /** Configure kernel
+     *
+     * @note Supported tensor rank: up to 4
+     * @note Start indices must be non-negative. 0 <= starts[i]
+     * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
+     * @note End indices are not inclusive unless negative.
+     *
+     * @param[in]  input  Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+     * @param[out] output Destination tensor. Data type supported: Same as @p input
+     * @param[in]  starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     * @param[in]  ends   The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     */
+    void configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends);
+
+    /** Static function to check if given info will lead to a valid configuration of @ref CLSlice
+     *
+     * @note Supported tensor rank: up to 4
+     * @note Start indices must be non-negative. 0 <= starts[i]
+     * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
+     * @note End indices are not inclusive unless negative.
+     *
+     * @param[in] input  Source tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+     * @param[in] output Destination tensor info. Data type supported: Same as @p input
+     * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     * @param[in] ends   The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     *
+     * @return A status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CL_SLICE_H__ */
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 29fd672..0cc6e32 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -361,7 +361,7 @@
     { "softmax_layer_max_shift_exp_sum_quantized_parallel", "softmax_layer_quantized.cl" },
     { "softmax_layer_max_shift_exp_sum_serial", "softmax_layer.cl" },
     { "softmax_layer_max_shift_exp_sum_parallel", "softmax_layer.cl" },
-    { "strided_slice", "strided_slice.cl" },
+    { "strided_slice", "slice_ops.cl" },
     { "suppress_non_maximum", "canny.cl" },
     { "tablelookup_U8", "tablelookup.cl" },
     { "tablelookup_S16", "tablelookup.cl" },
@@ -742,8 +742,8 @@
 #include "./cl_kernels/softmax_layer_quantized.clembed"
     },
     {
-        "strided_slice.cl",
-#include "./cl_kernels/strided_slice.clembed"
+        "slice_ops.cl",
+#include "./cl_kernels/slice_ops.clembed"
     },
     {
         "tablelookup.cl",
diff --git a/src/core/CL/cl_kernels/strided_slice.cl b/src/core/CL/cl_kernels/slice_ops.cl
similarity index 76%
rename from src/core/CL/cl_kernels/strided_slice.cl
rename to src/core/CL/cl_kernels/slice_ops.cl
index 7c68fb9..bc3df47 100644
--- a/src/core/CL/cl_kernels/strided_slice.cl
+++ b/src/core/CL/cl_kernels/slice_ops.cl
@@ -61,25 +61,47 @@
     Tensor4D input  = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, SRC_DEPTH);
     Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DST_DEPTH);
 
-    int offset_0 = 0;
-    int offset_1 = 0;
-    int offset_2 = 0;
-    int offset_3 = 0;
+    int offset = 0;
 
-    // Calculate offset
-#if defined(START_0) && defined(STRIDE_0)
-    offset_0 = (int)START_0 + (int)get_global_id(0) * (int)STRIDE_0;
+    // Offset X
+#if defined(START_0) && defined(STRIDE_0) && defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
+    // Check if access on width gets out of bounds
+    // If it does shift access vector to access elements within bounds
+    const int xi = (int)(get_global_id(0) * VEC_SIZE);
+    offset       = (int)START_0 + min(xi, (int)LAST_ACCESSED_X);
+    input.ptr += offset * input_stride_x;
+    output.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * output_stride_x;
+#elif defined(START_0) && defined(STRIDE_0)
+    offset = (int)START_0 + (int)get_global_id(0) * (int)STRIDE_0;
+    input.ptr += offset * input_stride_x;
 #endif // defined(START_0) && defined(STRIDE_0)
+
+    // Offset Y
 #if defined(START_1) && defined(STRIDE_1)
-    offset_1 = (int)START_1 + (int)get_global_id(1) * (int)STRIDE_1;
+    offset = (int)START_1 + (int)get_global_id(1) * (int)STRIDE_1;
+    input.ptr += offset * input_stride_y;
 #endif // defined(START_1) && defined(STRIDE_1)
+
+    // Offset Z
 #if defined(START_2) && defined(STRIDE_2)
-    offset_2 = (int)START_2 + ((int)get_global_id(2) % (int)DST_DEPTH) * (int)STRIDE_2;
+    offset = (int)START_2 + ((int)get_global_id(2) % (int)DST_DEPTH) * (int)STRIDE_2;
+    input.ptr += offset * input_stride_z;
 #endif // defined(START_2) && defined(STRIDE_2)
+
+    // Offset depth
 #if defined(START_3) && defined(STRIDE_3)
-    offset_3 = (int)START_3 + ((int)get_global_id(2) / (int)DST_DEPTH) * (int)STRIDE_3;
-#endif // defined(START_2) && defined(STRIDE_2)
+    offset = (int)START_3 + ((int)get_global_id(2) / (int)DST_DEPTH) * (int)STRIDE_3;
+    input.ptr += offset * input_stride_w;
+#endif // defined(START_3) && defined(STRIDE_3)
 
     // Store result
-    *((__global DATA_TYPE *)(output.ptr)) = *((__global DATA_TYPE *)tensor4D_offset(&input, offset_0, offset_1, offset_2, offset_3));
+#if defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
+    VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+    val = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(input.ptr));
+
+    VSTORE(VEC_SIZE)
+    (val, 0, (__global DATA_TYPE *)(output.ptr));
+#else  // defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
+    *((__global DATA_TYPE *)(output.ptr)) = *((__global DATA_TYPE *)(input.ptr));
+#endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
 }
diff --git a/src/core/CL/kernels/CLStridedSliceKernel.cpp b/src/core/CL/kernels/CLStridedSliceKernel.cpp
index f07436a..2d2ba10 100644
--- a/src/core/CL/kernels/CLStridedSliceKernel.cpp
+++ b/src/core/CL/kernels/CLStridedSliceKernel.cpp
@@ -55,10 +55,10 @@
     ARM_COMPUTE_RETURN_ERROR_ON(starts.num_dimensions() > input->num_dimensions());
     ARM_COMPUTE_RETURN_ERROR_ON(ends.num_dimensions() > input->num_dimensions());
     ARM_COMPUTE_RETURN_ERROR_ON(strides.num_dimensions() > input->num_dimensions());
-    for(unsigned int i = 0; i < strides.num_dimensions(); ++i)
+    ARM_COMPUTE_RETURN_ERROR_ON(std::any_of(strides.cbegin(), strides.cbegin() + strides.num_dimensions(), [](int i)
     {
-        ARM_COMPUTE_RETURN_ERROR_ON(strides[i] == 0);
-    }
+        return i == 0;
+    }));
 
     // Get expected output shape
     const TensorShape exp_output_shape = arm_compute::misc::shape_calculator::compute_strided_slice_shape(*input,
@@ -120,6 +120,19 @@
     // Configure kernel window
     auto win_config = validate_and_configure_window(input->info(), output->info(), starts, ends, strides, begin_mask, end_mask, shrink_axis_mask);
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+
+    // Enable multiple elements processing along x if stride_x is 1 and output width greater than the access vector size
+    const int  vec_size_x     = 16 / input->info()->element_size();
+    const int  output_width_x = output->info()->tensor_shape().x();
+    const bool multi_access_x = (final_strides.x() == 1) && (output_width_x / vec_size_x > 0);
+
+    // Update window if needed
+    if(multi_access_x)
+    {
+        Window &updated_window = std::get<1>(win_config);
+        updated_window.set(Window::DimX,
+                           Window::Dimension(updated_window.x().start(), ceil_to_multiple(updated_window.x().end(), vec_size_x), vec_size_x));
+    }
     ICLKernel::configure_internal(win_config.second);
 
     // Create build options
@@ -130,6 +143,8 @@
         build_opts.add_option("-DSTART_" + support::cpp11::to_string(i) + "=" + support::cpp11::to_string(starts_abs[i]));
         build_opts.add_option("-DSTRIDE_" + support::cpp11::to_string(i) + "=" + support::cpp11::to_string(final_strides[i]));
     }
+    build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
+    build_opts.add_option_if(multi_access_x, "-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
     build_opts.add_option_if_else(input_shape.num_dimensions() > 2,
                                   "-DSRC_DEPTH=" + support::cpp11::to_string(input_shape.z()),
                                   "-DSRC_DEPTH=1");
diff --git a/src/core/utils/helpers/tensor_transform.cpp b/src/core/utils/helpers/tensor_transform.cpp
index 5c83a8b..a4bce5d 100644
--- a/src/core/utils/helpers/tensor_transform.cpp
+++ b/src/core/utils/helpers/tensor_transform.cpp
@@ -29,6 +29,30 @@
 {
 namespace tensor_transform
 {
+Coordinates slice_absolute_end_coords(TensorShape input_shape, Coordinates ends)
+{
+    // Create end mask
+    int32_t end_mask = 0;
+    for(unsigned int i = 0; i < ends.num_dimensions(); ++i)
+    {
+        if(ends[i] < 0)
+        {
+            end_mask |= 1 << i;
+        }
+    }
+    // Get unit strides
+    const BiStrides unit_strides = strided_slice_strides(input_shape, BiStrides());
+
+    return strided_slice_absolute_end_coords(input_shape, Coordinates(), ends, unit_strides, end_mask);
+}
+
+TensorShape compute_slice_output_shape(TensorShape input_shape, Coordinates starts, Coordinates ends_abs)
+{
+    // Get unit strides
+    const BiStrides unit_strides = strided_slice_strides(input_shape, BiStrides());
+    return compute_strided_slice_output_shape(input_shape, starts, ends_abs, unit_strides);
+}
+
 Coordinates strided_slice_absolute_start_coords(TensorShape input_shape, Coordinates starts, Coordinates strides, int32_t begin_mask)
 {
     Coordinates starts_abs;
diff --git a/src/runtime/CL/functions/CLSlice.cpp b/src/runtime/CL/functions/CLSlice.cpp
new file mode 100644
index 0000000..bef7eca
--- /dev/null
+++ b/src/runtime/CL/functions/CLSlice.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLSlice.h"
+
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/helpers/tensor_transform.h"
+#include "support/ToolchainSupport.h"
+
+namespace arm_compute
+{
+void CLSlice::configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends)
+{
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input);
+
+    // Get absolute end coordinates
+    const Coordinates ends_abs = arm_compute::helpers::tensor_transform::slice_absolute_end_coords(input->info()->tensor_shape(), ends);
+
+    auto k = arm_compute::support::cpp14::make_unique<CLStridedSliceKernel>();
+    k->configure(input, output, starts, ends_abs, BiStrides(), 0, 0, 0);
+    _kernel = std::move(k);
+}
+
+Status CLSlice::validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
+
+    // Check start dimensions for being non-negative
+    ARM_COMPUTE_RETURN_ERROR_ON(std::any_of(starts.cbegin(), starts.cbegin() + starts.num_dimensions(), [](int i)
+    {
+        return i < 0;
+    }));
+
+    // Get absolute end coordinates
+    const Coordinates ends_abs = arm_compute::helpers::tensor_transform::slice_absolute_end_coords(input->tensor_shape(), ends);
+
+    return CLStridedSliceKernel::validate(input, output, starts, ends_abs, BiStrides(), 0, 0, 0);
+}
+} // namespace arm_compute
diff --git a/tests/datasets/StridedSliceDataset.h b/tests/datasets/SliceOperationsDataset.h
similarity index 65%
rename from tests/datasets/StridedSliceDataset.h
rename to tests/datasets/SliceOperationsDataset.h
index 00f1992..b6df404 100644
--- a/tests/datasets/StridedSliceDataset.h
+++ b/tests/datasets/SliceOperationsDataset.h
@@ -34,6 +34,77 @@
 {
 namespace datasets
 {
+class SliceDataset
+{
+public:
+    using type = std::tuple<TensorShape, Coordinates, Coordinates>;
+
+    struct iterator
+    {
+        iterator(std::vector<TensorShape>::const_iterator tensor_shapes_it,
+                 std::vector<Coordinates>::const_iterator starts_values_it,
+                 std::vector<Coordinates>::const_iterator ends_values_it)
+            : _tensor_shapes_it{ std::move(tensor_shapes_it) },
+              _starts_values_it{ std::move(starts_values_it) },
+              _ends_values_it{ std::move(ends_values_it) }
+        {
+        }
+
+        std::string description() const
+        {
+            std::stringstream description;
+            description << "Shape=" << *_tensor_shapes_it << ":";
+            description << "Starts=" << *_starts_values_it << ":";
+            description << "Ends=" << *_ends_values_it << ":";
+            return description.str();
+        }
+
+        SliceDataset::type operator*() const
+        {
+            return std::make_tuple(*_tensor_shapes_it, *_starts_values_it, *_ends_values_it);
+        }
+
+        iterator &operator++()
+        {
+            ++_tensor_shapes_it;
+            ++_starts_values_it;
+            ++_ends_values_it;
+            return *this;
+        }
+
+    private:
+        std::vector<TensorShape>::const_iterator _tensor_shapes_it;
+        std::vector<Coordinates>::const_iterator _starts_values_it;
+        std::vector<Coordinates>::const_iterator _ends_values_it;
+    };
+
+    iterator begin() const
+    {
+        return iterator(_tensor_shapes.begin(), _starts_values.begin(), _ends_values.begin());
+    }
+
+    int size() const
+    {
+        return std::min(_tensor_shapes.size(), std::min(_starts_values.size(), _ends_values.size()));
+    }
+
+    void add_config(TensorShape shape, Coordinates starts, Coordinates ends)
+    {
+        _tensor_shapes.emplace_back(std::move(shape));
+        _starts_values.emplace_back(std::move(starts));
+        _ends_values.emplace_back(std::move(ends));
+    }
+
+protected:
+    SliceDataset()                = default;
+    SliceDataset(SliceDataset &&) = default;
+
+private:
+    std::vector<TensorShape> _tensor_shapes{};
+    std::vector<Coordinates> _starts_values{};
+    std::vector<Coordinates> _ends_values{};
+};
+
 class StridedSliceDataset
 {
 public:
@@ -140,6 +211,41 @@
     std::vector<int32_t>     _shrink_mask_values{};
 };
 
+class SmallSliceDataset final : public SliceDataset
+{
+public:
+    SmallSliceDataset()
+    {
+        // 1D
+        add_config(TensorShape(15U), Coordinates(4), Coordinates(9));
+        add_config(TensorShape(15U), Coordinates(0), Coordinates(-1));
+        // 2D
+        add_config(TensorShape(15U, 16U), Coordinates(0, 1), Coordinates(5, -1));
+        add_config(TensorShape(15U, 16U), Coordinates(4, 1), Coordinates(12, -1));
+        // 3D
+        add_config(TensorShape(15U, 16U, 4U), Coordinates(0, 1, 2), Coordinates(5, -1, 4));
+        add_config(TensorShape(15U, 16U, 4U), Coordinates(0, 1, 2), Coordinates(5, -1, 4));
+        // 4D
+        add_config(TensorShape(15U, 16U, 4U, 12U), Coordinates(0, 1, 2, 2), Coordinates(5, -1, 4, 5));
+    }
+};
+
+class LargeSliceDataset final : public SliceDataset
+{
+public:
+    LargeSliceDataset()
+    {
+        // 1D
+        add_config(TensorShape(1025U), Coordinates(128), Coordinates(-100));
+        // 2D
+        add_config(TensorShape(372U, 68U), Coordinates(128, 7), Coordinates(368, -1));
+        // 3D
+        add_config(TensorShape(372U, 68U, 12U), Coordinates(128, 7, 2), Coordinates(368, -1, 4));
+        // 4D
+        add_config(TensorShape(372U, 68U, 7U, 4U), Coordinates(128, 7, 2), Coordinates(368, 17, 5));
+    }
+};
+
 class SmallStridedSliceDataset final : public StridedSliceDataset
 {
 public:
@@ -167,14 +273,13 @@
         // 1D
         add_config(TensorShape(1025U), Coordinates(128), Coordinates(-100), BiStrides(20));
         // 2D
-        add_config(TensorShape(372U, 68U), Coordinates(128U, 7U), Coordinates(368U, -30), BiStrides(10, 7));
+        add_config(TensorShape(372U, 68U), Coordinates(128, 7), Coordinates(368, -30), BiStrides(10, 7));
         // 3D
-        add_config(TensorShape(372U, 68U, 12U), Coordinates(128U, 7U, -1), Coordinates(368U, -30, -5), BiStrides(14, 7, -2));
+        add_config(TensorShape(372U, 68U, 12U), Coordinates(128, 7, -1), Coordinates(368, -30, -5), BiStrides(14, 7, -2));
         // 4D
-        add_config(TensorShape(372U, 68U, 7U, 4U), Coordinates(128U, 7U, 2U), Coordinates(368U, -30, 5U), BiStrides(20, 7, 2), 1, 1);
+        add_config(TensorShape(372U, 68U, 7U, 4U), Coordinates(128, 7, 2), Coordinates(368, -30, 5), BiStrides(20, 7, 2), 1, 1);
     }
 };
-
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/CL/Slice.cpp b/tests/validation/CL/Slice.cpp
new file mode 100644
index 0000000..91c2e90
--- /dev/null
+++ b/tests/validation/CL/Slice.cpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLSlice.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/SliceOperationsDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/SliceOperationsFixtures.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(Slice)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+        framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 3U, 2U, 5U, 3U), 1, DataType::F32), // Invalid input shape
+                                                TensorInfo(TensorShape(27U, 3U, 2U), 1, DataType::F32),         // Negative begin
+                                                TensorInfo(TensorShape(27U, 3U, 2U), 1, DataType::F32),         // Big number of coordinates
+                                                TensorInfo(TensorShape(27U, 3U, 2U), 1, DataType::F32)
+        }),
+        framework::dataset::make("Starts", { Coordinates(3, 1, 0), Coordinates(-3, 1, 0), Coordinates(3, 1, 0), Coordinates(3, 1, 0) })),
+        framework::dataset::make("Ends", { Coordinates(13, 3, 0),  Coordinates(13, 3, 1), Coordinates(13, 3, 1, 1), Coordinates(13, 3, 1) })),
+        framework::dataset::make("Expected", { false, false, false, true })),
+        input_info, starts, ends, expected)
+{
+    TensorInfo output_info;
+    const Status status = CLSlice::validate(&input_info.clone()->set_is_resizable(false), &output_info, starts, ends);
+    ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+DATA_TEST_CASE(Configuration,
+               framework::DatasetMode::ALL,
+               combine(arm_compute::test::datasets::SmallSliceDataset(), framework::dataset::make("DataType", { DataType::F16, DataType::F32 })),
+               shape, starts, ends, data_type)
+{
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, data_type);
+    CLTensor dst;
+
+    // Create and Configure function
+    CLSlice slice;
+    slice.configure(&src, &dst, starts, ends);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(dst.info()->tensor_shape());
+    validate(dst.info()->valid_region(), valid_region);
+}
+
+template <typename T>
+using CLSliceFixture = SliceFixture<CLTensor, CLAccessor, CLSlice, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       CLSliceFixture<half>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallSliceDataset(), framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge,
+                       CLSliceFixture<half>,
+                       framework::DatasetMode::NIGHTLY,
+                       combine(datasets::LargeSliceDataset(), framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // FP16
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       CLSliceFixture<float>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallSliceDataset(), framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge,
+                       CLSliceFixture<float>,
+                       framework::DatasetMode::NIGHTLY,
+                       combine(datasets::LargeSliceDataset(), framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
+
+TEST_SUITE_END() // Slice
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/StridedSlice.cpp b/tests/validation/CL/StridedSlice.cpp
index 824f7b1..00eee16 100644
--- a/tests/validation/CL/StridedSlice.cpp
+++ b/tests/validation/CL/StridedSlice.cpp
@@ -28,12 +28,12 @@
 
 #include "tests/CL/CLAccessor.h"
 #include "tests/PaddingCalculator.h"
-#include "tests/datasets/StridedSliceDataset.h"
+#include "tests/datasets/SliceOperationsDataset.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/StridedSliceFixture.h"
+#include "tests/validation/fixtures/SliceOperationsFixtures.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/fixtures/StridedSliceFixture.h b/tests/validation/fixtures/SliceOperationsFixtures.h
similarity index 68%
rename from tests/validation/fixtures/StridedSliceFixture.h
rename to tests/validation/fixtures/SliceOperationsFixtures.h
index 26442ad..018bafb 100644
--- a/tests/validation/fixtures/StridedSliceFixture.h
+++ b/tests/validation/fixtures/SliceOperationsFixtures.h
@@ -34,7 +34,7 @@
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
 #include "tests/validation/Helpers.h"
-#include "tests/validation/reference/StridedSlice.h"
+#include "tests/validation/reference/SliceOperations.h"
 
 namespace arm_compute
 {
@@ -43,6 +43,69 @@
 namespace validation
 {
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class SliceFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, Coordinates starts, Coordinates ends, DataType data_type)
+    {
+        _target    = compute_target(shape, starts, ends, data_type);
+        _reference = compute_reference(shape, starts, ends, data_type);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        library->fill_tensor_uniform(tensor, i);
+    }
+
+    TensorType compute_target(const TensorShape &shape, const Coordinates &starts, const Coordinates &ends, DataType data_type)
+    {
+        // Create tensors
+        TensorType src = create_tensor<TensorType>(shape, data_type);
+        TensorType dst;
+
+        // Create and configure function
+        FunctionType slice;
+        slice.configure(&src, &dst, starts, ends);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src), 0);
+        fill(AccessorType(dst), 1);
+
+        // Compute function
+        slice.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape, const Coordinates &starts, const Coordinates &ends, DataType data_type)
+    {
+        // Create reference
+        SimpleTensor<T> src{ shape, data_type };
+
+        // Fill reference
+        fill(src, 0);
+
+        return reference::slice(src, starts, ends);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
 class StridedSliceFixture : public framework::Fixture
 {
 public:
diff --git a/tests/validation/reference/StridedSlice.cpp b/tests/validation/reference/SliceOperations.cpp
similarity index 68%
rename from tests/validation/reference/StridedSlice.cpp
rename to tests/validation/reference/SliceOperations.cpp
index ac02ce1..04b5b98 100644
--- a/tests/validation/reference/StridedSlice.cpp
+++ b/tests/validation/reference/SliceOperations.cpp
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "StridedSlice.h"
+#include "SliceOperations.h"
 
 #include "arm_compute/core/utils/helpers/tensor_transform.h"
 
@@ -34,6 +34,51 @@
 namespace reference
 {
 template <typename T>
+SimpleTensor<T> slice(const SimpleTensor<T> &src, Coordinates starts, Coordinates ends)
+{
+    using namespace arm_compute::helpers::tensor_transform;
+
+    // Validation checks
+    ARM_COMPUTE_ERROR_ON(src.shape().num_dimensions() > 4);
+    ARM_COMPUTE_ERROR_ON(starts.num_dimensions() > src.shape().num_dimensions());
+    ARM_COMPUTE_ERROR_ON(std::any_of(starts.cbegin(), starts.cbegin() + starts.num_dimensions(), [](int i)
+    {
+        return i < 0;
+    }));
+    ARM_COMPUTE_ERROR_ON(ends.num_dimensions() > src.shape().num_dimensions());
+
+    // Get source shape
+    const TensorShape &src_shape = src.shape();
+
+    // Get actual end
+    Coordinates ends_abs = slice_absolute_end_coords(src_shape, ends);
+
+    // Get destination shape
+    TensorShape dst_shape = compute_slice_output_shape(src_shape, starts, ends_abs);
+
+    // Create destination tensor
+    SimpleTensor<T> dst{ dst_shape, src.data_type(), 1 };
+
+    // Perform slice
+    Window win;
+    win.use_tensor_dimensions(dst_shape);
+    execute_window_loop(win, [&](const Coordinates & id)
+    {
+        Coordinates offset;
+        for(unsigned int i = 0; i < id.num_dimensions(); ++i)
+        {
+            offset.set(i, starts[i] + id[i]);
+        }
+        *reinterpret_cast<T *>(dst(id)) = *reinterpret_cast<const T *>(src(offset));
+    });
+
+    return dst;
+}
+
+template SimpleTensor<float> slice(const SimpleTensor<float> &src, Coordinates starts, Coordinates ends);
+template SimpleTensor<half_float::half> slice(const SimpleTensor<half_float::half> &src, Coordinates starts, Coordinates ends);
+
+template <typename T>
 SimpleTensor<T> strided_slice(const SimpleTensor<T> &src,
                               Coordinates starts, Coordinates ends, BiStrides strides,
                               int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask)
@@ -45,10 +90,10 @@
     ARM_COMPUTE_ERROR_ON(starts.num_dimensions() > src.shape().num_dimensions());
     ARM_COMPUTE_ERROR_ON(ends.num_dimensions() > src.shape().num_dimensions());
     ARM_COMPUTE_ERROR_ON(strides.num_dimensions() > src.shape().num_dimensions());
-    for(unsigned int i = 0; i < strides.num_dimensions(); ++i)
+    ARM_COMPUTE_ERROR_ON(std::any_of(strides.cbegin(), strides.cbegin() + strides.num_dimensions(), [](int i)
     {
-        ARM_COMPUTE_ERROR_ON(strides[i] == 0);
-    }
+        return i == 0;
+    }));
 
     // Get source shape
     const TensorShape &src_shape = src.shape();
diff --git a/tests/validation/reference/StridedSlice.h b/tests/validation/reference/SliceOperations.h
similarity index 85%
rename from tests/validation/reference/StridedSlice.h
rename to tests/validation/reference/SliceOperations.h
index 805b7e6..89fe203 100644
--- a/tests/validation/reference/StridedSlice.h
+++ b/tests/validation/reference/SliceOperations.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_STRIDED_SLICE_H__
-#define __ARM_COMPUTE_TEST_STRIDED_SLICE_H__
+#ifndef __ARM_COMPUTE_TEST_SLICE_OPERATIONS_H__
+#define __ARM_COMPUTE_TEST_SLICE_OPERATIONS_H__
 
 #include "tests/SimpleTensor.h"
 
@@ -35,6 +35,9 @@
 namespace reference
 {
 template <typename T>
+SimpleTensor<T> slice(const SimpleTensor<T> &src, Coordinates starts, Coordinates ends);
+
+template <typename T>
 SimpleTensor<T> strided_slice(const SimpleTensor<T> &src,
                               Coordinates starts, Coordinates ends, BiStrides strides,
                               int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
@@ -42,4 +45,4 @@
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_STRIDED_SLICE_H__ */
+#endif /* __ARM_COMPUTE_TEST_SLICE_OPERATIONS_H__ */