COMPMID-1218 Implementing Batch to Space on OpenCL

Change-Id: I12ba4c0c35f086ea3f395970b85af5bf8f94850b
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145052
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h
index ff16394..da2d316 100644
--- a/arm_compute/core/CL/CLKernels.h
+++ b/arm_compute/core/CL/CLKernels.h
@@ -32,6 +32,7 @@
 #include "arm_compute/core/CL/kernels/CLArithmeticDivisionKernel.h"
 #include "arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h"
 #include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
+#include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
 #include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h"
 #include "arm_compute/core/CL/kernels/CLBitwiseNotKernel.h"
 #include "arm_compute/core/CL/kernels/CLBitwiseOrKernel.h"
diff --git a/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h
new file mode 100644
index 0000000..93c5160
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H__
+#define __ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the batch to space kernel */
+class CLBatchToSpaceLayerKernel : public ICLKernel
+{
+public:
+    /** Default constructor */
+    CLBatchToSpaceLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLBatchToSpaceLayerKernel(const CLBatchToSpaceLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLBatchToSpaceLayerKernel &operator=(const CLBatchToSpaceLayerKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    CLBatchToSpaceLayerKernel(CLBatchToSpaceLayerKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    CLBatchToSpaceLayerKernel &operator=(CLBatchToSpaceLayerKernel &&) = default;
+    /** Default destructor */
+    ~CLBatchToSpaceLayerKernel() = default;
+    /** Initialise the kernel's inputs and output.
+     *
+     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  block_shape 1-D tensor with shape [M]. Data types supported: S32
+     * @param[out] output      Tensor output. Data types supported: same as @p input
+     */
+    void configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output);
+    /** Initialise the kernel's inputs and output (Static block shape).
+     *
+     * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  block_shape_x Block shape x value.
+     * @param[in]  block_shape_y Block shape y value.
+     * @param[out] output        Tensor output. Data types supported: same as @p input
+     */
+    void configure(const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel
+     *
+     * @param[in] input       Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
+     * @param[in] output      Tensor output. Data types supported: same as @p input
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel (Static block shape).
+     *
+     * @param[in] input         Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] block_shape_x Block shape x value.
+     * @param[in] block_shape_y Block shape y value.
+     * @param[in] output        Tensor output. Data types supported: same as @p input
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+    const ICLTensor *_input;       /**< Source tensor */
+    const ICLTensor *_block_shape; /**< Block shape tensor */
+    ICLTensor       *_output;      /**< Destination tensor */
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H__ */
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index e0b6ce6..1a86d27 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -433,7 +433,6 @@
 
     return output_shape;
 }
-
 inline TensorShape compute_strided_slice_shape(const ITensorInfo &input,
                                                const Coordinates &starts, const Coordinates &ends, const Coordinates &strides,
                                                int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask)
@@ -449,6 +448,16 @@
 
     return compute_strided_slice_output_shape(input_shape, starts_abs, ends_abs, final_strides);
 }
+inline TensorShape compute_batch_to_space_shape(const ITensorInfo *input, const int block_x, const int block_y)
+{
+    ARM_COMPUTE_ERROR_ON(block_x <= 0 || block_y <= 0);
+    TensorShape output_shape{ input->tensor_shape() };
+    output_shape.set(0, input->tensor_shape()[0] * block_x);
+    output_shape.set(1, input->tensor_shape()[1] * block_y);
+    output_shape.set(3, input->tensor_shape()[3] / (block_x * block_y));
+
+    return output_shape;
+}
 
 template <typename T>
 inline TensorShape extract_shape(T *data)
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h
index f81a1ba..76c2128 100644
--- a/arm_compute/runtime/CL/CLFunctions.h
+++ b/arm_compute/runtime/CL/CLFunctions.h
@@ -32,6 +32,7 @@
 #include "arm_compute/runtime/CL/functions/CLArithmeticDivision.h"
 #include "arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h"
 #include "arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h"
 #include "arm_compute/runtime/CL/functions/CLBitwiseAnd.h"
 #include "arm_compute/runtime/CL/functions/CLBitwiseNot.h"
 #include "arm_compute/runtime/CL/functions/CLBitwiseOr.h"
diff --git a/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h b/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h
new file mode 100644
index 0000000..2abfd7c
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLBATCHTOSPACELAYER_H__
+#define __ARM_COMPUTE_CLBATCHTOSPACELAYER_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLBatchToSpaceLayerKernel. */
+class CLBatchToSpaceLayer : public IFunction
+{
+public:
+    /** Default constructor */
+    CLBatchToSpaceLayer();
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  block_shape 1-D tensor with shape [M]. Data types supported: S32
+     * @param[out] output      Tensor output. Data types supported: same as @p input
+     */
+    void configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output);
+    /** Set the input and output tensors. (Static block shape).
+     *
+     * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  block_shape_x Block shape x value.
+     * @param[in]  block_shape_y Block shape y value.
+     * @param[out] output        Tensor output. Data types supported: same as @p input
+     */
+    void configure(const ICLTensor *input, int32_t block_shape_x, int32_t block_shape_y, ICLTensor *output);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer
+     *
+     * @param[in]  input       Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  block_shape block shape tensor info with shape [M]. Data types supported: S32
+     * @param[out] output      Tensor output info. Data types supported: same as @p input
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer (Static block shape).
+     *
+     * @param[in]  input         Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  block_shape_x Block shape x value.
+     * @param[in]  block_shape_y Block shape y value.
+     * @param[out] output        Tensor output info. Data types supported: same as @p input
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, int32_t block_shape_x, int32_t block_shape_y, const ITensorInfo *output);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    CLBatchToSpaceLayerKernel _batch_to_space_kernel; /**< CLBatchToSpaceLayerKernel to run */
+};
+}
+#endif /* __ARM_COMPUTE_CLBATCHTOSPACELAYER_H__ */
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 6f45756..29fd672 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -153,6 +153,8 @@
     { "arithmetic_add", "arithmetic_op.cl" },
     { "arithmetic_sub", "arithmetic_op.cl" },
     { "arithmetic_div", "arithmetic_op.cl" },
+    { "batch_to_space", "batch_to_space.cl" },
+    { "batch_to_space_static", "batch_to_space.cl" },
     { "batchnormalization_layer_nchw", "batchnormalization_layer.cl" },
     { "batchnormalization_layer_nhwc", "batchnormalization_layer.cl" },
     { "bitwise_or", "bitwise_op.cl" },
@@ -456,6 +458,10 @@
 #include "./cl_kernels/arithmetic_op_quantized.clembed"
     },
     {
+        "batch_to_space.cl",
+#include "./cl_kernels/batch_to_space.clembed"
+    },
+    {
         "bitwise_op.cl",
 #include "./cl_kernels/bitwise_op.clembed"
     },
diff --git a/src/core/CL/cl_kernels/batch_to_space.cl b/src/core/CL/cl_kernels/batch_to_space.cl
new file mode 100644
index 0000000..3043c2c
--- /dev/null
+++ b/src/core/CL/cl_kernels/batch_to_space.cl
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software withoutput restriction, including withoutput limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "helpers.h"
+
+#if defined(DATA_TYPE) && defined(BATCH_SIZE)
+/** Batch to space transformation.
+ *
+ * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float
+ * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float
+ * @note The input tensor batch size must be passed at compile time using -DBATCH_SIZE. e.g. -DBATCH_SIZE=2
+ *
+ * @param[in]  input_ptr                            Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in]  input_stride_x                       Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  input_step_x                         input_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  input_stride_y                       Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  input_step_y                         input_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  input_stride_z                       Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  input_step_z                         input_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  input_offset_first_element_in_bytes  The offset of the first element in the first source tensor
+ * @param[in]  batch_id                             The input tensor batch id
+ * @param[in]  block_shape_ptr                      Pointer to the source tensor. Supported data types: S32
+ * @param[in]  block_shape_stride_x                 Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  block_shape_step_x                   block_shape_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  block_shape_stride_y                 Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  block_shape_step_y                   block_shape_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  input_offset_first_element_in_bytes  The offset of the first element in the first source tensor
+ * @param[out] output_ptr                           Pointer to the destination tensor. Supported data types: same as @p input_ptr
+ * @param[in]  output_stride_x                      Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  output_step_x                        output_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  output_stride_y                      Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  output_step_y                        output_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  output_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  output_step_z                        output_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  output_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ */
+__kernel void batch_to_space(
+    TENSOR3D_DECLARATION(input),
+    const int batch_id,
+    VECTOR_DECLARATION(block_shape),
+    TENSOR4D_DECLARATION(output))
+{
+    Tensor3D in    = CONVERT_TO_TENSOR3D_STRUCT(input);
+    Tensor4D out   = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0);
+    Vector   block = CONVERT_TO_VECTOR_STRUCT_NO_STEP(block_shape);
+
+    const int block_x = *((__global int *)vector_offset(&block, 0));
+    const int block_y = *((__global int *)vector_offset(&block, 1));
+
+    const int r = (BATCH_SIZE / (block_x * block_y));
+    const int x = get_global_id(0);
+    const int y = get_global_id(1);
+    const int z = get_global_id(2);
+    const int w = batch_id % r;
+
+    const int out_x = x * block_x + (batch_id / r) % block_x;
+    const int out_y = y * block_y + (batch_id / r) / block_x;
+
+    *((__global DATA_TYPE *)tensor4D_offset(&out, out_x, out_y, z, w)) = *((__global DATA_TYPE *)in.ptr);
+}
+#endif // defined(DATA_TYPE) && defined(BATCH_SIZE)
+
+#if defined(DATA_TYPE) && defined(BATCH_SIZE) && defined(BLOCK_SHAPE_X) && defined(BLOCK_SHAPE_Y)
+/** Batch to space transformation.
+ *
+ * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float
+ * @note The input tensor batch size must be passed at compile time using -DBATCH_SIZE. e.g. -DBATCH_SIZE=2
+ * @note The block shape x must be passed at compile time using -DBLOCK_SHAPE_X. e.g. -DBLOCK_SHAPE_X=2
+ * @note The block shape y must be passed at compile time using -DBLOCK_SHAPE_Y. e.g. -DBLOCK_SHAPE_Y=2
+ *
+ * @param[in]  input_ptr                            Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in]  input_stride_x                       Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  input_step_x                         input_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  input_stride_y                       Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  input_step_y                         input_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  input_stride_z                       Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  input_step_z                         input_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  input_offset_first_element_in_bytes  The offset of the first element in the first source tensor
+ * @param[in]  batch_id                             The input tensor batch id
+ * @param[out] output_ptr                           Pointer to the destination tensor. Supported data types: same as @p input_ptr
+ * @param[in]  output_stride_x                      Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  output_step_x                        output_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  output_stride_y                      Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  output_step_y                        output_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  output_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  output_step_z                        output_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  output_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ */
+__kernel void batch_to_space_static(
+    TENSOR3D_DECLARATION(input),
+    const int batch_id,
+    TENSOR4D_DECLARATION(output))
+{
+    Tensor3D in  = CONVERT_TO_TENSOR3D_STRUCT(input);
+    Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0);
+
+    const int block_x = BLOCK_SHAPE_X;
+    const int block_y = BLOCK_SHAPE_Y;
+
+    const int r = (BATCH_SIZE / (block_x * block_y));
+    const int x = get_global_id(0);
+    const int y = get_global_id(1);
+    const int z = get_global_id(2);
+    const int w = batch_id % r;
+
+    const int out_x = x * block_x + (batch_id / r) % block_x;
+    const int out_y = y * block_y + (batch_id / r) / block_x;
+
+    *((__global DATA_TYPE *)tensor4D_offset(&out, out_x, out_y, z, w)) = *((__global DATA_TYPE *)in.ptr);
+}
+#endif // defined(DATA_TYPE) && defined(BATCH_SIZE) && defined(BLOCK_SHAPE_X) && defined(BLOCK_SHAPE_Y)
\ No newline at end of file
diff --git a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp
new file mode 100644
index 0000000..e08d6f6
--- /dev/null
+++ b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLValidate.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
+using namespace arm_compute::misc::shape_calculator;
+namespace arm_compute
+{
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *block_info, const ITensorInfo *output)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, block_info, output);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(block_info, 1, DataType::S32);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
+
+    // Validate output if initialized
+    if(output->total_size() != 0)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 4);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+    }
+
+    return Status{};
+}
+Status validate_arguments_static(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const ITensorInfo *output)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
+    ARM_COMPUTE_RETURN_ERROR_ON(block_shape_x <= 0);
+    ARM_COMPUTE_RETURN_ERROR_ON(block_shape_y <= 0);
+
+    // Validate output if initialized
+    if(output->total_size() != 0)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[0] != (block_shape_x * output->tensor_shape()[0]));
+        ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[1] != (block_shape_x * output->tensor_shape()[1]));
+        ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[2] != output->tensor_shape()[2]);
+        ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[3] % (block_shape_x * block_shape_y) != 0);
+        ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 4);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+    }
+
+    return Status{};
+}
+} // namespace
+
+CLBatchToSpaceLayerKernel::CLBatchToSpaceLayerKernel()
+    : _input(nullptr), _block_shape(nullptr), _output(nullptr)
+{
+}
+
+void CLBatchToSpaceLayerKernel::configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output)
+{
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), block_shape->info(), output->info()));
+
+    _input       = input;
+    _block_shape = block_shape;
+    _output      = output;
+
+    // Create kernel
+    CLBuildOptions build_opts;
+    build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
+    build_opts.add_option("-DBATCH_SIZE=" + support::cpp11::to_string(input->info()->dimension(3)));
+    build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(0)));
+    _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("batch_to_space", build_opts.options()));
+
+    // Configure kernel window
+    Window win = calculate_max_window(*input->info(), Steps());
+    ICLKernel::configure_internal(win);
+}
+
+void CLBatchToSpaceLayerKernel::configure(const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output)
+{
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+    TensorShape output_shape = compute_batch_to_space_shape(input->info(), block_shape_x, block_shape_y);
+    auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
+
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_static(input->info(), block_shape_x, block_shape_y, output->info()));
+
+    _input  = input;
+    _output = output;
+
+    // Create kernel
+    CLBuildOptions build_opts;
+    build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
+    build_opts.add_option("-DBATCH_SIZE=" + support::cpp11::to_string(input->info()->dimension(3)));
+    build_opts.add_option("-DBLOCK_SHAPE_X=" + support::cpp11::to_string(block_shape_x));
+    build_opts.add_option("-DBLOCK_SHAPE_Y=" + support::cpp11::to_string(block_shape_y));
+    build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(0)));
+    _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("batch_to_space_static", build_opts.options()));
+
+    // Configure kernel window
+    Window win = calculate_max_window(*input->info(), Steps());
+    ICLKernel::configure_internal(win);
+}
+
+Status CLBatchToSpaceLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, block_shape, output);
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, block_shape, output));
+    return Status{};
+}
+
+Status CLBatchToSpaceLayerKernel::validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_static(input, block_shape_x, block_shape_y, output));
+    return Status{};
+}
+
+void CLBatchToSpaceLayerKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+    Window slice_in  = window.first_slice_window_3D();
+    Window slice_out = window.first_slice_window_4D();
+
+    Window vector_slice = window.first_slice_window_1D();
+    vector_slice.set(Window::DimX, Window::Dimension(0, 0, 0));
+
+    slice_out.set(Window::DimX, Window::Dimension(0, 0, 0));
+    slice_out.set(Window::DimY, Window::Dimension(0, 0, 0));
+    slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0));
+    slice_out.set(3, Window::Dimension(0, 0, 0));
+
+    int batch_id = 0;
+    do
+    {
+        unsigned int idx = 0;
+        add_3D_tensor_argument(idx, _input, slice_in);
+        add_argument(idx, batch_id);
+        if(_block_shape != nullptr)
+        {
+            add_1D_tensor_argument(idx, _block_shape, vector_slice);
+        }
+        add_4D_tensor_argument(idx, _output, slice_out);
+        enqueue(queue, *this, slice_in);
+
+        ++batch_id;
+    }
+    while(window.slide_window_slice_3D(slice_in));
+}
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLBatchToSpaceLayer.cpp b/src/runtime/CL/functions/CLBatchToSpaceLayer.cpp
new file mode 100644
index 0000000..7919b13
--- /dev/null
+++ b/src/runtime/CL/functions/CLBatchToSpaceLayer.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+using namespace arm_compute;
+
+CLBatchToSpaceLayer::CLBatchToSpaceLayer()
+    : _batch_to_space_kernel()
+{
+}
+
+void CLBatchToSpaceLayer::configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output)
+{
+    _batch_to_space_kernel.configure(input, block_shape, output);
+}
+
+void CLBatchToSpaceLayer::configure(const ICLTensor *input, int32_t block_shape_x, int32_t block_shape_y, ICLTensor *output)
+{
+    _batch_to_space_kernel.configure(input, block_shape_x, block_shape_y, output);
+}
+
+Status CLBatchToSpaceLayer::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output)
+{
+    return CLBatchToSpaceLayerKernel::validate(input, block_shape, output);
+}
+
+Status CLBatchToSpaceLayer::validate(const ITensorInfo *input, int32_t block_shape_x, int32_t block_shape_y, const ITensorInfo *output)
+{
+    return CLBatchToSpaceLayerKernel::validate(input, block_shape_x, block_shape_y, output);
+}
+
+void CLBatchToSpaceLayer::run()
+{
+    CLScheduler::get().enqueue(_batch_to_space_kernel, true);
+}
diff --git a/tests/datasets/BatchToSpaceDataset.h b/tests/datasets/BatchToSpaceDataset.h
new file mode 100644
index 0000000..63e6214
--- /dev/null
+++ b/tests/datasets/BatchToSpaceDataset.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_DATASET
+#define ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_DATASET
+
+#include "utils/TypePrinter.h"
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class BatchToSpaceLayerDataset
+{
+public:
+    using type = std::tuple<TensorShape, TensorShape, TensorShape>;
+
+    struct iterator
+    {
+        iterator(std::vector<TensorShape>::const_iterator src_it,
+                 std::vector<TensorShape>::const_iterator block_shape_it,
+                 std::vector<TensorShape>::const_iterator dst_it)
+            : _src_it{ std::move(src_it) },
+              _block_shape_it{ std::move(block_shape_it) },
+              _dst_it{ std::move(dst_it) }
+        {
+        }
+
+        std::string description() const
+        {
+            std::stringstream description;
+            description << "In=" << *_src_it << ":";
+            description << "BlockShape=" << *_block_shape_it << ":";
+            description << "Out=" << *_dst_it;
+            return description.str();
+        }
+
+        BatchToSpaceLayerDataset::type operator*() const
+        {
+            return std::make_tuple(*_src_it, *_block_shape_it, *_dst_it);
+        }
+
+        iterator &operator++()
+        {
+            ++_src_it;
+            ++_block_shape_it;
+            ++_dst_it;
+
+            return *this;
+        }
+
+    private:
+        std::vector<TensorShape>::const_iterator _src_it;
+        std::vector<TensorShape>::const_iterator _block_shape_it;
+        std::vector<TensorShape>::const_iterator _dst_it;
+    };
+
+    iterator begin() const
+    {
+        return iterator(_src_shapes.begin(), _block_shape_shapes.begin(), _dst_shapes.begin());
+    }
+
+    int size() const
+    {
+        return std::min(_src_shapes.size(), std::min(_block_shape_shapes.size(), _dst_shapes.size()));
+    }
+
+    void add_config(TensorShape src, TensorShape block_shape, TensorShape dst)
+    {
+        _src_shapes.emplace_back(std::move(src));
+        _block_shape_shapes.emplace_back(std::move(block_shape));
+        _dst_shapes.emplace_back(std::move(dst));
+    }
+
+protected:
+    BatchToSpaceLayerDataset()                            = default;
+    BatchToSpaceLayerDataset(BatchToSpaceLayerDataset &&) = default;
+
+private:
+    std::vector<TensorShape> _src_shapes{};
+    std::vector<TensorShape> _block_shape_shapes{};
+    std::vector<TensorShape> _dst_shapes{};
+};
+
+class SmallBatchToSpaceLayerDataset final : public BatchToSpaceLayerDataset
+{
+public:
+    SmallBatchToSpaceLayerDataset()
+    {
+        add_config(TensorShape(1U, 1U, 1U, 4U), TensorShape(2U), TensorShape(2U, 2U, 1U, 1U));
+        add_config(TensorShape(3U, 1U, 1U, 4U), TensorShape(2U), TensorShape(6U, 2U, 1U, 1U));
+        add_config(TensorShape(1U, 2U, 2U, 4U), TensorShape(2U), TensorShape(2U, 4U, 2U, 1U));
+        add_config(TensorShape(1U, 3U, 1U, 8U), TensorShape(2U), TensorShape(2U, 6U, 1U, 2U));
+        add_config(TensorShape(3U, 4U, 1U, 4U), TensorShape(2U), TensorShape(6U, 8U, 1U, 1U));
+    }
+};
+
+class LargeBatchToSpaceLayerDataset final : public BatchToSpaceLayerDataset
+{
+public:
+    LargeBatchToSpaceLayerDataset()
+    {
+        add_config(TensorShape(64U, 32U, 2U, 4U), TensorShape(2U), TensorShape(128U, 64U, 2U, 1U));
+        add_config(TensorShape(128U, 16U, 2U, 16U), TensorShape(2U), TensorShape(512U, 64U, 2U, 1U));
+    }
+};
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_DATASET */
diff --git a/tests/validation/CL/BatchToSpaceLayer.cpp b/tests/validation/CL/BatchToSpaceLayer.cpp
new file mode 100644
index 0000000..79fee14
--- /dev/null
+++ b/tests/validation/CL/BatchToSpaceLayer.cpp
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BatchToSpaceDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/BatchToSpaceLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(BatchToSpaceLayer)
+
+template <typename T>
+using CLBatchToSpaceLayerFixture = BatchToSpaceLayerValidationFixture<CLTensor, CLAccessor, CLBatchToSpaceLayer, T>;
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallBatchToSpaceLayerDataset(),
+                                                                   framework::dataset::make("DataType", { DataType::F16, DataType::F32 })),
+               input_shape, block_shape_shape, output_shape, dt)
+{
+    // Create tensors
+    CLTensor src         = create_tensor<CLTensor>(input_shape, dt, 1);
+    CLTensor dst         = create_tensor<CLTensor>(output_shape, dt, 1);
+    CLTensor block_shape = create_tensor<CLTensor>(block_shape_shape, DataType::S32, 1);
+
+    // Create and Configure function
+    CLBatchToSpaceLayer batch_to_space;
+    batch_to_space.configure(&src, &block_shape, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(output_shape);
+    validate(dst.info()->valid_region(), valid_region);
+}
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),     // Mismatching data types
+                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),     // Wrong data type block shape
+                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U, 4U), 1, DataType::F32), // Wrong tensor shape
+                                                     }),
+               framework::dataset::make("BlockShapeInfo",{ TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(2U, 2U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
+                                                     })),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
+                                                     })),
+               framework::dataset::make("Expected", { true, false, false, false})),
+               input_info, block_shape_info, output_info, expected)
+{
+    bool has_error = bool(CLBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), &block_shape_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false)));
+    ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS);
+}
+DATA_TEST_CASE(ValidateStatic, framework::DatasetMode::ALL, zip(zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32),    // Mismatching data types
+                                                       TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32),    // Negative block shapes
+                                                       TensorInfo(TensorShape(32U, 16U, 2U, 1U, 4U), 1, DataType::F32), // Wrong tensor shape
+                                                     }),
+               framework::dataset::make("BlockShapeX", { 2, 2, 2, 2 })),
+               framework::dataset::make("BlockShapeY", { 2, 2, -2, 2 })),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 8U, 2U, 4U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(32U, 8U, 2U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 8U, 2U, 4U), 1, DataType::F32),
+                                                     })),
+               framework::dataset::make("Expected", { true, false, false, false})),
+               input_info, block_shape_x, block_shape_y, output_info, expected)
+{
+    bool has_error = bool(CLBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), block_shape_x, block_shape_y, &output_info.clone()->set_is_resizable(false)));
+    ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLBatchToSpaceLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallBatchToSpaceLayerDataset(), framework::dataset::make("DataType",
+                                                                                                               DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLBatchToSpaceLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeBatchToSpaceLayerDataset(), framework::dataset::make("DataType",
+                                                                                                             DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLBatchToSpaceLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallBatchToSpaceLayerDataset(), framework::dataset::make("DataType",
+                                                                                                              DataType::F16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLBatchToSpaceLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeBatchToSpaceLayerDataset(), framework::dataset::make("DataType",
+                                                                                                            DataType::F16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END() // BatchToSpace
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/fixtures/BatchToSpaceLayerFixture.h b/tests/validation/fixtures/BatchToSpaceLayerFixture.h
new file mode 100644
index 0000000..f124fec
--- /dev/null
+++ b/tests/validation/fixtures/BatchToSpaceLayerFixture.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_FIXTURE
+#define ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_FIXTURE
+
+#include "tests/Globals.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/BatchToSpaceLayer.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class BatchToSpaceLayerValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, TensorShape block_shape_shape, TensorShape output_shape, DataType data_type)
+    {
+        _target    = compute_target(input_shape, block_shape_shape, output_shape, data_type);
+        _reference = compute_reference(input_shape, block_shape_shape, output_shape, data_type);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+        library->fill(tensor, distribution, i);
+    }
+    TensorType compute_target(const TensorShape &input_shape, const TensorShape &block_shape_shape, const TensorShape &output_shape,
+                              DataType data_type)
+    {
+        // Create tensors
+        TensorType input       = create_tensor<TensorType>(input_shape, data_type);
+        TensorType block_shape = create_tensor<TensorType>(block_shape_shape, DataType::S32);
+        TensorType output      = create_tensor<TensorType>(output_shape, data_type);
+
+        // Create and configure function
+        FunctionType batch_to_space;
+        batch_to_space.configure(&input, &block_shape, &output);
+
+        ARM_COMPUTE_EXPECT(input.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(block_shape.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(output.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        input.allocator()->allocate();
+        block_shape.allocator()->allocate();
+        output.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!input.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!block_shape.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!output.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(input), 0);
+        {
+            auto block_shape_data = AccessorType(block_shape);
+            for(unsigned int i = 0; i < block_shape_shape.x(); ++i)
+            {
+                static_cast<int32_t *>(block_shape_data.data())[i] = output_shape[i] / input_shape[i];
+            }
+        }
+        // Compute function
+        batch_to_space.run();
+
+        return output;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &block_shape_shape,
+                                      const TensorShape &output_shape, DataType data_type)
+    {
+        // Create reference
+        SimpleTensor<T>       input{ input_shape, data_type };
+        SimpleTensor<int32_t> block_shape{ block_shape_shape, DataType::S32 };
+
+        // Fill reference
+        fill(input, 0);
+        for(unsigned int i = 0; i < block_shape_shape.x(); ++i)
+        {
+            block_shape[i] = output_shape[i] / input_shape[i];
+        }
+
+        // Compute reference
+        return reference::batch_to_space(input, block_shape, output_shape);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_FIXTURE */
diff --git a/tests/validation/reference/BatchToSpaceLayer.cpp b/tests/validation/reference/BatchToSpaceLayer.cpp
new file mode 100644
index 0000000..662a707
--- /dev/null
+++ b/tests/validation/reference/BatchToSpaceLayer.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "BatchToSpaceLayer.h"
+
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+// Batch to Space
+template <typename T>
+SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const SimpleTensor<int32_t> &block_shape, const TensorShape &dst_shape)
+{
+    ARM_COMPUTE_ERROR_ON(block_shape[0] <= 0);
+    ARM_COMPUTE_ERROR_ON(block_shape[1] <= 0);
+    SimpleTensor<T> result(dst_shape, src.data_type());
+
+    int        in_pos    = 0;
+    const auto width_in  = static_cast<int>(src.shape()[0]);
+    const auto height_in = static_cast<int>(src.shape()[1]);
+    const auto z_in      = static_cast<int>(src.shape()[2]);
+    const auto batch_in  = static_cast<int>(src.shape()[3]);
+
+    for(int batch = 0; batch < batch_in; ++batch)
+    {
+        for(int z = 0; z < z_in; ++z)
+        {
+            for(int y = 0; y < height_in; ++y)
+            {
+                for(int x = 0; x < width_in; ++x)
+                {
+                    const int r       = src.shape()[3] / (block_shape[0] * block_shape[1]);
+                    const int out_x   = (block_shape[0] * x + (batch / r) % block_shape[0]);
+                    const int out_y   = (block_shape[1] * y + (batch / r) / block_shape[0]);
+                    const int out_pos = out_x + dst_shape[0] * out_y + z * dst_shape[0] * dst_shape[1] + (batch % r) * dst_shape[0] * dst_shape[1] * dst_shape[2];
+                    result[out_pos]   = src[in_pos];
+                    ++in_pos;
+                }
+            }
+        }
+    }
+
+    return result;
+}
+template SimpleTensor<float> batch_to_space(const SimpleTensor<float> &src, const SimpleTensor<int32_t> &block_shape, const TensorShape &dst_shape);
+template SimpleTensor<half> batch_to_space(const SimpleTensor<half> &src, const SimpleTensor<int32_t> &block_shape, const TensorShape &dst_shape);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/BatchToSpaceLayer.h b/tests/validation/reference/BatchToSpaceLayer.h
new file mode 100644
index 0000000..d17cbe5
--- /dev/null
+++ b/tests/validation/reference/BatchToSpaceLayer.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any  person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_H__
+#define __ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const SimpleTensor<int32_t> &block_shape, const TensorShape &dst_shape);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_H__ */