COMPMID-1344 Add grouping support to CLWeightsReshapeKernel

Change-Id: Idde333308db71087ec234b3fd1eb4e36a44db46c
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/143049
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
index 664fc3c..6c93c23 100644
--- a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
+++ b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
@@ -68,26 +68,30 @@
     ~CLWeightsReshapeKernel() = default;
     /** Set the input and output of the kernel.
      *
-     * @param[in]  input  The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
-     *                    and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM,  num_patches] if unshared. Data types supported: QASYMM8/F16/F32
-     * @param[in]  biases The shared biases tensor to append.  Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
-     *                    dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
-     *                    @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
-     * @param[out] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input
+     * @param[in]  input      The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
+     *                        and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM,  num_patches] if unshared. Data types supported: QASYMM8/F16/F32
+     * @param[in]  biases     The shared biases tensor to append.  Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
+     *                        dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
+     *                        @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
+     * @param[out] output     The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise.
+     *                        Data types supported: Same as @p input
+     * @param[in]  num_groups (Optional) Number of groups when performing a grouped convolution
      */
-    void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output);
+    void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, const unsigned int num_groups = 1);
     /** Static function to check if given info will lead to a valid configuration of @ref CLWeightsReshapeKernel
      *
-     * @param[in] input  The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
-     *                   and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM,  num_patches] if unshared. Data types supported: QASYMM8/F16/F32
-     * @param[in] biases The shared biases tensor to append.  Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
-     *                   dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
-     *                   @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
-     * @param[in] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input
+     * @param[in] input      The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
+     *                       and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM,  num_patches] if unshared. Data types supported: QASYMM8/F16/F32
+     * @param[in] biases     The shared biases tensor to append.  Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
+     *                       dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
+     *                       @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
+     * @param[in] output     The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise.
+     *                       Data types supported: Same as @p input
+     * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, const unsigned int num_groups = 1);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index ac83dcb..1e5b9af 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -55,14 +55,22 @@
     permute(output_shape, perm);
     return output_shape;
 }
-inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false)
+inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, const unsigned int num_groups = 1)
 {
+    ARM_COMPUTE_ERROR_ON(num_groups == 0);
+    ARM_COMPUTE_ERROR_ON((weights.dimension(3) % num_groups) != 0);
+    ARM_COMPUTE_ERROR_ON(weights.data_layout() == DataLayout::NHWC && num_groups > 1);
+
     // Calculate output shape
     TensorShape weights_reshaped{ weights.tensor_shape() };
     weights_reshaped.collapse(3);
     const size_t tmp_dim = weights_reshaped[0];
-    weights_reshaped.set(0, weights_reshaped[1]);
+    weights_reshaped.set(0, weights_reshaped[1] / num_groups);
     weights_reshaped.set(1, tmp_dim + (has_bias ? 1 : 0));
+    if(weights.num_dimensions() < 5)
+    {
+        weights_reshaped.set(2, num_groups);
+    }
 
     return weights_reshaped;
 }
diff --git a/src/core/CL/cl_kernels/convolution_layer.cl b/src/core/CL/cl_kernels/convolution_layer.cl
index 2b83e5a..9335b04 100644
--- a/src/core/CL/cl_kernels/convolution_layer.cl
+++ b/src/core/CL/cl_kernels/convolution_layer.cl
@@ -23,10 +23,11 @@
  */
 #include "helpers.h"
 
-#if defined(DATA_TYPE)
+#if defined(DATA_TYPE) && defined(NUM_GROUPS)
 /** This kernel reshapes the tensor's low three dimensions to single column
  *
  * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
+ * @note The number of groups should be given as a preprocessor argument using -DNUM_GROUPS=number. e.g. -DNUM_GROUPS=2
  *
  * @param[in]  src_ptr                            Pointer to the source tensor. Supported data types: F16/F32
  * @param[in]  src_stride_x                       Stride of the source tensor in X dimension (in bytes)
@@ -50,6 +51,7 @@
  * @param[in]  height                             The height of the input tensor
  * @param[in]  depth                              The depth of the input tensor
  * @param[in]  total_filters                      Total number of filters. 4th dimension of the weights matrix
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension (in bytes)
  */
 __kernel void reshape_to_columns_nchw(
     TENSOR3D_DECLARATION(src),
@@ -57,7 +59,7 @@
 #ifdef HAS_BIAS
     VECTOR_DECLARATION(bias),
 #endif /* HAS_BIAS */
-    uint width, uint height, uint depth, uint total_filters)
+    uint width, uint height, uint depth, uint total_filters, uint dst_stride_z)
 {
     Tensor3D src            = CONVERT_TO_TENSOR3D_STRUCT(src);
     bool     is_last_thread = (get_global_id(0) == (get_global_size(0) - 1) && get_global_id(1) == (get_global_size(1) - 1) && get_global_id(2) == (get_global_size(2) - 1));
@@ -71,25 +73,39 @@
 
     if(is_last_thread)
     {
-        for(uint i = 0; i < total_filters; ++i)
+        for(uint g = 0; g < NUM_GROUPS; ++g)
         {
-            *((__global DATA_TYPE *)tmp_dst_ptr) = *((__global DATA_TYPE *)tmp_src_ptr);
+            __global uchar *curr_group_dst = tmp_dst_ptr;
+
+            for(uint i = 0; i < total_filters / NUM_GROUPS; ++i)
+            {
+                *((__global DATA_TYPE *)curr_group_dst) = *((__global DATA_TYPE *)tmp_src_ptr);
 
 #ifdef HAS_BIAS
-            *((__global DATA_TYPE *)(tmp_dst_ptr + dst_stride_y)) = *((__global DATA_TYPE *)(tmp_bias_ptr));
-            tmp_bias_ptr += bias_stride_x;
+                *((__global DATA_TYPE *)(curr_group_dst + dst_stride_y)) = *((__global DATA_TYPE *)(tmp_bias_ptr));
+                tmp_bias_ptr += bias_stride_x;
 #endif /* HAS_BIAS */
-            tmp_src_ptr += depth * src_stride_z;
-            tmp_dst_ptr += dst_stride_x;
+                tmp_src_ptr += depth * src_stride_z;
+                curr_group_dst += dst_stride_x;
+            }
+
+            tmp_dst_ptr += dst_stride_z;
         }
     }
     else
     {
-        for(uint i = 0; i < total_filters; ++i)
+        for(uint g = 0; g < NUM_GROUPS; ++g)
         {
-            *((__global DATA_TYPE *)tmp_dst_ptr) = *((__global DATA_TYPE *)tmp_src_ptr);
-            tmp_src_ptr += depth * src_stride_z;
-            tmp_dst_ptr += dst_stride_x;
+            __global uchar *curr_group_dst = tmp_dst_ptr;
+
+            for(uint i = 0; i < total_filters / NUM_GROUPS; ++i)
+            {
+                *((__global DATA_TYPE *)curr_group_dst) = *((__global DATA_TYPE *)tmp_src_ptr);
+                tmp_src_ptr += depth * src_stride_z;
+                curr_group_dst += dst_stride_x;
+            }
+
+            tmp_dst_ptr += dst_stride_z;
         }
     }
 }
@@ -127,7 +143,7 @@
 #ifdef HAS_BIAS
     VECTOR_DECLARATION(bias),
 #endif /* HAS_BIAS */
-    uint depth, uint width, uint height, uint total_filters)
+    uint depth, uint width, uint height, uint total_filters, uint dst_stride_z)
 {
     Tensor3D src            = CONVERT_TO_TENSOR3D_STRUCT(src);
     bool     is_last_thread = (get_global_id(0) == (get_global_size(0) - 1) && get_global_id(1) == (get_global_size(1) - 1) && get_global_id(2) == (get_global_size(2) - 1));
@@ -163,4 +179,4 @@
         }
     }
 }
-#endif // defined(DATA_TYPE)
\ No newline at end of file
+#endif // defined(DATA_TYPE) && defined(NUM_GROUPS)
\ No newline at end of file
diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
index 5243c40..9df91fc 100644
--- a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
+++ b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
@@ -38,11 +38,15 @@
 
 namespace
 {
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output)
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, const unsigned int num_groups)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON(num_groups == 0);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::NHWC && num_groups > 1);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4 && num_groups > 1);
+    ARM_COMPUTE_RETURN_ERROR_ON((input->dimension(3) % num_groups) != 0);
 
     if(biases != nullptr)
     {
@@ -57,7 +61,7 @@
     // Checks performed when output is configured
     if(output->total_size() != 0)
     {
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), compute_weights_reshaped_shape(*input, biases != nullptr));
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), compute_weights_reshaped_shape(*input, biases != nullptr, num_groups));
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
     }
@@ -71,7 +75,7 @@
 {
 }
 
-void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output)
+void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, const unsigned int num_groups)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
 
@@ -81,7 +85,7 @@
     // Perform validation step
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(),
                                                   (biases != nullptr) ? biases->info() : nullptr,
-                                                  output->info()));
+                                                  output->info(), num_groups));
 
     const DataType   data_type   = input->info()->data_type();
     const DataLayout data_layout = input->info()->data_layout();
@@ -93,6 +97,7 @@
     // Create build options
     CLBuildOptions build_opts;
     build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
+    build_opts.add_option("-DNUM_GROUPS=" + support::cpp11::to_string(num_groups));
     build_opts.add_option_if(biases != nullptr, "-DHAS_BIAS");
 
     // Create kernel
@@ -106,6 +111,7 @@
     _kernel.setArg<cl_uint>(idx++, _input->info()->dimension(1));
     _kernel.setArg<cl_uint>(idx++, _input->info()->dimension(2));
     _kernel.setArg<cl_uint>(idx++, _input->info()->dimension(3));
+    _kernel.setArg<cl_uint>(idx++, _output->info()->strides_in_bytes().z());
 
     // Configure window
     Window win = calculate_max_window(*input->info(), Steps());
@@ -114,9 +120,9 @@
     ICLKernel::configure(win);
 }
 
-Status CLWeightsReshapeKernel::validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output)
+Status CLWeightsReshapeKernel::validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, const unsigned int num_groups)
 {
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, biases, output));
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, biases, output, num_groups));
     return Status{};
 }
 
diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h
index bc98b1e..937bd08 100644
--- a/tests/datasets/ShapeDatasets.h
+++ b/tests/datasets/ShapeDatasets.h
@@ -635,6 +635,42 @@
     }
 };
 
+/** Data set containing small grouped weights tensor shapes. */
+class GroupedWeightsSmallShapes final : public ShapeDataset
+{
+public:
+    GroupedWeightsSmallShapes()
+        : ShapeDataset("Shape",
+    {
+        TensorShape{ 3U, 3U, 48U, 120U },
+                     TensorShape{ 1U, 3U, 24U, 240U },
+                     TensorShape{ 3U, 1U, 12U, 480U },
+                     TensorShape{ 5U, 5U, 48U, 120U },
+                     TensorShape{ 1U, 5U, 24U, 240U },
+                     TensorShape{ 5U, 1U, 48U, 480U }
+    })
+    {
+    }
+};
+
+/** Data set containing large grouped weights tensor shapes. */
+class GroupedWeightsLargeShapes final : public ShapeDataset
+{
+public:
+    GroupedWeightsLargeShapes()
+        : ShapeDataset("Shape",
+    {
+        TensorShape{ 9U, 9U, 96U, 240U },
+                     TensorShape{ 7U, 9U, 48U, 480U },
+                     TensorShape{ 9U, 7U, 24U, 960U },
+                     TensorShape{ 13U, 13U, 96U, 240U },
+                     TensorShape{ 11U, 13U, 48U, 480U },
+                     TensorShape{ 13U, 11U, 24U, 960U }
+    })
+    {
+    }
+};
+
 /** Data set containing 2D tensor shapes for DepthConcatenateLayer. */
 class DepthConcatenateLayerShapes final : public ShapeDataset
 {
diff --git a/tests/validation/CL/WeightsReshape.cpp b/tests/validation/CL/WeightsReshape.cpp
new file mode 100644
index 0000000..6dae0c7
--- /dev/null
+++ b/tests/validation/CL/WeightsReshape.cpp
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
+#include "arm_compute/core/Types.h"
+#include "tests/CL/Helper.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/WeightsReshapeFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(WeightsReshape)
+
+using CLWeightsReshape = CLSynthetizeFunction<CLWeightsReshapeKernel>;
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::U8),      // Unsupported data type
+                                                       TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),     // Mismatching data type
+                                                       TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::QASYMM8), // Bias not supported with QASYMM8
+                                                       TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
+                                                     }),
+               framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(4U), 1, DataType::U8),
+                                                        TensorInfo(TensorShape(4U), 1, DataType::F16),
+                                                        TensorInfo(TensorShape(4U), 1, DataType::QASYMM8),
+                                                        TensorInfo(TensorShape(4U), 1, DataType::F32),
+                                                      })),
+               framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(4U, 19U), 1, DataType::U8),
+                                                        TensorInfo(TensorShape(4U, 19U), 1, DataType::F16),
+                                                        TensorInfo(TensorShape(4U, 19U), 1, DataType::QASYMM8),
+                                                        TensorInfo(TensorShape(4U, 19U), 1, DataType::F32),
+                                                      })),
+               framework::dataset::make("Expected", { false, false, false, true })),
+               input_info, biases_info, output_info, expected)
+{
+    bool status = bool(CLWeightsReshape::validate(&input_info, &biases_info, &output_info));
+    ARM_COMPUTE_EXPECT(status == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using CLWeightsReshapeFixture = WeightsReshapeValidationFixture<CLTensor, CLAccessor, CLWeightsReshape, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWeightsReshapeFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::GroupedWeightsSmallShapes(), framework::dataset::make("DataType",
+                                                                                                                      DataType::F32)),
+                                                                                                              framework::dataset::make("HasBias", { true, false })),
+                                                                                                      framework::dataset::make("NumGroups", { 1, 2, 3 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLWeightsReshapeFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::GroupedWeightsLargeShapes(), framework::dataset::make("DataType",
+                                                                                                                  DataType::F32)),
+                                                                                                                  framework::dataset::make("HasBias", { true, false })),
+                                                                                                          framework::dataset::make("NumGroups", { 1, 2, 3 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWeightsReshapeFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::GroupedWeightsSmallShapes(), framework::dataset::make("DataType",
+                                                                                                                     DataType::F16)),
+                                                                                                             framework::dataset::make("HasBias", { true, false })),
+                                                                                                     framework::dataset::make("NumGroups", { 1, 2, 3 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLWeightsReshapeFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::GroupedWeightsLargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::F16)),
+                                                                                                                 framework::dataset::make("HasBias", { true, false })),
+                                                                                                         framework::dataset::make("NumGroups", { 1, 2, 3 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWeightsReshapeFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::GroupedWeightsSmallShapes(), framework::dataset::make("DataType",
+                                                                                                                        DataType::QASYMM8)),
+                                                                                                                framework::dataset::make("HasBias", { false })),
+                                                                                                        framework::dataset::make("NumGroups", { 1, 2, 3 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLWeightsReshapeFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::GroupedWeightsLargeShapes(), framework::dataset::make("DataType",
+                                                                                                                    DataType::QASYMM8)),
+                                                                                                                    framework::dataset::make("HasBias", { false })),
+                                                                                                            framework::dataset::make("NumGroups", { 1, 2, 3 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/fixtures/WeightsReshapeFixture.h b/tests/validation/fixtures/WeightsReshapeFixture.h
new file mode 100644
index 0000000..06765f6
--- /dev/null
+++ b/tests/validation/fixtures/WeightsReshapeFixture.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_WEIGHTS_RESHAPE_FIXTURE
+#define ARM_COMPUTE_TEST_WEIGHTS_RESHAPE_FIXTURE
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/WeightsReshape.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using namespace arm_compute::misc::shape_calculator;
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class WeightsReshapeValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, DataType data_type, bool has_bias, unsigned int num_groups)
+    {
+        const TensorShape output_shape = compute_weights_reshaped_shape(TensorInfo(input_shape, 1, data_type), has_bias, num_groups);
+
+        _target    = compute_target(input_shape, output_shape, has_bias, num_groups, data_type);
+        _reference = compute_reference(input_shape, output_shape, has_bias, num_groups, data_type);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, const int seed)
+    {
+        library->fill_tensor_uniform(tensor, seed);
+    }
+
+    TensorType compute_target(const TensorShape &input_shape, const TensorShape &output_shape, const bool has_bias, const unsigned int num_groups, DataType data_type)
+    {
+        // Create tensors
+        TensorType src  = create_tensor<TensorType>(input_shape, data_type);
+        TensorType bias = create_tensor<TensorType>(TensorShape(input_shape[3]), data_type);
+        TensorType dst  = create_tensor<TensorType>(output_shape, data_type);
+
+        // Create and configure function
+        FunctionType weights_reshape_func;
+        weights_reshape_func.configure(&src, (has_bias ? &bias : nullptr), &dst, num_groups);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src), 0);
+
+        if(has_bias)
+        {
+            ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+            bias.allocator()->allocate();
+
+            ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+            fill(AccessorType(bias), 1);
+        }
+
+        // Compute function
+        weights_reshape_func.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &output_shape, const bool has_bias, const unsigned int num_groups, DataType data_type)
+    {
+        // Create reference
+        SimpleTensor<T> src{ input_shape, data_type };
+        SimpleTensor<T> bias{ TensorShape(has_bias ? input_shape[3] : 0), data_type };
+
+        // Fill reference
+        fill(src, 0);
+        if(has_bias)
+        {
+            fill(bias, 1);
+        }
+
+        return reference::weights_reshape(src, bias, output_shape, num_groups);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_WEIGHTS_RESHAPE_FIXTURE */
diff --git a/tests/validation/reference/WeightsReshape.cpp b/tests/validation/reference/WeightsReshape.cpp
new file mode 100644
index 0000000..fc02395
--- /dev/null
+++ b/tests/validation/reference/WeightsReshape.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "WeightsReshape.h"
+
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/Utils.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> weights_reshape(const SimpleTensor<T> &src, const SimpleTensor<T> &biases, const TensorShape &dst_shape, const unsigned int num_groups)
+{
+    SimpleTensor<T> dst{ dst_shape, src.data_type(), 1 };
+
+    // Compute reference
+    const bool   has_bias  = biases.size() > 0;
+    const size_t linear_sz = src.shape().total_size_lower(3);
+    const size_t group_sz  = src.shape()[3] / num_groups;
+
+    for(size_t g = 0; g < num_groups; ++g)
+    {
+        for(size_t w = 0; w < group_sz; ++w)
+        {
+            const size_t curr_weight = g * group_sz + w;
+
+            size_t i = 0;
+            for(; i < linear_sz; ++i)
+            {
+                dst[coord2index(dst.shape(), Coordinates(w, i, g))] = src[curr_weight * linear_sz + i];
+            }
+            if(has_bias)
+            {
+                dst[coord2index(dst.shape(), Coordinates(w, i, g))] = static_cast<T>(biases[curr_weight]);
+            }
+        }
+    }
+
+    return dst;
+}
+
+template SimpleTensor<float> weights_reshape(const SimpleTensor<float> &src, const SimpleTensor<float> &biases, const TensorShape &dst_shape, const unsigned int num_groups);
+template SimpleTensor<half> weights_reshape(const SimpleTensor<half> &src, const SimpleTensor<half> &biases, const TensorShape &dst_shape, const unsigned int num_groups);
+template SimpleTensor<uint8_t> weights_reshape(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &biases, const TensorShape &dst_shape, const unsigned int num_groups);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/WeightsReshape.h b/tests/validation/reference/WeightsReshape.h
new file mode 100644
index 0000000..629f1e5
--- /dev/null
+++ b/tests/validation/reference/WeightsReshape.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_WEIGHTS_RESHAPE_H__
+#define __ARM_COMPUTE_TEST_WEIGHTS_RESHAPE_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> weights_reshape(const SimpleTensor<T> &src, const SimpleTensor<T> &biases, const TensorShape &dst_shape, const unsigned int num_groups);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_WEIGHTS_RESHAPE_H__ */