COMPMID-1498 - Enable grouping in CLGEMMConvolutionLayer

Change-Id: I15c7df21773145b03f42b6f78bd7ad2e5b8a5219
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/144126
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp
index d7582dc..40032f9 100644
--- a/src/core/CL/kernels/CLCol2ImKernel.cpp
+++ b/src/core/CL/kernels/CLCol2ImKernel.cpp
@@ -119,7 +119,8 @@
     _config_id = "col2im_";
     _config_id += lower_string(string_from_data_type(input->info()->data_type()));
     _config_id += "_";
-    _config_id += (num_groups > 1) ? "grouping_" : "";
+    _config_id += support::cpp11::to_string(num_groups);
+    _config_id += "_";
     _config_id += support::cpp11::to_string(input->info()->dimension(0));
     _config_id += "_";
     _config_id += support::cpp11::to_string(input->info()->dimension(1));
diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp
index 3d71567..0ba0d0e 100644
--- a/src/core/CL/kernels/CLIm2ColKernel.cpp
+++ b/src/core/CL/kernels/CLIm2ColKernel.cpp
@@ -324,6 +324,8 @@
     _config_id += "_";
     _config_id += lower_string(string_from_data_type(input->info()->data_type()));
     _config_id += "_";
+    _config_id += support::cpp11::to_string(num_groups);
+    _config_id += "_";
     _config_id += support::cpp11::to_string(output->info()->dimension(0));
     _config_id += "_";
     _config_id += support::cpp11::to_string(output->info()->dimension(1));
diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
index 5ef0f5b..7639a48 100644
--- a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
+++ b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp
@@ -38,7 +38,7 @@
 
 namespace
 {
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, const unsigned int num_groups)
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
@@ -75,12 +75,12 @@
 {
 }
 
-void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, const unsigned int num_groups)
+void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
 
     // Output tensor auto inizialitation if not yet initialized
-    auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(compute_weights_reshaped_shape(*input->info(), (biases != nullptr))));
+    auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(compute_weights_reshaped_shape(*input->info(), (biases != nullptr), num_groups)));
 
     // Perform validation step
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(),
@@ -102,15 +102,6 @@
     // Create kernel
     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("reshape_to_columns", build_opts.options()));
 
-    // Set static arguments
-    unsigned int idx = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor();
-    idx += (biases != nullptr) ? num_arguments_per_1D_tensor() : 0;
-    _kernel.setArg<cl_uint>(idx++, _input->info()->dimension(0));
-    _kernel.setArg<cl_uint>(idx++, _input->info()->dimension(1));
-    _kernel.setArg<cl_uint>(idx++, _input->info()->dimension(2));
-    _kernel.setArg<cl_uint>(idx++, _input->info()->dimension(3));
-    _kernel.setArg<cl_uint>(idx++, _output->info()->strides_in_bytes().z());
-
     // Configure window
     Window win = calculate_max_window(*input->info(), Steps());
     // The CLWeightsReshapeKernel doesn't need padding so update_window_and_padding() can be skipped
@@ -118,7 +109,7 @@
     ICLKernel::configure_internal(win);
 }
 
-Status CLWeightsReshapeKernel::validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, const unsigned int num_groups)
+Status CLWeightsReshapeKernel::validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups)
 {
     ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, biases, output, num_groups));
     return Status{};
@@ -138,6 +129,14 @@
     Window biases_window;
     Window biases_slice;
 
+    unsigned int idx = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor();
+    idx += (_biases != nullptr) ? num_arguments_per_1D_tensor() : 0;
+    _kernel.setArg<cl_uint>(idx++, _input->info()->dimension(0));
+    _kernel.setArg<cl_uint>(idx++, _input->info()->dimension(1));
+    _kernel.setArg<cl_uint>(idx++, _input->info()->dimension(2));
+    _kernel.setArg<cl_uint>(idx++, _input->info()->dimension(3));
+    _kernel.setArg<cl_uint>(idx++, _output->info()->strides_in_bytes().z());
+
     if(_biases != nullptr)
     {
         biases_window.use_tensor_dimensions(_biases->info()->tensor_shape());