Extend Transposed Conv. for tiles with N0>1 Partially Resolves: COMPMID-5724 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Change-Id: I0aeddddcdd87c8c79f6dae9a76ffdc2ba0c08e17 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8883 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>

commit: 8a2d7cecea194ac8eafa91721fb5b09ae01e5971 [log] [tgz]
author: Gunes Bayir <gunes.bayir@arm.com> Wed Dec 28 10:28:20 2022 +0000
committer: Gunes Bayir <gunes.bayir@arm.com> Thu Dec 29 13:14:10 2022 +0000
tree: 03c2cf553751504bf575450dc3b22deea911af7f
parent: 939b21ad4b9ed15d43b4ee8b17484e57ed55a01f [diff]
diff --git a/src/core/CL/cl_kernels/nhwc/transposed_convolution.cl b/src/core/CL/cl_kernels/nhwc/transposed_convolution.cl
index 1ca282c..fe6182f 100644
--- a/src/core/CL/cl_kernels/nhwc/transposed_convolution.cl
+++ b/src/core/CL/cl_kernels/nhwc/transposed_convolution.cl

@@ -52,8 +52,8 @@
  * @note If bias exists, the compile time argument -DHAS_BIAS should be passed
  * @note Only the following configurations of M0, N0 and K0 are currently supported:
  *  - M0 = 1
- *  - N0 = 1
- *  - K0 = 2, 3, 4, 8, 16
+ *  - N0 = 1, 2, 3, 4, 8, 16
+ *  - K0 = 1, 2, 3, 4, 8, 16
  *
  * @note In case of QASYMM8/QASYMM8_SIGNED, the following extra information must be passed at compile time:
  * - -DIS_QUANTIZED

diff --git a/src/gpu/cl/kernels/ClTransposedConvolutionKernel.cpp b/src/gpu/cl/kernels/ClTransposedConvolutionKernel.cpp
index 714ca8e..1390451 100644
--- a/src/gpu/cl/kernels/ClTransposedConvolutionKernel.cpp
+++ b/src/gpu/cl/kernels/ClTransposedConvolutionKernel.cpp

@@ -130,8 +130,13 @@
     Window win;
     output_shape.collapse(2U, 1U); // Collapse width and height into single dimension
 
+    const unsigned int n0               = adjust_vec_size(16 / output->element_size(), output_channels);
+    const unsigned int m0               = 1;
+    const unsigned int k0               = adjust_vec_size(16 / input->element_size(), input_channels);
+    const unsigned int partial_store_n0 = output_channels % n0;
+
     // Create window and update padding
-    win = calculate_max_window(output_shape, Steps(1, 1));
+    win = calculate_max_window(output_shape, Steps(n0, m0));
     ICLKernel::configure_internal(win);
 
     const std::string kernel_name = "transposed_convolution_nhwc";
@@ -140,11 +145,6 @@
     const DataType    input_data_type = input->data_type();
     const PaddingInfo strides         = deconv_info.stride();
 
-    const unsigned int n0               = 1;
-    const unsigned int m0               = 1;
-    const unsigned int k0               = adjust_vec_size(16 / input->element_size(), input_channels);
-    const unsigned int partial_store_n0 = output_channels % n0;
-
     if(biases != nullptr)
     {
         build_options.add_option(std::string("-DHAS_BIAS"));
commit	8a2d7cecea194ac8eafa91721fb5b09ae01e5971	[log] [tgz]
author	Gunes Bayir <gunes.bayir@arm.com>	Wed Dec 28 10:28:20 2022 +0000
committer	Gunes Bayir <gunes.bayir@arm.com>	Thu Dec 29 13:14:10 2022 +0000
tree	03c2cf553751504bf575450dc3b22deea911af7f
parent	939b21ad4b9ed15d43b4ee8b17484e57ed55a01f [diff]