Fix performance regression in Transposed Convolution
Resolves: COMPMID-5849
Change-Id: I86f8bbc1f3a7c12c66d5ad8fcd74dd9e69629aa0
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9102
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Dynamic-Fusion: Jakub Sujak <jakub.sujak@arm.com>
diff --git a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
index 56e9dae..5c25cba 100644
--- a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
@@ -150,10 +150,12 @@
const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const size_t idx_n = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
+ const size_t ofm = weights->tensor_shape()[idx_n];
if(weights->dimension(idx_w) != deconv_info.stride().first || weights->dimension(idx_h) != deconv_info.stride().second)
{
- if(input->data_layout() == DataLayout::NHWC)
+ if(input->data_layout() == DataLayout::NHWC && ofm <= 16)
{
return DeconvolutionMethod::DIRECT;
}