Use heuristics for setting dynamic fusion direct conv2d tile sizes

Resolves: COMPMID-5735
Change-Id: I9958413b69c5052cfa205dd0e9457cc4953aaf35
Signed-off-by: Ramy Elgammal <ramy.elgammal@arm.com>
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/474818
Tested-by: bsgcomp <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: bsgcomp <bsgcomp@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8724
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
index dc05825..1fbcb41 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
@@ -57,13 +57,24 @@
     return _fast_relaxed_math;
 }
 
+ClComponentDirectConv2dSettings &ClComponentDirectConv2dSettings::direct_conv_descriptor(const DirectConvComputeKernelInfo &desc)
+{
+    _desc = desc;
+    return *this;
+}
+
+DirectConvComputeKernelInfo ClComponentDirectConv2dSettings::direct_conv_descriptor() const
+{
+    return _desc;
+}
+
 Status ClComponentDirectConv2d::validate(
     const Properties                &properties,
     const ArgumentPack<ITensorInfo> &tensors,
     const Attributes                &attributes,
     const Settings                  &settings)
 {
-    ARM_COMPUTE_UNUSED(properties, settings);
+    ARM_COMPUTE_UNUSED(properties);
     const auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
     const auto wei = tensors.get_const_tensor(TensorType::ACL_SRC_1);
     const auto bia = tensors.get_const_tensor(TensorType::ACL_SRC_2);
@@ -125,6 +136,11 @@
     // Data layout
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
 
+    const auto desc = settings.direct_conv_descriptor();
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.n0 != 1 && desc.n0 != 2 && desc.n0 != 3 && desc.n0 != 4 && desc.n0 != 8 && desc.n0 != 16,
+                                    "N0 can only be: 1, 2, 3, 4, 8, and 16");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.k0 != 1 && desc.k0 != 2 && desc.k0 != 3 && desc.k0 != 4 && desc.k0 != 8 && desc.k0 != 16,
+                                    "K0 can only be: 1, 2, 3, 4, 8, and 16");
     return Status{};
 }
 
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
index fec22b8..c3a70ef 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
@@ -25,6 +25,7 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D
 
 #include "arm_compute/core/Error.h"
+#include "arm_compute/core/KernelDescriptors.h"
 #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
 #include <memory>
 
@@ -56,9 +57,15 @@
     /** Get fast_relaxed_math flag */
     bool fast_relaxed_math() const;
 
+    /** Set direct convolution descriptor */
+    ClComponentDirectConv2dSettings &direct_conv_descriptor(const DirectConvComputeKernelInfo &desc);
+    /** Get direct convolution descriptor */
+    DirectConvComputeKernelInfo direct_conv_descriptor() const;
+
 private:
-    bool _export_to_cl_image{ false };
-    bool _fast_relaxed_math{ true };
+    bool                        _export_to_cl_image{ false };
+    bool                        _fast_relaxed_math{ true };
+    DirectConvComputeKernelInfo _desc{}; // Direct convolution descriptor
 };
 
 /** Forward declaration */
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
index 9cb4ee7..048ee01 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
@@ -23,16 +23,19 @@
  */
 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h"
 
+#include "arm_compute/core/KernelDescriptors.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
 
+#include "src/common/utils/Log.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
 #include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
-
-#include "src/common/utils/Log.h"
+#include "src/runtime/heuristics/direct_conv/ClDirectConvKernelConfig.h"
+#include "src/runtime/heuristics/direct_conv/IClDirectConvKernelConfig.h"
 
 namespace arm_compute
 {
@@ -85,6 +88,16 @@
     return true;
 }
 
+DirectConvComputeKernelInfo config_direct_convolution_nhwc(const ITensorInfo *src, const ITensorInfo *weights, const PadStrideInfo &conv_info)
+{
+    // Get GPU target
+    GPUTarget gpu_target = CLScheduler::get().target();
+
+    std::unique_ptr<arm_compute::cl_direct_conv::IClDirectConvKernelConfig> t = arm_compute::cl_direct_conv::ClDirectConvKernelConfigurationFactory::create(gpu_target);
+
+    return t->configure(src, weights, conv_info);
+}
+
 constexpr GpuOperatorType operator_type = GpuOperatorType::Complex;
 } // namespace
 
@@ -112,6 +125,11 @@
                                                                                           attributes.pad().right,
                                                                                           attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR)); // use the default DimensionRoundingType
 
+        // Checks performed when dst is configured
+        if(dst->total_size() != 0)
+        {
+            ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(), shape);
+        }
         auto_init_if_empty(dst_info_to_validate, src->clone()->set_tensor_shape(shape));
     }
 
@@ -175,6 +193,12 @@
                           const Conv2dAttributes &attributes)
 {
     ARM_COMPUTE_LOG_PARAMS(src, wei, bia, dst, attributes);
+    PadStrideInfo conv_info(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
+                            attributes.pad().right,
+                            attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR);
+    // Initialize the direct convolution descriptor
+    const DirectConvComputeKernelInfo desc = config_direct_convolution_nhwc(src, wei, conv_info);
+
     // Assert validation
     ARM_COMPUTE_ERROR_THROW_ON(GpuConv2d::validate_op(sketch, src, wei, bia, dst, attributes));
     ARM_COMPUTE_ERROR_ON_NULLPTR(src, wei, dst);
@@ -182,10 +206,7 @@
 
     // Auto initialize dst tensor
     {
-        auto shape = misc::shape_calculator::compute_deep_convolution_shape(src->tensor_shape(), data_layout, wei->tensor_shape(),
-                                                                            PadStrideInfo(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
-                                                                                          attributes.pad().right,
-                                                                                          attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR)); // use the default DimensionRoundingType
+        auto shape = misc::shape_calculator::compute_deep_convolution_shape(src->tensor_shape(), data_layout, wei->tensor_shape(), conv_info); // use the default DimensionRoundingType
 
         auto_init_if_empty(*dst, src->clone()->set_tensor_shape(shape));
     }
@@ -221,6 +242,8 @@
                 arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(wei);
             }
 
+            settings.direct_conv_descriptor(desc);
+
             ArgumentPack<ITensorInfo> arguments;
             arguments.add_const_tensor(ACL_SRC_0, src);
             arguments.add_const_tensor(ACL_SRC_1, wei);
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
index 75e812a..6f7bf72 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
@@ -69,7 +69,7 @@
     ARM_COMPUTE_UNUSED(comp_group);
 
     const auto channel_idx   = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL);
-    const auto k0            = adjust_vec_size(is_data_type_quantized(_src->data_type()) ? 16u : 8u, _src->dimension(channel_idx));
+    const auto k0            = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx));
     const bool leftover_loop = (_src->dimension(channel_idx) % k0) != 0;
 
     std::string code = R"_(
@@ -303,13 +303,11 @@
 CLBuildOptions ClTemplateDirectConv2d::get_build_options(const ComponentGroup &comp_group) const
 {
     const unsigned int channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL);
-    const DataType     data_type   = _src->data_type();
 
-    /// NOTE: For now tile sizes (n0, m0, k0) are set by the execution window. This may change in the future
     const auto         root_window      = comp_group.get_root_component()->template_writer()->get_window();
     const unsigned int n0               = root_window.x().step();
     const unsigned int m0               = root_window.y().step();
-    const unsigned int k0               = adjust_vec_size(is_data_type_quantized(data_type) ? 16u : 8u, _src->dimension(channel_idx));
+    const unsigned int k0               = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx));
     const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
 
     CLBuildOptions build_opts{};
@@ -369,15 +367,16 @@
     ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
 
     const auto output_shape = _dst->tensor_shape();
+    const auto desc         = _settings.direct_conv_descriptor();
 
-    const unsigned int vec_size = std::min(static_cast<unsigned int>(output_shape[0]), 4u);
-    const unsigned int num_rows = (_dst->tensor_shape()[0] > 16) ? ((_src->data_type() == DataType::F32) ? 2U : 4U) : 1U;
+    const unsigned int n0 = adjust_vec_size(desc.n0, output_shape[0]);
+    const unsigned int m0 = adjust_vec_size(desc.m0, output_shape[1] * output_shape[2]);
 
     // Create and configure kernel window
-    Window win = calculate_max_window(output_shape, Steps(vec_size, num_rows));
+    Window win = calculate_max_window(output_shape, Steps(n0, m0));
 
-    const size_t dim_y_collapsed = ceil_to_multiple(output_shape[1] * output_shape[2], num_rows);
-    win.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, num_rows));
+    const size_t dim_y_collapsed = ceil_to_multiple(output_shape[1] * output_shape[2], m0);
+    win.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, m0));
     win.set(Window::DimZ, Window::Dimension(0, output_shape.total_size_upper(3), 1));
 
     return win;
diff --git a/src/gpu/cl/operators/ClDirectConv2d.cpp b/src/gpu/cl/operators/ClDirectConv2d.cpp
index ded275d..0215dba 100644
--- a/src/gpu/cl/operators/ClDirectConv2d.cpp
+++ b/src/gpu/cl/operators/ClDirectConv2d.cpp
@@ -30,10 +30,10 @@
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/gpu/cl/kernels/ClActivationKernel.h"
 #include "src/gpu/cl/kernels/ClDirectConv2dKernel.h"
-#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h"
-#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h"
-#include "src/gpu/cl/kernels/direct_conv/ClDirectConvKernelConfig.h"
-#include "src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h"
+#include "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.h"
+#include "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.h"
+#include "src/runtime/heuristics/direct_conv/ClDirectConvKernelConfig.h"
+#include "src/runtime/heuristics/direct_conv/IClDirectConvKernelConfig.h"
 
 #include "src/common/utils/Log.h"
 
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.cpp b/src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.cpp
similarity index 98%
rename from src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.cpp
rename to src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.cpp
index ba176f8..1bfb812 100644
--- a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.cpp
+++ b/src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.cpp
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h"
+#include "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.h"
 
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h b/src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.h
similarity index 88%
rename from src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h
rename to src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.h
index 1e4cb66..6b60b2c 100644
--- a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h
+++ b/src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.h
@@ -21,10 +21,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_BIFROST_H
-#define ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_BIFROST_H
+#ifndef SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVDEFAULTCONFIGBIFROST
+#define SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVDEFAULTCONFIGBIFROST
 
-#include "src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h"
+#include "src/runtime/heuristics/direct_conv/IClDirectConvKernelConfig.h"
 
 namespace arm_compute
 {
@@ -52,4 +52,4 @@
 };
 } // namespace opencl
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_BIFROST_H */
+#endif /* SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVDEFAULTCONFIGBIFROST */
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp b/src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.cpp
similarity index 98%
rename from src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp
rename to src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.cpp
index b693568..8f2fd82 100644
--- a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp
+++ b/src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.cpp
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h"
+#include "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.h"
 
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h b/src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.h
similarity index 88%
rename from src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h
rename to src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.h
index 2c65b88..f9d5c52 100644
--- a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h
+++ b/src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.h
@@ -21,10 +21,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_VALHALL_H
-#define ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_VALHALL_H
+#ifndef SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVDEFAULTCONFIGVALHALL
+#define SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVDEFAULTCONFIGVALHALL
 
-#include "src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h"
+#include "src/runtime/heuristics/direct_conv/IClDirectConvKernelConfig.h"
 
 namespace arm_compute
 {
@@ -52,4 +52,4 @@
 };
 } // namespace opencl
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_VALHALL_H */
+#endif /* SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVDEFAULTCONFIGVALHALL */
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvKernelConfig.h b/src/runtime/heuristics/direct_conv/ClDirectConvKernelConfig.h
similarity index 82%
rename from src/gpu/cl/kernels/direct_conv/ClDirectConvKernelConfig.h
rename to src/runtime/heuristics/direct_conv/ClDirectConvKernelConfig.h
index c1c2e43..232167f 100644
--- a/src/gpu/cl/kernels/direct_conv/ClDirectConvKernelConfig.h
+++ b/src/runtime/heuristics/direct_conv/ClDirectConvKernelConfig.h
@@ -21,12 +21,12 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CL_DIRECT_CONV_KERNEL_CONFIGURATION_H
-#define ARM_COMPUTE_CL_DIRECT_CONV_KERNEL_CONFIGURATION_H
+#ifndef SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVKERNELCONFIG
+#define SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVKERNELCONFIG
 
-#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h"
-#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h"
-#include "src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h"
+#include "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.h"
+#include "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.h"
+#include "src/runtime/heuristics/direct_conv/IClDirectConvKernelConfig.h"
 
 #include <memory>
 
@@ -61,4 +61,4 @@
 };
 } // namespace opencl
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_DIRECT_CONV_KERNEL_CONFIGURATION_H */
+#endif /* SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVKERNELCONFIG */
diff --git a/src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h b/src/runtime/heuristics/direct_conv/IClDirectConvKernelConfig.h
similarity index 94%
rename from src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h
rename to src/runtime/heuristics/direct_conv/IClDirectConvKernelConfig.h
index 837fa35..6104d73 100644
--- a/src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h
+++ b/src/runtime/heuristics/direct_conv/IClDirectConvKernelConfig.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_ICL_DIRECT_CONV_KERNEL_CONFIG_H
-#define ARM_COMPUTE_ICL_DIRECT_CONV_KERNEL_CONFIG_H
+#ifndef SRC_RUNTIME_HEURISTICS_DIRECT_CONV_ICLDIRECTCONVKERNELCONFIG
+#define SRC_RUNTIME_HEURISTICS_DIRECT_CONV_ICLDIRECTCONVKERNELCONFIG
 
 #include "arm_compute/core/GPUTarget.h"
 #include "arm_compute/core/KernelDescriptors.h"
@@ -112,4 +112,4 @@
 };
 } // namespace opencl
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_ICL_DIRECT_CONV_KERNEL_CONFIG_H */
+#endif /* SRC_RUNTIME_HEURISTICS_DIRECT_CONV_ICLDIRECTCONVKERNELCONFIG */