COMPMID-3639: (3RDPARTY_UPDATE) Move CL kernels to src

Change-Id: I10d27db788e5086adae1841e3e2441cd9b76ef84
Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4310
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h b/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h
index de6d561..e35905f 100644
--- a/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h
+++ b/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h
@@ -24,14 +24,18 @@
 #ifndef ARM_COMPUTE_CLFUSEBATCHNORMALIZATION_H
 #define ARM_COMPUTE_CLFUSEBATCHNORMALIZATION_H
 
-#include "arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 
+#include <memory>
+
 namespace arm_compute
 {
 // Forward declarations
+class CLCompileContext;
+class CLFuseBatchNormalizationKernel;
 class ICLTensor;
+class ITensorInfo;
 
 /** Basic function to fuse the batch normalization node to a preceding convolution node */
 class CLFuseBatchNormalization : public IFunction
@@ -48,7 +52,7 @@
     /** Allow instances of this class to be moved */
     CLFuseBatchNormalization &operator=(CLFuseBatchNormalization &&) = default;
     /** Default destructor */
-    ~CLFuseBatchNormalization() = default;
+    ~CLFuseBatchNormalization();
     /** Set the input and output tensors.
      *
      * @param[in]  input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
@@ -112,7 +116,7 @@
     void run() override;
 
 private:
-    CLFuseBatchNormalizationKernel _fuse_bn_kernel;
+    std::unique_ptr<CLFuseBatchNormalizationKernel> _fuse_bn_kernel;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_CLFUSEBATCHNORMALIZATION_H */