Implement the OpenCL kernel to compute the indirect convolution

- Implement indirect convolution kernel
- Add operator support
- Add test

Resolves COMPMID-5709

Change-Id: I9272304163471a5a40da7fdec204599f3c1d8e32
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8701
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/Android.bp b/Android.bp
index 20afbfc..46cdb06 100644
--- a/Android.bp
+++ b/Android.bp
@@ -649,6 +649,7 @@
         "src/gpu/cl/kernels/ClHeightConcatenateKernel.cpp",
         "src/gpu/cl/kernels/ClIm2ColKernel.cpp",
         "src/gpu/cl/kernels/ClIndirectConv2dAddressPrecalculationKernel.cpp",
+        "src/gpu/cl/kernels/ClIndirectConv2dKernel.cpp",
         "src/gpu/cl/kernels/ClMulKernel.cpp",
         "src/gpu/cl/kernels/ClPermuteKernel.cpp",
         "src/gpu/cl/kernels/ClPool2dKernel.cpp",
@@ -697,6 +698,7 @@
         "src/gpu/cl/operators/ClGemmConv2d.cpp",
         "src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.cpp",
         "src/gpu/cl/operators/ClGemmLowpOutputStage.cpp",
+        "src/gpu/cl/operators/ClIndirectConv2d.cpp",
         "src/gpu/cl/operators/ClLogicalNot.cpp",
         "src/gpu/cl/operators/ClMul.cpp",
         "src/gpu/cl/operators/ClPRelu.cpp",
@@ -772,6 +774,7 @@
         "src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp",
         "src/runtime/CL/functions/CLGather.cpp",
         "src/runtime/CL/functions/CLGenerateProposalsLayer.cpp",
+        "src/runtime/CL/functions/CLIndirectConvolutionLayer.cpp",
         "src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp",
         "src/runtime/CL/functions/CLL2NormalizeLayer.cpp",
         "src/runtime/CL/functions/CLLSTMLayer.cpp",