Implement CLDirectConv3D f32/f16

Resolve COMPMID-4660

Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Change-Id: Ibd66ec1eb6faa60086981b1e3a9c12561df3445f
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6420
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
diff --git a/Android.bp b/Android.bp
index 8b73de5..36d392d 100644
--- a/Android.bp
+++ b/Android.bp
@@ -100,6 +100,7 @@
         "src/core/CL/cl_kernels/nhwc/depth_to_space.cl",
         "src/core/CL/cl_kernels/nhwc/dequantization_layer.cl",
         "src/core/CL/cl_kernels/nhwc/direct_convolution.cl",
+        "src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl",
         "src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl",
         "src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl",
         "src/core/CL/cl_kernels/nhwc/im2col.cl",
@@ -512,6 +513,7 @@
         "src/gpu/cl/kernels/ClDepthConcatenateKernel.cpp",
         "src/gpu/cl/kernels/ClDequantizeKernel.cpp",
         "src/gpu/cl/kernels/ClDirectConv2dKernel.cpp",
+        "src/gpu/cl/kernels/ClDirectConv3dKernel.cpp",
         "src/gpu/cl/kernels/ClElementwiseKernel.cpp",
         "src/gpu/cl/kernels/ClElementwiseUnaryKernel.cpp",
         "src/gpu/cl/kernels/ClFillKernel.cpp",
@@ -566,6 +568,7 @@
         "src/gpu/cl/operators/ClCrop.cpp",
         "src/gpu/cl/operators/ClDequantize.cpp",
         "src/gpu/cl/operators/ClDirectConv2d.cpp",
+        "src/gpu/cl/operators/ClDirectConv3d.cpp",
         "src/gpu/cl/operators/ClElementwiseOperations.cpp",
         "src/gpu/cl/operators/ClElementwiseUnary.cpp",
         "src/gpu/cl/operators/ClFill.cpp",
@@ -618,6 +621,7 @@
         "src/runtime/CL/functions/CLChannelShuffleLayer.cpp",
         "src/runtime/CL/functions/CLComparison.cpp",
         "src/runtime/CL/functions/CLConcatenateLayer.cpp",
+        "src/runtime/CL/functions/CLConv3D.cpp",
         "src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp",
         "src/runtime/CL/functions/CLConvolutionLayer.cpp",
         "src/runtime/CL/functions/CLCopy.cpp",