Fix CLConv3D filelist and comments Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Change-Id: I4d48f1b8eba6681a9de0ae5f1fd8a4ad1edf7fe8 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6439 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>

commit: 273c28cc225d8edc7921397a754649529610c4e2 [log] [tgz]
author: Giorgio Arena <giorgio.arena@arm.com> Thu Oct 14 15:59:15 2021 +0100
committer: Giorgio Arena <giorgio.arena@arm.com> Fri Oct 15 10:23:21 2021 +0000
tree: 741ed825b21b3a4108e7c19f1e3106a2e83f3d5e
parent: 945ae9e4027655267170ecc56563c362d8110d1e [diff]
diff --git a/filelist.json b/filelist.json
index 6a78840..2c28c05 100644
--- a/filelist.json
+++ b/filelist.json

@@ -284,7 +284,6 @@
       "files": {
         "common": [
           "src/gpu/cl/kernels/ClDirectConv2dKernel.cpp",
-          "src/gpu/cl/kernels/ClDirectConv3dKernel.cpp",
           "src/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp",
           "src/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp",
           "src/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp",
@@ -292,12 +291,10 @@
           "src/gpu/cl/kernels/ClCol2ImKernel.cpp",
           "src/gpu/cl/operators/ClConv2d.cpp",
           "src/gpu/cl/operators/ClDirectConv2d.cpp",
-          "src/gpu/cl/operators/ClDirectConv3d.cpp",
           "src/gpu/cl/operators/ClGemmConv2d.cpp",
           "src/gpu/cl/operators/ClWinogradConv2d.cpp",
           "src/gpu/cl/kernels/ClWeightsReshapeKernel.cpp",
           "src/runtime/CL/functions/CLConvolutionLayer.cpp",
-          "src/runtime/CL/functions/CLConv3D.cpp",
           "src/runtime/CL/functions/CLDirectConvolutionLayer.cpp",
           "src/runtime/CL/functions/CLFFTConvolutionLayer.cpp",
           "src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp",
@@ -305,6 +302,15 @@
         ]
       }
     },
+    "Conv3d": {
+      "files": {
+        "common": [
+          "src/gpu/cl/kernels/ClDirectConv3dKernel.cpp",
+          "src/gpu/cl/operators/ClDirectConv3d.cpp",
+          "src/runtime/CL/functions/CLConv3D.cpp"
+        ]
+      }
+    },
     "Copy": {
       "files": {
         "common": [

diff --git a/src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl b/src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl
index df8be00..d11be5b 100644
--- a/src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl
+++ b/src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl

@@ -26,7 +26,7 @@
 #include "tile_helpers.h"
 
 //! @cond Doxygen_Suppress
-/** OpenCL kernel to compute the direct convolution.
+/** OpenCL kernel to compute the direct convolution 3d.
  *
  * @note Data layout supported: NDHWC
  * @note Data type supported: F32/F16
@@ -37,7 +37,9 @@
  * @note The spatial dimensions of the source tensor must be passed at compile time using -DSRC_WIDTH, -DSRC_HEIGHT and -DSRC_DEPTH (e.g. -DSRC_WIDTH=96, -DSRC_HEIGHT=64, -DSRC_DEPTH=32)
  * @note The spatial dimensions of the destination tensor must be passed at compile time using -DDST_WIDTH, -DDST_HEIGHT and -DDST_DEPTH (e.g. -DDST_WIDTH=96, -DDST_HEIGHT=64, -DDST_DEPTH=32)
  * @note The channels of the source tensor must be passed at compile time using -DSRC_CHANNELS (e.g. -DSRC_CHANNELS=64)
- * @note The data type must be passed at compile time using -DDATA_TYPE (e.g. -DDATA_TYPE=float)
+ * @note The channels of the destination tensor must be passed at compile time using -DDST_CHANNELS (e.g. -DDST_CHANNELS=64)
+ * @note The data type must be passed at compile time using -DDATA_TYPE (e.g. -DDATA_TYPE=half)
+ * @note The data type of the accumulators must be passed at compile time using -DACC_DATA_TYPE (e.g. -DACC_DATA_TYPE=float)
  * @note The number of M0 rows (width*height) to process must be passed at compile time using -DM0 (e.g. -DM0=2)
  * @note The number of N0 output channels to process must be passed at compile time using -DN0 (e.g. -DN0=2)
  * @note The number of K0 inner accumulations must be passed at compile time using -DK0 (e.g. -DK0=2)
@@ -143,8 +145,6 @@
         int yk = (i / _IWEI_WIDTH) % _IWEI_HEIGHT;
         int zk = i / (_IWEI_WIDTH * _IWEI_HEIGHT);
 
-        __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes;
-
         int k = 0;
         for(; k <= (_ISRC_CHANNELS - K0); k += K0)
         {
commit	273c28cc225d8edc7921397a754649529610c4e2	[log] [tgz]
author	Giorgio Arena <giorgio.arena@arm.com>	Thu Oct 14 15:59:15 2021 +0100
committer	Giorgio Arena <giorgio.arena@arm.com>	Fri Oct 15 10:23:21 2021 +0000
tree	741ed825b21b3a4108e7c19f1e3106a2e83f3d5e
parent	945ae9e4027655267170ecc56563c362d8110d1e [diff]