Add DirectConvolution2D kernel component for dynamic fusion
Resolves: COMPMID-5156
Change-Id: I438da924cb80d3bce72106b06ca7181e0606bd01
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7399
Reviewed-by: SiCong Li <sicong.li@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h
index 27ab294..23629f4 100644
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingAPI.h
@@ -56,7 +56,10 @@
Image_3D_Export_To_ClImage2D,
Tensor_3D,
- Tensor_4D
+ Tensor_4D,
+
+ Tensor_4D_t_Buffer,
+ Tensor_4D_t_Image
};
/** Describes all the info required to add a kernel argument at run time */
struct ClKernelArgRuntimeDescriptor
@@ -100,13 +103,12 @@
/** Component: Tensor Argument */
struct ClTensorDescriptor
{
- ClTensorDescriptor(ITensorInfo *info, unsigned int dim)
- : tensor_info(info), slice_dim(dim)
+ ClTensorDescriptor(ITensorInfo *info)
+ : tensor_info(info)
{
}
ITensorInfo *tensor_info;
- unsigned int slice_dim;
};
Status add_tensor_argument(ClKernelBlueprint &, const ClTensorDescriptor &, ArgumentID &);
@@ -134,7 +136,7 @@
};
Status add_kcomp_gemm_native(ClKernelBlueprint &, const ClKernelComponentDescriptor &, const GemmNativeDescriptor &,
- ArgumentID input_id, ArgumentID weights_id, ArgumentID bias_id, ArgumentID &dst_id);
+ ArgumentID lhs_id, ArgumentID rhs_id, ArgumentID bias_id, ArgumentID &dst_id);
/** Component: Eltwise Add */
struct EltwiseAddDescriptor
@@ -150,6 +152,14 @@
};
Status add_kcomp_activation(ClKernelBlueprint &, const ClKernelComponentDescriptor &, const ActivationDescriptor &, ArgumentID src_id, ArgumentID &dst_id);
+/** Component: Direct Convolution **/
+struct DirectConvolutionDescriptor
+{
+ PadStrideInfo pad_stride_info{};
+};
+Status add_kcomp_direct_conv(ClKernelBlueprint &, const ClKernelComponentDescriptor &, const DirectConvolutionDescriptor &,
+ ArgumentID src_id, ArgumentID weight_id, ArgumentID bias_id, ArgumentID &dst_id);
+
enum class ClippingStrategy
{
TOP_LEFT,
@@ -239,8 +249,9 @@
///// Tuning /////
struct ClExecutionDescriptor
{
- cl::NDRange suggested_lws{}; /**< Suggested local work-group size for optimal performance if not zero */
- cl::NDRange gws{}; /**< Global work-group to be used */
+ cl::NDRange suggested_lws{}; /**< Suggested local work-group size for optimal performance if not zero */
+ cl::NDRange gws{}; /**< Global work-group to be used */
+ bool skip_sliding_window{ false }; /**< Skip sliding window slices during execution loop */
};
Status tune_static(ClExecutionDescriptor &, const ClKernelCode &);