Blame - src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp - ml/ComputeLibrary

inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, const GpuKernelArgumentInfo &arg, const ICLTensor *tensor, const Window &arg_slice, std::vector<cl::Image2D> &cl_images)

61

{

SiCong Li

19844f6

2023-05-16 16:46:34 +0100

[diff] [blame^]

62

ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);

63

SiCong Li

f44bbc5

2022-08-29 18:25:51 +0100

[diff] [blame]

64

switch(arg.type)

65

{

66

case GpuKernelArgumentInfo::Type::Scalar:

67

{

68

ARM_COMPUTE_ERROR("Unsupported yet");

break;

}

case GpuKernelArgumentInfo::Type::Vector:

73

{

74

add_1D_tensor_argument(idx, tensor, arg_slice);

break;

}

case GpuKernelArgumentInfo::Type::Image:

79

{

80

add_2D_tensor_argument(idx, tensor, arg_slice);

81

break;

82

}

83

case GpuKernelArgumentInfo::Type::Image_Reinterpret_As_3D:

84

{

85

add_2D_tensor_argument(idx, tensor, arg_slice);

86

const unsigned int total_cross_plane_pad = tensor->info()->padding().top + tensor->info()->padding().bottom;

87

_kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(total_cross_plane_pad));

88

break;

89

}

90

case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:

91

{

92

const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) * tensor->info()->dimension(2) * tensor->info()->dimension(3));

93

const size_t image_row_pitch = tensor->info()->strides_in_bytes()[1];

Gian Marco Iodice

3cce35d

2022-12-30 16:07:45 +0000

[diff] [blame]

94

cl::Image2D tensor_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d, tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);

SiCong Li

f44bbc5

2022-08-29 18:25:51 +0100

[diff] [blame]

95

cl_images.push_back(tensor_image2d);

96

_kernel.setArg(idx++, tensor_image2d);

break;

}

case GpuKernelArgumentInfo::Type::Image_3D:

101

{

102

add_2D_tensor_argument(idx, tensor, arg_slice);

103

_kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2]));

104

break;

105

}

106

case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:

107

{

108

const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) * tensor->info()->dimension(2) * tensor->info()->dimension(3));

109

const size_t image_row_pitch = tensor->info()->strides_in_bytes()[1];

Gian Marco Iodice

3cce35d

2022-12-30 16:07:45 +0000

[diff] [blame]

110

cl::Image2D tensor_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d, tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);

SiCong Li

f44bbc5

2022-08-29 18:25:51 +0100

[diff] [blame]

111

cl_images.push_back(tensor_image2d);

112

_kernel.setArg(idx++, tensor_image2d);

113

_kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2]));

break;

}

case GpuKernelArgumentInfo::Type::Tensor_3D:

118

{

119

add_3D_tensor_argument(idx, tensor, arg_slice);

break;

}

case GpuKernelArgumentInfo::Type::Tensor_4D:

124

{

125

add_4D_tensor_argument(idx, tensor, arg_slice);

126

break;

127

}

128

case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer:

129

{

130

add_4d_tensor_nhwc_argument(idx, tensor);

131

break;

132

}

133

case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:

134

{

135

const size_t image_w = tensor->info()->dimension(0) / 4;

136

const size_t image_h = tensor->info()->tensor_shape().total_size_upper(1);

137

const size_t image_stride_y = tensor->info()->strides_in_bytes()[1];

138

139

cl::Image2D tensor_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(),

Gian Marco Iodice

3cce35d

2022-12-30 16:07:45 +0000

[diff] [blame]

140

TensorShape(image_w, image_h), tensor->info()->data_type(), image_stride_y, CLImage2DType::ReadOnly);

SiCong Li

f44bbc5

2022-08-29 18:25:51 +0100

[diff] [blame]

141

cl_images.push_back(tensor_image2d);

142

143

_kernel.setArg(idx++, tensor_image2d);

144

add_4d_tensor_nhwc_argument(idx, tensor);

145

break;

146

}

SiCong Li

19844f6

2023-05-16 16:46:34 +0100

[diff] [blame^]

147

case GpuKernelArgumentInfo::Type::Tensor_Special_0:

148

{

149

const ITensorInfo *info = tensor->info();

150

const Strides &strides = info->strides_in_bytes();

151

152

_kernel.setArg(idx++, tensor->cl_buffer());

153

const size_t dim1xdim2 = info->tensor_shape()[1] * info->tensor_shape()[2];

154

_kernel.setArg<cl_int>(idx++, static_cast<int32_t>(dim1xdim2));

155

const size_t stride1 = strides[1];

156

_kernel.setArg<cl_int>(idx++, static_cast<int32_t>(stride1));

157

break;

158

}

SiCong Li

f44bbc5

2022-08-29 18:25:51 +0100

[diff] [blame]

159

default:

160

{

161

ARM_COMPUTE_ERROR("Unsupported");

}

}

}

void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)

167

{

168

ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);

169

ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);

170

171

Window slice = window.first_slice_window_3D();

172

// Don't slice matrix along the z dimension if matrix has just 2 dimensions and matrix A more than 2

173

// This scenario can happen when the matrix multiplication is used to perform a convolution operation

174

Window slice_fixed_z = slice;

175

slice_fixed_z.set(Window::DimX, Window::Dimension(0, 1, 1));

176

slice_fixed_z.set(Window::DimY, Window::Dimension(0, 1, 1));

177

178

/// NOTE: Parameters extracted from old kernels. So far they seem to be constant

179

/// but we may need to make them into another configuration passed from GpuWorkloadSourceCode if needed in the future

180

constexpr bool slide_along_dimz = true;

181

constexpr bool skip_sliding_window = false;

182

constexpr bool use_dummy_work_items = false;

183

184

unsigned int idx = 0;

185

do

186

{

187

// Set kernel arguments

188

Window arg_slice = slice;

189

// CLImages created from tensor arguments. Need to be retained until enqueue

190

std::vector<cl::Image2D> cl_images;

191

for(auto id_arg : _arguments)

192

{

193

const auto arg = id_arg.second;

194

auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(id_arg.first));

195

ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);

196

ARM_COMPUTE_ERROR_ON_NULLPTR(tensor->info());

197

if(!slide_along_dimz)

198

{

199

// The stride_z for matrix must be zero if we do not slice

200

ARM_COMPUTE_ERROR_ON(tensor->info()->strides_in_bytes()[3] != 0);

201

arg_slice = slice_fixed_z;

202

}

203

add_tensor_argument(idx, *arg.kernel_argument_info(), tensor, arg_slice, cl_images);

}

// Dispatch kernel

enqueue(queue, *this, slice, lws_hint(), use_dummy_work_items);

208

}

209

while(skip_sliding_window && window.slide_window_slice_3D(slice));

210

}

211

212

} // namespace dynamic_fusion

213

} // namespace experimental

214

} // namespace arm_compute