Blame - src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp - ml/ComputeLibrary

std::unique_ptr<arm_compute::cl_direct_conv::IClDirectConvKernelConfig> t = arm_compute::cl_direct_conv::ClDirectConvKernelConfigurationFactory::create(gpu_target);

97

98

return t->configure(src, weights, conv_info);

99

}

100

SiCong Li

81fdadd

2022-11-23 09:58:18 +0000

[diff] [blame]

101

void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, const ITensorInfo *wei, const Conv2dAttributes &attributes)

102

{

103

if(dst->total_size() == 0U)

104

{

105

const auto shape = misc::shape_calculator::compute_deep_convolution_shape(src->tensor_shape(), src->data_layout(), wei->tensor_shape(),

106

PadStrideInfo(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,

107

attributes.pad().right,

108

attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR)); // use the default DimensionRoundingType

109

110

auto_init_if_empty(*dst, src->clone()->set_tensor_shape(shape));

}

}

SiCong Li

2022-11-04 10:11:32 +0000

[diff] [blame]

114

constexpr GpuOperatorType operator_type = GpuOperatorType::Complex;

SiCong Li

f44bbc5

2022-08-29 18:25:51 +0100

[diff] [blame]

115

} // namespace

116

SiCong Li

81fdadd

2022-11-23 09:58:18 +0000

[diff] [blame]

117

Status GpuConv2d::is_supported_op(const GpuWorkloadContext &context,

118

const ITensorInfo *src,

119

const ITensorInfo *wei,

120

const ITensorInfo *bia,

121

const ITensorInfo *dst,

122

const Conv2dAttributes &attributes)

SiCong Li

f44bbc5

2022-08-29 18:25:51 +0100

[diff] [blame]

123

{

124

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei, dst);

SiCong Li

f44bbc5

2022-08-29 18:25:51 +0100

[diff] [blame]

125

// Auto initialize dst tensor info

126

TensorInfo dst_info_to_validate = *dst;

127

const auto data_layout = src->data_layout();

128

129

{

130

auto shape = misc::shape_calculator::compute_deep_convolution_shape(src->tensor_shape(), data_layout, wei->tensor_shape(),

131

PadStrideInfo(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,

132

attributes.pad().right,

133

attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR)); // use the default DimensionRoundingType

134

Ramy Elgammal

df6a3b0

2022-11-30 16:23:10 +0000

[diff] [blame]

135

// Checks performed when dst is configured

136

if(dst->total_size() != 0)

137

{

138

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(), shape);

139

}

SiCong Li

f44bbc5

2022-08-29 18:25:51 +0100

[diff] [blame]

140

auto_init_if_empty(dst_info_to_validate, src->clone()->set_tensor_shape(shape));

141

}

142

143

// Check support level

144

// Data type

145

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);

146

// Data layout

147

ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);

148

SiCong Li

81fdadd

2022-11-23 09:58:18 +0000

[diff] [blame]

149

// Check components

150

const auto gpu_target = context.gpu_target();

151

if(context.gpu_language() == GpuLanguage::OpenCL)

SiCong Li

f44bbc5

2022-08-29 18:25:51 +0100

[diff] [blame]

152

{

SiCong Li

81fdadd

2022-11-23 09:58:18 +0000

[diff] [blame]

153

const auto cl_compile_ctx = context.cl_compile_context();

SiCong Li

f44bbc5

2022-08-29 18:25:51 +0100

[diff] [blame]

154

ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);

155

// Validate Direct Conv2d Component

156

{

157

const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });

158

auto settings = ClComponentDirectConv2d::Settings();

159

160

settings.export_to_cl_image(

161

export_to_cl_image_support(src, gpu_target, cl_compile_ctx->get_device(), data_layout));

162

163

settings.fast_relaxed_math(

164

(gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)

165

&& (dst_info_to_validate.data_type() == DataType::F32 || dst_info_to_validate.data_type() == DataType::F16));

166

167

ArgumentPack<ITensorInfo> arguments;

168

arguments.add_const_tensor(ACL_SRC_0, src);

169

arguments.add_const_tensor(ACL_SRC_1, wei);

170

arguments.add_const_tensor(ACL_SRC_2, bia);

171

arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate);

172

ARM_COMPUTE_RETURN_ON_ERROR(ClComponentDirectConv2d::validate(properties, arguments, attributes, settings));

}

}

else

{

ARM_COMPUTE_RETURN_ERROR_MSG("Unimplemented Gpu language");

}

return Status{};

}

SiCong Li

2022-11-23 09:58:18 +0000

[diff] [blame]

182

Status GpuConv2d::validate_op(const GpuWorkloadSketch &sketch,

183

const ITensorInfo *src,

184

const ITensorInfo *wei,

185

const ITensorInfo *bia,

186

const ITensorInfo *dst,

187

const Conv2dAttributes &attributes)

188

{

189

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei, dst);

190

191

// Check if tensors have valid id. I.e. they are created from a sketch

192

ARM_COMPUTE_RETURN_ERROR_ON(

193

!src->has_valid_id() || !wei->has_valid_id() || !dst->has_valid_id());

194

if(bia != nullptr)

195

{

196

ARM_COMPUTE_RETURN_ERROR_ON(!bia->has_valid_id());

197

}

198

199

// Auto initialize dst tensor info

200

TensorInfo dst_info_to_validate = *dst;

201

calculate_and_init_dst_if_empty(&dst_info_to_validate, src, wei, attributes);

202

203

// Perform fusion test

204

// Check if operator meets fusion constraints

205

ArgumentPack<ITensorInfo> tensors;

206

tensors.add_const_tensor(ACL_SRC_0, src);

207

tensors.add_const_tensor(ACL_SRC_1, wei);

208

tensors.add_const_tensor(ACL_SRC_2, bia);

209

tensors.add_const_tensor(ACL_DST_0, &dst_info_to_validate);

210

const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);

211

ARM_COMPUTE_RETURN_ERROR_ON_MSG(!sketch.implementation().operator_group().try_add_operator(op),

212

"Operator fusion test failed. This operator cannot be fused into the workload");

213

214

// Check if configuration is supported

215

return is_supported_op(*sketch.gpu_context(), src, wei, bia, &dst_info_to_validate, attributes);

216

}

217

SiCong Li

f44bbc5

2022-08-29 18:25:51 +0100

[diff] [blame]

218

void GpuConv2d::create_op(GpuWorkloadSketch &sketch,

ITensorInfo *src,

ITensorInfo *wei,

ITensorInfo *bia,

ITensorInfo *dst,

const Conv2dAttributes &attributes)

224

{

Ramy Elgammal

404462a

2022-11-08 02:14:46 +0000

[diff] [blame]

225

ARM_COMPUTE_LOG_PARAMS(src, wei, bia, dst, attributes);

Ramy Elgammal

df6a3b0

2022-11-30 16:23:10 +0000

[diff] [blame]

226

PadStrideInfo conv_info(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,

227

attributes.pad().right,

228

attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR);

229

// Initialize the direct convolution descriptor

230

const DirectConvComputeKernelInfo desc = config_direct_convolution_nhwc(src, wei, conv_info);

231

SiCong Li

f44bbc5

2022-08-29 18:25:51 +0100

[diff] [blame]

232

// Assert validation

233

ARM_COMPUTE_ERROR_THROW_ON(GpuConv2d::validate_op(sketch, src, wei, bia, dst, attributes));

234

ARM_COMPUTE_ERROR_ON_NULLPTR(src, wei, dst);

SiCong Li

f44bbc5

2022-08-29 18:25:51 +0100

[diff] [blame]

235

236

// Auto initialize dst tensor

SiCong Li

81fdadd

2022-11-23 09:58:18 +0000

[diff] [blame]

237

calculate_and_init_dst_if_empty(dst, src, wei, attributes);

SiCong Li

f44bbc5

2022-08-29 18:25:51 +0100

[diff] [blame]

238

239

// Translate into components and add to component graph

240

auto &comp_graph = sketch.implementation().component_graph();

241

242

const auto sketch_ctx = sketch.implementation().context();

243

SiCong Li

81fdadd

2022-11-23 09:58:18 +0000

[diff] [blame]

244

const auto data_layout = src->data_layout();

245

const auto gpu_target = sketch_ctx->gpu_target();

SiCong Li

f44bbc5

2022-08-29 18:25:51 +0100

[diff] [blame]

246

247

if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)

248

{

249

const auto cl_compile_ctx = sketch_ctx->cl_compile_context();

250

ARM_COMPUTE_ERROR_ON(cl_compile_ctx == nullptr);

251

252

// Add Direct Conv2d Component

253

{

254

auto properties = IGpuKernelComponent::Properties();

255

properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });

256

257

auto settings = ClComponentDirectConv2d::Settings();

258

259

settings.export_to_cl_image(

260

export_to_cl_image_support(src, gpu_target, cl_compile_ctx->get_device(), data_layout));

261

262

settings.fast_relaxed_math(

263

(gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)

264

&& (dst->data_type() == DataType::F32 || dst->data_type() == DataType::F16));

265

266

if(settings.export_to_cl_image())

267

{

268

arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(wei);

269

}

270

Ramy Elgammal

df6a3b0

2022-11-30 16:23:10 +0000

[diff] [blame]

271

settings.direct_conv_descriptor(desc);

272

SiCong Li

f44bbc5

2022-08-29 18:25:51 +0100

[diff] [blame]

273

ArgumentPack<ITensorInfo> arguments;

274

arguments.add_const_tensor(ACL_SRC_0, src);

275

arguments.add_const_tensor(ACL_SRC_1, wei);

276

arguments.add_const_tensor(ACL_SRC_2, bia);

277

arguments.add_const_tensor(ACL_DST_0, dst);

278

comp_graph.add_new_component<ClComponentDirectConv2d>(properties, arguments, attributes, settings);

}

}

else

{

ARM_COMPUTE_ERROR("Unimplemented Gpu language");

284

}

285

286

// Set up fusion test by adding to the Operator Group

287

// Note this has to be performed after all the components have been successfully added to the component graph

288

289

// Pack tensor infos

290

ArgumentPack<ITensorInfo> tensors;

291

tensors.add_const_tensor(ACL_SRC_0, src);

SiCong Li

81fdadd

2022-11-23 09:58:18 +0000

[diff] [blame]

292

tensors.add_const_tensor(ACL_SRC_1, wei);

SiCong Li

f44bbc5

2022-08-29 18:25:51 +0100

[diff] [blame]

293

tensors.add_const_tensor(ACL_SRC_2, bia);

SiCong Li

81fdadd

2022-11-23 09:58:18 +0000

[diff] [blame]

294

tensors.add_const_tensor(ACL_DST_0, dst);

SiCong Li

f44bbc5

2022-08-29 18:25:51 +0100

[diff] [blame]

295

296

const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);

297

sketch.implementation().operator_group().add_operator(op);

298

}

299

300

} // namespace dynamic_fusion

301

} // namespace experimental

302

} // namespace arm_compute