Blame - src/gpu/cl/operators/ClConv2d.cpp - ml/ComputeLibrary

void ClConv2d::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const Conv2dInfo &conv2d_info,

84

const WeightsInfo &weights_info)

85

{

86

ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);

87

ARM_COMPUTE_ERROR_THROW_ON(ClConv2d::validate(src, weights, ((biases != nullptr) ? biases : nullptr), dst, conv2d_info, weights_info));

ramelg01

2e53f17

2021-09-22 10:48:25 +0100

[diff] [blame]

88

ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, conv2d_info, weights_info);

Sheri Zhang

06d1efd

2021-07-28 11:20:04 +0100

[diff] [blame]

89

90

switch(ClConv2d::get_convolution_method(src, weights, dst, conv2d_info, weights_info, CLScheduler::get().target()))

91

{

92

case ConvolutionMethod::WINOGRAD:

93

{

94

ARM_COMPUTE_ERROR_ON(conv2d_info.num_groups != 1);

SiCongLi

579ca84

2021-10-18 09:38:33 +0100

[diff] [blame]

95

ARM_COMPUTE_ERROR_ON(conv2d_info.post_ops.size() > 0);

Sheri Zhang

06d1efd

2021-07-28 11:20:04 +0100

[diff] [blame]

96

auto f = std::make_unique<ClWinogradConv2d>();

97

f->configure(compile_context, src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info, conv2d_info.enable_fast_math);

98

_operator = std::move(f);

99

break;

100

}

101

case ConvolutionMethod::DIRECT:

102

{

103

ARM_COMPUTE_ERROR_ON(conv2d_info.num_groups != 1);

SiCongLi

579ca84

2021-10-18 09:38:33 +0100

[diff] [blame]

104

ARM_COMPUTE_ERROR_ON(conv2d_info.post_ops.size() > 0);

Sheri Zhang

06d1efd

2021-07-28 11:20:04 +0100

[diff] [blame]

105

auto f = std::make_unique<ClDirectConv2d>();

106

f->configure(compile_context, src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info);

107

_operator = std::move(f);

108

break;

109

}

110

case ConvolutionMethod::GEMM:

111

{

Georgios Pinitas

1988463

2021-08-16 12:38:54 +0100

[diff] [blame]

112

auto f = std::make_unique<ClGemmConv2d>();

Sheri Zhang

06d1efd

2021-07-28 11:20:04 +0100

[diff] [blame]

113

f->configure(compile_context, src, weights, biases, dst, conv2d_info, weights_info);

114

_operator = std::move(f);

break;

}

default:

ARM_COMPUTE_ERROR("Not supported.");

119

break;

120

}

121

_aux_mem = _operator->workspace();

122

}

123

124

Status ClConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const Conv2dInfo &conv2d_info,

125

const WeightsInfo &weights_info)

126

{

127

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);

128

ARM_COMPUTE_RETURN_ERROR_ON_MSG((conv2d_info.num_groups != 1) && (src->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported");

129

130

const GPUTarget gpu_target = CLScheduler::get().target();

131

132

switch(ClConv2d::get_convolution_method(src, weights, dst, conv2d_info, weights_info, gpu_target))

133

{

134

case ConvolutionMethod::WINOGRAD:

135

{

136

//Validate Winograd

137

ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.num_groups != 1, "Grouping (num_groups != 1) with ClWinogradConv2d is not supported");

SiCongLi

579ca84

2021-10-18 09:38:33 +0100

[diff] [blame]

138

ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.post_ops.size() > 0, "ClWinogradConv2d does not support PostOps");

Sheri Zhang

06d1efd

2021-07-28 11:20:04 +0100

[diff] [blame]

139

ARM_COMPUTE_RETURN_ON_ERROR(ClWinogradConv2d::validate(src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info, conv2d_info.enable_fast_math));

140

break;

141

}

142

case ConvolutionMethod::DIRECT:

143

{

144

// Validate direct convolution layer

145

ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.num_groups != 1, "Grouping (num_groups != 1) with ClDirectConv2d is not supported");

SiCongLi

579ca84

2021-10-18 09:38:33 +0100

[diff] [blame]

146

ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.post_ops.size() > 0, "ClDirectConv2d does not support PostOps");

Sheri Zhang

06d1efd

2021-07-28 11:20:04 +0100

[diff] [blame]

147

ARM_COMPUTE_RETURN_ON_ERROR(ClDirectConv2d::validate(src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info));

148

break;

149

}

150

case ConvolutionMethod::GEMM:

151

{

152

// Validate gemm-based convolution layer

Georgios Pinitas

1988463

2021-08-16 12:38:54 +0100

[diff] [blame]

153

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmConv2d::validate(src, weights, biases, dst, conv2d_info, weights_info));

Sheri Zhang

06d1efd

2021-07-28 11:20:04 +0100

[diff] [blame]

break;

}

default:

ARM_COMPUTE_ERROR("Not supported.");

break;

}

return Status{};

}

ConvolutionMethod ClConv2d::get_convolution_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const Conv2dInfo &conv2d_info,

165

const WeightsInfo &weights_info, const GPUTarget gpu_target)

166

{

167

ARM_COMPUTE_ERROR_ON_NULLPTR(src);

168

ARM_COMPUTE_ERROR_ON_NULLPTR(dst);

169

ARM_COMPUTE_ERROR_ON_NULLPTR(weights);

170

ARM_COMPUTE_UNUSED(weights_info);

Sheri Zhang

06d1efd

2021-07-28 11:20:04 +0100

[diff] [blame]

171

172

const PadStrideInfo conv_info = conv2d_info.conv_info;

173

const ActivationLayerInfo act_info = conv2d_info.act_info;

174

const Size2D dilation = conv2d_info.dilation;

175

bool enable_fast_math = conv2d_info.enable_fast_math;

176

177

const size_t idx_w = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH);

178

const size_t idx_h = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT);

179

const size_t idx_c = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::CHANNEL);

180

181

/* Input spatial dims, kernel size, IFM/OFM, conv info*/

182

using ConvolutionConfiguration = std::tuple<Size2D, Size2D, Size2D, PadStrideInfo, DataLayout>;

183

using ConfigurationMethod = std::pair<ConvolutionConfiguration, ConvolutionMethod>;

184

185

const std::vector<ConfigurationMethod> known_configs =

186

{

187

// Alexnet

188

ConfigurationMethod(ConvolutionConfiguration(Size2D(27U, 27U), Size2D(5U, 5U), Size2D(48U, 128U), PadStrideInfo(1U, 1U, 2U, 2U), DataLayout::NCHW), ConvolutionMethod::DIRECT),

189

// VGG16 / VGG19

190

ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 64U), PadStrideInfo(1U, 1U, 1U, 1U), DataLayout::NCHW), ConvolutionMethod::DIRECT),

191

// Mobilenet 224

192

ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), ConvolutionMethod::GEMM),

193

// Mobilenet 160

194

ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), ConvolutionMethod::GEMM),

195

// Mobilenet 224

196

ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), ConvolutionMethod::GEMM),

197

// Mobilenet 160

198

ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), ConvolutionMethod::GEMM),

199

};

200

201

const auto find_config = [&](ConfigurationMethod c)

202

{

203

const ConvolutionConfiguration config = c.first;

204

const PadStrideInfo info = std::get<3>(config);

205

const DataLayout data_layout = std::get<4>(config);

206

207

return std::get<0>(config) == Size2D(src->dimension(idx_w), src->dimension(idx_h)) && std::get<1>(config) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h))

208

&& std::get<2>(config) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right()

209

&& info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() && info.stride() == conv_info.stride() && (data_layout == src->data_layout());

210

};

211

212

std::vector<ConfigurationMethod>::const_iterator found;

213

if((found = std::find_if(known_configs.begin(), known_configs.end(), find_config)) != known_configs.end())

214

{

215

return (*found).second;

216

}

217

218

if(dilation != Size2D(1U, 1U))

219

{

220

return ConvolutionMethod::GEMM;

}

else

{

if(src->data_layout() == DataLayout::NCHW)

225

{

226

// SRGAN

227

if((src->dimension(idx_h) > 720U) && (dst->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3)

228

&& (ClDirectConv2d::validate(src, weights, nullptr, dst, conv_info, act_info)))

229

{

230

return ConvolutionMethod::DIRECT;

231

}

232

if((weights->dimension(idx_h) > 5) && (src->dimension(idx_c) > dst->dimension(idx_c)) && (CLFFTConvolutionLayer::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math)))

233

{

234

return ConvolutionMethod::FFT;

235

}

236

if(src->dimension(idx_c) < 16)

237

{

238

return ConvolutionMethod::GEMM;

239

}

240

return bool(ClWinogradConv2d::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math)) ? ConvolutionMethod::WINOGRAD : ConvolutionMethod::GEMM;

241

}

242

else

243

{

Gian Marco Iodice

ebbd529

2021-08-17 16:25:37 +0100

[diff] [blame]

244

const bool is_direct_valid = bool(ClDirectConv2d::validate(src, weights, nullptr, dst, conv_info, act_info));

245

const bool is_wino_valid = bool(ClWinogradConv2d::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math));

246

const size_t kernel_sz_direct_conv_thr = get_direct_conv_kernel_threshold_nhwc(gpu_target);

Sheri Zhang

06d1efd

2021-07-28 11:20:04 +0100

[diff] [blame]

247

248

// SRGAN case

249

if((src->dimension(idx_h) > 720U) && (dst->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3)

250

&& is_direct_valid)

251

{

252

return ConvolutionMethod::DIRECT;

253

}

254

255

// Floating-point case: GeMM/Direct/Winograd

256

if(is_data_type_float(src->data_type()))

257

{

Gian Marco Iodice

0bae3ee

2022-01-20 16:33:29 +0000

[diff] [blame]

258

// Get dst shape

259

TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *weights, conv_info);

260

const bool is_large_kernel_sz = (weights->dimension(idx_w) >= kernel_sz_direct_conv_thr) && (weights->dimension(idx_h) >= kernel_sz_direct_conv_thr);

261

const bool is_ifm_ge_16 = src->dimension(idx_c) >= 16;

262

const bool is_ofm_lte_8 = weights->dimension(3U) <= 8;

263

const bool workload_gte_8192 = (output_shape[0] * output_shape[1] * output_shape[2]) / 16 >= 8192;

Adnan AlSinan

e871207

2022-07-21 16:34:49 +0100

[diff] [blame]

264

const bool is_ifm_gt_ofm = src->dimension(idx_c) > weights->dimension(3U);

265

const bool is_m_one = output_shape[1] * output_shape[2] == 1;

Sheri Zhang

06d1efd

2021-07-28 11:20:04 +0100

[diff] [blame]

266

267

// Run Winograd if valid and IFM >= 16

268

if(is_wino_valid && is_ifm_ge_16)

269

{

Adnan AlSinan

e871207

2022-07-21 16:34:49 +0100

[diff] [blame]

270

if(is_ofm_lte_8)

271

{

272

if(gpu_target == arm_compute::GPUTarget::G71 || gpu_target == arm_compute::GPUTarget::G72 || get_arch_from_target(gpu_target) == arm_compute::GPUTarget::MIDGARD)

273

{

274

return ConvolutionMethod::WINOGRAD;

}

}

else

{

return ConvolutionMethod::WINOGRAD;

280

}

Sheri Zhang

06d1efd

2021-07-28 11:20:04 +0100

[diff] [blame]

281

}

Gian Marco Iodice

78baa48

2021-12-01 09:26:14 +0000

[diff] [blame]

282

Gian Marco Iodice

0bae3ee

2022-01-20 16:33:29 +0000

[diff] [blame]

283

// Direct convolution case

Adnan AlSinan

96fa763

2022-08-10 18:14:33 +0100

[diff] [blame]

284

if(is_direct_valid)

Sheri Zhang

06d1efd

2021-07-28 11:20:04 +0100

[diff] [blame]

285

{

Adnan AlSinan

e871207

2022-07-21 16:34:49 +0100

[diff] [blame]

286

if((gpu_target == arm_compute::GPUTarget::G71 || gpu_target == arm_compute::GPUTarget::G72 || get_arch_from_target(gpu_target) == arm_compute::GPUTarget::MIDGARD))

Gian Marco Iodice

0bae3ee

2022-01-20 16:33:29 +0000

[diff] [blame]

287

{

Gian Marco Iodice

51d7119

2022-02-16 14:41:28 +0000

[diff] [blame]

288

if(is_large_kernel_sz && is_ifm_ge_16 && is_ifm_gt_ofm)