Blame - src/gpu/cl/operators/ClConv2d.cpp - ml/ComputeLibrary

2021-07-28 11:20:04 +0100

[diff] [blame]

87

88

switch(ClConv2d::get_convolution_method(src, weights, dst, conv2d_info, weights_info, CLScheduler::get().target()))

89

{

90

case ConvolutionMethod::WINOGRAD:

91

{

92

ARM_COMPUTE_ERROR_ON(conv2d_info.num_groups != 1);

93

auto f = std::make_unique<ClWinogradConv2d>();

94

f->configure(compile_context, src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info, conv2d_info.enable_fast_math);

95

_operator = std::move(f);

96

break;

97

}

98

case ConvolutionMethod::DIRECT:

99

{

100

ARM_COMPUTE_ERROR_ON(conv2d_info.num_groups != 1);

101

auto f = std::make_unique<ClDirectConv2d>();

102

f->configure(compile_context, src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info);

103

_operator = std::move(f);

104

break;

105

}

Gian Marco Iodice

2022-12-28 13:53:51 +0000

[diff] [blame]

106

case ConvolutionMethod::INDIRECT:

107

{

108

ARM_COMPUTE_ERROR_ON(conv2d_info.num_groups != 1);

Gian Marco Iodice

2022-12-28 13:53:51 +0000

[diff] [blame]

109

auto f = std::make_unique<ClIndirectConv2d>();

110

f->configure(compile_context, src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info);

111

_operator = std::move(f);

112

break;

113

}

Sheri Zhang

2021-07-28 11:20:04 +0100

[diff] [blame]

114

case ConvolutionMethod::GEMM:

115

{

Georgios Pinitas

1988463

2021-08-16 12:38:54 +0100

[diff] [blame]

116

auto f = std::make_unique<ClGemmConv2d>();

Sheri Zhang

2021-07-28 11:20:04 +0100

[diff] [blame]

117

f->configure(compile_context, src, weights, biases, dst, conv2d_info, weights_info);

118

_operator = std::move(f);

break;

}

default:

ARM_COMPUTE_ERROR("Not supported.");

123

break;

124

}

125

_aux_mem = _operator->workspace();

126

}

127

128

Status ClConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const Conv2dInfo &conv2d_info,

129

const WeightsInfo &weights_info)

130

{

131

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);

132

ARM_COMPUTE_RETURN_ERROR_ON_MSG((conv2d_info.num_groups != 1) && (src->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported");

133

134

const GPUTarget gpu_target = CLScheduler::get().target();

135

136

switch(ClConv2d::get_convolution_method(src, weights, dst, conv2d_info, weights_info, gpu_target))

137

{

138

case ConvolutionMethod::WINOGRAD:

139

{

140

//Validate Winograd

141

ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.num_groups != 1, "Grouping (num_groups != 1) with ClWinogradConv2d is not supported");

142

ARM_COMPUTE_RETURN_ON_ERROR(ClWinogradConv2d::validate(src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info, conv2d_info.enable_fast_math));

143

break;

144

}

145

case ConvolutionMethod::DIRECT:

146

{

147

// Validate direct convolution layer

148

ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.num_groups != 1, "Grouping (num_groups != 1) with ClDirectConv2d is not supported");

149

ARM_COMPUTE_RETURN_ON_ERROR(ClDirectConv2d::validate(src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info));

150

break;

151

}

Gian Marco Iodice

2022-12-28 13:53:51 +0000

[diff] [blame]

152

case ConvolutionMethod::INDIRECT:

153

{

154

// Validate indirect convolution layer

155

ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.num_groups != 1, "Grouping (num_groups != 1) with ClIndirectConv2d is not supported");

Gian Marco Iodice

2022-12-28 13:53:51 +0000

[diff] [blame]

156

ARM_COMPUTE_RETURN_ON_ERROR(ClIndirectConv2d::validate(src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info));

157

break;

158

}

Sheri Zhang

2021-07-28 11:20:04 +0100

[diff] [blame]

159

case ConvolutionMethod::GEMM:

160

{

161

// Validate gemm-based convolution layer

Georgios Pinitas

1988463

2021-08-16 12:38:54 +0100

[diff] [blame]

162

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmConv2d::validate(src, weights, biases, dst, conv2d_info, weights_info));

Sheri Zhang

2021-07-28 11:20:04 +0100

[diff] [blame]

break;

}

default:

ARM_COMPUTE_ERROR("Not supported.");

break;

}

return Status{};

}

ConvolutionMethod ClConv2d::get_convolution_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const Conv2dInfo &conv2d_info,

174

const WeightsInfo &weights_info, const GPUTarget gpu_target)

175

{

176

ARM_COMPUTE_ERROR_ON_NULLPTR(src);

177

ARM_COMPUTE_ERROR_ON_NULLPTR(dst);

178

ARM_COMPUTE_ERROR_ON_NULLPTR(weights);

179

ARM_COMPUTE_UNUSED(weights_info);

Sheri Zhang

2021-07-28 11:20:04 +0100

[diff] [blame]

180

181

const PadStrideInfo conv_info = conv2d_info.conv_info;

182

const ActivationLayerInfo act_info = conv2d_info.act_info;

183

const Size2D dilation = conv2d_info.dilation;

184

bool enable_fast_math = conv2d_info.enable_fast_math;

185

186

const size_t idx_w = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH);

187

const size_t idx_h = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT);

188

const size_t idx_c = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::CHANNEL);

189

190

/* Input spatial dims, kernel size, IFM/OFM, conv info*/

191

using ConvolutionConfiguration = std::tuple<Size2D, Size2D, Size2D, PadStrideInfo, DataLayout>;

192

using ConfigurationMethod = std::pair<ConvolutionConfiguration, ConvolutionMethod>;

193

194

const std::vector<ConfigurationMethod> known_configs =

195

{

196

// Alexnet

197

ConfigurationMethod(ConvolutionConfiguration(Size2D(27U, 27U), Size2D(5U, 5U), Size2D(48U, 128U), PadStrideInfo(1U, 1U, 2U, 2U), DataLayout::NCHW), ConvolutionMethod::DIRECT),

198

// VGG16 / VGG19

199

ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 64U), PadStrideInfo(1U, 1U, 1U, 1U), DataLayout::NCHW), ConvolutionMethod::DIRECT),

200

// Mobilenet 224

201

ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), ConvolutionMethod::GEMM),

202

// Mobilenet 160

203

ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), ConvolutionMethod::GEMM),

204

// Mobilenet 224

205

ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), ConvolutionMethod::GEMM),

206

// Mobilenet 160

207

ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), ConvolutionMethod::GEMM),

208

};

209

210

const auto find_config = [&](ConfigurationMethod c)

211

{

212

const ConvolutionConfiguration config = c.first;

213

const PadStrideInfo info = std::get<3>(config);

214

const DataLayout data_layout = std::get<4>(config);

215

216

return std::get<0>(config) == Size2D(src->dimension(idx_w), src->dimension(idx_h)) && std::get<1>(config) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h))

217

&& std::get<2>(config) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right()

218

&& info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() && info.stride() == conv_info.stride() && (data_layout == src->data_layout());

219

};

220

221

std::vector<ConfigurationMethod>::const_iterator found;

222

if((found = std::find_if(known_configs.begin(), known_configs.end(), find_config)) != known_configs.end())

223

{

224

return (*found).second;

225

}

226

227

if(dilation != Size2D(1U, 1U))

228

{

229

return ConvolutionMethod::GEMM;

}

else

{

if(src->data_layout() == DataLayout::NCHW)

234

{

235

// SRGAN

236

if((src->dimension(idx_h) > 720U) && (dst->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3)

237

&& (ClDirectConv2d::validate(src, weights, nullptr, dst, conv_info, act_info)))

238

{

239

return ConvolutionMethod::DIRECT;

240

}

241

if((weights->dimension(idx_h) > 5) && (src->dimension(idx_c) > dst->dimension(idx_c)) && (CLFFTConvolutionLayer::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math)))

242

{

243

return ConvolutionMethod::FFT;

244

}

245

if(src->dimension(idx_c) < 16)

246

{

247

return ConvolutionMethod::GEMM;

248

}

249

return bool(ClWinogradConv2d::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math)) ? ConvolutionMethod::WINOGRAD : ConvolutionMethod::GEMM;

250

}

251

else

252

{

Gian Marco Iodice

ebbd529

2021-08-17 16:25:37 +0100

[diff] [blame]

253

const bool is_direct_valid = bool(ClDirectConv2d::validate(src, weights, nullptr, dst, conv_info, act_info));

254

const bool is_wino_valid = bool(ClWinogradConv2d::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math));

255

const size_t kernel_sz_direct_conv_thr = get_direct_conv_kernel_threshold_nhwc(gpu_target);

Sheri Zhang

2021-07-28 11:20:04 +0100

[diff] [blame]

256

257

// SRGAN case

258

if((src->dimension(idx_h) > 720U) && (dst->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3)

259

&& is_direct_valid)

260

{

261

return ConvolutionMethod::DIRECT;

262

}

263

264

// Floating-point case: GeMM/Direct/Winograd

265

if(is_data_type_float(src->data_type()))

266

{

Gian Marco Iodice

2022-01-20 16:33:29 +0000

[diff] [blame]

267

// Get dst shape

Jakub Sujak

0d27b2e

2023-08-24 14:01:20 +0100

[diff] [blame]

268

TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *weights, conv_info);

269

const bool is_large_kernel_sz = (weights->dimension(idx_w) >= kernel_sz_direct_conv_thr) && (weights->dimension(idx_h) >= kernel_sz_direct_conv_thr);

270

const bool is_ifm_ge_8 = src->dimension(idx_c) >= 8;

271

const bool is_ifm_ge_16 = src->dimension(idx_c) >= 16;

272

const bool is_ofm_lte_8 = weights->dimension(3U) <= 8;

273

const bool is_ofm_lt_64 = weights->dimension(3U) < 64;

274

const bool workload_gte_8192 = (output_shape[0] * output_shape[1] * output_shape[2]) / 16 >= 8192;

275

const bool is_ifm_gt_ofm = src->dimension(idx_c) > weights->dimension(3U);

276

const bool is_m_one = output_shape[1] * output_shape[2] == 1;

277

const bool is_unit_stride = (conv2d_info.conv_info.stride().first == 1) && (conv2d_info.conv_info.stride().second == 1);

278

const int32_t kernel_sz = weights->dimension(idx_w) * weights->dimension(idx_h);

Sheri Zhang

2021-07-28 11:20:04 +0100

[diff] [blame]

279

Gian Marco Iodice

1257131

2022-08-25 12:25:44 +0100

[diff] [blame]

280

// Run Winograd if valid and IFM >= 8

281

if(is_wino_valid && is_ifm_ge_8)

Sheri Zhang

2021-07-28 11:20:04 +0100

[diff] [blame]

282

{

Adnan AlSinan

2022-07-21 16:34:49 +0100

[diff] [blame]

283

if(is_ofm_lte_8)

284

{

285

if(gpu_target == arm_compute::GPUTarget::G71 || gpu_target == arm_compute::GPUTarget::G72 || get_arch_from_target(gpu_target) == arm_compute::GPUTarget::MIDGARD)

286

{

287

return ConvolutionMethod::WINOGRAD;

}

}

else

{

return ConvolutionMethod::WINOGRAD;

293

}

Sheri Zhang

2021-07-28 11:20:04 +0100

[diff] [blame]

294

}

Gian Marco Iodice

78baa48

2021-12-01 09:26:14 +0000

[diff] [blame]

295

Gian Marco Iodice

2022-01-20 16:33:29 +0000

[diff] [blame]

296

// Direct convolution case

Adnan AlSinan

cec1af5

2022-08-10 18:14:33 +0100

[diff] [blame]

297

if(is_direct_valid)

Sheri Zhang

2021-07-28 11:20:04 +0100

[diff] [blame]

298

{

Adnan AlSinan

2022-07-21 16:34:49 +0100

[diff] [blame]

299

if((gpu_target == arm_compute::GPUTarget::G71 || gpu_target == arm_compute::GPUTarget::G72 || get_arch_from_target(gpu_target) == arm_compute::GPUTarget::MIDGARD))

Gian Marco Iodice

2022-01-20 16:33:29 +0000

[diff] [blame]

300

{

Gian Marco Iodice

51d7119

2022-02-16 14:41:28 +0000

[diff] [blame]

301

if(is_large_kernel_sz && is_ifm_ge_16 && is_ifm_gt_ofm)

302

{

303

return ConvolutionMethod::DIRECT;

304

}

305

}

Adnan AlSinan

2022-07-21 16:34:49 +0100

[diff] [blame]

306

else if(gpu_target == arm_compute::GPUTarget::G76)

Gian Marco Iodice

51d7119

2022-02-16 14:41:28 +0000

[diff] [blame]

307

{

Adnan AlSinan

cec1af5

2022-08-10 18:14:33 +0100

[diff] [blame]

308

if((is_large_kernel_sz && workload_gte_8192 && is_ifm_ge_16) || (is_ofm_lte_8 && is_ifm_ge_16))

Gian Marco Iodice

51d7119

2022-02-16 14:41:28 +0000

[diff] [blame]

309

{

310

return ConvolutionMethod::DIRECT;

311

}

Gian Marco Iodice

2022-01-20 16:33:29 +0000

[diff] [blame]

312

}

Adnan AlSinan

2022-07-21 16:34:49 +0100

[diff] [blame]

313

else

314

{

Gian Marco Iodice

2022-12-28 13:53:51 +0000

[diff] [blame]

315

ConvolutionMethod preferred_conv_method = ConvolutionMethod::DIRECT;

316

317

const bool is_indirect_valid = bool(ClIndirectConv2d::validate(src, weights, nullptr, dst, conv_info, act_info));

318

319

// indirect conv2d should be called when:

320

// 1- When the kernel size is greater than 1x1 and less than or equal to 9x9 (81)

321

// 2- When the kernel size is odd

322

// 3- When the Gpu target is Arm Mali-G77

323

if(is_indirect_valid)

324

{

325

const bool is_kernel_sz_odd = kernel_sz % 2;

326

const bool is_g77 = gpu_target == GPUTarget::G77;

Jakub Sujak

0d27b2e

2023-08-24 14:01:20 +0100

[diff] [blame]

327

preferred_conv_method = (kernel_sz > 1) && (kernel_sz <= 81) && is_kernel_sz_odd && is_g77 ? ConvolutionMethod::INDIRECT : ConvolutionMethod::DIRECT;

Gian Marco Iodice

2022-12-28 13:53:51 +0000

[diff] [blame]

328

}

329

330

// Direct/indirect convolution used for the first layer of the network

Gian Marco Iodice

4478e1c

2022-09-06 15:06:40 +0100

[diff] [blame]

331

if(workload_gte_8192 && !is_ifm_ge_16 && !is_unit_stride && is_ofm_lt_64)

332

{

333

// In general, the question we should ask for the first convolution layer of a model is:

334

// when the execution time of im2col + gemm < direct?. Since im2col does not depend on the OFM, it means that

335

// when OFM is big enough, the contribution of im2col is small and the GEMM approach is preferable.

336

// From internal experiments, the OFM threshold is 64 (is_ofm_lt_64)

Gian Marco Iodice

2022-12-28 13:53:51 +0000

[diff] [blame]

337

return preferred_conv_method;

Gian Marco Iodice

4478e1c

2022-09-06 15:06:40 +0100

[diff] [blame]

338

}

339

340

if((is_large_kernel_sz || is_m_one) && workload_gte_8192 && is_ifm_ge_16)

341

{

Gian Marco Iodice

2022-12-28 13:53:51 +0000

[diff] [blame]

342

return preferred_conv_method;

Gian Marco Iodice

4478e1c

2022-09-06 15:06:40 +0100

[diff] [blame]

343

}

344

345

// Direct convolution used for the last layer of the network

346

if(is_ofm_lte_8)

Adnan AlSinan

2022-07-21 16:34:49 +0100

[diff] [blame]

347

{

Gian Marco Iodice

2022-12-28 13:53:51 +0000

[diff] [blame]

348

return preferred_conv_method;

Adnan AlSinan

2022-07-21 16:34:49 +0100

[diff] [blame]

349

}

350

}

Sheri Zhang