Blame - src/runtime/CL/functions/CLGEMM.cpp - ml/ComputeLibrary

2017-09-04 18:44:23 +0100

[diff] [blame]

66

{

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

67

GEMMType gemm_type = GEMMType::RESHAPED_V1;

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

68

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

69

if(gpu_target_is_in(gpu_target, GPUTarget::G52, GPUTarget::G52LIT, GPUTarget::G71, GPUTarget::G72, GPUTarget::G76))

70

{

71

if((m > 1) && (n < 16))

72

{

73

gemm_type = GEMMType::RESHAPED_V1;

74

}

75

else if((m == 1) && (data_type == DataType::F32))

76

{

77

gemm_type = GEMMType::RESHAPED_ONLY_RHS;

}

else

{

// COMPMID-852

if((k > 256) && (m > 4) && is_data_type_float(data_type) && reshape_b_only_on_first_run)

83

{

84

constexpr float alpha = 3.2f;

85

constexpr float fact0 = 1.51f;

86

constexpr float fact1 = 1.66f;

87

constexpr float ops = 12.0f;

88

const float scale = k > 1024 ? 1.07f : 1.0f;

89

gemm_type = (alpha + ((n * fact0) / ops) < ((fact1 * n * scale) / ops)) ? GEMMType::RESHAPED_V1 : GEMMType::NATIVE;

}

else

{

gemm_type = GEMMType::NATIVE;

94

}

95

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

96

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

97

const auto workload = static_cast<float>((m * n) / 20.0f);

Gian Marco Iodice

1246b63

2017-08-16 18:38:32 +0100

[diff] [blame]

98

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

99

gemm_type = ((workload > 1600.0f) && (gemm_type == GEMMType::RESHAPED_V1) && (data_type == DataType::F32)) ? GEMMType::RESHAPED_V2 : gemm_type;

}

else

{

// We reshape the matrices only if we do not have the vector-by-matrix case and we reshape the matrix B only once

104

gemm_type = ((m != 1) && reshape_b_only_on_first_run) ? GEMMType::RESHAPED_V1 : GEMMType::NATIVE;

105

}

Gian Marco Iodice

edfa9f4

2017-08-15 11:45:22 +0100

[diff] [blame]

106

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

return gemm_type;

}

void CLGEMM::configure_native(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info)

111

{

112

const unsigned int m = gemm_info.reinterpret_input_as_3d() ? (a->info()->dimension(1) * a->info()->dimension(2)) : a->info()->dimension(1);

113

const unsigned int n = b->info()->dimension(0);

114

const unsigned int k = a->info()->dimension(0);

115

const GPUTarget gpu_target = CLScheduler::get().target();

Gian Marco

2018-01-12 10:21:40 +0000

[diff] [blame]

116

117

// Set the target for the kernels

Gian Marco

2018-01-12 10:21:40 +0000

[diff] [blame]

118

_mm_kernel.set_target(gpu_target);

119

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

120

GEMMReshapeInfo reshape_info(m, n, k, 1, 1, gemm_info.depth_output_gemm3d(), gemm_info.reinterpret_input_as_3d());

121

122

// Configure and tune matrix multiply kernel

123

_mm_kernel.configure(a, b, c, output, alpha, beta, false, reshape_info, gemm_info.fp_mixed_precision());

124

125

// Tune kernel statically

126

CLScheduler::get().tune_kernel_static(_mm_kernel);

127

}

128

129

void CLGEMM::configure_reshaped_v1(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info)

130

{

Gian Marco Iodice

2018-12-12 10:18:04 +0000

[diff] [blame]

131

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

132

const unsigned int m = reinterpret_input_as_3d ? (a->info()->dimension(1) * a->info()->dimension(2)) : a->info()->dimension(1);

133

const unsigned int n = b->info()->dimension(0);

134

const unsigned int k = a->info()->dimension(0);

135

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

136

const GPUTarget gpu_target = CLScheduler::get().target();

Gian Marco Iodice

2018-12-12 10:18:04 +0000

[diff] [blame]

137

int mult_transpose1xW_width = 1;

138

int mult_interleave4x4_height = 1;

Gian Marco

2018-01-12 10:21:40 +0000

[diff] [blame]

139

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

140

// Set the target for the kernels

141

_reshape_lhs_kernel.set_target(gpu_target);

142

_mm_kernel.set_target(gpu_target);

143

Gian Marco Iodice

2018-05-08 12:01:57 +0100

[diff] [blame]

144

if(get_arch_from_target(gpu_target) == GPUTarget::BIFROST)

Gian Marco

2018-01-12 10:21:40 +0000

[diff] [blame]

145

{

146

mult_transpose1xW_width = 4;

147

mult_interleave4x4_height = 2;

148

}

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

149

giuros01

8b6b4a9

2018-12-18 19:01:33 +0000

[diff] [blame]

150

GEMMRHSMatrixInfo rhs_info;

151

rhs_info.n0 = 16 / b->info()->element_size();

152

rhs_info.k0 = 1;

153

rhs_info.h0 = mult_transpose1xW_width;

154

rhs_info.interleave = false;

155

rhs_info.transpose = false;

Gian Marco

2018-01-12 10:21:40 +0000

[diff] [blame]

156

giuros01

1c9efeb

2019-01-11 14:04:43 +0000

[diff] [blame]

157

GEMMLHSMatrixInfo lhs_info;

158

lhs_info.m0 = 4;

159

lhs_info.k0 = 4;

160

lhs_info.v0 = mult_interleave4x4_height;

161

lhs_info.interleave = true;

162

lhs_info.transpose = true;

163

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

164

GEMMReshapeInfo reshape_info(m, n, k, mult_transpose1xW_width, mult_interleave4x4_height, depth_output_gemm3d, false);

Gian Marco

b5311a6

2017-12-13 12:48:03 +0000

[diff] [blame]

165

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

166

_memory_group.manage(&_tmp_a);

167

if(!_reshape_b_only_on_first_run)

Gian Marco Iodice

68a3f56

2018-07-26 11:44:03 +0100

[diff] [blame]

168

{

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

169

_memory_group.manage(&_tmp_b);

Gian Marco Iodice

edfa9f4

2017-08-15 11:45:22 +0100

[diff] [blame]

170

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

171

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

172

// Configure interleave kernel

173

_reshape_lhs_kernel.configure(a, &_tmp_a, lhs_info, reinterpret_input_as_3d);

Gian Marco Iodice

edfa9f4

2017-08-15 11:45:22 +0100

[diff] [blame]

174

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

175

// Configure transpose kernel

176

_reshape_rhs_kernel.configure(b, &_tmp_b, rhs_info);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

177

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

178

// Configure and tune matrix multiply kernel

179

_mm_kernel.configure(&_tmp_a, &_tmp_b, c, output, alpha, beta, true, reshape_info, gemm_info.fp_mixed_precision());

180

181

CLScheduler::get().tune_kernel_static(_mm_kernel);

182

183

// Allocate intermediate tensors

184

_tmp_a.allocator()->allocate();

185

if(!_reshape_b_only_on_first_run)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

186

{

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

187

_tmp_b.allocator()->allocate();

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

}

}

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

191

void CLGEMM::configure_reshaped_v2(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info)

192

{

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

193

DataType data_type = a->info()->data_type();

194

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

195

const unsigned int m = reinterpret_input_as_3d ? (a->info()->dimension(1) * a->info()->dimension(2)) : a->info()->dimension(1);

196

const unsigned int n = b->info()->dimension(0);

197

const unsigned int k = a->info()->dimension(0);

198

const unsigned int batch_size = reinterpret_input_as_3d ? a->info()->dimension(3) : a->info()->dimension(2);

199

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

200

const GPUTarget gpu_target = CLScheduler::get().target();

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame^]

201

bool broadcast_bias = gemm_info.broadcast_bias();

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

202

203

// Set the target for the kernels

204

_reshape_lhs_kernel.set_target(gpu_target);

205

_mm_kernel.set_target(gpu_target);

206

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame^]

207

GEMMReshapeInfo reshape_info(m, n, k, 1, 1, depth_output_gemm3d, false, broadcast_bias);

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

208

209

// Manage intermediate buffers

210

_memory_group.manage(&_tmp_a);

211

if(!_reshape_b_only_on_first_run)

212

{

213

_memory_group.manage(&_tmp_b);

214

}

215

// _tmp_a and _tmp_b will be auto configured in _interleave_kernel and in _transpose_kernel

216

217

GEMMLHSMatrixInfo lhs_info{};

218

GEMMRHSMatrixInfo rhs_info{};

219

220

// Pick up the GEMM configuration

221

std::unique_ptr<ICLGEMMKernelConfiguration> gemm_config = CLGEMMReshapedKernelConfigurationFactory::create(gpu_target);

222

ARM_COMPUTE_ERROR_ON_NULLPTR(gemm_config.get());

223

224

// Configure lhs_info and rhs_info

225

std::tie(lhs_info, rhs_info) = gemm_config->configure(m, n, k, batch_size, data_type);

226

227

_reshape_lhs_kernel.configure(a, &_tmp_a, lhs_info, gemm_info.reinterpret_input_as_3d());

228

_reshape_rhs_kernel.configure(b, &_tmp_b, rhs_info);

229

230

// Configure and tune matrix multiply kernel

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame^]

231

_mm_reshaped_kernel.configure(&_tmp_a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, reshape_info);

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

232

233

// Allocate intermediate tensors

234

_tmp_a.allocator()->allocate();

235

if(!_reshape_b_only_on_first_run)

236

{

237

_tmp_b.allocator()->allocate();

}

}

void CLGEMM::configure_reshaped_only_rhs(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info)

242

{

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

243

DataType data_type = a->info()->data_type();

244

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

245

const unsigned int m = reinterpret_input_as_3d ? (a->info()->dimension(1) * a->info()->dimension(2)) : a->info()->dimension(1);

246

const unsigned int n = b->info()->dimension(0);

247

const unsigned int k = a->info()->dimension(0);

248

const unsigned int batch_size = reinterpret_input_as_3d ? a->info()->dimension(3) : a->info()->dimension(2);

249

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

250

const GPUTarget gpu_target = CLScheduler::get().target();

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

251

bool broadcast_bias = gemm_info.broadcast_bias();

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

252

253

// Set the target for the kernels

254

_mm_kernel.set_target(gpu_target);

255

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

256

GEMMReshapeInfo reshape_info(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d, broadcast_bias);

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

257

258

// Manage intermediate buffers

259

if(!_reshape_b_only_on_first_run)

260

{

261

_memory_group.manage(&_tmp_b);

262

}

263

264

GEMMLHSMatrixInfo lhs_info{};

265

GEMMRHSMatrixInfo rhs_info{};

266

267

// Pick up the GEMM configuration

268

std::unique_ptr<ICLGEMMKernelConfiguration> gemm_config = CLGEMMReshapedOnlyRHSKernelConfigurationFactory::create(gpu_target);

269

ARM_COMPUTE_ERROR_ON_NULLPTR(gemm_config.get());

270

271

// Configure lhs_info and rhs_info

272

std::tie(lhs_info, rhs_info) = gemm_config->configure(m, n, k, batch_size, data_type);

273

274

_reshape_rhs_kernel.configure(b, &_tmp_b, rhs_info);

275

276

// Configure and tune matrix multiply kernel

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

277

_mm_reshaped_only_rhs_kernel.configure(a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, reshape_info);

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

278

279

if(!_reshape_b_only_on_first_run)

280

{

281

_tmp_b.allocator()->allocate();

}

}

Status CLGEMM::validate_native(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

Georgios Pinitas

78c0090

2018-01-09 17:33:11 +0000

[diff] [blame]

286

{

Gian Marco Iodice

2018-05-08 12:01:57 +0100

[diff] [blame]

287

ARM_COMPUTE_UNUSED(alpha);

Gian Marco Iodice

215b4ea

2018-06-28 16:29:29 +0100

[diff] [blame]

288

ARM_COMPUTE_UNUSED(output);

Gian Marco Iodice

2018-05-08 12:01:57 +0100

[diff] [blame]

289

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

290

// Get the GPU target

291

const GPUTarget gpu_target = CLScheduler::get().target();

292

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

293

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

294

const unsigned int n = b->dimension(0);

295

const unsigned int k = a->dimension(0);

296

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

297

const bool add_c = (beta != 0.f && c != nullptr);

298

const bool is_beta_one = std::abs(1.0f - beta) < 0.00001f;

299

const bool fuse_add = is_beta_one && (c != nullptr && c->num_dimensions() == 1);

Gian Marco Iodice

2018-05-08 12:01:57 +0100

[diff] [blame]

300

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

301

const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d);

302

303

// Validate matrix multiply

304

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixMultiplyKernel::validate(a, b, (add_c && fuse_add) ? c : nullptr, output, alpha, beta,

305

false, reshape_info, gpu_target, gemm_info.fp_mixed_precision()));

306

307

if(add_c && !fuse_add)

308

{

309

// Validate matrix addition kernel

310

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixAdditionKernel::validate(c, output, beta));

}

return Status{};

}

Status CLGEMM::validate_reshaped_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

317

{

318

ARM_COMPUTE_UNUSED(alpha);

319

ARM_COMPUTE_UNUSED(output);

Gian Marco Iodice

2018-05-08 12:01:57 +0100

[diff] [blame]

320

321

TensorInfo tmp_a_info{};

322

TensorInfo tmp_b_info{};

Gian Marco Iodice

2018-05-08 12:01:57 +0100

[diff] [blame]

323

324

// Get the GPU target

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

325

const GPUTarget gpu_target = CLScheduler::get().target();

326

const unsigned int m = gemm_info.reinterpret_input_as_3d() ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

Gian Marco Iodice

2018-12-12 10:18:04 +0000

[diff] [blame]

327

const unsigned int n = b->dimension(0);

328

const unsigned int k = a->dimension(0);

329

int mult_transpose1xW_width = 1;

330

int mult_interleave4x4_height = 1;

331

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

332

const bool add_c = (beta != 0.f && c != nullptr);

333

const bool is_beta_one = std::abs(1.0f - beta) < 0.00001f;

334

const bool fuse_add = is_beta_one && (c != nullptr && c->num_dimensions() == 1);

Gian Marco Iodice

2018-05-08 12:01:57 +0100

[diff] [blame]

335

336

if(get_arch_from_target(gpu_target) == GPUTarget::BIFROST)

337

{

338

mult_transpose1xW_width = 4;

339

mult_interleave4x4_height = 2;

340

}

341

giuros01

8b6b4a9

2018-12-18 19:01:33 +0000

[diff] [blame]

342

GEMMRHSMatrixInfo rhs_info;

343

rhs_info.n0 = 16 / b->element_size();

344

rhs_info.k0 = 1;

345

rhs_info.h0 = mult_transpose1xW_width;

346

rhs_info.interleave = false;

347

rhs_info.transpose = false;

348

giuros01

1c9efeb

2019-01-11 14:04:43 +0000

[diff] [blame]

349

GEMMLHSMatrixInfo lhs_info;

350

lhs_info.m0 = 4;

351

lhs_info.k0 = 4;

352

lhs_info.v0 = mult_interleave4x4_height;

353

lhs_info.interleave = true;

354

lhs_info.transpose = true;

355

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

356

const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, mult_transpose1xW_width, mult_interleave4x4_height, depth_output_gemm3d, false);

Gian Marco Iodice

2018-05-08 12:01:57 +0100

[diff] [blame]

357

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

358

// Validate interleave kernel

359

auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, gemm_info.reinterpret_input_as_3d())));

360

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMReshapeLHSMatrixKernel::validate(a, &tmp_a_info, lhs_info, gemm_info.reinterpret_input_as_3d()));

Gian Marco Iodice

2018-12-12 10:18:04 +0000

[diff] [blame]

361

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

362

// Validate transpose kernel

363

auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));

364

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMReshapeRHSMatrixKernel::validate(b, &tmp_b_info, rhs_info));

Michele Di Giorgio

ebc3a90

2018-11-16 16:04:25 +0000

[diff] [blame]

365

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

366

// Validate matrix multiply

367

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixMultiplyKernel::validate(&tmp_a_info, &tmp_b_info, (add_c && fuse_add) ? c : nullptr, output, alpha, beta,

368

true, reshape_info, gpu_target, gemm_info.fp_mixed_precision()));

Gian Marco Iodice

68a3f56

2018-07-26 11:44:03 +0100

[diff] [blame]

369

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

370

if(add_c && !fuse_add)

Gian Marco Iodice

2018-05-08 12:01:57 +0100

[diff] [blame]

371

{

372

// Validate matrix addition kernel

Giorgio Arena

0f17039

2018-07-18 16:13:12 +0100

[diff] [blame]

373

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixAdditionKernel::validate(c, output, beta));

Gian Marco Iodice

2018-05-08 12:01:57 +0100

[diff] [blame]

374

}

375

Georgios Pinitas

78c0090

2018-01-09 17:33:11 +0000

[diff] [blame]

return Status{};

}

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

379

Status CLGEMM::validate_reshaped_v2(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

380

{

381

ARM_COMPUTE_UNUSED(alpha);

382

ARM_COMPUTE_UNUSED(output);

383

384

TensorInfo tmp_a_info{};

385

TensorInfo tmp_b_info{};

386

387

// Get the GPU target

388

const GPUTarget gpu_target = CLScheduler::get().target();

389

DataType data_type = a->data_type();

390

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

391

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

392

const unsigned int n = b->dimension(0);

393

const unsigned int k = a->dimension(0);

394

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

395

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame^]

396

const bool broadcast_bias = gemm_info.broadcast_bias();

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

397

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame^]

398

const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, false, broadcast_bias);

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

399

400

GEMMLHSMatrixInfo lhs_info;

401

GEMMRHSMatrixInfo rhs_info;

402

403

// Pick up the GEMM configuration

404

std::unique_ptr<ICLGEMMKernelConfiguration> gemm_config = CLGEMMReshapedKernelConfigurationFactory::create(gpu_target);

405

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(gemm_config.get());

406

407

// Configure lhs_info and rhs_info

408

std::tie(lhs_info, rhs_info) = gemm_config->configure(m, n, k, batch_size, data_type);

409

410

auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, gemm_info.reinterpret_input_as_3d())));

411

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMReshapeLHSMatrixKernel::validate(a, &tmp_a_info, lhs_info, gemm_info.reinterpret_input_as_3d()));

412

413

auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));

414

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMReshapeRHSMatrixKernel::validate(b, &tmp_b_info, rhs_info));

415

416

// Validate matrix multiply

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame^]

417

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixMultiplyReshapedKernel::validate(&tmp_a_info, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, reshape_info));

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

418

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

return Status{};

}

Status CLGEMM::validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

423

{

424

ARM_COMPUTE_UNUSED(alpha);

425

ARM_COMPUTE_UNUSED(output);

426

427

TensorInfo tmp_b_info{};

428

429

// Get the GPU target

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

430

const GPUTarget gpu_target = CLScheduler::get().target();

431

const DataType data_type = a->data_type();

432

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

433

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

434

const unsigned int n = b->dimension(0);

435

const unsigned int k = a->dimension(0);

436

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

437

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

438

const bool broadcast_bias = gemm_info.broadcast_bias();

439

const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d, broadcast_bias);

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

440

441

GEMMLHSMatrixInfo lhs_info;

442

GEMMRHSMatrixInfo rhs_info;

443

444

// Pick up the GEMM configuration

445

std::unique_ptr<ICLGEMMKernelConfiguration> gemm_config = CLGEMMReshapedOnlyRHSKernelConfigurationFactory::create(gpu_target);

446

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(gemm_config.get());

447

448

// Configure lhs_info and rhs_info

449

std::tie(lhs_info, rhs_info) = gemm_config->configure(m, n, k, batch_size, data_type);

450

451

auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));

452

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMReshapeRHSMatrixKernel::validate(b, &tmp_b_info, rhs_info));

453

454

// Validate matrix multiply

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

455

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, reshape_info));

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

return Status{};

}

void CLGEMM::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info)

461

{

462

ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);

463

464

// Perform validation step

465

ARM_COMPUTE_ERROR_THROW_ON(validate(a->info(), b->info(), c != nullptr ? c->info() : nullptr, output->info(), alpha, beta, gemm_info));

466

467

// Check if we need to reshape the matrix B only on the first run

468

_reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();

469

_is_prepared = gemm_info.retain_internal_weights();

470

_original_b = b;

471

472

// Get the GPU target

473

const GPUTarget gpu_target = CLScheduler::get().target();

474

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

475

const unsigned int m = reinterpret_input_as_3d ? (a->info()->dimension(1) * a->info()->dimension(2)) : a->info()->dimension(1);

476

const unsigned int n = b->info()->dimension(0);

477

const unsigned int k = a->info()->dimension(0);

478

479

// Select GEMMType

480

_gemm_type = select_gemm_type(m, n, k, a->info()->data_type(), _reshape_b_only_on_first_run, gpu_target);

481

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame^]

482

const bool is_fuse_add_c_supported = (_gemm_type == GEMMType::RESHAPED_V2) || (_gemm_type == GEMMType::RESHAPED_ONLY_RHS);

483

const bool add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr);

484

const bool fuse_add_c = add_c && is_fuse_add_c_supported;

485

486

const ICLTensor *c_to_use = fuse_add_c ? c : nullptr;

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

switch(_gemm_type)

{

case GEMMType::NATIVE:

491

{

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame^]

492

configure_native(a, b, c_to_use, output, alpha, beta, gemm_info);

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

493

break;

494

}

495

case GEMMType::RESHAPED_V1:

496

{

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame^]

497

configure_reshaped_v1(a, b, c_to_use, output, alpha, beta, gemm_info);

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

498

break;

499

}

500

case GEMMType::RESHAPED_V2:

501

{

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame^]

502

configure_reshaped_v2(a, b, c_to_use, output, alpha, beta, gemm_info);

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

503

break;

504

}

505

case GEMMType::RESHAPED_ONLY_RHS:

506

{

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame^]

507

configure_reshaped_only_rhs(a, b, c_to_use, output, alpha, beta, gemm_info);

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

break;

}

default:

{

ARM_COMPUTE_ERROR("GEMMType not supported");

}

}

// Configure matrix addition kernel

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame^]

517

if(add_c && !fuse_add_c)

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

518

{

519

_ma_kernel.configure(c, output, beta);

520

_run_addition = true;

}

}

Status CLGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

525

{

526

// Get the GPU target

527

const GPUTarget gpu_target = CLScheduler::get().target();

528

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

529

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

530

const unsigned int n = b->dimension(0);

531

const unsigned int k = a->dimension(0);

532

533

// Select GEMMType

534

GEMMType gemm_type = select_gemm_type(m, n, k, a->data_type(), gemm_info.reshape_b_only_on_first_run(), gpu_target);

535

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame^]

536

const bool is_fuse_add_c_supported = (gemm_type == GEMMType::RESHAPED_V2) || (gemm_type == GEMMType::RESHAPED_ONLY_RHS);

537

const bool add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr);

538

const bool fuse_add_c = add_c && is_fuse_add_c_supported;

539

540

const ITensorInfo *c_to_use = fuse_add_c ? c : nullptr;

541

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

542

switch(gemm_type)

543

{

544

case GEMMType::NATIVE:

545

{

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame^]

546

ARM_COMPUTE_RETURN_ON_ERROR(validate_native(a, b, c_to_use, output, alpha, beta, gemm_info));

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

547

break;

548

}

549

case GEMMType::RESHAPED_V1:

550

{

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame^]

551

ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_v1(a, b, c_to_use, output, alpha, beta, gemm_info));

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

552

break;

553

}

554

case GEMMType::RESHAPED_V2:

555

{

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame^]

556

ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_v2(a, b, c_to_use, output, alpha, beta, gemm_info));

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

557

break;

558

}

559

case GEMMType::RESHAPED_ONLY_RHS:

560

{

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame^]

561

ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_only_rhs(a, b, c_to_use, output, alpha, beta, gemm_info));

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

break;

}

default:

{

ARM_COMPUTE_RETURN_ERROR_MSG("GEMMType not supported");

}

}

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame^]

570

// Validate matrix addition kernel

571

if(add_c && !fuse_add_c)

572

{

573

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixAdditionKernel::validate(c, output, beta));

574

}

575

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

return Status{};

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

579

void CLGEMM::run()

580

{

Georgios Pinitas

2018-05-02 14:07:55 +0100

[diff] [blame]

581

prepare();

582

Georgios Pinitas

da953f2

2019-04-02 17:27:03 +0100

[diff] [blame]

583

MemoryGroupResourceScope scope_mg(_memory_group);

Georgios Pinitas

8a94e7c

2017-09-15 19:06:47 +0100

[diff] [blame]

584

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

585

// Run matrix multiply kernel

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

586

switch(_gemm_type)

Gian Marco Iodice

2018-12-12 10:18:04 +0000

[diff] [blame]

587

{

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

588

case GEMMType::NATIVE:

589

{

590

CLScheduler::get().enqueue(_mm_kernel, !_run_addition);

591

break;

592

}

593

case GEMMType::RESHAPED_V1:

594

{

595

// Run interleave kernel

596

CLScheduler::get().enqueue(_reshape_lhs_kernel, false);

597

598

if(!_reshape_b_only_on_first_run)

599

{

600

// Run transpose kernel

601

CLScheduler::get().enqueue(_reshape_rhs_kernel, false);

602

}

603

604

CLScheduler::get().enqueue(_mm_kernel, !_run_addition);

605

break;

606

}

607

case GEMMType::RESHAPED_V2:

608

{

609

// Run interleave kernel

610

CLScheduler::get().enqueue(_reshape_lhs_kernel, false);

611

612

if(!_reshape_b_only_on_first_run)

613

{

614

// Run transpose kernel

615

CLScheduler::get().enqueue(_reshape_rhs_kernel, false);

616

}

617

618

CLScheduler::get().enqueue(_mm_reshaped_kernel, !_run_addition);

619

break;

620

}

621

case GEMMType::RESHAPED_ONLY_RHS:

622

{

623

if(!_reshape_b_only_on_first_run)

624

{

625

// Run transpose kernel

626

CLScheduler::get().enqueue(_reshape_rhs_kernel, false);

627

}

628

629

CLScheduler::get().enqueue(_mm_reshaped_only_rhs_kernel, !_run_addition);

break;

}

default:

{

ARM_COMPUTE_ERROR("GEMMType not supported");

635

}

Gian Marco Iodice

2018-12-12 10:18:04 +0000

[diff] [blame]

636

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

637

638

// Run matrix addition kernel

639

if(_run_addition)

640

{

641

CLScheduler::get().enqueue(_ma_kernel);

642

}

Georgios Pinitas

2018-05-02 14:07:55 +0100

[diff] [blame]

643

}

Georgios Pinitas

82b5148

2018-04-24 15:14:12 +0100

[diff] [blame]

644

Georgios Pinitas

2018-05-02 14:07:55 +0100

[diff] [blame]

645

void CLGEMM::prepare()

646

{

647

if(!_is_prepared)

648

{

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

649

if(_gemm_type != GEMMType::NATIVE && _reshape_b_only_on_first_run)

Georgios Pinitas

2018-05-02 14:07:55 +0100

[diff] [blame]

650

{

Georgios Pinitas

7221933

2018-06-05 14:56:06 +0100

[diff] [blame]

651

// Run transpose kernel and mark original weights tensor as unused

Georgios Pinitas

2018-05-02 14:07:55 +0100

[diff] [blame]

652

_tmp_b.allocator()->allocate();

giuros01

8b6b4a9

2018-12-18 19:01:33 +0000

[diff] [blame]

653

CLScheduler::get().enqueue(_reshape_rhs_kernel, false);

Georgios Pinitas

2018-05-02 14:07:55 +0100

[diff] [blame]

654

_original_b->mark_as_unused();

655

}

656

CLScheduler::get().queue().finish();

657

_is_prepared = true;

658

}

Anthony Barbier