Blame - src/gpu/cl/operators/ClGemm.cpp - ml/ComputeLibrary

2021-10-15 10:23:24 +0100

[diff] [blame]

189

: _reshape_lhs_kernel(std::make_unique<ClGemmReshapeLhsMatrixKernel>()),

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

190

_reshape_rhs_kernel(std::make_unique<ClGemmReshapeRhsMatrixKernel>()),

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

191

_mm_native_kernel(std::make_unique<ClGemmMatrixMultiplyNativeKernel>()),

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

192

_mm_reshaped_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedKernel>()),

193

_mm_reshaped_only_rhs_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedOnlyRhsKernel>()),

Gunes Bayir

2021-12-10 16:17:56 +0000

[diff] [blame]

194

_mm_reshaped_only_rhs_mmul_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel>()),

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

195

_tmp_a(),

196

_tmp_b(),

197

_reshape_b_only_on_first_run(false),

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

198

_gemm_kernel_type(CLGEMMKernelType::NATIVE),

Manuel Bottini

2021-07-16 10:23:31 +0100

[diff] [blame]

199

_is_prepared(false),

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

200

_aux_mem(AuxTensorIdx::Count)

{

}

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

204

void ClGemm::configure_native(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,

205

const GEMMInfo &gemm_info)

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

206

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

207

DataType data_type = a->data_type();

208

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

209

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

210

const unsigned int n = b->dimension(0);

211

const unsigned int k = a->dimension(0);

212

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

213

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

214

const GPUTarget gpu_target = CLScheduler::get().target();

215

bool broadcast_bias = gemm_info.broadcast_bias();

216

217

GEMMKernelInfo kernel_info;

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

222

kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;

223

kernel_info.broadcast_bias = broadcast_bias;

224

kernel_info.activation_info = gemm_info.activation_info();

SiCongLi

2021-10-24 19:12:33 +0100

[diff] [blame]

225

kernel_info.post_ops = gemm_info.post_ops();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

226

227

// Set the target for the kernels

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

228

_mm_native_kernel->set_target(gpu_target);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

229

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

230

auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

231

232

// Configure and tune matrix multiply kernel

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

233

_mm_native_kernel->configure(compile_context, a, b, c, output, alpha, beta, config.lhs_info, config.rhs_info, kernel_info);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

234

}

235

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

236

void ClGemm::configure_reshaped(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,

237

const GEMMInfo &gemm_info)

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

238

{

239

DataType data_type = a->data_type();

240

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

241

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

242

const unsigned int n = b->dimension(0);

243

const unsigned int k = a->dimension(0);

244

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

245

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

246

const GPUTarget gpu_target = CLScheduler::get().target();

247

bool broadcast_bias = gemm_info.broadcast_bias();

248

249

GEMMKernelInfo kernel_info;

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

254

kernel_info.reinterpret_input_as_3d = false;

255

kernel_info.broadcast_bias = broadcast_bias;

256

kernel_info.activation_info = gemm_info.activation_info();

SiCongLi

579ca84

2021-10-18 09:38:33 +0100

[diff] [blame]

257

kernel_info.post_ops = gemm_info.post_ops();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

258

259

// Set the target for the kernels

260

_reshape_lhs_kernel->set_target(gpu_target);

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

261

_mm_reshaped_kernel->set_target(gpu_target);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

262

263

GEMMLHSMatrixInfo lhs_info{};

264

GEMMRHSMatrixInfo rhs_info{};

265

266

// Pick up the GEMM configuration

267

std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }, kernel_info, a, b,

268

c, output, gemm_info.reinterpret_input_as_3d());

269

270

_reshape_lhs_kernel->configure(compile_context, a, &_tmp_a, lhs_info, gemm_info.reinterpret_input_as_3d());

271

_reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);

272

273

// Configure and tune matrix multiply kernel

274

_mm_reshaped_kernel->configure(compile_context, &_tmp_a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);

275

276

// Request memory for LHS and RHS reshape matrix

277

_aux_mem[LhsReshape] = MemoryInfo(offset_int_vec(LhsReshape), MemoryLifetime::Temporary, _tmp_a.total_size());

278

_aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size());

279

}

280

281

void ClGemm::configure_reshaped_only_rhs(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,

282

const GEMMInfo &gemm_info)

283

{

284

DataType data_type = a->data_type();

285

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

286

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

287

const unsigned int n = b->dimension(0);

288

const unsigned int k = a->dimension(0);

289

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

290

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

291

const GPUTarget gpu_target = CLScheduler::get().target();

292

bool broadcast_bias = gemm_info.broadcast_bias();

293

294

GEMMKernelInfo kernel_info;

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

299

kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;

300

kernel_info.broadcast_bias = broadcast_bias;

301

kernel_info.activation_info = gemm_info.activation_info();

SiCongLi

2021-10-24 19:12:33 +0100

[diff] [blame]

302

kernel_info.post_ops = gemm_info.post_ops();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

303

304

// Set the target for the kernels

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

305

_mm_reshaped_only_rhs_kernel->set_target(gpu_target);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

306

307

GEMMLHSMatrixInfo lhs_info{};

308

GEMMRHSMatrixInfo rhs_info{};

309

310

// Pick up the GEMM configuration

311

std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }, kernel_info, a, b, c, output);

312

313

// Transpose matrix

314

_reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);

315

316

// Configure two variants of CLGEMMMatrixMultiplyReshapedOnlyRHSKernel (has_pad_y = false/true)

317

// During the prepare stage we check the padding requirement for the lhs and dst tensors. If they do not have

318

// pad y, we dispatch CLGEMMMatrixMultiplyReshapedOnlyRHSKernel with has_pad_y = false

319

320

// Configure matrix multiply kernel with no y padding support

321

kernel_info.has_pad_y = false;

322

_mm_reshaped_only_rhs_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);

323

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

324

// Request memory for RHS reshape matrix

325

_aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size());

326

}

327

Gunes Bayir

2021-12-10 16:17:56 +0000

[diff] [blame]

328

void ClGemm::configure_reshaped_only_rhs_mmul(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,

329

const GEMMInfo &gemm_info)

330

{

331

DataType data_type = a->data_type();

332

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

333

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

334

const unsigned int n = b->dimension(0);

335

const unsigned int k = a->dimension(0);

336

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

337

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

338

const GPUTarget gpu_target = CLScheduler::get().target();

339

bool broadcast_bias = gemm_info.broadcast_bias();

340

341

GEMMKernelInfo kernel_info;

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

346

kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;

347

kernel_info.broadcast_bias = broadcast_bias;

348

kernel_info.activation_info = gemm_info.activation_info();

349

kernel_info.post_ops = gemm_info.post_ops();

350

351

// Set the target for the kernels

352

_mm_reshaped_only_rhs_mmul_kernel->set_target(gpu_target);

353

354

GEMMLHSMatrixInfo lhs_info{};

355

GEMMRHSMatrixInfo rhs_info{};

356

357

// Pick up the GEMM configuration

358

auto gemm_config = select_default_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });

359

lhs_info = gemm_config.lhs_info;

360

rhs_info = gemm_config.rhs_info;

361

// Force H0 to 4 in order to use the MMUL extension

362

rhs_info.h0 = 4;

363

364

// Reshape Rhs matrix

365

_reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);

366

367

// Configure matrix multiply kernel with no y padding support

368

kernel_info.has_pad_y = false;

369

_mm_reshaped_only_rhs_mmul_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);

370

371

// Request memory for RHS reshape matrix

372

_aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size());

373

}

374

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

375

Status ClGemm::validate_native(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

376

{

377

ARM_COMPUTE_UNUSED(alpha);

378

ARM_COMPUTE_UNUSED(output);

379

380

// Get the GPU target

381

const GPUTarget gpu_target = CLScheduler::get().target();

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

382

DataType data_type = a->data_type();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

383

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

384

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

385

const unsigned int n = b->dimension(0);

386

const unsigned int k = a->dimension(0);

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

387

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

388

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

389

const bool broadcast_bias = gemm_info.broadcast_bias();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

390

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

391

GEMMKernelInfo kernel_info;

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

396

kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;

397

kernel_info.broadcast_bias = broadcast_bias;

398

kernel_info.activation_info = gemm_info.activation_info();

SiCongLi

2021-10-24 19:12:33 +0100

[diff] [blame]

399

kernel_info.post_ops = gemm_info.post_ops();

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

400

401

auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

402

403

// Validate matrix multiply

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

404

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyNativeKernel::validate(a, b, c, output, alpha, beta, config.lhs_info, config.rhs_info, kernel_info));

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

return Status{};

}

Status ClGemm::validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

410

{

411

ARM_COMPUTE_UNUSED(alpha);

412

ARM_COMPUTE_UNUSED(output);

413

414

TensorInfo tmp_a_info{};

415

TensorInfo tmp_b_info{};

416

417

// Get the GPU target

418

const GPUTarget gpu_target = CLScheduler::get().target();

419

DataType data_type = a->data_type();

420

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

421

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

422

const unsigned int n = b->dimension(0);

423

const unsigned int k = a->dimension(0);

424

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

425

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

426

const bool broadcast_bias = gemm_info.broadcast_bias();

427

428

GEMMKernelInfo kernel_info;

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

433

kernel_info.reinterpret_input_as_3d = false;

434

kernel_info.broadcast_bias = broadcast_bias;

435

kernel_info.activation_info = gemm_info.activation_info();

SiCongLi

579ca84

2021-10-18 09:38:33 +0100

[diff] [blame]

436

kernel_info.post_ops = gemm_info.post_ops();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

437

438

GEMMLHSMatrixInfo lhs_info;

439

GEMMRHSMatrixInfo rhs_info;

440

441

// Pick up the GEMM configuration

442

// NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails

443

const auto gemm_config = select_default_gemm_config_reshaped(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });

444

lhs_info = gemm_config.lhs_info;

445

rhs_info = gemm_config.rhs_info;

446

447

auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, gemm_info.reinterpret_input_as_3d())));

448

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeLhsMatrixKernel::validate(a, &tmp_a_info, lhs_info, gemm_info.reinterpret_input_as_3d()));

449

450

auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));

451

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info));

452

453

// Validate matrix multiply

454

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedKernel::validate(&tmp_a_info, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));

return Status{};

}

Status ClGemm::validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

460

{

461

ARM_COMPUTE_UNUSED(alpha);

462

ARM_COMPUTE_UNUSED(output);

463

464

TensorInfo tmp_b_info{};

465

466

// Get the GPU target

467

const GPUTarget gpu_target = CLScheduler::get().target();

468

const DataType data_type = a->data_type();

469

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

470

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

471

const unsigned int n = b->dimension(0);

472

const unsigned int k = a->dimension(0);

473

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

474

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

475

const bool broadcast_bias = gemm_info.broadcast_bias();

476

477

GEMMKernelInfo kernel_info;

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

482

kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;

483

kernel_info.broadcast_bias = broadcast_bias;

484

kernel_info.activation_info = gemm_info.activation_info();

SiCongLi

2021-10-24 19:12:33 +0100

[diff] [blame]

485

kernel_info.post_ops = gemm_info.post_ops();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

486

487

GEMMLHSMatrixInfo lhs_info;

488

GEMMRHSMatrixInfo rhs_info;

489

490

// Pick up the GEMM configuration

491

// NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails

492

const auto gemm_config = select_default_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });

493

lhs_info = gemm_config.lhs_info;

494

rhs_info = gemm_config.rhs_info;

495

496

auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));

497

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info));

498

499

// Validate matrix multiply

500

kernel_info.has_pad_y = false;

501

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));

502

Ramy Elgammal

451c309

2022-02-01 23:01:27 +0000

[diff] [blame]

503

kernel_info.has_pad_y = true;

504

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));

505

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

return Status{};

}

Gunes Bayir

2021-12-10 16:17:56 +0000

[diff] [blame]

509

Status ClGemm::validate_reshaped_only_rhs_mmul(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

510

{

511

ARM_COMPUTE_UNUSED(alpha);

512

ARM_COMPUTE_UNUSED(output);

513

TensorInfo tmp_b_info{};

514

515

// Get the GPU target

516

const GPUTarget gpu_target = CLScheduler::get().target();

517

const DataType data_type = a->data_type();

518

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

519

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

520

const unsigned int n = b->dimension(0);

521

const unsigned int k = a->dimension(0);

522

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

523

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

524

const bool broadcast_bias = gemm_info.broadcast_bias();

525

526

GEMMKernelInfo kernel_info;

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

531

kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;

532

kernel_info.broadcast_bias = broadcast_bias;

533

kernel_info.activation_info = gemm_info.activation_info();

534

kernel_info.post_ops = gemm_info.post_ops();

535

536

GEMMLHSMatrixInfo lhs_info;

537

GEMMRHSMatrixInfo rhs_info;

538

539

// Pick up the GEMM configuration

540

// NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails

541

const auto gemm_config = select_default_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });

542

lhs_info = gemm_config.lhs_info;

543

rhs_info = gemm_config.rhs_info;

544

// Force H0 to 4 in order to use the MMUL extension

545

rhs_info.h0 = 4;

546

547

auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));

548

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info));

549

550

// Validate matrix multiply

551

kernel_info.has_pad_y = false;

552

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));

return Status{};

}

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

557

void ClGemm::configure(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

558

{

559

ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);

560

561

// Perform validation step

562

ARM_COMPUTE_ERROR_THROW_ON(validate(a, b, c, output, alpha, beta, gemm_info));

ramelg01

2e53f17

2021-09-22 10:48:25 +0100

[diff] [blame]

563

ARM_COMPUTE_LOG_PARAMS(a, b, c, output, alpha, beta, gemm_info);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

564

565

// Check if we need to reshape the matrix B only on the first run

566

_reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();

Georgios Pinitas

f5d51f3

2021-08-17 16:09:10 +0100

[diff] [blame]

567

_is_prepared = gemm_info.retain_internal_weights();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

568

569

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

570

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

571

const unsigned int n = b->dimension(0);

572

const unsigned int k = a->dimension(0);

573

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

574

575

// Select GEMMType

Giorgio Arena

4403ed3

2021-05-17 13:03:50 +0100

[diff] [blame]

576

_gemm_kernel_type = auto_select_gemm_kernel(auto_heuristics::CommonQuery{ CLScheduler::get().target(), a->data_type(), m, n, k, batch_size }, _reshape_b_only_on_first_run,

Giorgio Arena

63e0beb

2021-09-24 14:04:27 +0100

[diff] [blame]

577

b->are_values_constant());

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

578

579

const bool fuse_add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr);

580

581

ITensorInfo *c_to_use = fuse_add_c ? c : nullptr;

582

583

switch(_gemm_kernel_type)

584

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

585

case CLGEMMKernelType::NATIVE:

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

586

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

587

configure_native(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

588

break;

589

}

590

case CLGEMMKernelType::RESHAPED:

591

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

592

configure_reshaped(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

593

break;

594

}

595

case CLGEMMKernelType::RESHAPED_ONLY_RHS:

596

{

597

configure_reshaped_only_rhs(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);

598

break;

599

}

Gunes Bayir

2021-12-10 16:17:56 +0000

[diff] [blame]

600

case CLGEMMKernelType::RESHAPED_ONLY_RHS_MMUL:

601

{

602

configure_reshaped_only_rhs_mmul(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);

603

break;

604

}

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

605

default:

606

{

607

ARM_COMPUTE_ERROR("GEMMType not supported");

}

}

}

Status ClGemm::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

613

{

614

// Get the GPU target

615

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

616

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

617

const unsigned int n = b->dimension(0);

618

const unsigned int k = a->dimension(0);

619

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

620

SiCong Li

13bab71

2023-01-13 15:29:39 +0000

[diff] [blame]

621

// Check data type early because the auto_select_gemm_kernel has assertions on supported data types

622

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::F32, DataType::F16);

623

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

624

// Select GEMMType

625

CLGEMMKernelType gemm_kernel_type = auto_select_gemm_kernel(auto_heuristics::CommonQuery

626

{

627

CLScheduler::get().target(), a->data_type(), m, n, k, batch_size,

628

},

Giorgio Arena

63e0beb

2021-09-24 14:04:27 +0100

[diff] [blame]

629

gemm_info.reshape_b_only_on_first_run(), b->are_values_constant());

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

630

631

const bool fuse_add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr);

632

633

const ITensorInfo *c_to_use = fuse_add_c ? c : nullptr;

634

635

switch(gemm_kernel_type)

636

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

637

case CLGEMMKernelType::NATIVE:

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

638

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

639

ARM_COMPUTE_RETURN_ON_ERROR(validate_native(a, b, c_to_use, output, alpha, beta, gemm_info));

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

640

break;

641

}

642

case CLGEMMKernelType::RESHAPED:

643

{

644

ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped(a, b, c_to_use, output, alpha, beta, gemm_info));

645

break;

646

}

647

case CLGEMMKernelType::RESHAPED_ONLY_RHS:

648

{

649

ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_only_rhs(a, b, c_to_use, output, alpha, beta, gemm_info));

650

break;

651

}

Gunes Bayir

2021-12-10 16:17:56 +0000

[diff] [blame]

652

case CLGEMMKernelType::RESHAPED_ONLY_RHS_MMUL:

653

{

654

ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_only_rhs_mmul(a, b, c_to_use, output, alpha, beta, gemm_info));

655

break;

656

}

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

657

default:

658

{

659

ARM_COMPUTE_RETURN_ERROR_MSG("GEMMType not supported");

}

}

return Status{};

}

void ClGemm::run(ITensorPack &tensors)

667

{

SiCongLi

2021-10-24 19:12:33 +0100

[diff] [blame]

668

const ITensor *lhs = tensors.get_const_tensor(ACL_SRC_0);

669

const ITensor *rhs = tensors.get_const_tensor(ACL_SRC_1);

670

ITensor *dst = tensors.get_tensor(ACL_DST);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

671

672

ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, dst);

673

674

CLAuxTensorHandler lhs_reshaped(offset_int_vec(LhsReshape), _tmp_a, tensors, true);

675

CLAuxTensorHandler rhs_reshaped(offset_int_vec(RhsReshape), _tmp_b, tensors, true);

676

677

// Prepare the consts if needed

678

prepare(tensors);

679

680

// Run matrix multiply kernel

681

switch(_gemm_kernel_type)

682

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

683

case CLGEMMKernelType::NATIVE:

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

684

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

685

CLScheduler::get().enqueue_op(*_mm_native_kernel, tensors, true);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

686

break;

687

}

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

688

case CLGEMMKernelType::RESHAPED:

689

{

690

// Run interleave kernel

691

ITensorPack reshape_lhs_pack{ { ACL_SRC, lhs }, { ACL_DST, lhs_reshaped.get() } };

692

CLScheduler::get().enqueue_op(*_reshape_lhs_kernel, reshape_lhs_pack, false);

693

694

if(!_reshape_b_only_on_first_run)

695

{

696

// Run transpose kernel

697

ITensorPack reshape_rhs_pack{ { ACL_SRC, rhs }, { ACL_DST, rhs_reshaped.get() } };

698

CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, false);

699

}

SiCongLi

579ca84

2021-10-18 09:38:33 +0100

[diff] [blame]

700

// Copy original tensor pack and overwrite lhs and rhs with reshaped counterparts

701

ITensorPack gemm_reshaped_pack(tensors);

702

gemm_reshaped_pack.add_const_tensor(ACL_SRC_0, lhs_reshaped.get());

703

gemm_reshaped_pack.add_const_tensor(ACL_SRC_1, rhs_reshaped.get());

Manuel Bottini

2021-07-16 10:23:31 +0100

[diff] [blame]

704

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

705

if(_gemm_kernel_type == CLGEMMKernelType::RESHAPED)

706

{

707

CLScheduler::get().enqueue_op(*_mm_reshaped_kernel, gemm_reshaped_pack, true);

708

}

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

709

break;

710

}

711

case CLGEMMKernelType::RESHAPED_ONLY_RHS:

712

{

713

if(!_reshape_b_only_on_first_run)

714

{

715

// Run transpose kernel

716

ITensorPack reshape_rhs_pack{ { ACL_SRC, rhs }, { ACL_DST, rhs_reshaped.get() } };

717

CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, false);

718

}

719

// In case of RESHAPED_ONLY_RHS, we need to check the padding requirement

720

// Check if the lhs or dst tensors have padding

721

const unsigned int cross_plane_pad_lhs = lhs->info()->padding().top + lhs->info()->padding().bottom;

722

const unsigned int cross_plane_pad_dst = dst->info()->padding().top + dst->info()->padding().bottom;

723

bool has_pad_y = (cross_plane_pad_lhs != 0) || (cross_plane_pad_dst != 0);

724

SiCongLi

2021-10-24 19:12:33 +0100

[diff] [blame]

725

// Copy original tensor pack and overwrite rhs with reshaped counterpart

726

ITensorPack gemm_reshaped_onlyrhs_pack(tensors);

727

gemm_reshaped_onlyrhs_pack.add_const_tensor(ACL_SRC_1, rhs_reshaped.get());

728

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

729

if(has_pad_y)

730

{

ramelg01

9cca592

2021-11-11 10:05:00 +0000

[diff] [blame]

731

ARM_COMPUTE_ERROR_ON(has_pad_y);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

}

else

{

CLScheduler::get().enqueue_op(*_mm_reshaped_only_rhs_kernel, gemm_reshaped_onlyrhs_pack, true);

736

}

737

break;

738

}

Gunes Bayir

2021-12-10 16:17:56 +0000

[diff] [blame]

739

case CLGEMMKernelType::RESHAPED_ONLY_RHS_MMUL:

740

{

741

if(!_reshape_b_only_on_first_run)

742

{

743

// Run transpose kernel

744

ITensorPack reshape_rhs_pack{ { ACL_SRC, rhs }, { ACL_DST, rhs_reshaped.get() } };

745

CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, false);

746

}

747

// In case of RESHAPED_ONLY_RHS, we need to check the padding requirement

748

// Check if the lhs or dst tensors have padding

749

const unsigned int cross_plane_pad_lhs = lhs->info()->padding().top + lhs->info()->padding().bottom;

750

const unsigned int cross_plane_pad_dst = dst->info()->padding().top + dst->info()->padding().bottom;

751

bool has_pad_y = (cross_plane_pad_lhs != 0) || (cross_plane_pad_dst != 0);

752

753

// Copy original tensor pack and overwrite rhs with reshaped counterpart

754

ITensorPack gemm_reshaped_onlyrhs_pack(tensors);

755

gemm_reshaped_onlyrhs_pack.add_const_tensor(ACL_SRC_1, rhs_reshaped.get());

if(has_pad_y)

{

ARM_COMPUTE_ERROR_ON(has_pad_y);

}

else

{

CLScheduler::get().enqueue_op(*_mm_reshaped_only_rhs_mmul_kernel, gemm_reshaped_onlyrhs_pack, true);

764

}

765

break;

766

}

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

767

default:

768

{

769

ARM_COMPUTE_ERROR("GEMMType not supported");

}

}

}

void ClGemm::prepare(ITensorPack &constants)

775

{

Manuel Bottini

2021-07-16 10:23:31 +0100

[diff] [blame]

776

if(!_is_prepared)

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

777

{

Manuel Bottini

2021-07-16 10:23:31 +0100

[diff] [blame]

778

const ITensor *src1 = constants.get_const_tensor(ACL_SRC_1);

779

ICLTensor *rhs_aux = utils::cast::polymorphic_downcast<ICLTensor *>(constants.get_tensor(offset_int_vec(RhsReshape)));

Georgios Pinitas

2b147ee

2021-07-08 18:14:45 +0100

[diff] [blame]

780

Manuel Bottini

2021-07-16 10:23:31 +0100

[diff] [blame]

781

// If memory for RHS is persistent and src1 is provided re-transform else assume that RHS is transformed

782

if((_aux_mem[AuxTensorIdx::RhsReshape].lifetime == MemoryLifetime::Persistent) && (src1 != nullptr && rhs_aux != nullptr) && rhs_aux)

783

{

784

ARM_COMPUTE_LOG_INFO_WITH_FUNCNAME_ACL("Transforming RHS Matrix!");

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

785

Manuel Bottini

2021-07-16 10:23:31 +0100

[diff] [blame]

786

CLAuxTensorHandler rhs_reshaped(_tmp_b, *rhs_aux);

787

ARM_COMPUTE_ERROR_ON(rhs_reshaped.get()->cl_buffer().get() == nullptr);

788

789

ITensorPack reshape_rhs_pack{ { ACL_SRC, src1 }, { ACL_DST, rhs_reshaped.get() } };

790

CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, true);

791

}

792

_is_prepared = true;

Georgios Pinitas