Blame - src/gpu/cl/operators/ClGemm.cpp - ml/ComputeLibrary

2021-10-15 10:23:24 +0100

[diff] [blame]

189

: _reshape_lhs_kernel(std::make_unique<ClGemmReshapeLhsMatrixKernel>()),

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

190

_reshape_rhs_kernel(std::make_unique<ClGemmReshapeRhsMatrixKernel>()),

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

191

_mm_native_kernel(std::make_unique<ClGemmMatrixMultiplyNativeKernel>()),

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

192

_mm_reshaped_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedKernel>()),

193

_mm_reshaped_only_rhs_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedOnlyRhsKernel>()),

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

194

_tmp_a(),

195

_tmp_b(),

196

_reshape_b_only_on_first_run(false),

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

197

_gemm_kernel_type(CLGEMMKernelType::NATIVE),

Manuel Bottini

2021-07-16 10:23:31 +0100

[diff] [blame]

198

_is_prepared(false),

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

199

_aux_mem(AuxTensorIdx::Count)

{

}

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

203

void ClGemm::configure_native(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,

204

const GEMMInfo &gemm_info)

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

205

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

206

DataType data_type = a->data_type();

207

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

208

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

209

const unsigned int n = b->dimension(0);

210

const unsigned int k = a->dimension(0);

211

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

212

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

213

const GPUTarget gpu_target = CLScheduler::get().target();

214

bool broadcast_bias = gemm_info.broadcast_bias();

215

216

GEMMKernelInfo kernel_info;

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

221

kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;

222

kernel_info.broadcast_bias = broadcast_bias;

223

kernel_info.activation_info = gemm_info.activation_info();

SiCongLi

2021-10-24 19:12:33 +0100

[diff] [blame]

224

kernel_info.post_ops = gemm_info.post_ops();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

225

226

// Set the target for the kernels

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

227

_mm_native_kernel->set_target(gpu_target);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

228

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

229

auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

230

231

// Configure and tune matrix multiply kernel

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

232

_mm_native_kernel->configure(compile_context, a, b, c, output, alpha, beta, config.lhs_info, config.rhs_info, kernel_info);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

233

}

234

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

235

void ClGemm::configure_reshaped(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,

236

const GEMMInfo &gemm_info)

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

237

{

238

DataType data_type = a->data_type();

239

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

240

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

241

const unsigned int n = b->dimension(0);

242

const unsigned int k = a->dimension(0);

243

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

244

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

245

const GPUTarget gpu_target = CLScheduler::get().target();

246

bool broadcast_bias = gemm_info.broadcast_bias();

247

248

GEMMKernelInfo kernel_info;

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

253

kernel_info.reinterpret_input_as_3d = false;

254

kernel_info.broadcast_bias = broadcast_bias;

255

kernel_info.activation_info = gemm_info.activation_info();

SiCongLi

579ca84

2021-10-18 09:38:33 +0100

[diff] [blame]

256

kernel_info.post_ops = gemm_info.post_ops();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

257

258

// Set the target for the kernels

259

_reshape_lhs_kernel->set_target(gpu_target);

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

260

_mm_reshaped_kernel->set_target(gpu_target);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

261

262

GEMMLHSMatrixInfo lhs_info{};

263

GEMMRHSMatrixInfo rhs_info{};

264

265

// Pick up the GEMM configuration

266

std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }, kernel_info, a, b,

267

c, output, gemm_info.reinterpret_input_as_3d());

268

269

_reshape_lhs_kernel->configure(compile_context, a, &_tmp_a, lhs_info, gemm_info.reinterpret_input_as_3d());

270

_reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);

271

272

// Configure and tune matrix multiply kernel

273

_mm_reshaped_kernel->configure(compile_context, &_tmp_a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);

274

275

// Request memory for LHS and RHS reshape matrix

276

_aux_mem[LhsReshape] = MemoryInfo(offset_int_vec(LhsReshape), MemoryLifetime::Temporary, _tmp_a.total_size());

277

_aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size());

278

}

279

280

void ClGemm::configure_reshaped_only_rhs(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,

281

const GEMMInfo &gemm_info)

282

{

283

DataType data_type = a->data_type();

284

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

285

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

286

const unsigned int n = b->dimension(0);

287

const unsigned int k = a->dimension(0);

288

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

289

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

290

const GPUTarget gpu_target = CLScheduler::get().target();

291

bool broadcast_bias = gemm_info.broadcast_bias();

292

293

GEMMKernelInfo kernel_info;

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

298

kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;

299

kernel_info.broadcast_bias = broadcast_bias;

300

kernel_info.activation_info = gemm_info.activation_info();

SiCongLi

2021-10-24 19:12:33 +0100

[diff] [blame]

301

kernel_info.post_ops = gemm_info.post_ops();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

302

303

// Set the target for the kernels

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

304

_mm_reshaped_only_rhs_kernel->set_target(gpu_target);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

305

306

GEMMLHSMatrixInfo lhs_info{};

307

GEMMRHSMatrixInfo rhs_info{};

308

309

// Pick up the GEMM configuration

310

std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }, kernel_info, a, b, c, output);

311

312

// Transpose matrix

313

_reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);

314

315

// Configure two variants of CLGEMMMatrixMultiplyReshapedOnlyRHSKernel (has_pad_y = false/true)

316

// During the prepare stage we check the padding requirement for the lhs and dst tensors. If they do not have

317

// pad y, we dispatch CLGEMMMatrixMultiplyReshapedOnlyRHSKernel with has_pad_y = false

318

319

// Configure matrix multiply kernel with no y padding support

320

kernel_info.has_pad_y = false;

321

_mm_reshaped_only_rhs_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);

322

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

323

// Request memory for RHS reshape matrix

324

_aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size());

325

}

326

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

327

Status ClGemm::validate_native(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

328

{

329

ARM_COMPUTE_UNUSED(alpha);

330

ARM_COMPUTE_UNUSED(output);

331

332

// Get the GPU target

333

const GPUTarget gpu_target = CLScheduler::get().target();

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

334

DataType data_type = a->data_type();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

335

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

336

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

337

const unsigned int n = b->dimension(0);

338

const unsigned int k = a->dimension(0);

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

339

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

340

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

341

const bool broadcast_bias = gemm_info.broadcast_bias();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

342

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

343

GEMMKernelInfo kernel_info;

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

348

kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;

349

kernel_info.broadcast_bias = broadcast_bias;

350

kernel_info.activation_info = gemm_info.activation_info();

SiCongLi

2021-10-24 19:12:33 +0100

[diff] [blame]

351

kernel_info.post_ops = gemm_info.post_ops();

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

352

353

auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

354

355

// Validate matrix multiply

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

356

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyNativeKernel::validate(a, b, c, output, alpha, beta, config.lhs_info, config.rhs_info, kernel_info));

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

return Status{};

}

Status ClGemm::validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

362

{

363

ARM_COMPUTE_UNUSED(alpha);

364

ARM_COMPUTE_UNUSED(output);

365

366

TensorInfo tmp_a_info{};

367

TensorInfo tmp_b_info{};

368

369

// Get the GPU target

370

const GPUTarget gpu_target = CLScheduler::get().target();

371

DataType data_type = a->data_type();

372

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

373

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

374

const unsigned int n = b->dimension(0);

375

const unsigned int k = a->dimension(0);

376

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

377

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

378

const bool broadcast_bias = gemm_info.broadcast_bias();

379

380

GEMMKernelInfo kernel_info;

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

385

kernel_info.reinterpret_input_as_3d = false;

386

kernel_info.broadcast_bias = broadcast_bias;

387

kernel_info.activation_info = gemm_info.activation_info();

SiCongLi

579ca84

2021-10-18 09:38:33 +0100

[diff] [blame]

388

kernel_info.post_ops = gemm_info.post_ops();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

389

390

GEMMLHSMatrixInfo lhs_info;

391

GEMMRHSMatrixInfo rhs_info;

392

393

// Pick up the GEMM configuration

394

// NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails

395

const auto gemm_config = select_default_gemm_config_reshaped(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });

396

lhs_info = gemm_config.lhs_info;

397

rhs_info = gemm_config.rhs_info;

398

399

auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, gemm_info.reinterpret_input_as_3d())));

400

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeLhsMatrixKernel::validate(a, &tmp_a_info, lhs_info, gemm_info.reinterpret_input_as_3d()));

401

402

auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));

403

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info));

404

405

// Validate matrix multiply

406

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedKernel::validate(&tmp_a_info, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));

return Status{};

}

Status ClGemm::validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

412

{

413

ARM_COMPUTE_UNUSED(alpha);

414

ARM_COMPUTE_UNUSED(output);

415

416

TensorInfo tmp_b_info{};

417

418

// Get the GPU target

419

const GPUTarget gpu_target = CLScheduler::get().target();

420

const DataType data_type = a->data_type();

421

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

422

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

423

const unsigned int n = b->dimension(0);

424

const unsigned int k = a->dimension(0);

425

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

426

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

427

const bool broadcast_bias = gemm_info.broadcast_bias();

428

429

GEMMKernelInfo kernel_info;

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

434

kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;

435

kernel_info.broadcast_bias = broadcast_bias;

436

kernel_info.activation_info = gemm_info.activation_info();

SiCongLi

2021-10-24 19:12:33 +0100

[diff] [blame]

437

kernel_info.post_ops = gemm_info.post_ops();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

438

439

GEMMLHSMatrixInfo lhs_info;

440

GEMMRHSMatrixInfo rhs_info;

441

442

// Pick up the GEMM configuration

443

// NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails

444

const auto gemm_config = select_default_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });

445

lhs_info = gemm_config.lhs_info;

446

rhs_info = gemm_config.rhs_info;

447

448

auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));

449

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info));

450

451

// Validate matrix multiply

452

kernel_info.has_pad_y = false;

453

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));

454

Ramy Elgammal

451c309

2022-02-01 23:01:27 +0000

[diff] [blame^]

455

kernel_info.has_pad_y = true;

456

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));

457

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

return Status{};

}

void ClGemm::configure(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

462

{

463

ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);

464

465

// Perform validation step

466

ARM_COMPUTE_ERROR_THROW_ON(validate(a, b, c, output, alpha, beta, gemm_info));

ramelg01

2e53f17

2021-09-22 10:48:25 +0100

[diff] [blame]

467

ARM_COMPUTE_LOG_PARAMS(a, b, c, output, alpha, beta, gemm_info);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

468

469

// Check if we need to reshape the matrix B only on the first run

470

_reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();

Georgios Pinitas

f5d51f3

2021-08-17 16:09:10 +0100

[diff] [blame]

471

_is_prepared = gemm_info.retain_internal_weights();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

472

473

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

474

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

475

const unsigned int n = b->dimension(0);

476

const unsigned int k = a->dimension(0);

477

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

478

479

// Select GEMMType

Giorgio Arena

4403ed3

2021-05-17 13:03:50 +0100

[diff] [blame]

480

_gemm_kernel_type = auto_select_gemm_kernel(auto_heuristics::CommonQuery{ CLScheduler::get().target(), a->data_type(), m, n, k, batch_size }, _reshape_b_only_on_first_run,

Giorgio Arena

63e0beb

2021-09-24 14:04:27 +0100

[diff] [blame]

481

b->are_values_constant());

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

482

483

const bool fuse_add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr);

484

485

ITensorInfo *c_to_use = fuse_add_c ? c : nullptr;

486

487

switch(_gemm_kernel_type)

488

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

489

case CLGEMMKernelType::NATIVE:

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

490

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

491

configure_native(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

492

break;

493

}

494

case CLGEMMKernelType::RESHAPED:

495

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

496

configure_reshaped(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

497

break;

498

}

499

case CLGEMMKernelType::RESHAPED_ONLY_RHS:

500

{

501

configure_reshaped_only_rhs(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);

break;

}

default:

{

ARM_COMPUTE_ERROR("GEMMType not supported");

}

}

}

Status ClGemm::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

512

{

513

// Get the GPU target

514

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

515

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

516

const unsigned int n = b->dimension(0);

517

const unsigned int k = a->dimension(0);

518

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

519

520

// Select GEMMType

521

CLGEMMKernelType gemm_kernel_type = auto_select_gemm_kernel(auto_heuristics::CommonQuery

522

{

523

CLScheduler::get().target(), a->data_type(), m, n, k, batch_size,

524

},

Giorgio Arena

63e0beb

2021-09-24 14:04:27 +0100

[diff] [blame]

525

gemm_info.reshape_b_only_on_first_run(), b->are_values_constant());

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

526

527

const bool fuse_add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr);

528

529

const ITensorInfo *c_to_use = fuse_add_c ? c : nullptr;

530

531

switch(gemm_kernel_type)

532

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

533

case CLGEMMKernelType::NATIVE:

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

534

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

535

ARM_COMPUTE_RETURN_ON_ERROR(validate_native(a, b, c_to_use, output, alpha, beta, gemm_info));

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

536

break;

537

}

538

case CLGEMMKernelType::RESHAPED:

539

{

540

ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped(a, b, c_to_use, output, alpha, beta, gemm_info));

541

break;

542

}

543

case CLGEMMKernelType::RESHAPED_ONLY_RHS:

544

{

545

ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_only_rhs(a, b, c_to_use, output, alpha, beta, gemm_info));

break;

}

default:

{

ARM_COMPUTE_RETURN_ERROR_MSG("GEMMType not supported");

}

}

return Status{};

}

void ClGemm::run(ITensorPack &tensors)

558

{

SiCongLi

2021-10-24 19:12:33 +0100

[diff] [blame]

559

const ITensor *lhs = tensors.get_const_tensor(ACL_SRC_0);

560

const ITensor *rhs = tensors.get_const_tensor(ACL_SRC_1);

561

ITensor *dst = tensors.get_tensor(ACL_DST);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

562

563

ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, dst);

564

565

CLAuxTensorHandler lhs_reshaped(offset_int_vec(LhsReshape), _tmp_a, tensors, true);

566

CLAuxTensorHandler rhs_reshaped(offset_int_vec(RhsReshape), _tmp_b, tensors, true);

567

568

// Prepare the consts if needed

569

prepare(tensors);

570

571

// Run matrix multiply kernel

572

switch(_gemm_kernel_type)

573

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

574

case CLGEMMKernelType::NATIVE:

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

575

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

576

CLScheduler::get().enqueue_op(*_mm_native_kernel, tensors, true);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

577

break;

578

}

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

579

case CLGEMMKernelType::RESHAPED:

580

{

581

// Run interleave kernel

582

ITensorPack reshape_lhs_pack{ { ACL_SRC, lhs }, { ACL_DST, lhs_reshaped.get() } };

583

CLScheduler::get().enqueue_op(*_reshape_lhs_kernel, reshape_lhs_pack, false);

584

585

if(!_reshape_b_only_on_first_run)

586

{

587

// Run transpose kernel

588

ITensorPack reshape_rhs_pack{ { ACL_SRC, rhs }, { ACL_DST, rhs_reshaped.get() } };

589

CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, false);

590

}

SiCongLi

579ca84

2021-10-18 09:38:33 +0100

[diff] [blame]

591

// Copy original tensor pack and overwrite lhs and rhs with reshaped counterparts

592

ITensorPack gemm_reshaped_pack(tensors);

593

gemm_reshaped_pack.add_const_tensor(ACL_SRC_0, lhs_reshaped.get());

594

gemm_reshaped_pack.add_const_tensor(ACL_SRC_1, rhs_reshaped.get());

Manuel Bottini

2021-07-16 10:23:31 +0100

[diff] [blame]

595

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

596

if(_gemm_kernel_type == CLGEMMKernelType::RESHAPED)

597

{

598

CLScheduler::get().enqueue_op(*_mm_reshaped_kernel, gemm_reshaped_pack, true);

599

}

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

600

break;

601

}

602

case CLGEMMKernelType::RESHAPED_ONLY_RHS:

603

{

604

if(!_reshape_b_only_on_first_run)

605

{

606

// Run transpose kernel

607

ITensorPack reshape_rhs_pack{ { ACL_SRC, rhs }, { ACL_DST, rhs_reshaped.get() } };

608

CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, false);

609

}

610

// In case of RESHAPED_ONLY_RHS, we need to check the padding requirement

611

// Check if the lhs or dst tensors have padding

612

const unsigned int cross_plane_pad_lhs = lhs->info()->padding().top + lhs->info()->padding().bottom;

613

const unsigned int cross_plane_pad_dst = dst->info()->padding().top + dst->info()->padding().bottom;

614

bool has_pad_y = (cross_plane_pad_lhs != 0) || (cross_plane_pad_dst != 0);

615

SiCongLi

2021-10-24 19:12:33 +0100

[diff] [blame]

616

// Copy original tensor pack and overwrite rhs with reshaped counterpart

617

ITensorPack gemm_reshaped_onlyrhs_pack(tensors);

618

gemm_reshaped_onlyrhs_pack.add_const_tensor(ACL_SRC_1, rhs_reshaped.get());

619

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

620

if(has_pad_y)

621

{

ramelg01

9cca592

2021-11-11 10:05:00 +0000

[diff] [blame]

622

ARM_COMPUTE_ERROR_ON(has_pad_y);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

}

else

{

CLScheduler::get().enqueue_op(*_mm_reshaped_only_rhs_kernel, gemm_reshaped_onlyrhs_pack, true);

}

break;

}

default:

{

ARM_COMPUTE_ERROR("GEMMType not supported");

}

}

}

void ClGemm::prepare(ITensorPack &constants)

638

{

Manuel Bottini

2021-07-16 10:23:31 +0100

[diff] [blame]

639

if(!_is_prepared)

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

640

{

Manuel Bottini

2021-07-16 10:23:31 +0100

[diff] [blame]

641

const ITensor *src1 = constants.get_const_tensor(ACL_SRC_1);

642

ICLTensor *rhs_aux = utils::cast::polymorphic_downcast<ICLTensor *>(constants.get_tensor(offset_int_vec(RhsReshape)));

Georgios Pinitas

2b147ee

2021-07-08 18:14:45 +0100

[diff] [blame]

643

Manuel Bottini

2021-07-16 10:23:31 +0100

[diff] [blame]

644

// If memory for RHS is persistent and src1 is provided re-transform else assume that RHS is transformed

645

if((_aux_mem[AuxTensorIdx::RhsReshape].lifetime == MemoryLifetime::Persistent) && (src1 != nullptr && rhs_aux != nullptr) && rhs_aux)

646

{

647

ARM_COMPUTE_LOG_INFO_WITH_FUNCNAME_ACL("Transforming RHS Matrix!");

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

648

Manuel Bottini

2021-07-16 10:23:31 +0100

[diff] [blame]

649

CLAuxTensorHandler rhs_reshaped(_tmp_b, *rhs_aux);

650

ARM_COMPUTE_ERROR_ON(rhs_reshaped.get()->cl_buffer().get() == nullptr);

651

652

ITensorPack reshape_rhs_pack{ { ACL_SRC, src1 }, { ACL_DST, rhs_reshaped.get() } };

653

CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, true);

654

}

655

_is_prepared = true;

Georgios Pinitas