Blame - src/gpu/cl/operators/ClGemm.cpp - ml/ComputeLibrary

2021-10-15 10:23:24 +0100

[diff] [blame]

189

: _reshape_lhs_kernel(std::make_unique<ClGemmReshapeLhsMatrixKernel>()),

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

190

_reshape_rhs_kernel(std::make_unique<ClGemmReshapeRhsMatrixKernel>()),

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

191

_mm_native_kernel(std::make_unique<ClGemmMatrixMultiplyNativeKernel>()),

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

192

_mm_reshaped_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedKernel>()),

193

_mm_reshaped_only_rhs_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedOnlyRhsKernel>()),

194

_mm_reshaped_only_rhs_fallback_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedOnlyRhsKernel>()),

195

_tmp_a(),

196

_tmp_b(),

197

_reshape_b_only_on_first_run(false),

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

198

_gemm_kernel_type(CLGEMMKernelType::NATIVE),

Manuel Bottini

2021-07-16 10:23:31 +0100

[diff] [blame]

199

_is_prepared(false),

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

200

_aux_mem(AuxTensorIdx::Count)

{

}

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

204

void ClGemm::configure_native(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,

205

const GEMMInfo &gemm_info)

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

206

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

207

DataType data_type = a->data_type();

208

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

209

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

210

const unsigned int n = b->dimension(0);

211

const unsigned int k = a->dimension(0);

212

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

213

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

214

const GPUTarget gpu_target = CLScheduler::get().target();

215

bool broadcast_bias = gemm_info.broadcast_bias();

216

217

GEMMKernelInfo kernel_info;

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

222

kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;

223

kernel_info.broadcast_bias = broadcast_bias;

224

kernel_info.activation_info = gemm_info.activation_info();

SiCongLi

2021-10-24 19:12:33 +0100

[diff] [blame^]

225

kernel_info.post_ops = gemm_info.post_ops();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

226

227

// Set the target for the kernels

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

228

_mm_native_kernel->set_target(gpu_target);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

229

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

230

auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

231

232

// Configure and tune matrix multiply kernel

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

233

_mm_native_kernel->configure(compile_context, a, b, c, output, alpha, beta, config.lhs_info, config.rhs_info, kernel_info);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

234

}

235

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

236

void ClGemm::configure_reshaped(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,

237

const GEMMInfo &gemm_info)

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

238

{

239

DataType data_type = a->data_type();

240

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

241

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

242

const unsigned int n = b->dimension(0);

243

const unsigned int k = a->dimension(0);

244

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

245

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

246

const GPUTarget gpu_target = CLScheduler::get().target();

247

bool broadcast_bias = gemm_info.broadcast_bias();

248

249

GEMMKernelInfo kernel_info;

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

254

kernel_info.reinterpret_input_as_3d = false;

255

kernel_info.broadcast_bias = broadcast_bias;

256

kernel_info.activation_info = gemm_info.activation_info();

SiCongLi

579ca84

2021-10-18 09:38:33 +0100

[diff] [blame]

257

kernel_info.post_ops = gemm_info.post_ops();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

258

259

// Set the target for the kernels

260

_reshape_lhs_kernel->set_target(gpu_target);

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

261

_mm_reshaped_kernel->set_target(gpu_target);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

262

263

GEMMLHSMatrixInfo lhs_info{};

264

GEMMRHSMatrixInfo rhs_info{};

265

266

// Pick up the GEMM configuration

267

std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }, kernel_info, a, b,

268

c, output, gemm_info.reinterpret_input_as_3d());

269

270

_reshape_lhs_kernel->configure(compile_context, a, &_tmp_a, lhs_info, gemm_info.reinterpret_input_as_3d());

271

_reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);

272

273

// Configure and tune matrix multiply kernel

274

_mm_reshaped_kernel->configure(compile_context, &_tmp_a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);

275

276

// Request memory for LHS and RHS reshape matrix

277

_aux_mem[LhsReshape] = MemoryInfo(offset_int_vec(LhsReshape), MemoryLifetime::Temporary, _tmp_a.total_size());

278

_aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size());

279

}

280

281

void ClGemm::configure_reshaped_only_rhs(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,

282

const GEMMInfo &gemm_info)

283

{

284

DataType data_type = a->data_type();

285

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

286

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

287

const unsigned int n = b->dimension(0);

288

const unsigned int k = a->dimension(0);

289

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

290

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

291

const GPUTarget gpu_target = CLScheduler::get().target();

292

bool broadcast_bias = gemm_info.broadcast_bias();

293

294

GEMMKernelInfo kernel_info;

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

299

kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;

300

kernel_info.broadcast_bias = broadcast_bias;

301

kernel_info.activation_info = gemm_info.activation_info();

SiCongLi

2021-10-24 19:12:33 +0100

[diff] [blame^]

302

kernel_info.post_ops = gemm_info.post_ops();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

303

304

// Set the target for the kernels

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

305

_mm_reshaped_only_rhs_kernel->set_target(gpu_target);

306

_mm_reshaped_only_rhs_fallback_kernel->set_target(gpu_target);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

307

308

GEMMLHSMatrixInfo lhs_info{};

309

GEMMRHSMatrixInfo rhs_info{};

310

311

// Pick up the GEMM configuration

312

std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }, kernel_info, a, b, c, output);

313

314

// Transpose matrix

315

_reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);

316

317

// Configure two variants of CLGEMMMatrixMultiplyReshapedOnlyRHSKernel (has_pad_y = false/true)

318

// During the prepare stage we check the padding requirement for the lhs and dst tensors. If they do not have

319

// pad y, we dispatch CLGEMMMatrixMultiplyReshapedOnlyRHSKernel with has_pad_y = false

320

321

// Configure matrix multiply kernel with no y padding support

322

kernel_info.has_pad_y = false;

323

_mm_reshaped_only_rhs_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);

324

325

// Configure matrix multiply kernel with y padding support

326

kernel_info.has_pad_y = true;

327

_mm_reshaped_only_rhs_fallback_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);

328

329

// Request memory for RHS reshape matrix

330

_aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size());

331

}

332

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

333

Status ClGemm::validate_native(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

334

{

335

ARM_COMPUTE_UNUSED(alpha);

336

ARM_COMPUTE_UNUSED(output);

337

338

// Get the GPU target

339

const GPUTarget gpu_target = CLScheduler::get().target();

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

340

DataType data_type = a->data_type();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

341

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

342

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

343

const unsigned int n = b->dimension(0);

344

const unsigned int k = a->dimension(0);

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

345

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

346

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

347

const bool broadcast_bias = gemm_info.broadcast_bias();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

348

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

349

GEMMKernelInfo kernel_info;

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

354

kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;

355

kernel_info.broadcast_bias = broadcast_bias;

356

kernel_info.activation_info = gemm_info.activation_info();

SiCongLi

2021-10-24 19:12:33 +0100

[diff] [blame^]

357

kernel_info.post_ops = gemm_info.post_ops();

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

358

359

auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

360

361

// Validate matrix multiply

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

362

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyNativeKernel::validate(a, b, c, output, alpha, beta, config.lhs_info, config.rhs_info, kernel_info));

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

return Status{};

}

Status ClGemm::validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

368

{

369

ARM_COMPUTE_UNUSED(alpha);

370

ARM_COMPUTE_UNUSED(output);

371

372

TensorInfo tmp_a_info{};

373

TensorInfo tmp_b_info{};

374

375

// Get the GPU target

376

const GPUTarget gpu_target = CLScheduler::get().target();

377

DataType data_type = a->data_type();

378

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

379

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

380

const unsigned int n = b->dimension(0);

381

const unsigned int k = a->dimension(0);

382

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

383

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

384

const bool broadcast_bias = gemm_info.broadcast_bias();

385

386

GEMMKernelInfo kernel_info;

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

391

kernel_info.reinterpret_input_as_3d = false;

392

kernel_info.broadcast_bias = broadcast_bias;

393

kernel_info.activation_info = gemm_info.activation_info();

SiCongLi

579ca84

2021-10-18 09:38:33 +0100

[diff] [blame]

394

kernel_info.post_ops = gemm_info.post_ops();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

395

396

GEMMLHSMatrixInfo lhs_info;

397

GEMMRHSMatrixInfo rhs_info;

398

399

// Pick up the GEMM configuration

400

// NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails

401

const auto gemm_config = select_default_gemm_config_reshaped(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });

402

lhs_info = gemm_config.lhs_info;

403

rhs_info = gemm_config.rhs_info;

404

405

auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, gemm_info.reinterpret_input_as_3d())));

406

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeLhsMatrixKernel::validate(a, &tmp_a_info, lhs_info, gemm_info.reinterpret_input_as_3d()));

407

408

auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));

409

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info));

410

411

// Validate matrix multiply

412

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedKernel::validate(&tmp_a_info, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));

return Status{};

}

Status ClGemm::validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

418

{

419

ARM_COMPUTE_UNUSED(alpha);

420

ARM_COMPUTE_UNUSED(output);

421

422

TensorInfo tmp_b_info{};

423

424

// Get the GPU target

425

const GPUTarget gpu_target = CLScheduler::get().target();

426

const DataType data_type = a->data_type();

427

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

428

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

429

const unsigned int n = b->dimension(0);

430

const unsigned int k = a->dimension(0);

431

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

432

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

433

const bool broadcast_bias = gemm_info.broadcast_bias();

434

435

GEMMKernelInfo kernel_info;

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

440

kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;

441

kernel_info.broadcast_bias = broadcast_bias;

442

kernel_info.activation_info = gemm_info.activation_info();

SiCongLi

2021-10-24 19:12:33 +0100

[diff] [blame^]

443

kernel_info.post_ops = gemm_info.post_ops();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

444

445

GEMMLHSMatrixInfo lhs_info;

446

GEMMRHSMatrixInfo rhs_info;

447

448

// Pick up the GEMM configuration

449

// NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails

450

const auto gemm_config = select_default_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });

451

lhs_info = gemm_config.lhs_info;

452

rhs_info = gemm_config.rhs_info;

453

454

auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));

455

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info));

456

457

// Validate matrix multiply

458

kernel_info.has_pad_y = false;

459

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));

460

461

kernel_info.has_pad_y = true;

462

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));

return Status{};

}

void ClGemm::configure(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

468

{

469

ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);

470

471

// Perform validation step

472

ARM_COMPUTE_ERROR_THROW_ON(validate(a, b, c, output, alpha, beta, gemm_info));

ramelg01

2e53f17

2021-09-22 10:48:25 +0100

[diff] [blame]

473

ARM_COMPUTE_LOG_PARAMS(a, b, c, output, alpha, beta, gemm_info);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

474

475

// Check if we need to reshape the matrix B only on the first run

476

_reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();

Georgios Pinitas

f5d51f3

2021-08-17 16:09:10 +0100

[diff] [blame]

477

_is_prepared = gemm_info.retain_internal_weights();

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

478

479

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

480

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

481

const unsigned int n = b->dimension(0);

482

const unsigned int k = a->dimension(0);

483

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

484

485

// Select GEMMType

Giorgio Arena

4403ed3

2021-05-17 13:03:50 +0100

[diff] [blame]

486

_gemm_kernel_type = auto_select_gemm_kernel(auto_heuristics::CommonQuery{ CLScheduler::get().target(), a->data_type(), m, n, k, batch_size }, _reshape_b_only_on_first_run,

Giorgio Arena

63e0beb

2021-09-24 14:04:27 +0100

[diff] [blame]

487

b->are_values_constant());

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

488

489

const bool fuse_add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr);

490

491

ITensorInfo *c_to_use = fuse_add_c ? c : nullptr;

492

493

switch(_gemm_kernel_type)

494

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

495

case CLGEMMKernelType::NATIVE:

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

496

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

497

configure_native(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

498

break;

499

}

500

case CLGEMMKernelType::RESHAPED:

501

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

502

configure_reshaped(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

503

break;

504

}

505

case CLGEMMKernelType::RESHAPED_ONLY_RHS:

506

{

507

configure_reshaped_only_rhs(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);

break;

}

default:

{

ARM_COMPUTE_ERROR("GEMMType not supported");

}

}

}

Status ClGemm::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

518

{

519

// Get the GPU target

520

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

521

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

522

const unsigned int n = b->dimension(0);

523

const unsigned int k = a->dimension(0);

524

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

525

526

// Select GEMMType

527

CLGEMMKernelType gemm_kernel_type = auto_select_gemm_kernel(auto_heuristics::CommonQuery

528

{

529

CLScheduler::get().target(), a->data_type(), m, n, k, batch_size,

530

},

Giorgio Arena

63e0beb

2021-09-24 14:04:27 +0100

[diff] [blame]

531

gemm_info.reshape_b_only_on_first_run(), b->are_values_constant());

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

532

533

const bool fuse_add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr);

534

535

const ITensorInfo *c_to_use = fuse_add_c ? c : nullptr;

536

537

switch(gemm_kernel_type)

538

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

539

case CLGEMMKernelType::NATIVE:

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

540

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

541

ARM_COMPUTE_RETURN_ON_ERROR(validate_native(a, b, c_to_use, output, alpha, beta, gemm_info));

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

542

break;

543

}

544

case CLGEMMKernelType::RESHAPED:

545

{

546

ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped(a, b, c_to_use, output, alpha, beta, gemm_info));

547

break;

548

}

549

case CLGEMMKernelType::RESHAPED_ONLY_RHS:

550

{

551

ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_only_rhs(a, b, c_to_use, output, alpha, beta, gemm_info));

break;

}

default:

{

ARM_COMPUTE_RETURN_ERROR_MSG("GEMMType not supported");

}

}

return Status{};

}

void ClGemm::run(ITensorPack &tensors)

564

{

SiCongLi

2021-10-24 19:12:33 +0100

[diff] [blame^]

565

const ITensor *lhs = tensors.get_const_tensor(ACL_SRC_0);

566

const ITensor *rhs = tensors.get_const_tensor(ACL_SRC_1);

567

ITensor *dst = tensors.get_tensor(ACL_DST);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

568

569

ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, dst);

570

571

CLAuxTensorHandler lhs_reshaped(offset_int_vec(LhsReshape), _tmp_a, tensors, true);

572

CLAuxTensorHandler rhs_reshaped(offset_int_vec(RhsReshape), _tmp_b, tensors, true);

573

574

// Prepare the consts if needed

575

prepare(tensors);

576

577

// Run matrix multiply kernel

578

switch(_gemm_kernel_type)

579

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

580

case CLGEMMKernelType::NATIVE:

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

581

{

Gian Marco Iodice

2021-10-15 10:23:24 +0100

[diff] [blame]

582

CLScheduler::get().enqueue_op(*_mm_native_kernel, tensors, true);

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

583

break;

584

}

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

585

case CLGEMMKernelType::RESHAPED:

586

{

587

// Run interleave kernel

588

ITensorPack reshape_lhs_pack{ { ACL_SRC, lhs }, { ACL_DST, lhs_reshaped.get() } };

589

CLScheduler::get().enqueue_op(*_reshape_lhs_kernel, reshape_lhs_pack, false);

590

591

if(!_reshape_b_only_on_first_run)

592

{

593

// Run transpose kernel

594

ITensorPack reshape_rhs_pack{ { ACL_SRC, rhs }, { ACL_DST, rhs_reshaped.get() } };

595

CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, false);

596

}

SiCongLi

579ca84

2021-10-18 09:38:33 +0100

[diff] [blame]

597

// Copy original tensor pack and overwrite lhs and rhs with reshaped counterparts

598

ITensorPack gemm_reshaped_pack(tensors);

599

gemm_reshaped_pack.add_const_tensor(ACL_SRC_0, lhs_reshaped.get());

600

gemm_reshaped_pack.add_const_tensor(ACL_SRC_1, rhs_reshaped.get());

Manuel Bottini

2021-07-16 10:23:31 +0100

[diff] [blame]

601

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

602

if(_gemm_kernel_type == CLGEMMKernelType::RESHAPED)

603

{

604

CLScheduler::get().enqueue_op(*_mm_reshaped_kernel, gemm_reshaped_pack, true);

605

}

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

606

break;

607

}

608

case CLGEMMKernelType::RESHAPED_ONLY_RHS:

609

{

610

if(!_reshape_b_only_on_first_run)

611

{

612

// Run transpose kernel

613

ITensorPack reshape_rhs_pack{ { ACL_SRC, rhs }, { ACL_DST, rhs_reshaped.get() } };

614

CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, false);

615

}

616

// In case of RESHAPED_ONLY_RHS, we need to check the padding requirement

617

// Check if the lhs or dst tensors have padding

618

const unsigned int cross_plane_pad_lhs = lhs->info()->padding().top + lhs->info()->padding().bottom;

619

const unsigned int cross_plane_pad_dst = dst->info()->padding().top + dst->info()->padding().bottom;

620

bool has_pad_y = (cross_plane_pad_lhs != 0) || (cross_plane_pad_dst != 0);

621

SiCongLi

2021-10-24 19:12:33 +0100

[diff] [blame^]

622

// Copy original tensor pack and overwrite rhs with reshaped counterpart

623

ITensorPack gemm_reshaped_onlyrhs_pack(tensors);

624

gemm_reshaped_onlyrhs_pack.add_const_tensor(ACL_SRC_1, rhs_reshaped.get());

625

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

626

if(has_pad_y)

627

{

628

CLScheduler::get().enqueue_op(*_mm_reshaped_only_rhs_fallback_kernel, gemm_reshaped_onlyrhs_pack, true);

}

else

{

CLScheduler::get().enqueue_op(*_mm_reshaped_only_rhs_kernel, gemm_reshaped_onlyrhs_pack, true);

}

break;

}

default:

{

ARM_COMPUTE_ERROR("GEMMType not supported");

}

}

}

void ClGemm::prepare(ITensorPack &constants)

644

{

Manuel Bottini

2021-07-16 10:23:31 +0100

[diff] [blame]

645

if(!_is_prepared)

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

646

{

Manuel Bottini

2021-07-16 10:23:31 +0100

[diff] [blame]

647

const ITensor *src1 = constants.get_const_tensor(ACL_SRC_1);

648

ICLTensor *rhs_aux = utils::cast::polymorphic_downcast<ICLTensor *>(constants.get_tensor(offset_int_vec(RhsReshape)));

Georgios Pinitas

2b147ee

2021-07-08 18:14:45 +0100

[diff] [blame]

649

Manuel Bottini

2021-07-16 10:23:31 +0100

[diff] [blame]

650

// If memory for RHS is persistent and src1 is provided re-transform else assume that RHS is transformed

651

if((_aux_mem[AuxTensorIdx::RhsReshape].lifetime == MemoryLifetime::Persistent) && (src1 != nullptr && rhs_aux != nullptr) && rhs_aux)

652

{

653

ARM_COMPUTE_LOG_INFO_WITH_FUNCNAME_ACL("Transforming RHS Matrix!");

Georgios Pinitas

2021-04-22 21:13:21 +0100

[diff] [blame]

654

Manuel Bottini

2021-07-16 10:23:31 +0100

[diff] [blame]

655

CLAuxTensorHandler rhs_reshaped(_tmp_b, *rhs_aux);

656

ARM_COMPUTE_ERROR_ON(rhs_reshaped.get()->cl_buffer().get() == nullptr);

657

658

ITensorPack reshape_rhs_pack{ { ACL_SRC, src1 }, { ACL_DST, rhs_reshaped.get() } };

659

CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, true);

660

}

661

_is_prepared = true;

Georgios Pinitas