Blame - src/core/CL/ICLKernel.h - ml/ComputeLibrary

2017-07-04 15:02:10 +0100

[diff] [blame]

293

/** Returns the number of arguments enqueued per 1D array object.

294

*

295

* @return The number of arguments enqueues per 1D array object.

296

*/

Diego Lopez Recas

2017-12-18 14:42:56 +0000

[diff] [blame]

297

constexpr static unsigned int num_arguments_per_1D_array()

298

{

299

return num_arguments_per_array<1>();

300

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

301

/** Returns the number of arguments enqueued per 1D tensor object.

302

*

303

* @return The number of arguments enqueues per 1D tensor object.

304

*/

Diego Lopez Recas

2017-12-18 14:42:56 +0000

[diff] [blame]

305

constexpr static unsigned int num_arguments_per_1D_tensor()

306

{

307

return num_arguments_per_tensor<1>();

308

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

309

/** Returns the number of arguments enqueued per 2D tensor object.

310

*

311

* @return The number of arguments enqueues per 2D tensor object.

312

*/

Diego Lopez Recas

2017-12-18 14:42:56 +0000

[diff] [blame]

313

constexpr static unsigned int num_arguments_per_2D_tensor()

314

{

315

return num_arguments_per_tensor<2>();

316

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

317

/** Returns the number of arguments enqueued per 3D tensor object.

318

*

319

* @return The number of arguments enqueues per 3D tensor object.

320

*/

Diego Lopez Recas

2017-12-18 14:42:56 +0000

[diff] [blame]

321

constexpr static unsigned int num_arguments_per_3D_tensor()

322

{

323

return num_arguments_per_tensor<3>();

324

}

steniu01

868e541

2017-07-17 23:16:00 +0100

[diff] [blame]

325

/** Returns the number of arguments enqueued per 4D tensor object.

326

*

327

* @return The number of arguments enqueues per 4D tensor object.

328

*/

Diego Lopez Recas

2017-12-18 14:42:56 +0000

[diff] [blame]

329

constexpr static unsigned int num_arguments_per_4D_tensor()

330

{

331

return num_arguments_per_tensor<4>();

332

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

333

/** Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.

334

*

335

* @note The queue is *not* flushed by this method, and therefore the kernel will not have been executed by the time this method returns.

336

*

337

* @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).

338

* @param[in,out] queue Command queue on which to enqueue the kernel.

339

*/

Michalis Spyrou

2020-07-02 12:43:53 +0100

[diff] [blame]

340

virtual void run(const Window &window, cl::CommandQueue &queue)

341

{

342

ARM_COMPUTE_UNUSED(window, queue);

343

}

344

/** Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.

345

*

346

* @note The queue is *not* flushed by this method, and therefore the kernel will not have been executed by the time this method returns.

347

*

Georgios Pinitas

0499dff

2020-07-31 22:21:38 +0100

[diff] [blame]

348

* @param[in] tensors A vector containing the tensors to operato on.

Michalis Spyrou

2020-07-02 12:43:53 +0100

[diff] [blame]

349

* @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).

350

* @param[in,out] queue Command queue on which to enqueue the kernel.

351

*/

Georgios Pinitas

0499dff

2020-07-31 22:21:38 +0100

[diff] [blame]

352

virtual void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)

Michalis Spyrou

2020-07-02 12:43:53 +0100

[diff] [blame]

353

{

Georgios Pinitas

0499dff

2020-07-31 22:21:38 +0100

[diff] [blame]

354

ARM_COMPUTE_UNUSED(tensors, window, queue);

Michalis Spyrou

2020-07-02 12:43:53 +0100

[diff] [blame]

355

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

356

/** Add the passed parameters to the object's kernel's arguments starting from the index idx.

357

*

358

* @param[in,out] idx Index at which to start adding the arguments. Will be incremented by the number of kernel arguments set.

359

* @param[in] value Value to set as an argument of the object's kernel.

360

*/

361

template <typename T>

362

void add_argument(unsigned int &idx, T value)

363

{

364

_kernel.setArg(idx++, value);

365

}

366

Gian Marco Iodice

9331aeb

2017-08-10 17:11:08 +0100

[diff] [blame]

367

/** Set the Local-Workgroup-Size hint

368

*

369

* @note This method should be called after the configuration of the kernel

370

*

371

* @param[in] lws_hint Local-Workgroup-Size to use

372

*/

Anthony Barbier

d727e85

2018-04-20 11:05:29 +0100

[diff] [blame]

373

void set_lws_hint(const cl::NDRange &lws_hint)

Gian Marco Iodice

9331aeb

2017-08-10 17:11:08 +0100

[diff] [blame]

374

{

Anthony Barbier

b6eb353

2018-08-08 13:20:04 +0100

[diff] [blame]

375

ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); // lws_hint will be overwritten by configure()

Manuel Bottini

be9f9f9

2021-01-25 15:07:17 +0000

[diff] [blame]

376

_tuning_params_hint.set_lws(lws_hint);

Gian Marco Iodice

9331aeb

2017-08-10 17:11:08 +0100

[diff] [blame]

377

}

378

Georgios Pinitas

c0d1c86

2018-03-23 15:13:15 +0000

[diff] [blame]

379

/** Return the Local-Workgroup-Size hint

380

*

381

* @return Current lws hint

382

*/

383

cl::NDRange lws_hint() const

384

{

Manuel Bottini

be9f9f9

2021-01-25 15:07:17 +0000

[diff] [blame]

385

return _tuning_params_hint.get_lws();

386

}

387

388

/** Set the workgroup batch size modifier hint

389

*

390

* @note This method should be called after the configuration of the kernel

391

*

392

* @param[in] wbsm_hint workgroup batch size modifier value

393

*/

394

void set_wbsm_hint(const cl_int &wbsm_hint)

395

{

396

ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); // wbsm_hint will be overwritten by configure()

397

_tuning_params_hint.set_wbsm(wbsm_hint);

398

}

399

400

/** Return the workgroup batch size modifier hint

401

*

402

* @return Current wbsm hint

403

*/

404

cl_int wbsm_hint() const

405

{

406

return _tuning_params_hint.get_wbsm();

Georgios Pinitas

c0d1c86

2018-03-23 15:13:15 +0000

[diff] [blame]

407

}

408

Gian Marco

de691f0

2017-09-08 16:13:11 +0100

[diff] [blame]

409

/** Get the configuration ID

410

*

411

* @note The configuration ID can be used by the caller to distinguish different calls of the same OpenCL kernel

412

* In particular, this method can be used by CLScheduler to keep track of the best LWS for each configuration of the same kernel.

413

* The configuration ID should be provided only for the kernels potentially affected by the LWS geometry

414

*

415

* @note This method should be called after the configuration of the kernel

416

*

417

* @return configuration id string

418

*/

419

const std::string &config_id() const

{

return _config_id;

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

424

/** Set the targeted GPU architecture

425

*

426

* @param[in] target The targeted GPU architecture

427

*/

Diego Lopez Recas

2017-12-18 14:42:56 +0000

[diff] [blame]

428

void set_target(GPUTarget target)

429

{

430

_target = target;

431

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

432

433

/** Set the targeted GPU architecture according to the CL device

434

*

435

* @param[in] device A CL device

436

*/

437

void set_target(cl::Device &device);

438

439

/** Get the targeted GPU architecture

440

*

441

* @return The targeted GPU architecture.

442

*/

Diego Lopez Recas

2017-12-18 14:42:56 +0000

[diff] [blame]

443

GPUTarget get_target() const

444

{

445

return _target;

446

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

447

Abel Bernabeu

5a6e053

2017-09-28 09:53:45 +0100

[diff] [blame]

448

/** Get the maximum workgroup size for the device the CLKernelLibrary uses.

449

*

450

* @return The maximum workgroup size value.

451

*/

452

size_t get_max_workgroup_size();

Georgios Pinitas

1f378ee

2017-10-27 13:37:16 +0100

[diff] [blame]

453

/** Get the global work size given an execution window

454

*

SiCong Li

47f177e

2023-02-22 17:24:09 +0000

[diff] [blame]

455

* @param[in] window Execution window

456

* @param[in] use_dummy_work_items If the kernel uses dummy work items

Georgios Pinitas

1f378ee

2017-10-27 13:37:16 +0100

[diff] [blame]

457

*

458

* @return Global work size of the given execution window

459

*/

SiCong Li

47f177e

2023-02-22 17:24:09 +0000

[diff] [blame]

460

static cl::NDRange gws_from_window(const Window &window, bool use_dummy_work_items);

461

462

/** Get the cached gws used to enqueue this kernel

463

*

464

* @return Latest global work size of the kernel

465

*/

466

cl::NDRange get_cached_gws() const;

467

468

/** Cache the latest gws used to enqueue this kernel

469

*

470

* @param[in] gws Latest global work size of the kernel

471

*/

472

void cache_gws(const cl::NDRange &gws);

Abel Bernabeu

5a6e053

2017-09-28 09:53:45 +0100

[diff] [blame]

473

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

474

private:

SiCong Li

2017-07-04 15:02:10 +0100

[diff] [blame]

475

/** Add the passed array's parameters to the object's kernel's arguments starting from the index idx.

476

*

477

* @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set.

478

* @param[in] array Array to set as an argument of the object's kernel.

479

* @param[in] strides @ref Strides object containing stride of each dimension in bytes.

480

* @param[in] num_dimensions Number of dimensions of the @p array.

481

* @param[in] window Window the kernel will be executed on.

482

*/

483

template <typename T, unsigned int dimension_size>

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

484

void add_array_argument(unsigned int &idx,

485

const ICLArray<T> *array,

486

const Strides &strides,

487

unsigned int num_dimensions,

488

const Window &window);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

489

/** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx.

490

*

491

* @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.

492

* @param[in] tensor Tensor to set as an argument of the object's kernel.

493

* @param[in] window Window the kernel will be executed on.

494

*/

495

template <unsigned int dimension_size>

496

void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

497

498

protected:

Giorgio Arena

4a95bba

2021-06-28 11:00:27 +0100

[diff] [blame]

499

cl::Kernel _kernel; /**< OpenCL kernel to run */

500

GPUTarget _target; /**< The targeted GPU */

501

std::string _config_id; /**< Configuration ID */

502

size_t _max_workgroup_size; /**< The maximum workgroup size for this kernel */

503

CLKernelType _type; /**< The CL kernel type */

Anthony Barbier

b6eb353

2018-08-08 13:20:04 +0100

[diff] [blame]

504

private:

Manuel Bottini

be9f9f9

2021-01-25 15:07:17 +0000

[diff] [blame]

505

CLTuningParams _tuning_params_hint; /**< Tuning parameters hint for the OpenCL kernel */

SiCong Li

47f177e

2023-02-22 17:24:09 +0000

[diff] [blame]

506

cl::NDRange _cached_gws; /**< Latest GWS used to enqueue this kernel */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

507

};

508

509

/** Add the kernel to the command queue with the given window.

510

*

511

* @note Depending on the size of the window, this might translate into several jobs being enqueued.

512

*

513

* @note If kernel->kernel() is empty then the function will return without adding anything to the queue.

514

*

Gian Marco Iodice

b0c5037

2019-03-15 10:13:05 +0000

[diff] [blame]

515

* @param[in,out] queue OpenCL command queue.

516

* @param[in] kernel Kernel to enqueue

517

* @param[in] window Window the kernel has to process.

518

* @param[in] lws_hint (Optional) Local workgroup size requested. Default is based on the device target.

519

* @param[in] use_dummy_work_items (Optional) Use dummy work items in order to have two dimensional power of two NDRange. Default is false

520

* Note: it is kernel responsibility to check if the work-item is out-of-range

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

521

*

522

* @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed.

523

*/

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

524

void enqueue(cl::CommandQueue &queue,

525

ICLKernel &kernel,

526

const Window &window,

527

const cl::NDRange &lws_hint = CLKernelLibrary::get().default_ndrange(),

528

bool use_dummy_work_items = false);

SiCong Li

2017-07-04 15:02:10 +0100

[diff] [blame]

529

Alex Gilday

c357c47

2018-03-21 13:54:09 +0000

[diff] [blame]

530

/** Add the passed array's parameters to the object's kernel's arguments starting from the index idx.

531

*

532

* @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set.

533

* @param[in] array Array to set as an argument of the object's kernel.

534

* @param[in] strides @ref Strides object containing stride of each dimension in bytes.

535

* @param[in] num_dimensions Number of dimensions of the @p array.

536

* @param[in] window Window the kernel will be executed on.

537

*/

SiCong Li

2017-07-04 15:02:10 +0100

[diff] [blame]

538

template <typename T, unsigned int dimension_size>

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

539

void ICLKernel::add_array_argument(

540

unsigned &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window)

SiCong Li

2017-07-04 15:02:10 +0100

[diff] [blame]

541

{

Diego Lopez Recas

2017-12-18 14:42:56 +0000

[diff] [blame]

542

ARM_COMPUTE_ERROR_ON(array == nullptr);

543

SiCong Li

2017-07-04 15:02:10 +0100

[diff] [blame]

544

// Calculate offset to the start of the window

545

unsigned int offset_first_element = 0;

546

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

547

for (unsigned int n = 0; n < num_dimensions; ++n)

SiCong Li

2017-07-04 15:02:10 +0100

[diff] [blame]

548

{

549

offset_first_element += window[n].start() * strides[n];

550

}

551

552

unsigned int idx_start = idx;

553

_kernel.setArg(idx++, array->cl_buffer());

554

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

555

for (unsigned int dimension = 0; dimension < dimension_size; dimension++)

SiCong Li

2017-07-04 15:02:10 +0100

[diff] [blame]

556

{

557

_kernel.setArg<cl_uint>(idx++, strides[dimension]);

558

_kernel.setArg<cl_uint>(idx++, strides[dimension] * window[dimension].step());

559

}

560

561

_kernel.setArg<cl_uint>(idx++, offset_first_element);

562

Michalis Spyrou

7c60c99

2019-10-10 14:33:47 +0100

[diff] [blame]

563

ARM_COMPUTE_ERROR_ON_MSG_VAR(idx_start + num_arguments_per_array<dimension_size>() != idx,

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

564

"add_%dD_array_argument() is supposed to add exactly %d arguments to the kernel",

565

dimension_size, num_arguments_per_array<dimension_size>());

SiCong Li

2017-07-04 15:02:10 +0100

[diff] [blame]

566

ARM_COMPUTE_UNUSED(idx_start);

567

}

Felix Thomasmathibalan