Blame - src/cpu/operators/CpuWinogradConv2d.cpp - ml/ComputeLibrary

2023-09-27 17:46:17 +0100

[diff] [blame]

269

if (_data_layout == DataLayout::NCHW)

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

270

{

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

271

// configure and allocate dst tensor to be used to convert from winograd domain to spatial domain when calling to reshape_output()

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

272

TensorInfo info(TensorShape(dst->dimension(2), dst->dimension(0), dst->dimension(1), dst->dimension(3)), 1,

273

dst->data_type());

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

274

_output_nhwc = info;

275

_permute_output->configure(&_output_nhwc, dst, PermutationVector(1U, 2U, 0U));

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

276

}

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

277

Viet-Hoa Do

2022-08-03 16:39:23 +0100

[diff] [blame]

278

// Configure input transform kernel

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

279

_transform_input_kernel =

280

std::make_unique<CpuWinogradConv2dTransformInputKernel>(_winograd_impl, *_conv_args, nthreads);

Viet-Hoa Do

2022-08-03 16:39:23 +0100

[diff] [blame]

281

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

282

// Configure GEMM function

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

283

_gemm_function->configure(&_winograd_transformed_input, &_winograd_transformed_weights, nullptr,

284

&_winograd_transformed_output, 1.0f, 0.f);

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

285

Viet-Hoa Do

2022-08-03 16:39:23 +0100

[diff] [blame]

286

// Configure output transform kernel

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

287

_transform_output_kernel =

288

std::make_unique<CpuWinogradConv2dTransformOutputKernel>(_winograd_impl, *_conv_args, nthreads);

Viet-Hoa Do

2022-08-03 16:39:23 +0100

[diff] [blame]

289

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

290

//Configure Activation Layer

291

_run_activation = act_info.enabled() && !fuse_function_supported(act_info);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

292

if (_run_activation)

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

293

{

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

294

_activation_func->configure(dst, nullptr, act_info);

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

295

}

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

296

SiCong Li

c5ab4df

2023-10-17 17:38:57 +0100

[diff] [blame]

297

const auto mm_mem_req = _gemm_function->workspace();

298

for (unsigned int slot = 0; slot < mm_mem_req.size(); ++slot)

299

{

300

_aux_mem[slot] = mm_mem_req[slot];

301

}

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

302

303

// Request temporary memory. Overlap memory needed for Input/Output transformations as they run on different non-overlapping time-steps.

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

304

_aux_mem[TransformedInput] = MemoryInfo(offset_int_vec(TransformedInput), MemoryLifetime::Temporary,

305

wds.input_matrix_size_bytes, storage_alignment);

306

_aux_mem[TransformedOutput] = MemoryInfo(offset_int_vec(TransformedOutput), MemoryLifetime::Temporary,

307

wds.output_matrix_size_bytes, storage_alignment);

308

_aux_mem[WorkspaceIO] = MemoryInfo(offset_int_vec(WorkspaceIO), MemoryLifetime::Temporary,

309

std::max(input_workspace_size, output_workspace_size));

310

_aux_mem[PermutedWeights] =

311

MemoryInfo(offset_int_vec(PermutedWeights), MemoryLifetime::Prepare, _weights_hwio.total_size());

312

_aux_mem[TransformedWeights] = MemoryInfo(offset_int_vec(TransformedWeights), MemoryLifetime::Persistent,

313

wds.weight_matrix_size_bytes, storage_alignment);

314

if (_data_layout == DataLayout::NCHW)

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

315

{

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

316

_aux_mem[PermutedInput].merge(offset_int_vec(PermutedInput), src->total_size());

317

_aux_mem[PermutedOutput].merge(offset_int_vec(PermutedOutput), dst->total_size());

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

318

}

Georgios Pinitas

2021-08-20 17:26:45 +0100

[diff] [blame]

319

}

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

320

}

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

321

Status CpuWinogradConv2d::validate(const ITensorInfo *src,

322

const ITensorInfo *weights,

323

const ITensorInfo *biases,

324

const ITensorInfo *dst,

325

const PadStrideInfo &conv_info,

326

const ActivationLayerInfo &act_info,

327

bool enable_fast_math)

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

328

{

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

329

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);

330

ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, weights, biases, dst, conv_info));

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

331

Ramy Elgammal

e4e3b2e

2022-09-07 12:38:46 +0100

[diff] [blame]

332

// Disable winograd for fp16 if fast math is false.

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

333

if (!enable_fast_math)

Ramy Elgammal

e4e3b2e

2022-09-07 12:38:46 +0100

[diff] [blame]

334

{

335

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F32);

336

}

337

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

338

const Tensor4DShape kernel_shape{internal_get_shape(weights)};

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

339

arm_conv::winograd::WinogradImpl winograd_impl{};

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

340

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

341

std::unique_ptr<arm_conv::ConvolutionArgs> conv_args;

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

342

const bool success = get_winograd_kernel_implementation(src, weights, dst, conv_info, act_info, enable_fast_math,

343

&winograd_impl, conv_args);

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

344

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

345

ARM_COMPUTE_RETURN_ERROR_ON_MSG_VAR(success == false, "Unsupported kernel size: %d x %d.\n", kernel_shape.n_rows,

346

kernel_shape.n_cols);

347

ARM_COMPUTE_LOG_MSG_WITH_FORMAT_ACL(arm_compute::logging::LogLevel::INFO, "Using input transform: %s\n",

348

winograd_impl.input_transform->get_name().c_str());

349

ARM_COMPUTE_LOG_MSG_WITH_FORMAT_ACL(arm_compute::logging::LogLevel::INFO, "Using weight transform: %s\n",

350

winograd_impl.input_transform->get_name().c_str());

351

ARM_COMPUTE_LOG_MSG_WITH_FORMAT_ACL(arm_compute::logging::LogLevel::INFO, "Using output transform: %s\n",

352

winograd_impl.input_transform->get_name().c_str());

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

353

return Status{};

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

354

}

355

356

void CpuWinogradConv2d::run(ITensorPack &tensors)

357

{

358

prepare(tensors);

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

359

auto src = tensors.get_const_tensor(ACL_SRC_0);

360

auto biases = tensors.get_const_tensor(ACL_SRC_2);

361

auto output = tensors.get_tensor(ACL_DST);

362

Window win;

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

363

David Svantesson

ded5b18

2023-08-02 14:23:00 +0000

[diff] [blame]

364

const uint32_t nthreads = NEScheduler::num_threads();

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

365

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

366

// The Winograd transform implementation does fine-grain threading inside the transforms. Just pass thread_id and nthreads.

367

win.set(Window::DimX, Window::Dimension(0, nthreads, 1));

368

369

// Wrap the winograd-domain tensorInfos created in configuration in tensors and allocate the required memory.

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

370

CpuAuxTensorHandler input_nhwc(offset_int_vec(PermutedInput), _input_nhwc, tensors, true);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

371

CpuAuxTensorHandler winograd_input_transformed(offset_int_vec(TransformedInput), _winograd_transformed_input,

372

tensors, true);

Georgios Pinitas

2021-08-20 17:26:45 +0100

[diff] [blame]

373

CpuAuxTensorHandler input_workspace(offset_int_vec(WorkspaceIO), _input_workspace, tensors, true);

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

374

const bool is_nchw = _data_layout == DataLayout::NCHW;

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

375

if (is_nchw)

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

376

{

377

//Bring channels to the front as Winograd code expects the tensor to be in the format NHWC

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

378

ITensorPack pack{{ACL_SRC, src}, {ACL_DST, input_nhwc.get()}};

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

379

_permute_input->run(pack);

380

}

381

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

382

CpuAuxTensorHandler winograd_output_transformed(offset_int_vec(TransformedOutput), _winograd_transformed_output,

383

tensors, true);

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

384

CpuAuxTensorHandler output_workspace(offset_int_vec(WorkspaceIO), _output_workspace, tensors, true);

385

CpuAuxTensorHandler output_nhwc(offset_int_vec(PermutedOutput), _output_nhwc, tensors, true);

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

386

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

387

ITensorPack transform_input_pack{{ACL_SRC, is_nchw ? input_nhwc.get() : src},

388

{ACL_DST, winograd_input_transformed.get()},

389

{ACL_INT, input_workspace.get()}};

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

390

NEScheduler::get().schedule_op(_transform_input_kernel.get(), Window::DimX, win, transform_input_pack);

391

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

392

CpuAuxTensorHandler winograd_weights_transformed(offset_int_vec(TransformedWeights), _winograd_transformed_weights,

393

tensors, true);

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

394

395

// Run 16 GEMMs in multiple threads, each kernel runs one or more GEMMs

Georgios Pinitas

2021-08-20 17:26:45 +0100

[diff] [blame]

396

ITensorPack gemm_pack = tensors;

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

397

gemm_pack.add_const_tensor(ACL_SRC, winograd_input_transformed.get());

398

gemm_pack.add_const_tensor(ACL_SRC_1, winograd_weights_transformed.get());

Georgios Pinitas

2021-08-20 17:26:45 +0100

[diff] [blame]

399

gemm_pack.add_const_tensor(ACL_BIAS, nullptr);

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

400

gemm_pack.add_tensor(ACL_DST, winograd_output_transformed.get());

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

401

_gemm_function->run(gemm_pack);

402

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

403

// Output transform

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

404

ITensorPack transform_output_pack{{ACL_SRC_0, winograd_output_transformed.get()},

405

{ACL_DST, is_nchw ? output_nhwc.get() : output},

406

{ACL_SRC_1, biases},

407

{ACL_INT, output_workspace.get()}};

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

408

NEScheduler::get().schedule_op(_transform_output_kernel.get(), Window::DimX, win, transform_output_pack);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

409

if (is_nchw)

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

410

{

411

// Reorder the convoluted output to ACL's ordering NCHW

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

412

ITensorPack pack{{ACL_SRC, output_nhwc.get()}, {ACL_DST, output}};

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

413

_permute_output->run(pack);

414

}

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

415

if (_run_activation)

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

416

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

417

ITensorPack pack{{ACL_SRC, output}, {ACL_DST, output}};

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

418

_activation_func->run(pack);

}

}

void CpuWinogradConv2d::prepare(ITensorPack &tensors)

423

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

424

if (!_is_prepared)

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

425

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

426

const ITensor *weights = tensors.get_const_tensor(ACL_SRC_1);

427

ITensor *weights_aux =

428

utils::cast::polymorphic_cast<ITensor *>(tensors.get_tensor(offset_int_vec(PermutedWeights)));

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

429

430

CpuAuxTensorHandler permuted_weights(_weights_hwio, *weights_aux);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

431

ITensorPack permute_tensors{{ACL_SRC, weights}, {ACL_DST, permuted_weights.get()}};

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

432

_permute_weights->run(permute_tensors);

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

433

const int element_size_in_bytes = permuted_weights.get()->info()->element_size();

434

// Weights were in OHWI format, before being permuted "permuted_weights" to be in HWIO format.

435

const unsigned int height_idx = 3; // H in HWIO

436

const unsigned int width_idx = 2; // W in HWIO

437

const unsigned int channel_idx = 1; // I in HWIO

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

438

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

439

const int permuted_weight_row_stride =

440

permuted_weights.get()->info()->strides_in_bytes()[height_idx] / element_size_in_bytes;

441

const int permuted_weight_col_stride =

442

permuted_weights.get()->info()->strides_in_bytes()[width_idx] / element_size_in_bytes;

443

const int permuted_weight_channel_stride =

444

permuted_weights.get()->info()->strides_in_bytes()[channel_idx] / element_size_in_bytes;

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

445

446

// Wrap the winograd-domain transformed weight TensorInfo in Auxiliary tensor and allocate the required memory.

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

447

ITensor *weights_transf =

448

utils::cast::polymorphic_cast<ITensor *>(tensors.get_tensor(offset_int_vec(TransformedWeights)));

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

449

ARM_COMPUTE_ERROR_ON_NULLPTR(weights_transf);

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

450

CpuAuxTensorHandler winograd_transformed_weights(_winograd_transformed_weights, *weights_transf);

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

451

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

452

const void *permuted_weights_ptr;

453

void *win_wght_transf_ptr;

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

454

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

455

permuted_weights_ptr = reinterpret_cast<const void *>(

456

permuted_weights.get()->buffer() + permuted_weights.get()->info()->offset_first_element_in_bytes());

457

win_wght_transf_ptr =

458

reinterpret_cast<void *>(winograd_transformed_weights.get()->buffer() +

459

winograd_transformed_weights.get()->info()->offset_first_element_in_bytes());

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

460

461

// Prepare Weights

462

_winograd_impl.weight_transform->execute(

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

463

*_conv_args, permuted_weights_ptr, permuted_weight_row_stride, permuted_weight_col_stride,

464

permuted_weight_channel_stride, win_wght_transf_ptr, _winograd_impl.winograd_spec, 0, 1 // Thread 1 of 1

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

465

);

Georgios Pinitas

6634194

2021-07-30 12:21:07 +0100

[diff] [blame]

466

ITensorPack gemm_pack = tensors;

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

467

gemm_pack.add_const_tensor(ACL_SRC_1, winograd_transformed_weights.get());

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

468

_gemm_function->prepare(gemm_pack);

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

469

_is_prepared = 1;

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

470

}

471

}

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

472

experimental::MemoryRequirements CpuWinogradConv2d::workspace() const

473

{

474

return _aux_mem;

475

}

ramelg01

2022-06-29 16:28:10 +0100

[diff] [blame]

476

Michalis Spyrou

2021-07-01 12:20:56 +0100

[diff] [blame]

477

} // namespace cpu

ramelg01