Blame - src/core/NEON/kernels/NEPoolingLayerKernel.cpp - ml/ComputeLibrary

2017-06-26 17:17:42 +0100

[diff] [blame]

213

else if(input->info()->data_type() == DataType::F16)

214

{

215

_func = (PoolingType::AVG == pool_type) ? &NEPoolingLayerKernel::pooling2_f16<PoolingType::AVG> : &NEPoolingLayerKernel::pooling2_f16<PoolingType::MAX>;

216

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

217

else if(input->info()->data_type() == DataType::F32)

218

{

219

_func = (PoolingType::AVG == pool_type) ? &NEPoolingLayerKernel::pooling2_f32<PoolingType::AVG> : &NEPoolingLayerKernel::pooling2_f32<PoolingType::MAX>;

}

break;

case 3:

if(input->info()->data_type() == DataType::QS8)

224

{

225

_func = (PoolingType::AVG == pool_type) ? &NEPoolingLayerKernel::pooling3_q8<PoolingType::AVG> : &NEPoolingLayerKernel::pooling3_q8<PoolingType::MAX>;

226

}

Pablo Tello

2017-06-26 17:17:42 +0100

[diff] [blame]

227

else if(input->info()->data_type() == DataType::F16)

228

{

229

_func = (PoolingType::AVG == pool_type) ? &NEPoolingLayerKernel::pooling3_f16<PoolingType::AVG> : &NEPoolingLayerKernel::pooling3_f16<PoolingType::MAX>;

230

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

231

else if(input->info()->data_type() == DataType::F32)

232

{

233

_func = (PoolingType::AVG == pool_type) ? &NEPoolingLayerKernel::pooling3_f32<PoolingType::AVG> : &NEPoolingLayerKernel::pooling3_f32<PoolingType::MAX>;

234

}

235

break;

Michele Di Giorgio

2017-06-19 15:19:29 +0100

[diff] [blame]

236

case 7:

237

_func = (PoolingType::AVG == pool_type) ? &NEPoolingLayerKernel::pooling7_f32<PoolingType::AVG> : &NEPoolingLayerKernel::pooling7_f32<PoolingType::MAX>;

238

break;

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

239

default:

240

ARM_COMPUTE_ERROR("Unsupported pooling size");

break;

}

// Configure kernel window

245

Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));

246

AccessWindowStatic input_access(input->info(), -pool_pad_x, -pool_pad_y, input_width + _border_size.right, input_height + _border_size.bottom);

247

AccessWindowHorizontal output_access(output->info(), 0, num_elems_horizontal_window);

248

update_window_and_padding(win, input_access, output_access);

249

output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));

250

INEKernel::configure(win);

251

}

252

253

template <PoolingType pooling_type>

254

void NEPoolingLayerKernel::pooling2_q8(const Window &window_input, const Window &window)

255

{

256

Iterator input(_input, window_input);

257

Iterator output(_output, window);

258

259

const int fixed_point_position = _input->info()->fixed_point_position();

260

constexpr int pool_size = 2;

261

int pool_pad_x = 0;

262

int pool_pad_y = 0;

263

int pool_stride_x = 0;

264

int pool_stride_y = 0;

265

std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();

266

std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();

267

const int upper_bound_w = _input->info()->dimension(0) + pool_pad_x;

268

const int upper_bound_h = _input->info()->dimension(1) + pool_pad_y;

269

270

const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));

271

const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));

272

273

execute_window_loop(window, [&](const Coordinates & id)

274

{

275

const auto top_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_top_ptr + input.offset()));

276

const auto bottom_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_bottom_ptr + input.offset()));

277

qint8x8_t res = {};

278

if(pooling_type == PoolingType::AVG)

279

{

280

// Calculate scale

281

const qint8_t scale = calculate_avg_scale_q8(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y, fixed_point_position);

282

const qint8x8_t scale_vec = vdup_n_qs8(scale);

283

284

// Perform pooling

285

const qint8x16_t sum_data = vqaddq_qs8(top_data, bottom_data);

286

res = vqmul_qs8(vpadd_s8(vget_low_s8(sum_data), vget_high_s8(sum_data)), scale_vec, fixed_point_position);

}

else

{

const qint8x16_t max_data = vmaxq_s8(top_data, bottom_data);

291

res = vpmax_s8(vget_low_s8(max_data), vget_high_s8(max_data));

292

}

293

vst1_qs8(reinterpret_cast<qint8_t *>(output.ptr()), res);

},

input, output);

}

template <PoolingType pooling_type>

Pablo Tello

2017-06-26 17:17:42 +0100

[diff] [blame]

299

void NEPoolingLayerKernel::pooling3_f16(const Window &window_input, const Window &window)

300

{

301

#ifdef ARM_COMPUTE_ENABLE_FP16

302

Iterator input(_input, window_input);

303

Iterator output(_output, window);

304

305

constexpr const int pool_size = 3;

306

int pool_pad_x = 0;

307

int pool_pad_y = 0;

308

int pool_stride_x = 0;

309

int pool_stride_y = 0;

310

std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();

311

std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();

312

const int upper_bound_w = _input->info()->dimension(0) + pool_pad_x;

313

const int upper_bound_h = _input->info()->dimension(1) + pool_pad_y;

314

315

const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));

316

const unsigned char *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));

317

const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 2));

318

319

execute_window_loop(window, [&](const Coordinates & id)

320

{

321

const float16x4_t top_data = vld1_f16(reinterpret_cast<const float16_t *>(input_top_ptr + input.offset()));

322

const float16x4_t middle_data = vld1_f16(reinterpret_cast<const float16_t *>(input_middle_ptr + input.offset()));

323

const float16x4_t bottom_data = vld1_f16(reinterpret_cast<const float16_t *>(input_bottom_ptr + input.offset()));

324

float16x4_t res = {};

325

if(pooling_type == PoolingType::AVG)

326

{

327

// Calculate scale

328

const float scale = calculate_avg_scale(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y);

329

const float16x4_t scale_v = vdup_n_f16(scale);

330

// Perform pooling

331

const float16x4_t sum_data = vadd_f16(vadd_f16(top_data, bottom_data), middle_data);

332

res = vpadd_f16(vset_lane_f16(0.f, sum_data, 3), sum_data);

333

res = vmul_f16(vpadd_f16(res, res), scale_v);

}

else

{

const float16x4_t max_data = vmax_f16(vmax_f16(top_data, bottom_data), middle_data);

338

res = vpmax_f16(vset_lane_f16(-std::numeric_limits<float>::max(), max_data, 3), max_data);

339

res = vpmax_f16(res, res);

340

}

341

*(reinterpret_cast<float16_t *>(output.ptr())) = vget_lane_f16(res, 0);

342

},

343

input, output);

344

#else /* ARM_COMPUTE_ENABLE_FP16 */

345

ARM_COMPUTE_UNUSED(window_input);

346

ARM_COMPUTE_UNUSED(window);

347

ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");

348

#endif /* ARM_COMPUTE_ENABLE_FP16 */

349

}

350

351

template <PoolingType pooling_type>

352

void NEPoolingLayerKernel::pooling2_f16(const Window &window_input, const Window &window)

353

{

354

#ifdef ARM_COMPUTE_ENABLE_FP16

355

Iterator input(_input, window_input);

356

Iterator output(_output, window);

357

constexpr int pool_size = 2;

358

int pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y = 0;

359

std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();

360

std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();

361

const int upper_bound_w = _input->info()->dimension(0) + pool_pad_x;

362

const int upper_bound_h = _input->info()->dimension(1) + pool_pad_y;

363

364

const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));

365

const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));

366

367

execute_window_loop(window, [&](const Coordinates & id)

368

{

369

const auto top_data = vld2q_f16(reinterpret_cast<const float16_t *>(input_top_ptr + input.offset()));

370

const auto bottom_data = vld2q_f16(reinterpret_cast<const float16_t *>(input_bottom_ptr + input.offset()));

371

float16x8_t res = {};

372

373

if(pooling_type == PoolingType::AVG)

374

{

375

const float scale = calculate_avg_scale(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y);

376

const float16x8_t scale_v = vdupq_n_f16(scale);

377

res = vmulq_f16(scale_v, vaddq_f16(bottom_data.val[1], vaddq_f16(bottom_data.val[0], vaddq_f16(top_data.val[0], top_data.val[1]))));

}

else

{

res = vmaxq_f16(bottom_data.val[1], vmaxq_f16(bottom_data.val[0], vmaxq_f16(top_data.val[0], top_data.val[1])));

382

}

383

vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()), res);

384

},

385

input, output);

386

#else /* ARM_COMPUTE_ENABLE_FP16 */

387

ARM_COMPUTE_UNUSED(window_input);

388

ARM_COMPUTE_UNUSED(window);

389

ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");

390

#endif /* ARM_COMPUTE_ENABLE_FP16 */

391

}

392

393

template <PoolingType pooling_type>

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

394

void NEPoolingLayerKernel::pooling2_f32(const Window &window_input, const Window &window)

395

{

396

Iterator input(_input, window_input);

397

Iterator output(_output, window);

398

Michele Di Giorgio

2017-06-19 15:19:29 +0100

[diff] [blame]

399

constexpr int pool_size = 2;

400

int pool_pad_x = 0;

401

int pool_pad_y = 0;

402

int pool_stride_x = 0;

403

int pool_stride_y = 0;

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

404

std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();

405

std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();

406

const int upper_bound_w = _input->info()->dimension(0) + pool_pad_x;

407

const int upper_bound_h = _input->info()->dimension(1) + pool_pad_y;

408

Michele Di Giorgio

2017-06-19 15:19:29 +0100

[diff] [blame]

409

const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));

410

const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

411

412

execute_window_loop(window, [&](const Coordinates & id)

413

{

414

const float32x2_t top_data = vld1_f32(reinterpret_cast<const float *>(input_top_ptr + input.offset()));

415

const float32x2_t bottom_data = vld1_f32(reinterpret_cast<const float *>(input_bottom_ptr + input.offset()));

416

float32x2_t res = {};

417

if(pooling_type == PoolingType::AVG)

418

{

419

// Calculate scale

420

float scale = calculate_avg_scale(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y);

421

const float32x2_t scale_v = vdup_n_f32(scale);

422

423

// Perform pooling

424

const float32x2_t sum_data = vadd_f32(top_data, bottom_data);

425

res = vmul_f32(vpadd_f32(sum_data, sum_data), scale_v);

}

else

{

const float32x2_t max_data = vmax_f32(top_data, bottom_data);

430

res = vpmax_f32(max_data, max_data);

431

}

432

*(reinterpret_cast<float *>(output.ptr())) = vget_lane_f32(res, 0);

},

input, output);

}

template <PoolingType pooling_type>

438

void NEPoolingLayerKernel::pooling3_q8(const Window &window_input, const Window &window)

439

{

440

Iterator input(_input, window_input);

441

Iterator output(_output, window);

442

443

const int fixed_point_position = _input->info()->fixed_point_position();

444

constexpr int pool_size = 3;

445

int pool_pad_x = 0;

446

int pool_pad_y = 0;

447

int pool_stride_x = 0;

448

int pool_stride_y = 0;

449

std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();

450

std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();

451

const int upper_bound_w = _input->info()->dimension(0) + pool_pad_x;

452

const int upper_bound_h = _input->info()->dimension(1) + pool_pad_y;

453

454

const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));

455

const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));

456

const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 2));

457

458

execute_window_loop(window, [&](const Coordinates & id)

459

{

460

const auto top_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_top_ptr + input.offset()));

461

const auto middle_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_middle_ptr + input.offset()));

462

const auto bottom_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_bottom_ptr + input.offset()));

463

qint8x8_t res = {};

464

if(pooling_type == PoolingType::AVG)

465

{

466

// Calculate scale

467

const qint8_t scale = calculate_avg_scale_q8(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y, fixed_point_position);

468

const qint8x8_t scale_vec = vdup_n_qs8(scale);

469

470

// Perform pooling for stride 2

471

const qint8x16_t sum_data = vqaddq_qs8(vqaddq_qs8(top_data, bottom_data), middle_data);

472

const qint8x16_t sum_data2 = vextq_s8(sum_data, sum_data, 1);

473

const qint8x16_t sum_data3 = vextq_s8(sum_data, sum_data, 2);

474

const qint8x16_t final_sum = vqaddq_qs8(vqaddq_qs8(sum_data, sum_data2), sum_data3);

475

if(pool_stride_x == 2)

476

{

477

const qint8x8x2_t table = { { vget_low_s8(final_sum), vget_high_s8(final_sum) } };

478

static const qint8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 };

479

res = vtbl2_s8(table, lookup_val);

}

else

{

res = vget_low_s8(final_sum);

484

}

485

res = vqmul_qs8(res, scale_vec, fixed_point_position);

}

else

{

const qint8x16_t max_data = vmaxq_s8(vmaxq_s8(top_data, bottom_data), middle_data);

490

const qint8x16_t max_data2 = vextq_s8(max_data, max_data, 1);

491

const qint8x16_t max_data3 = vextq_s8(max_data, max_data, 2);

492

const qint8x16_t final_max = vmaxq_s8(vmaxq_s8(max_data, max_data2), max_data3);

493

494

if(pool_stride_x == 2)

495

{

496

const qint8x8x2_t table = { { vget_low_s8(final_max), vget_high_s8(final_max) } };

497

static const qint8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 };

498

res = vtbl2_s8(table, lookup_val);

}

else

{

res = vget_low_s8(final_max);

503

}

504

}

505

vst1_qs8(reinterpret_cast<qint8_t *>(output.ptr()), res);

},

input, output);

}

template <PoolingType pooling_type>

511

void NEPoolingLayerKernel::pooling3_f32(const Window &window_input, const Window &window)

512

{

513

Iterator input(_input, window_input);

514

Iterator output(_output, window);

515

Michele Di Giorgio

2017-06-19 15:19:29 +0100

[diff] [blame]

516

constexpr const int pool_size = 3;

517

int pool_pad_x = 0;

518

int pool_pad_y = 0;

519

int pool_stride_x = 0;

520

int pool_stride_y = 0;

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

521

std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();

522

std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();

523

const int upper_bound_w = _input->info()->dimension(0) + pool_pad_x;

524

const int upper_bound_h = _input->info()->dimension(1) + pool_pad_y;

525

Michele Di Giorgio

2017-06-19 15:19:29 +0100

[diff] [blame]

526

const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y)));

527

const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 1));

528

const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + 2));

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

529

530

execute_window_loop(window, [&](const Coordinates & id)

531

{

532

const float32x4_t top_data = vld1q_f32(reinterpret_cast<const float *>(input_top_ptr + input.offset()));

533

const float32x4_t middle_data = vld1q_f32(reinterpret_cast<const float *>(input_middle_ptr + input.offset()));

534

const float32x4_t bottom_data = vld1q_f32(reinterpret_cast<const float *>(input_bottom_ptr + input.offset()));

535

float32x2_t res = {};

536

if(pooling_type == PoolingType::AVG)

537

{

538

// Calculate scale

539

float scale = calculate_avg_scale(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y);

540

const float32x2_t scale_v = vdup_n_f32(scale);

541

542

// Perform pooling

543

const float32x4_t sum_data = vaddq_f32(vaddq_f32(top_data, bottom_data), middle_data);

544

res = vpadd_f32(vget_high_f32(vsetq_lane_f32(0.f, sum_data, 3)), vget_low_f32(sum_data));

545

res = vmul_f32(vpadd_f32(res, res), scale_v);

}

else

{

const float32x4_t max_data = vmaxq_f32(vmaxq_f32(top_data, bottom_data), middle_data);

550

res = vpmax_f32(vget_high_f32(vsetq_lane_f32(-std::numeric_limits<float>::max(), max_data, 3)), vget_low_f32(max_data));

551

res = vpmax_f32(res, res);

552

}

553

*(reinterpret_cast<float *>(output.ptr())) = vget_lane_f32(res, 0);

},

input, output);

}

Michele Di Giorgio

2017-06-19 15:19:29 +0100

[diff] [blame]

558

template <PoolingType pooling_type>

559

void NEPoolingLayerKernel::pooling7_f32(const Window &window_input, const Window &window)

560

{

561

Iterator input(_input, window_input);

562

Iterator output(_output, window);

563

564

constexpr const int pool_size = 7;

565

int pool_pad_x = 0;

566

int pool_pad_y = 0;

567

int pool_stride_x = 0;

568

int pool_stride_y = 0;

569

std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad();

570

std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();

571

const int upper_bound_w = _input->info()->dimension(0) + pool_pad_x;

572

const int upper_bound_h = _input->info()->dimension(1) + pool_pad_y;

573

574

std::array<const uint8_t *, pool_size> input_ptrs{ {} };

575

for(int i = 0; i < pool_size; ++i)

576

{

577

input_ptrs[i] = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_x), -static_cast<int>(pool_pad_y) + i));

578

}

579

580

execute_window_loop(window, [&](const Coordinates & id)

581

{

582

float32x2_t res = {};

583

if(pooling_type == PoolingType::AVG)

584

{

585

// Calculate scale

586

float scale = calculate_avg_scale(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y);

587

const float32x2_t scale_v = vdup_n_f32(scale);

588

589

// Perform pooling

590

float32x4x2_t data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[0] + input.offset()));

591

float32x4_t sum_data = vaddq_f32(data.val[0], vsetq_lane_f32(0.f, data.val[1], 3));

592

for(int i = 1; i < pool_size; ++i)

593

{

594

data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[i] + input.offset()));

595

sum_data = vaddq_f32(sum_data, data.val[0]);

596

sum_data = vaddq_f32(sum_data, vsetq_lane_f32(0.f, data.val[1], 3));

597

}

598

res = vpadd_f32(vget_high_f32(sum_data), vget_low_f32(sum_data));

599

res = vmul_f32(vpadd_f32(res, res), scale_v);

}

else

{

float32x4x2_t max_data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[0] + input.offset()));

604

for(int i = 1; i < pool_size; ++i)

605

{

606

const float32x4x2_t data = vld2q_f32(reinterpret_cast<const float *>(input_ptrs[i] + input.offset()));

607

max_data = vmax2q_f32(max_data, data);

608

}

609

res = vpmax_f32(vget_high_f32(vsetq_lane_f32(-std::numeric_limits<float>::max(), max_data.val[1], 3)), vget_low_f32(max_data.val[1]));

610

res = vpmax_f32(res, vpmax_f32(vget_high_f32(max_data.val[0]), vget_low_f32(max_data.val[0])));

611

res = vpmax_f32(res, res);

612

}

613

*(reinterpret_cast<float *>(output.ptr())) = vget_lane_f32(res, 0);

},

input, output);

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

618

void NEPoolingLayerKernel::run(const Window &window)

619

{

620

ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);

621

ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);

622

ARM_COMPUTE_ERROR_ON(_func == nullptr);

623

Pablo Tello

2017-06-26 17:17:42 +0100

[diff] [blame]

624

const unsigned int pool_stride_x = _pool_info.pad_stride_info().stride().first;

625

const unsigned int pool_stride_y = _pool_info.pad_stride_info().stride().second;

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

626

627

// Set step for input in x and y direction for the input

628

Window window_input(window);

629

unsigned int window_x_inc = 0;

Pablo Tello

2017-06-26 17:17:42 +0100

[diff] [blame]

630

switch(_input->info()->data_type())

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

631

{

Pablo Tello

2017-06-26 17:17:42 +0100

[diff] [blame]

case DataType::QS8:

case DataType::F16:

{

window_x_inc = (pool_stride_x == 2) ? _num_elems_processed_per_iteration * 2 : _num_elems_processed_per_iteration;

break;

}

case DataType::F32:

{

window_x_inc = pool_stride_x;

break;

}

default:

{

ARM_COMPUTE_ERROR("Not supported");

646

}

Anthony Barbier