Blame - src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp - ml/ComputeLibrary

2019-02-25 13:50:11 +0000

[diff] [blame]

264

}

Manuel Bottini

2019-02-25 13:50:11 +0000

[diff] [blame]

265

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

266

// Compute left-over elements

267

for(; x < window_end_x; ++x)

268

{

269

const float afs = static_cast<int32_t>(*(non_broadcast_input_ptr + x) - non_broadcast_qinfo.offset) * non_broadcast_qinfo.scale;

270

*(output_ptr + x) = quantize<T>((afs - bfs), out->info()->quantization_info());

271

}

272

},

273

broadcast_input, non_broadcast_input, output);

}

else

{

// Clear X Dimension on execution window as we handle manually

278

input1_win.set(Window::DimX, Window::Dimension(0, 1, 1));

279

input2_win.set(Window::DimX, Window::Dimension(0, 1, 1));

Manuel Bottini

2019-02-25 13:50:11 +0000

[diff] [blame]

280

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

281

Iterator input1(in1, input1_win);

282

Iterator input2(in2, input2_win);

283

Iterator output(out, win);

284

285

execute_window_loop(win, [&](const Coordinates &)

286

{

287

const auto input1_ptr = reinterpret_cast<const T *>(input1.ptr());

288

const auto input2_ptr = reinterpret_cast<const T *>(input2.ptr());

289

const auto output_ptr = reinterpret_cast<T *>(output.ptr());

290

291

// Compute S elements per iteration

292

int x = window_start_x;

293

for(; x <= (window_end_x - window_step_x); x += window_step_x)

294

{

295

const auto a = wrapper::vloadq(input1_ptr + x);

296

const auto b = wrapper::vloadq(input2_ptr + x);

297

298

const float32x4x4_t af =

299

{

300

{

301

vmulq_f32(vcvtq_f32_s32(vsubq_s32(wrapper::vreinterpret(wrapper::vmovl(wrapper::vgetlow(wrapper::vmovl(wrapper::vgetlow(a))))), voffset1)), vscale1),

302

vmulq_f32(vcvtq_f32_s32(vsubq_s32(wrapper::vreinterpret(wrapper::vmovl(wrapper::vgethigh(wrapper::vmovl(wrapper::vgetlow(a))))), voffset1)), vscale1),

303

vmulq_f32(vcvtq_f32_s32(vsubq_s32(wrapper::vreinterpret(wrapper::vmovl(wrapper::vgetlow(wrapper::vmovl(wrapper::vgethigh(a))))), voffset1)), vscale1),

304

vmulq_f32(vcvtq_f32_s32(vsubq_s32(wrapper::vreinterpret(wrapper::vmovl(wrapper::vgethigh(wrapper::vmovl(wrapper::vgethigh(a))))), voffset1)), vscale1),

}

};

const float32x4x4_t bf =

309

{

310

{

311

vmulq_f32(vcvtq_f32_s32(vsubq_s32(wrapper::vreinterpret(wrapper::vmovl(wrapper::vgetlow(wrapper::vmovl(wrapper::vgetlow(b))))), voffset2)), vscale2),

312

vmulq_f32(vcvtq_f32_s32(vsubq_s32(wrapper::vreinterpret(wrapper::vmovl(wrapper::vgethigh(wrapper::vmovl(wrapper::vgetlow(b))))), voffset2)), vscale2),

313

vmulq_f32(vcvtq_f32_s32(vsubq_s32(wrapper::vreinterpret(wrapper::vmovl(wrapper::vgetlow(wrapper::vmovl(wrapper::vgethigh(b))))), voffset2)), vscale2),

314

vmulq_f32(vcvtq_f32_s32(vsubq_s32(wrapper::vreinterpret(wrapper::vmovl(wrapper::vgethigh(wrapper::vmovl(wrapper::vgethigh(b))))), voffset2)), vscale2),

}

};

const int32x4x4_t rf =

{

{

#ifdef __aarch64__

vcvtnq_s32_f32(vmlaq_f32(voffseto, vsubq_f32(af.val[0], bf.val[0]), invvscaleo)),

323

vcvtnq_s32_f32(vmlaq_f32(voffseto, vsubq_f32(af.val[1], bf.val[1]), invvscaleo)),

324

vcvtnq_s32_f32(vmlaq_f32(voffseto, vsubq_f32(af.val[2], bf.val[2]), invvscaleo)),

325

vcvtnq_s32_f32(vmlaq_f32(voffseto, vsubq_f32(af.val[3], bf.val[3]), invvscaleo)),

326

#else //__aarch64__

327

vcvtq_s32_f32(vmlaq_f32(voffseto, vsubq_f32(af.val[0], bf.val[0]), invvscaleo)),

328

vcvtq_s32_f32(vmlaq_f32(voffseto, vsubq_f32(af.val[1], bf.val[1]), invvscaleo)),

329

vcvtq_s32_f32(vmlaq_f32(voffseto, vsubq_f32(af.val[2], bf.val[2]), invvscaleo)),

330

vcvtq_s32_f32(vmlaq_f32(voffseto, vsubq_f32(af.val[3], bf.val[3]), invvscaleo)),

#endif //__aarch64__

}

};

const auto pa = wrapper::vqmov<T>(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1])));

336

const auto pb = wrapper::vqmov<T>(vcombine_s16(vqmovn_s32(rf.val[2]), vqmovn_s32(rf.val[3])));

337

wrapper::vstore(output_ptr + x, wrapper::vcombine(pa, pb));

338

}

339

340

// Compute left-over elements

341

for(; x < window_end_x; ++x)

342

{

343

const float afs = static_cast<int32_t>((*(input1_ptr + x)) - iq1_info.offset) * iq1_info.scale;

344

const float bfs = static_cast<int32_t>((*(input2_ptr + x)) - iq2_info.offset) * iq2_info.scale;

345

346

*(output_ptr + x) = quantize<T>((afs - bfs), out->info()->quantization_info());

347

}

348

},

349

input1, input2, output);

350

}

Manuel Bottini

2019-02-25 13:50:11 +0000

[diff] [blame]

351

}

352

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

353

void sub_QSYMM16_QSYMM16_QSYMM16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, bool is_sat)

Michalis Spyrou

2019-12-04 12:00:36 +0000

[diff] [blame]

354

{

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

355

ARM_COMPUTE_UNUSED(is_sat);

356

357

// Create input windows

358

Window input1_win = window.broadcast_if_dimension_le_one(in1->info()->tensor_shape());

359

Window input2_win = window.broadcast_if_dimension_le_one(in2->info()->tensor_shape());

360

361

// Clear X Dimension on execution window as we handle manually

362

Window win = window;

363

win.set(Window::DimX, Window::Dimension(0, 1, 1));

364

365

const int window_step_x = 8;

366

const auto window_start_x = static_cast<int>(window.x().start());

367

const auto window_end_x = static_cast<int>(window.x().end());

368

const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);

Michalis Spyrou

2019-12-04 12:00:36 +0000

[diff] [blame]

369

370

const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform();

371

const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform();

372

const UniformQuantizationInfo oq_info = out->info()->quantization_info().uniform();

373

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

374

const float32x4_t vscale1 = vdupq_n_f32(iq1_info.scale);

375

const float32x4_t vscale2 = vdupq_n_f32(iq2_info.scale);

376

const float32x4_t invvscaleo = vdupq_n_f32(1.f / oq_info.scale);

377

378

if(is_broadcast_across_x)

Michalis Spyrou

2019-12-04 12:00:36 +0000

[diff] [blame]

379

{

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

380

const bool is_broadcast_input_2 = input2_win.x().step() == 0;

381

Window broadcast_win = is_broadcast_input_2 ? input2_win : input1_win;

382

Window non_broadcast_win = !is_broadcast_input_2 ? input2_win : input1_win;

383

const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1;

384

const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1;

385

const UniformQuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info().uniform();

386

const UniformQuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info().uniform();

Michalis Spyrou

2019-12-04 12:00:36 +0000

[diff] [blame]

387

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

388

// Clear X Dimension on execution window as we handle manually

389

non_broadcast_win.set(Window::DimX, Window::Dimension(0, 1, 1));

390

391

Iterator broadcast_input(broadcast_tensor, broadcast_win);

392

Iterator non_broadcast_input(non_broadcast_tensor, non_broadcast_win);

393

Iterator output(out, win);

394

395

execute_window_loop(win, [&](const Coordinates &)

Michalis Spyrou

2019-12-04 12:00:36 +0000

[diff] [blame]

396

{

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

397

const auto non_broadcast_input_ptr = reinterpret_cast<const int16_t *>(non_broadcast_input.ptr());

398

const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());

399

400

const int16_t broadcast_value = *reinterpret_cast<const int16_t *>(broadcast_input.ptr());

401

const int16x8_t broadcast_value_vec = vdupq_n_s16(broadcast_value);

402

403

const float32x4x2_t bf =

Michalis Spyrou

2019-12-04 12:00:36 +0000

[diff] [blame]

404

{

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

405

{

406

vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(broadcast_value_vec))), vscale2),

407

vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(broadcast_value_vec))), vscale2),

408

}

409

};

410

const float bfs = static_cast<int32_t>(broadcast_value) * broadcast_qinfo.scale;

411

412

// Compute S elements per iteration

413

int x = window_start_x;

414

for(; x <= (window_end_x - window_step_x); x += window_step_x)

415

{

416

const int16x8_t a = vld1q_s16(non_broadcast_input_ptr + x);

417

const float32x4x2_t af =

418

{

419

{

420

vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(a))), vscale1),

421

vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(a))), vscale1),

}

};

const int32x4x4_t rf =

{

{

#ifdef __aarch64__

vcvtnq_s32_f32(vmulq_f32(is_broadcast_input_2 ? vsubq_f32(bf.val[0], af.val[0]) : vsubq_f32(af.val[0], bf.val[0]), invvscaleo)),

430

vcvtnq_s32_f32(vmulq_f32(is_broadcast_input_2 ? vsubq_f32(bf.val[1], af.val[1]) : vsubq_f32(af.val[1], bf.val[1]), invvscaleo)),

431

#else //__aarch64__

432

vcvtq_s32_f32(vmulq_f32(is_broadcast_input_2 ? vsubq_f32(bf.val[0], af.val[0]) : vsubq_f32(af.val[0], bf.val[0]), invvscaleo)),

433

vcvtq_s32_f32(vmulq_f32(is_broadcast_input_2 ? vsubq_f32(bf.val[1], af.val[1]) : vsubq_f32(af.val[1], bf.val[1]), invvscaleo)),

#endif //__aarch64__

}

};

const int16x8_t pa = vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1]));

439

vst1q_s16(output_ptr + x, pa);

Michalis Spyrou

2019-12-04 12:00:36 +0000

[diff] [blame]

440

}

Michalis Spyrou

2019-12-04 12:00:36 +0000

[diff] [blame]

441

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

442

// Compute left-over elements

443

for(; x < window_end_x; ++x)

444

{

445

const float afs = static_cast<int32_t>(*(non_broadcast_input_ptr + x)) * non_broadcast_qinfo.scale;

446

*(output_ptr + x) = quantize_qsymm16(is_broadcast_input_2 ? (bfs - afs) : (afs - bfs), oq_info);

447

}

448

},

449

broadcast_input, non_broadcast_input, output);

}

else

{

// Clear X Dimension on execution window as we handle manually

454

input1_win.set(Window::DimX, Window::Dimension(0, 1, 1));

455

input2_win.set(Window::DimX, Window::Dimension(0, 1, 1));

Michalis Spyrou

2019-12-04 12:00:36 +0000

[diff] [blame]

456

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

457

Iterator input1(in1, input1_win);

458

Iterator input2(in2, input2_win);

459

Iterator output(out, win);

460

461

execute_window_loop(win, [&](const Coordinates &)

462

{

463

const auto input1_ptr = reinterpret_cast<const int16_t *>(input1.ptr());

464

const auto input2_ptr = reinterpret_cast<const int16_t *>(input2.ptr());

465

const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());

466

467

// Compute S elements per iteration

468

int x = window_start_x;

469

for(; x <= (window_end_x - window_step_x); x += window_step_x)

470

{

471

const int16x8_t a = vld1q_s16(input1_ptr + x);

472

const int16x8_t b = vld1q_s16(input2_ptr + x);

473

474

const float32x4x2_t af =

475

{

476

{

477

vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(a))), vscale1),

478

vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(a))), vscale1),

}

};

const float32x4x2_t bf =

483

{

484

{

485

vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(b))), vscale2),

486

vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(b))), vscale2),

}

};

const int32x4x2_t rf =

{

{

#ifdef __aarch64__

vcvtnq_s32_f32(vmulq_f32(vsubq_f32(af.val[0], bf.val[0]), invvscaleo)),

495

vcvtnq_s32_f32(vmulq_f32(vsubq_f32(af.val[1], bf.val[1]), invvscaleo)),

496

#else //__aarch64__

497

vcvtq_s32_f32(vmulq_f32(vsubq_f32(af.val[0], bf.val[0]), invvscaleo)),

498

vcvtq_s32_f32(vmulq_f32(vsubq_f32(af.val[1], bf.val[1]), invvscaleo)),

#endif //__aarch64__

}

};

const int16x8_t pa = vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1]));

504

vst1q_s16(output_ptr + x, pa);

505

}

506

507

// Compute left-over elements

508

for(; x < window_end_x; ++x)

509

{

510

const float afs = static_cast<int32_t>((*(input1_ptr + x))) * iq1_info.scale;

511

const float bfs = static_cast<int32_t>((*(input2_ptr + x))) * iq2_info.scale;

512

*(output_ptr + x) = quantize_qsymm16((afs - bfs), out->info()->quantization_info());

513

}

514

},

515

input1, input2, output);

}

}

void sub_S16_U8_S16_impl(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, bool is_sat, bool is_swapped)

520

{

521

// Create input windows

522

Window win = window;

523

Window input1_win = window.broadcast_if_dimension_le_one(in1->info()->tensor_shape());

524

Window input2_win = window.broadcast_if_dimension_le_one(in2->info()->tensor_shape());

525

526

// Clear X Dimension on execution window as we handle manually

527

win.set(Window::DimX, Window::Dimension(0, 1, 1));

528

input1_win.set(Window::DimX, Window::Dimension(0, 1, 1));

529

input2_win.set(Window::DimX, Window::Dimension(0, 1, 1));

530

531

Iterator input1(in1, input1_win);

532

Iterator input2(in2, input2_win);

533

Iterator output(out, win);

534

535

const int window_step_x = 8;

536

const auto window_start_x = static_cast<int>(window.x().start());

537

const auto window_end_x = static_cast<int>(window.x().end());

538

539

execute_window_loop(win, [&](const Coordinates &)

540

{

541

const auto input1_ptr = reinterpret_cast<const int16_t *>(input1.ptr());

542

const auto input2_ptr = reinterpret_cast<const uint8_t *>(input2.ptr());

543

const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());

if(!is_sat)

{

// Compute S elements per iteration

548

int x = window_start_x;

549

for(; x <= (window_end_x - window_step_x); x += window_step_x)

550

{

551

const auto vin1 = wrapper::vloadq(input1_ptr + x);

552

const auto vin2 = vreinterpretq_s16_u16(wrapper::vmovl(wrapper::vload(input2_ptr + x)));

553

const auto res = is_swapped ? wrapper::vsub(vin2, vin1) : wrapper::vsub(vin1, vin2);

554

wrapper::vstore(output_ptr + x, res);

555

}

556

557

// Compute left-over elements

558

for(; x < window_end_x; ++x)

559

{

560

const auto res = is_swapped ? static_cast<int16_t>(*(input2_ptr + x)) - *(input1_ptr + x) : *(input1_ptr + x) - static_cast<int16_t>(*(input2_ptr + x));

561

*(output_ptr + x) = res;

}

}

else

{

// Compute S elements per iteration

567

int x = window_start_x;

568

for(; x <= (window_end_x - window_step_x); x += window_step_x)

569

{

570

const auto vin1 = wrapper::vloadq(input1_ptr + x);

571

const auto vin2 = vreinterpretq_s16_u16(wrapper::vmovl(wrapper::vload(input2_ptr + x)));

572

const auto res = is_swapped ? wrapper::vqsub(vin2, vin1) : wrapper::vqsub(vin1, vin2);

573

wrapper::vstore(output_ptr + x, res);

574

}

575

576

// Compute left-over elements

577

for(; x < window_end_x; ++x)

578

{

579

const auto res = is_swapped ? wrapper::sub_sat(static_cast<int16_t>(*(input2_ptr + x)), *(input1_ptr + x)) : wrapper::sub_sat(*(input1_ptr + x), static_cast<int16_t>(*(input2_ptr + x)));

580

*(output_ptr + x) = res;

581

}

582

}

Michalis Spyrou

2019-12-04 12:00:36 +0000

[diff] [blame]

583

},

584

input1, input2, output);

585

}

586

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

587

void sub_S16_U8_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, bool is_sat)

Michele Di Giorgio

2020-03-27 10:23:44 +0000

[diff] [blame]

588

{

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

589

sub_S16_U8_S16_impl(in1, in2, out, window, is_sat, false);

Michele Di Giorgio

2020-03-27 10:23:44 +0000

[diff] [blame]

590

}

591

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

592

void sub_U8_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, bool is_sat)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

593

{

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

594

// Swap arguments

595

sub_S16_U8_S16_impl(in2, in1, out, window, is_sat, true);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

596

}

597

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

598

void sub_U8_U8_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, bool is_sat)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

599

{

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

600

// Create input windows

601

Window win = window;

602

Window input1_win = window.broadcast_if_dimension_le_one(in1->info()->tensor_shape());

603

Window input2_win = window.broadcast_if_dimension_le_one(in2->info()->tensor_shape());

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

604

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

605

// Clear X Dimension on execution window as we handle manually

606

win.set(Window::DimX, Window::Dimension(0, 1, 1));

607

input1_win.set(Window::DimX, Window::Dimension(0, 1, 1));

608

input2_win.set(Window::DimX, Window::Dimension(0, 1, 1));

609

610

Iterator input1(in1, input1_win);

611

Iterator input2(in2, input2_win);

612

Iterator output(out, win);

613

614

const int window_step_x = 8;

615

const auto window_start_x = static_cast<int>(window.x().start());

616

const auto window_end_x = static_cast<int>(window.x().end());

617

618

execute_window_loop(win, [&](const Coordinates &)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

619

{

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

620

const auto input1_ptr = reinterpret_cast<const uint8_t *>(input1.ptr());

621

const auto input2_ptr = reinterpret_cast<const uint8_t *>(input2.ptr());

622

const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());

623

624

if(!is_sat)

Michele Di Giorgio

2020-03-27 10:23:44 +0000

[diff] [blame]

625

{

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

626

// Compute S elements per iteration

627

int x = window_start_x;

628

for(; x <= (window_end_x - window_step_x); x += window_step_x)

Michele Di Giorgio

2020-03-27 10:23:44 +0000

[diff] [blame]

629

{

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

630

const auto vin1 = vreinterpretq_s16_u16(wrapper::vmovl(wrapper::vload(input1_ptr + x)));

631

const auto vin2 = vreinterpretq_s16_u16(wrapper::vmovl(wrapper::vload(input2_ptr + x)));

632

wrapper::vstore(output_ptr + x, wrapper::vsub(vin1, vin2));

Michele Di Giorgio

2020-03-27 10:23:44 +0000

[diff] [blame]

633

}

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

634

635

// Compute left-over elements

636

for(; x < window_end_x; ++x)

637

{

638

*(output_ptr + x) = static_cast<int16_t>(*(input1_ptr + x)) - static_cast<int16_t>(*(input2_ptr + x));

639

}

640

}

641

else

Michele Di Giorgio

2020-03-27 10:23:44 +0000

[diff] [blame]

642

{

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

643

// Compute S elements per iteration

644

int x = window_start_x;

645

for(; x <= (window_end_x - window_step_x); x += window_step_x)

Michele Di Giorgio

2020-03-27 10:23:44 +0000

[diff] [blame]

646

{

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

647

const auto vin1 = vreinterpretq_s16_u16(wrapper::vmovl(wrapper::vload(input1_ptr + x)));

648

const auto vin2 = vreinterpretq_s16_u16(wrapper::vmovl(wrapper::vload(input2_ptr + x)));

649

wrapper::vstore(output_ptr + x, wrapper::vqsub(vin1, vin2));

Michele Di Giorgio

2020-03-27 10:23:44 +0000

[diff] [blame]

650

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

651

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

652

// Compute left-over elements

653

for(; x < window_end_x; ++x)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

654

{

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

655

*(output_ptr + x) = wrapper::sub_sat(static_cast<int16_t>(*(input1_ptr + x)),

656

static_cast<int16_t>(*(input2_ptr + x)));

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

657

}

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

658

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

659

},

660

input1, input2, output);

661

}

Ioan-Cristian Szabo

2017-11-30 15:19:11 +0000

[diff] [blame]

662

Georgios Pinitas

2018-09-10 15:07:45 +0100

[diff] [blame]

663

inline Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output, ConvertPolicy policy)

Ioan-Cristian Szabo

2017-11-30 15:19:11 +0000

[diff] [blame]

664

{

665

ARM_COMPUTE_UNUSED(policy);

Georgios Pinitas

2018-09-10 15:07:45 +0100

[diff] [blame]

666

ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input1);

Michele Di Giorgio

2020-03-27 10:23:44 +0000

[diff] [blame]

667

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM16, DataType::S16, DataType::F16, DataType::F32);

668

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM16, DataType::S16, DataType::F16, DataType::F32);

669

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM16, DataType::S16, DataType::F16, DataType::F32);

Ioan-Cristian Szabo

2017-11-30 15:19:11 +0000

[diff] [blame]

670

Georgios Pinitas

2018-09-10 15:07:45 +0100

[diff] [blame]

671

const TensorShape out_shape = TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape());

672

ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible");

Ioan-Cristian Szabo

2017-11-30 15:19:11 +0000

[diff] [blame]

673

Manuel Bottini

2019-02-25 13:50:11 +0000

[diff] [blame]

674

ARM_COMPUTE_RETURN_ERROR_ON_MSG(

675

!(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8)

676

&& !(input1.data_type() == DataType::QASYMM8 && input2.data_type() == DataType::QASYMM8)

Michalis Spyrou

2019-12-04 12:00:36 +0000

[diff] [blame]

677

&& !(input1.data_type() == DataType::QASYMM8_SIGNED && input2.data_type() == DataType::QASYMM8_SIGNED)

Michele Di Giorgio

2020-03-27 10:23:44 +0000

[diff] [blame]

678

&& !(input1.data_type() == DataType::QSYMM16 && input2.data_type() == DataType::QSYMM16)

Manuel Bottini

2019-02-25 13:50:11 +0000

[diff] [blame]

679

&& !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8)

680

&& !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::S16)

681

&& !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::U8)

682

&& !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::S16)

683

&& !(input1.data_type() == DataType::F32 && input2.data_type() == DataType::F32)

684

&& !(input1.data_type() == DataType::F16 && input2.data_type() == DataType::F16),

685

"You called subtract with the wrong image formats");

686

687

ARM_COMPUTE_RETURN_ERROR_ON_MSG(

Michalis Spyrou

2019-12-04 12:00:36 +0000

[diff] [blame]

688

input1.data_type() == DataType::QASYMM8_SIGNED && input2.data_type() == DataType::QASYMM8_SIGNED && policy == ConvertPolicy::WRAP

Michele Di Giorgio

2020-03-27 10:23:44 +0000

[diff] [blame]

689

&& input1.data_type() == DataType::QASYMM8 && input2.data_type() == DataType::QASYMM8 && policy == ConvertPolicy::WRAP

690

&& input1.data_type() == DataType::QSYMM16 && input2.data_type() == DataType::QSYMM16 && policy == ConvertPolicy::WRAP,

Michalis Spyrou

2019-12-04 12:00:36 +0000

[diff] [blame]

691

"Convert policy cannot be WRAP if datatype is QASYMM8 or QASYMM8_SIGNED");

Manuel Bottini

2019-02-25 13:50:11 +0000

[diff] [blame]

692

Georgios Pinitas

2018-09-10 15:07:45 +0100

[diff] [blame]

693

// Validate in case of configured output

694

if(output.total_size() > 0)

695

{

696

ARM_COMPUTE_RETURN_ERROR_ON_MSG(

697

!(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8 && output.data_type() == DataType::U8)

Manuel Bottini

2019-02-25 13:50:11 +0000

[diff] [blame]

698

&& !(input1.data_type() == DataType::QASYMM8 && input2.data_type() == DataType::QASYMM8 && output.data_type() == DataType::QASYMM8)

Michalis Spyrou

2019-12-04 12:00:36 +0000

[diff] [blame]

699

&& !(input1.data_type() == DataType::QASYMM8_SIGNED && input2.data_type() == DataType::QASYMM8_SIGNED && output.data_type() == DataType::QASYMM8_SIGNED)

Michele Di Giorgio

2020-03-27 10:23:44 +0000

[diff] [blame]

700

&& !(input1.data_type() == DataType::QSYMM16 && input2.data_type() == DataType::QSYMM16 && output.data_type() == DataType::QSYMM16)

Georgios Pinitas

2018-09-10 15:07:45 +0100

[diff] [blame]

701

&& !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8 && output.data_type() == DataType::S16)

702

&& !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::S16 && output.data_type() == DataType::S16)

703

&& !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::U8 && output.data_type() == DataType::S16)

704

&& !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::S16 && output.data_type() == DataType::S16)

705

&& !(input1.data_type() == DataType::F32 && input2.data_type() == DataType::F32 && output.data_type() == DataType::F32)

706

&& !(input1.data_type() == DataType::F16 && input2.data_type() == DataType::F16 && output.data_type() == DataType::F16),

707

"You called subtract with the wrong image formats");

708

709

ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output.tensor_shape(), 0),

710

"Wrong shape for output");

711

}

Georgios Pinitas

631c41a

2017-12-06 11:53:03 +0000

[diff] [blame]

712

return Status{};

Ioan-Cristian Szabo

2017-11-30 15:19:11 +0000

[diff] [blame]

713

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

714

} // namespace

715

716

NEArithmeticSubtractionKernel::NEArithmeticSubtractionKernel()

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

717

: _func(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr), _policy(ConvertPolicy::WRAP)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

{

}

void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy)

722

{

723

ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);

Georgios Pinitas

2018-09-10 15:07:45 +0100

[diff] [blame]

724

ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1->info(), *input2->info(), *output->info(), policy));

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

725

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

726

_input1 = input1;

727

_input2 = input2;

728

_output = output;

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

729

_policy = policy;

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

730

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

731

const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*input1->info(), *input2->info());

732

const TensorShape &out_shape = broadcast_pair.first;

733

const ValidRegion &valid_region = broadcast_pair.second;

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

734

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

735

// Auto initialize output if not initialized

736

set_shape_if_empty(*output->info(), out_shape);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

737

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

738

switch(input1->info()->data_type())

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

739

{

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

740

case DataType::U8:

741

if(input2->info()->data_type() == DataType::U8 && output->info()->data_type() == DataType::U8)

742

{

743

_func = &sub_same<uint8_t>;

744

}

745

else if(input2->info()->data_type() == DataType::U8 && output->info()->data_type() == DataType::S16)

746

{

747

_func = &sub_U8_U8_S16;

}

else

{

_func = &sub_U8_S16_S16;

752

}

753

break;

754

case DataType::QASYMM8:

755

_func = &sub_quantized<uint8_t>;

756

set_data_type_if_unknown(*output->info(), DataType::QASYMM8);

757

break;

758

case DataType::QASYMM8_SIGNED:

759

_func = &sub_quantized<int8_t>;

760

set_data_type_if_unknown(*output->info(), DataType::QASYMM8_SIGNED);

761

break;

762

case DataType::S16:

763

if(input2->info()->data_type() == DataType::U8)

764

{

765

_func = &sub_S16_U8_S16;

}

else

{

_func = &sub_same<int16_t>;

770

}

771

set_format_if_unknown(*output->info(), Format::S16);

772

break;

773

case DataType::QSYMM16:

774

_func = &sub_QSYMM16_QSYMM16_QSYMM16;

775

set_data_type_if_unknown(*output->info(), DataType::QSYMM16);

776

break;

777

#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC

778

case DataType::F16:

779

_func = &sub_same<float16_t>;

780

set_format_if_unknown(*output->info(), Format::F16);

781

break;

782

#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */

783

case DataType::F32:

784

_func = &sub_same<float>;

785

set_format_if_unknown(*output->info(), Format::F32);

786

break;

787

default:

788

_func = nullptr;

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

789

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

790

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

791

// NEArithmeticSubtractionKernel doesn't need padding so update_window_and_padding() can be skipped

792

Coordinates coord;

793

coord.set_num_dimensions(output->info()->num_dimensions());

794

output->info()->set_valid_region(valid_region);

795

Window win = calculate_max_window(valid_region, Steps());

796

797

INEKernel::configure(win);

Ioan-Cristian Szabo

2017-11-30 15:19:11 +0000

[diff] [blame]

798

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

799

Georgios Pinitas

631c41a

2017-12-06 11:53:03 +0000

[diff] [blame]

800

Status NEArithmeticSubtractionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy)

Ioan-Cristian Szabo

2017-11-30 15:19:11 +0000

[diff] [blame]

801

{

Georgios Pinitas

2018-09-10 15:07:45 +0100

[diff] [blame]

802

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);

Georgios Pinitas

2018-09-10 15:07:45 +0100

[diff] [blame]

803

ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output, policy));

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

804

Georgios Pinitas

631c41a

2017-12-06 11:53:03 +0000

[diff] [blame]

805

return Status{};

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

806

}

807

Moritz Pflanzer

c186b57

2017-09-07 09:48:04 +0100

[diff] [blame]

808

void NEArithmeticSubtractionKernel::run(const Window &window, const ThreadInfo &info)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

809

{

Moritz Pflanzer

c186b57

2017-09-07 09:48:04 +0100

[diff] [blame]

810

ARM_COMPUTE_UNUSED(info);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

811

ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);

812

ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);

813

ARM_COMPUTE_ERROR_ON(_func == nullptr);

814

Michalis Spyrou

2020-05-13 00:12:08 +0100

[diff] [blame^]

815

(*_func)(_input1, _input2, _output, window, (_policy == ConvertPolicy::SATURATE));

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

816

}

Michalis Spyrou