Blame - src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp - ml/ComputeLibrary

2018-07-06 17:05:59 +0100

[diff] [blame]

587

}

588

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

589

if (_gemm_info.method == AsmConvMethod::Indirect)

Georgios Pinitas

2020-11-02 01:37:17 +0000

[diff] [blame]

590

{

Sang-Hoon Park

2021-05-17 17:04:50 +0100

[diff] [blame]

591

prepare_indirect_buffer(tensors);

Georgios Pinitas

2020-11-02 01:37:17 +0000

[diff] [blame]

592

}

593

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

_is_prepared = true;

}

}

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

598

template <typename TypeInput, typename TypeOutput, class OutputStage>

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

599

bool Fallback<TypeInput, TypeOutput, OutputStage>::is_configured() const

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

600

{

601

return _optimised_kernel != nullptr;

602

}

603

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

604

template <typename TypeInput, typename TypeOutput, class OutputStage>

Michele Di Giorgio

2021-06-16 11:14:41 +0100

[diff] [blame]

605

experimental::MemoryRequirements Fallback<TypeInput, TypeOutput, OutputStage>::workspace() const

{

return _aux_mem;

}

template <typename TypeInput, typename TypeOutput, class OutputStage>

Sang-Hoon Park

2021-05-17 17:04:50 +0100

[diff] [blame]

611

void Fallback<TypeInput, TypeOutput, OutputStage>::run(ITensorPack &tensors)

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

612

{

Sang-Hoon Park

2021-05-17 17:04:50 +0100

[diff] [blame]

613

auto a = tensors.get_const_tensor(TensorType::ACL_SRC_0);

614

auto b = tensors.get_const_tensor(TensorType::ACL_SRC_1);

615

auto c = tensors.get_const_tensor(TensorType::ACL_SRC_2);

616

auto d = tensors.get_tensor(TensorType::ACL_DST);

617

Jonathan Deakin

2023-01-12 11:41:14 +0000

[diff] [blame]

618

int lda = a->info()->strides_in_bytes().y() / a->info()->element_size();

Georgios Pinitas

2018-07-31 17:22:11 +0100

[diff] [blame]

619

int ldb = 0;

Jonathan Deakin

2023-01-12 11:41:14 +0000

[diff] [blame]

620

const int ldd = d->info()->strides_in_bytes().y() / d->info()->element_size();

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

621

Georgios Pinitas

2020-11-02 01:37:17 +0000

[diff] [blame]

622

const size_t a_batch_idx = _gemm_info.reinterpret_input_as_3d != 0 ? 3 : 2;

Georgios Pinitas

37d080f

2019-06-21 18:43:12 +0100

[diff] [blame]

623

const size_t a_multi_idx = a_batch_idx + 1;

Georgios Pinitas

2020-11-02 01:37:17 +0000

[diff] [blame]

624

const size_t d_batch_idx = _gemm_info.depth_output_gemm3d != 0 ? 3 : 2;

Georgios Pinitas

37d080f

2019-06-21 18:43:12 +0100

[diff] [blame]

625

const size_t d_multi_idx = d_batch_idx + 1;

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

626

Jonathan Deakin

2023-01-12 11:41:14 +0000

[diff] [blame]

627

int batch_stride_a = a->info()->strides_in_bytes()[a_batch_idx] / a->info()->element_size();

628

const int batch_stride_d = d->info()->strides_in_bytes()[d_batch_idx] / d->info()->element_size();

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

629

Jonathan Deakin

2023-01-12 11:41:14 +0000

[diff] [blame]

630

int multi_stride_a = a->info()->strides_in_bytes()[a_multi_idx] / a->info()->element_size();

Georgios Pinitas

2018-07-31 17:22:11 +0100

[diff] [blame]

631

int multi_stride_b = 0;

Jonathan Deakin

2023-01-12 11:41:14 +0000

[diff] [blame]

632

const int multi_stride_d = d->info()->strides_in_bytes()[d_multi_idx] / d->info()->element_size();

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

633

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

634

auto in0_ptr = reinterpret_cast<const TypeInput *>(a->buffer() + a->info()->offset_first_element_in_bytes());

Georgios Pinitas

2018-07-31 17:22:11 +0100

[diff] [blame]

635

const TypeInput *in1_ptr = nullptr;

Sang-Hoon Park

2021-05-17 17:04:50 +0100

[diff] [blame]

636

auto out_ptr = reinterpret_cast<TypeOutput *>(d->buffer() + d->info()->offset_first_element_in_bytes());

Georgios Pinitas

2018-07-31 17:22:11 +0100

[diff] [blame]

637

SiCong Li

2023-10-17 17:38:57 +0100

[diff] [blame]

638

const ITensor *b_to_use = b;

639

640

// Pre-pretranspose B if required

641

const bool run_pre_pretranspose_b = _gemm_info.transpose_b && !isVarWeightsKernel();

642

CpuAuxTensorHandler pre_pretransposed_b(

643

offset_int_vec(PrePretransposedB), _pre_pretransposed_b_info, tensors,

644

false /*pack_inject: no need to inject into tensors*/,

645

!run_pre_pretranspose_b /*bypass_alloc: no need to allocate if pre-pretranspose B is not required as this handle will not be used*/);

646

if (b_to_use && !_is_b_constant && run_pre_pretranspose_b)

647

{

648

ARM_COMPUTE_ERROR_ON(_pre_pretranspose_b == nullptr);

649

ITensorPack pre_pretranspose_pack{{ACL_SRC, b_to_use}, {ACL_DST, pre_pretransposed_b.get()}};

650

_pre_pretranspose_b->run(pre_pretranspose_pack);

651

b_to_use = pre_pretransposed_b.get();

652

}

653

Georgios Pinitas

2018-07-31 17:22:11 +0100

[diff] [blame]

654

// Check if B is pre-tranposed and de-reference if not

Anitha Raj

69766d6

2023-11-21 11:19:50 +0000

[diff] [blame]

655

if (!_gemm_kernel_asm->B_is_pretransposed())

Georgios Pinitas

2018-07-31 17:22:11 +0100

[diff] [blame]

656

{

SiCong Li

2023-10-17 17:38:57 +0100

[diff] [blame]

657

ldb = b_to_use->info()->strides_in_bytes().y() / b_to_use->info()->element_size();

658

multi_stride_b = b_to_use->info()->strides_in_bytes().z() / b_to_use->info()->element_size();

659

in1_ptr =

660

reinterpret_cast<const TypeInput *>(b_to_use->buffer() + b_to_use->info()->offset_first_element_in_bytes());

Georgios Pinitas

2018-07-31 17:22:11 +0100

[diff] [blame]

661

}

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

662

Giorgio Arena

2021-09-24 14:04:27 +0100

[diff] [blame]

663

// If necessary, run pretranspose every time if either weights or biases are non-constant

SiCong Li

2023-10-17 17:38:57 +0100

[diff] [blame]

664

if ((b_to_use && !_is_b_constant) || (c && !_is_c_constant && c->info()->data_type() == DataType::S32))

Giorgio Arena

2021-09-24 14:04:27 +0100

[diff] [blame]

665

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

666

if (c && c->info()->data_type() == DataType::S32)

Giorgio Arena

2021-09-24 14:04:27 +0100

[diff] [blame]

667

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

668

_gemm_kernel_asm->set_quantized_bias(

669

reinterpret_cast<const int32_t *>(c->buffer() + c->info()->offset_first_element_in_bytes()), 0);

Giorgio Arena

2021-09-24 14:04:27 +0100

[diff] [blame]

670

}

671

672

// Pretranspose B if required

Anitha Raj

69766d6

2023-11-21 11:19:50 +0000

[diff] [blame]

673

if (_B_pretranspose_required)

Giorgio Arena

2021-09-24 14:04:27 +0100

[diff] [blame]

674

{

SiCong Li

2023-10-17 17:38:57 +0100

[diff] [blame]

675

// Fixed format kernels need no pretranspose.

676

ARM_COMPUTE_ERROR_ON(arm_compute::is_fixed_format(

677

assembly_utils::map_to_arm_compute_weight_format(_gemm_kernel_asm->get_config().weight_format)));

678

const int ldb = b_to_use->info()->strides_in_bytes().y() / b_to_use->info()->element_size();

679

const auto b_ptr = reinterpret_cast<const TypeInput *>(b_to_use->buffer() +

680

b_to_use->info()->offset_first_element_in_bytes());

681

const int multi_stride_b = b_to_use->info()->strides_in_bytes().z() / b_to_use->info()->element_size();

Giorgio Arena

2021-09-24 14:04:27 +0100

[diff] [blame]

682

683

CpuAuxTensorHandler pretranspose(offset_int_vec(Pretranspose), _pretranspose_info, tensors, true);

684

ARM_COMPUTE_ERROR_ON(pretranspose.get()->buffer() == nullptr);

685

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

686

if (_is_b_constant)

Giorgio Arena

2021-09-24 14:04:27 +0100

[diff] [blame]

687

{

688

_gemm_kernel_asm->requantize_bias(pretranspose.get()->buffer(), b_ptr, ldb, multi_stride_b);

689

}

690

else

691

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

692

run_parallel_pretranspose_B_array<TypeInput, TypeOutput>(_gemm_kernel_asm.get(), pretranspose.get(),

693

b_ptr, ldb, multi_stride_b,

694

NEScheduler::get().num_threads());

Giorgio Arena

2021-09-24 14:04:27 +0100

[diff] [blame]

}

}

}

Sang-Hoon Park

2021-05-17 17:04:50 +0100

[diff] [blame]

699

const auto scheduling_hint = scheduling_hint_heuristic(_kernel_info.method, d->info()->data_type());

Joseph Dobson

6f8b17d

2020-02-11 19:32:11 +0000

[diff] [blame]

700

David Mansell

2020-08-25 15:02:02 +0100

[diff] [blame]

701

// Set workspace if needed and reset number of threads as buffer manager gets re-created with max_threads

Michele Di Giorgio

2021-06-16 11:14:41 +0100

[diff] [blame]

702

CpuAuxTensorHandler workspace(offset_int_vec(AsmGemmWorkspace), _workspace_info, tensors, false);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

703

if (workspace.get()->buffer() != nullptr)

David Mansell

2020-08-25 15:02:02 +0100

[diff] [blame]

704

{

Michele Di Giorgio

2021-06-16 11:14:41 +0100

[diff] [blame]

705

_gemm_kernel_asm->set_working_space(reinterpret_cast<void *>(workspace.get()->buffer()));

David Mansell

2020-08-25 15:02:02 +0100

[diff] [blame]

706

const unsigned int split_dim = scheduling_hint.split_dimension();

707

const unsigned int window_size = _gemm_kernel_asm->get_window_size().total_size();

708

unsigned int num_threads = NEScheduler::get().num_threads();

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

709

if (window_size < num_threads)

David Mansell

2020-08-25 15:02:02 +0100

[diff] [blame]

710

{

711

num_threads = window_size;

712

}

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

713

if (split_dim != IScheduler::split_dimensions_all)

David Mansell

2020-08-25 15:02:02 +0100

[diff] [blame]

714

{

715

// Make sure the kernel does not expect more threads than we can actually spawn

716

const unsigned int num_iterations = _optimised_kernel.get()->window().num_iterations(split_dim);

717

num_threads = std::min(num_iterations, num_threads);

718

}

719

_gemm_kernel_asm->set_nthreads(num_threads);

720

}

721

722

// Prepare assembly kernel

Sang-Hoon Park

2021-05-17 17:04:50 +0100

[diff] [blame]

723

prepare(tensors);

David Mansell

2020-08-25 15:02:02 +0100

[diff] [blame]

724

David Mansell

2020-08-25 15:02:02 +0100

[diff] [blame]

725

// Setup up matrix bias in the assembly kernel, it's just a pointer to matrix C.

Georgios Pinitas

2020-11-02 01:37:17 +0000

[diff] [blame]

726

TypeOutput *bias = nullptr;

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

727

if (c && c->info()->data_type() != DataType::S32)

David Mansell

2020-08-25 15:02:02 +0100

[diff] [blame]

728

{

Sang-Hoon Park

2021-05-17 17:04:50 +0100

[diff] [blame]

729

bias = reinterpret_cast<TypeOutput *>(c->buffer() + c->info()->offset_first_element_in_bytes());

David Mansell

2020-08-25 15:02:02 +0100

[diff] [blame]

730

}

Georgios Pinitas

2020-11-02 01:37:17 +0000

[diff] [blame]

731

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

732

if (_gemm_info.method == AsmConvMethod::Indirect)

Georgios Pinitas

2020-11-02 01:37:17 +0000

[diff] [blame]

{

in0_ptr = nullptr;

lda = 0;

batch_stride_a = 0;

multi_stride_a = 0;

}

David Mansell

2020-08-25 15:02:02 +0100

[diff] [blame]

740

// Set gemm parameters

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

741

_gemm_kernel_asm->set_arrays(in0_ptr, lda, batch_stride_a, multi_stride_a, in1_ptr, ldb, multi_stride_b, out_ptr,

742

ldd, batch_stride_d, multi_stride_d, bias, 0);

Georgios Pinitas

2020-11-02 01:37:17 +0000

[diff] [blame]

743

// Schedule

Georgios Pinitas

77d4252

2019-11-05 13:35:47 +0000

[diff] [blame]

744

NEScheduler::get().schedule(_optimised_kernel.get(), scheduling_hint);

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

745

}

746

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

747

template <typename TypeInput, typename TypeOutput>

Michele Di Giorgio

2021-06-16 11:14:41 +0100

[diff] [blame]

748

void create_arm_gemm(std::unique_ptr<CpuGemmAssemblyDispatch::IFallback> &arm_gemm,

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

749

const ITensorInfo *a,

750

const ITensorInfo *b,

751

const ITensorInfo *c,

752

ITensorInfo *d,

753

arm_gemm::Activation activation,

754

const AsmGemmInfo &info)

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

755

{

Georgios Pinitas

2020-11-02 01:37:17 +0000

[diff] [blame]

756

Params p = extract_parameters(a, b, d, info);

757

const CPUInfo &ci = NEScheduler::get().cpu_info();

758

unsigned int num_threads = NEScheduler::get().num_threads();

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

759

Francesco Petrogalli

2022-06-30 10:22:01 +0000

[diff] [blame]

760

arm_gemm::GemmConfig cfg;

Ramy Elgammal

2022-07-20 14:57:37 +0100

[diff] [blame]

761

cfg.weight_format = assembly_utils::map_to_arm_gemm_weight_format(info.weight_format);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

762

arm_gemm::GemmArgs args(&ci, p.M, p.N, p.K, p.sections, p.batches, p.multis, p.indirect, activation, num_threads,

763

info.fixed_format, info.fast_mode, &cfg);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

764

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

765

// Create arm_gemm fallback

Georgios Pinitas

40f51a6

2020-11-21 03:04:18 +0000

[diff] [blame]

766

auto fallback = std::make_unique<Fallback<TypeInput, TypeOutput>>();

Michele Di Giorgio

2021-06-16 11:14:41 +0100

[diff] [blame]

767

fallback->configure(a, b, c, d, args, info);

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

768

arm_gemm = std::move(fallback);

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

769

}

770

771

template <typename TypeInput, typename TypeOutput>

Michele Di Giorgio

2021-06-16 11:14:41 +0100

[diff] [blame]

772

void create_arm_gemm_quant(std::unique_ptr<CpuGemmAssemblyDispatch::IFallback> &arm_gemm,

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

773

const ITensorInfo *a,

774

const ITensorInfo *b,

775

const ITensorInfo *c,

776

ITensorInfo *d,

777

arm_gemm::Activation activation,

778

const AsmGemmInfo &info)

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

779

{

Michele Di Giorgio

6ad60af

2020-06-09 14:52:15 +0100

[diff] [blame]

780

ARM_COMPUTE_UNUSED(activation);

Georgios Pinitas

4ee8b15

2021-07-16 16:16:43 +0100

[diff] [blame]

781

Params p = extract_parameters(a, b, d, info);

782

const CPUInfo &ci = NEScheduler::get().cpu_info();

783

const unsigned int num_threads = NEScheduler::get().num_threads();

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

784

Francesco Petrogalli

2022-06-30 10:22:01 +0000

[diff] [blame]

785

arm_gemm::GemmConfig cfg;

Ramy Elgammal

2022-07-20 14:57:37 +0100

[diff] [blame]

786

cfg.weight_format = assembly_utils::map_to_arm_gemm_weight_format(info.weight_format);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

787

arm_gemm::GemmArgs args(&ci, p.M, p.N, p.K, p.sections, p.batches, p.multis, p.indirect, activation, num_threads,

788

info.fixed_format, info.fast_mode, &cfg);

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

789

Michalis Spyrou

2019-11-14 14:31:44 +0000

[diff] [blame]

790

// Create arm_gemm fallback

Georgios Pinitas

40f51a6

2020-11-21 03:04:18 +0000

[diff] [blame]

791

auto fallback = std::make_unique<Fallback<TypeInput, TypeOutput, arm_gemm::Requantize32>>();

Michalis Spyrou

2019-11-14 14:31:44 +0000

[diff] [blame]

792

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

793

// Configure requantization info

Georgios Pinitas

2020-11-02 01:37:17 +0000

[diff] [blame]

794

const int32_t negation = info.negated_offsets ? 1 : -1;

Sang-Hoon Park

2021-05-17 17:04:50 +0100

[diff] [blame]

795

const int32_t a_offset = -a->quantization_info().uniform().offset * negation;

796

const int32_t b_offset = -b->quantization_info().uniform().offset * negation;

Georgios Pinitas

2020-11-02 01:37:17 +0000

[diff] [blame]

797

const GEMMLowpOutputStageInfo os_info = info.output_stage;

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

798

Michalis Spyrou

2019-11-14 14:31:44 +0000

[diff] [blame]

799

arm_gemm::Requantize32 gemm_requant_info{};

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

800

if (os_info.gemmlowp_shifts.size() > 1)

Michalis Spyrou

2019-11-14 14:31:44 +0000

[diff] [blame]

801

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

802

const auto requantize_data =

803

fallback->set_requantize_data(os_info.gemmlowp_shifts, os_info.gemmlowp_multipliers);

804

gemm_requant_info = arm_gemm::Requantize32(

805

nullptr, 0, a_offset, b_offset, os_info.gemmlowp_offset,

806

(std::get<0>(requantize_data)) ? std::get<1>(requantize_data) : nullptr, std::get<2>(requantize_data),

807

std::get<3>(requantize_data), os_info.gemmlowp_min_bound, os_info.gemmlowp_max_bound);

Michalis Spyrou

2019-11-14 14:31:44 +0000

[diff] [blame]

808

}

809

else

810

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

811

gemm_requant_info =

812

arm_gemm::Requantize32(nullptr, 0, a_offset, b_offset, os_info.gemmlowp_offset, -os_info.gemmlowp_shift,

813

os_info.gemmlowp_multiplier, os_info.gemmlowp_min_bound, os_info.gemmlowp_max_bound);

Michalis Spyrou

2019-11-14 14:31:44 +0000

[diff] [blame]

814

}

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

815

Michalis Spyrou

2019-11-14 14:31:44 +0000

[diff] [blame]

816

// Configure fallback

Michele Di Giorgio

2021-06-16 11:14:41 +0100

[diff] [blame]

817

fallback->configure(a, b, c, d, args, info, gemm_requant_info);

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

818

arm_gemm = std::move(fallback);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

819

}

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

820

} //namespace

821

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

822

CpuGemmAssemblyDispatch::CpuGemmAssemblyDispatch() : _arm_gemm(nullptr)

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

823

{

824

}

825

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

826

Status CpuGemmAssemblyDispatch::has_opt_impl(arm_compute::WeightFormat &expected_weight_format,

827

const ITensorInfo *a,

828

const ITensorInfo *b,

829

const ITensorInfo *c,

830

const ITensorInfo *d,

831

const AsmGemmInfo &info)

Francesco.Petrogalli@arm.com

2022-03-31 17:55:35 +0000

[diff] [blame]

832

{

833

ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, d);

834

ARM_COMPUTE_UNUSED(c);

835

arm_gemm::Activation act = assembly_utils::map_to_arm_gemm_activation(info.activation_info);

836

Params p = extract_parameters(a, b, d, info);

837

const CPUInfo &ci = NEScheduler::get().cpu_info();

838

unsigned int num_threads = NEScheduler::get().num_threads();

Francesco Petrogalli

2022-06-30 10:22:01 +0000

[diff] [blame]

839

arm_gemm::GemmConfig cfg;

Ramy Elgammal

2022-07-20 14:57:37 +0100

[diff] [blame]

840

cfg.weight_format = assembly_utils::map_to_arm_gemm_weight_format(info.weight_format);

841

arm_gemm::WeightFormat arm_gemm_expected_wf = assembly_utils::map_to_arm_gemm_weight_format(expected_weight_format);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

842

arm_gemm::GemmArgs args(&ci, p.M, p.N, p.K, p.sections, p.batches, p.multis, p.indirect, act, num_threads,

843

info.fixed_format, info.fast_mode, &cfg);

SiCong Li

2023-10-17 17:38:57 +0100

[diff] [blame]

844

// TODO: Incorporate info.transpose_b COMPMID-6595

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

845

switch (a->data_type())

Francesco.Petrogalli@arm.com

2022-03-31 17:55:35 +0000

[diff] [blame]

846

{

847

case DataType::F32:

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

848

ARM_COMPUTE_RETURN_ERROR_ON_MSG(

849

!(arm_gemm::has_opt_gemm<float, float, arm_gemm::Nothing>(arm_gemm_expected_wf, args, {})),

850

"We could not find an optimized kernel for F32 input");

Francesco.Petrogalli@arm.com

2022-03-31 17:55:35 +0000

[diff] [blame]

break;

#ifdef __aarch64__

case DataType::U8:

case DataType::QASYMM8:

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

855

if (d->data_type() == DataType::S32)

Francesco.Petrogalli@arm.com

2022-03-31 17:55:35 +0000

[diff] [blame]

856

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

857

ARM_COMPUTE_RETURN_ERROR_ON_MSG(

858

!(arm_gemm::has_opt_gemm<uint8_t, uint32_t, arm_gemm::Nothing>(arm_gemm_expected_wf, args, {})),

859

"We could not find an optimized kernel for U8/QASYMM8 input and U32 output");

Francesco.Petrogalli@arm.com

2022-03-31 17:55:35 +0000

[diff] [blame]

860

}

861

else

862

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

863

ARM_COMPUTE_RETURN_ERROR_ON_MSG(

864

!(arm_gemm::has_opt_gemm<uint8_t, uint8_t, arm_gemm::Requantize32>(arm_gemm_expected_wf, args, {})),

865

"We could not find an optimized kernel for U8 input and U8 output");

Francesco.Petrogalli@arm.com

2022-03-31 17:55:35 +0000

[diff] [blame]

}

break;

case DataType::S8:

case DataType::QASYMM8_SIGNED:

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

870

if (d->data_type() == DataType::S32)

Francesco.Petrogalli@arm.com

2022-03-31 17:55:35 +0000

[diff] [blame]

871

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

872

ARM_COMPUTE_RETURN_ERROR_ON_MSG(

873

!(arm_gemm::has_opt_gemm<int8_t, int32_t, arm_gemm::Nothing>(arm_gemm_expected_wf, args, {})),

874

"We could not find an optimized kernel for S8/QASYMM8_SIGNED input and S32 output");

Francesco.Petrogalli@arm.com

2022-03-31 17:55:35 +0000

[diff] [blame]

875

}

876

else

877

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

878

ARM_COMPUTE_RETURN_ERROR_ON_MSG(

879

!(arm_gemm::has_opt_gemm<int8_t, int8_t, arm_gemm::Requantize32>(arm_gemm_expected_wf, args, {})),

880

"We could not find an optimized kernel for S8 input and S8 output");

Francesco.Petrogalli@arm.com

2022-03-31 17:55:35 +0000

[diff] [blame]

881

}

882

break;

883

#endif /* __aarch64__ */

Pablo Marquez Tello

2022-07-19 12:19:46 +0100

[diff] [blame]

884

#if defined(ARM_COMPUTE_ENABLE_BF16)

Francesco.Petrogalli@arm.com

2022-03-31 17:55:35 +0000

[diff] [blame]

885

case DataType::BFLOAT16:

886

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

887

ARM_COMPUTE_RETURN_ERROR_ON_MSG(

888

!(arm_gemm::has_opt_gemm<bfloat16, float, arm_gemm::Nothing>(arm_gemm_expected_wf, args, {})),

889

"We could not find an optimized kernel for BFLOAT16 input and F32 output");

Francesco.Petrogalli@arm.com

2022-03-31 17:55:35 +0000

[diff] [blame]

890

break;

891

}

Pablo Marquez Tello

2022-07-19 12:19:46 +0100

[diff] [blame]

892

#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */

Francesco.Petrogalli@arm.com

2022-03-31 17:55:35 +0000

[diff] [blame]

893

#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC

894

case DataType::F16:

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

895

ARM_COMPUTE_RETURN_ERROR_ON_MSG(

896

!(arm_gemm::has_opt_gemm<float16_t, float16_t, arm_gemm::Nothing>(arm_gemm_expected_wf, args, {})),

897

"We could not find an optimized kernel for F16 input and F16 output");

Francesco.Petrogalli@arm.com

2022-03-31 17:55:35 +0000

[diff] [blame]

898

break;

899

#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */

900

default:

901

ARM_COMPUTE_RETURN_ERROR_ON_MSG(true, "Usupported type. Could not find a kernel");

902

break;

903

}

Ramy Elgammal

2022-07-20 14:57:37 +0100

[diff] [blame]

904

expected_weight_format = assembly_utils::map_to_arm_compute_weight_format(arm_gemm_expected_wf);

Francesco.Petrogalli@arm.com

2022-03-31 17:55:35 +0000

[diff] [blame]

return Status{};

}

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

909

Status CpuGemmAssemblyDispatch::validate(

910

const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, const AsmGemmInfo &info)

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

911

{

Georgios Pinitas

2020-11-02 01:37:17 +0000

[diff] [blame]

912

ARM_COMPUTE_UNUSED(c, info);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

913

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(a, b, d);

914

ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(a);

Georgios Pinitas

c7b183a

2020-03-06 18:12:09 +0000

[diff] [blame]

915

ARM_COMPUTE_RETURN_ERROR_ON_CPU_BF16_UNSUPPORTED(a);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

916

ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(info.reshape_b_only_on_first_run),

917

"Assembly kernel will not be executed when reshape_b_only_on_first_run is false");

Georgios Pinitas

0f954eb

2020-06-23 17:28:38 +0100

[diff] [blame]

918

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

919

#ifndef __aarch64__

Michele Di Giorgio

5255672

2019-12-23 16:35:12 +0000

[diff] [blame]

920

ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->element_size() == 1, "8bit integer types only supported for aarch64");

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

921

#endif /* __aarch64__ */

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

922

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::U8, DataType::QASYMM8,

923

DataType::QASYMM8_SIGNED, DataType::S8, DataType::BFLOAT16,

924

DataType::F16, DataType::F32);

925

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(

926

b, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::S8,

927

DataType::BFLOAT16, DataType::F16, DataType::F32);

928

if (is_data_type_quantized_per_channel(b->data_type()))

Georgios Pinitas

dbdea0d

2019-10-16 19:21:40 +0100

[diff] [blame]

929

{

930

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::QASYMM8_SIGNED, DataType::S8);

931

}

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

932

else if (is_fixed_format_fast_math(info.weight_format))

Jonathan Deakin

2023-01-12 11:41:14 +0000

[diff] [blame]

933

{

934

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(a, DataType::F32);

935

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(b, DataType::BFLOAT16);

936

}

Georgios Pinitas

dbdea0d

2019-10-16 19:21:40 +0100

[diff] [blame]

937

else

938

{

939

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, b);

940

}

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

941

ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::F32 && d->data_type() != DataType::F32,

942

"Only F32 output supported for F32 input");

943

ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::F16 && d->data_type() != DataType::F16,

944

"Only F16 output supported for F16 input");

945

ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::BFLOAT16 && d->data_type() != DataType::F32,

946

"Only F32 output supported for BFLOAT16 input");

947

ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::U8 && d->data_type() != DataType::U32,

948

"Only U32 output supported for U8 input");

949

ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::S8 && d->data_type() != DataType::S32,

950

"Only S32 output supported for S8 input");

951

ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::QASYMM8 &&

952

(d->data_type() != DataType::QASYMM8 && d->data_type() != DataType::S32),

Ethan Doe

1fe48ca

2023-03-01 23:19:26 +0000

[diff] [blame]

953

"Only QASYMM8/S32 output supported for QASYMM8 input");

Viet-Hoa Do

246fe08

2023-08-16 10:29:00 +0100

[diff] [blame]

954

arm_compute::WeightFormat expected_weight_format = arm_compute::WeightFormat::UNSPECIFIED;

Ramy Elgammal

2022-07-20 14:57:37 +0100

[diff] [blame]

955

const Status ret = CpuGemmAssemblyDispatch::has_opt_impl(expected_weight_format, a, b, c, d, info);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

956

if ((bool)ret && expected_weight_format != arm_compute::WeightFormat::ANY)

Francesco Petrogalli

2022-06-30 10:22:01 +0000

[diff] [blame]

957

{

958

// Correctness check: if the format expected by the kernel is

959

// not "any", make sure that the one found matches the format

960

// intended by the caller.

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

961

ARM_COMPUTE_RETURN_ERROR_ON_MSG(

962

(expected_weight_format != info.weight_format),

963

"The format expected by the kernel does not correspond with the one requested by the user.");

Francesco Petrogalli

2022-06-30 10:22:01 +0000

[diff] [blame]

964

}

965

return ret;

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

966

}

967

Sang-Hoon Park

4f7693d

2021-05-12 13:59:10 +0100

[diff] [blame]

968

bool CpuGemmAssemblyDispatch::is_activation_supported(const ActivationLayerInfo &activation)

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

969

{

Michele Di Giorgio

d02d5ed

2021-01-22 09:47:04 +0000

[diff] [blame]

970

arm_gemm::Activation act = assembly_utils::map_to_arm_gemm_activation(activation);

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

971

return act.type != arm_gemm::Activation::Type::None;

972

}

973

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

974

void CpuGemmAssemblyDispatch::configure(

975

const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, ITensorInfo *d, const AsmGemmInfo &info)

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

976

{

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

977

ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, d);

Michele Di Giorgio

d02d5ed

2021-01-22 09:47:04 +0000

[diff] [blame]

978

arm_gemm::Activation act = assembly_utils::map_to_arm_gemm_activation(info.activation_info);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

979

980

//If we don't support a combination of data types, silently return: it is the caller's responsibility to check if configure() was successful via is_configured()

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

981

if (!CpuGemmAssemblyDispatch::validate(a, b, c, d, info))

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

{

return;

}

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

986

switch (a->data_type())

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

987

{

988

case DataType::F32:

Michele Di Giorgio

2021-06-16 11:14:41 +0100

[diff] [blame]

989

create_arm_gemm<float, float>(_arm_gemm, a, b, c, d, act, info);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

break;

#ifdef __aarch64__

case DataType::U8:

case DataType::QASYMM8:

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

994

if (d->data_type() == DataType::S32)

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

995

{

Michele Di Giorgio

2021-06-16 11:14:41 +0100

[diff] [blame]

996

create_arm_gemm<uint8_t, uint32_t>(_arm_gemm, a, b, c, d, act, info);

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

997

}

998

else

999

{

Michele Di Giorgio

2021-06-16 11:14:41 +0100

[diff] [blame]

1000

create_arm_gemm_quant<uint8_t, uint8_t>(_arm_gemm, a, b, c, d, act, info);

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

1001

}

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

1002

break;

1003

case DataType::S8:

Georgios Pinitas

dbdea0d

2019-10-16 19:21:40 +0100

[diff] [blame]

1004

case DataType::QASYMM8_SIGNED:

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

1005

if (d->data_type() == DataType::S32)

Michalis Spyrou

2019-11-14 14:31:44 +0000

[diff] [blame]

1006

{

Michele Di Giorgio

2021-06-16 11:14:41 +0100

[diff] [blame]

1007

create_arm_gemm<int8_t, int32_t>(_arm_gemm, a, b, c, d, act, info);

Michalis Spyrou

2019-11-14 14:31:44 +0000

[diff] [blame]

1008

}

1009

else

1010

{

Michele Di Giorgio

2021-06-16 11:14:41 +0100

[diff] [blame]

1011

create_arm_gemm_quant<int8_t, int8_t>(_arm_gemm, a, b, c, d, act, info);

Michalis Spyrou

2019-11-14 14:31:44 +0000

[diff] [blame]

1012

}

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

1013

break;

1014

#endif /* __aarch64__ */

Pablo Marquez Tello

2022-07-19 12:19:46 +0100

[diff] [blame]

1015

#if defined(ARM_COMPUTE_ENABLE_BF16)

Georgios Pinitas

c7b183a

2020-03-06 18:12:09 +0000

[diff] [blame]

1016

case DataType::BFLOAT16:

Michele Di Giorgio

2021-06-16 11:14:41 +0100

[diff] [blame]

1017

create_arm_gemm<bfloat16, float>(_arm_gemm, a, b, c, d, act, info);

Georgios Pinitas

c7b183a

2020-03-06 18:12:09 +0000

[diff] [blame]

1018

break;

Pablo Marquez Tello

2022-07-19 12:19:46 +0100

[diff] [blame]

1019

#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

1020

#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC

1021

case DataType::F16:

Michele Di Giorgio

2021-06-16 11:14:41 +0100

[diff] [blame]

1022

create_arm_gemm<float16_t, float16_t>(_arm_gemm, a, b, c, d, act, info);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

1023

break;

1024

#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */

default:

break;

}

}

Sang-Hoon Park

2021-05-17 17:04:50 +0100

[diff] [blame]

1030

void CpuGemmAssemblyDispatch::prepare(ITensorPack &tensors)

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

1031

{

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

1032

ARM_COMPUTE_ERROR_ON(_arm_gemm == nullptr);

Sang-Hoon Park

2021-05-17 17:04:50 +0100

[diff] [blame]

1033

_arm_gemm->prepare(tensors);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

1034

}

1035

Sang-Hoon Park

4f7693d

2021-05-12 13:59:10 +0100

[diff] [blame]

1036

bool CpuGemmAssemblyDispatch::is_configured() const

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

1037

{

Francesco Petrogalli

2022-06-30 10:22:01 +0000

[diff] [blame]

1038

return _arm_gemm && _arm_gemm->is_configured();

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

1039

}

1040

Sang-Hoon Park

2021-05-17 17:04:50 +0100

[diff] [blame]

1041

void CpuGemmAssemblyDispatch::run(ITensorPack &tensors)

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

1042

{

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

1043

ARM_COMPUTE_ERROR_ON(_arm_gemm == nullptr);

Sang-Hoon Park

2021-05-17 17:04:50 +0100

[diff] [blame]

1044

_arm_gemm->run(tensors);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

1045

}

Michele Di Giorgio