Blame - src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp - ml/ComputeLibrary

2018-07-20 17:49:35 +0100

[diff] [blame]

136

/** Fallback in case ACL doesn't have a function */

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

137

template <typename TypeInput, typename TypeOutput, class OutputStage = arm_gemm::Nothing>

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

138

class Fallback : public NEGEMMAssemblyDispatch::IFallback

Anthony Barbier

2018-07-17 16:48:42 +0100

[diff] [blame]

139

{

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

140

public:

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

/** Destructor */

~Fallback()

{

// Release memory if we have allocated the memory ourselves

145

if(_pretranspose && !(_weights_manager && _weights_manager->are_weights_managed(_b)))

146

{

147

delete _pretranspose;

}

}

Georgios Pinitas

2019-01-30 17:17:16 +0000

[diff] [blame]

151

/** Initialise the functions's input and output.

152

*

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

153

* @param[in] a Input tensor containing the Matrix A.

154

* @param[in] b Input tensor containing the Matrix B.

155

* @param[in] c Input tensor containing the Matrix C.

156

* @param[out] d Output tensor to store the result of matrix multiplication.

157

* @param[in] args Matrix multiplication information.

158

* @param[in] gemm_info GEMM meta-data

159

* @param[in] memory_group Memory group to be used by the function.

160

* @param[in] weights_manager Weights manager to be used by the function.

161

* @param[in] os Output stage meta-data.

Georgios Pinitas

2019-01-30 17:17:16 +0000

[diff] [blame]

162

*/

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

163

void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d,

164

arm_gemm::GemmArgs<TypeOutput> args, const GEMMInfo &gemm_info,

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

165

MemoryGroup &memory_group, IWeightsManager *weights_manager, const OutputStage &os = {});

Georgios Pinitas

2019-01-30 17:17:16 +0000

[diff] [blame]

166

167

// Inherited methods overridden:

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

168

void run() override;

169

void prepare() override;

170

bool is_configured() const override;

Anthony Barbier

2018-07-17 16:48:42 +0100

[diff] [blame]

171

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

172

private:

173

/** Allocate a workspace tensor.

174

*

175

* @param[in] workspace_size Size to allocate.

176

* @param[in] memory_group Tensor memory group.

177

* @param[in] alignment Workspace memory alignment.

178

*/

Anthony Barbier

20394d5

2018-08-02 11:29:09 +0100

[diff] [blame]

179

void allocate_workspace(size_t workspace_size, MemoryGroup &memory_group, size_t alignment);

Anthony Barbier

2018-07-17 16:48:42 +0100

[diff] [blame]

180

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

181

/** Assembly Gemm kernel */

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

182

std::shared_ptr<arm_gemm::GemmCommon<TypeInput, TypeOutput>> _gemm_kernel_asm{ nullptr };

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

183

/** Optimised NEON kernel */

184

std::unique_ptr<INEKernel> _optimised_kernel{ nullptr };

185

/** Input A */

186

const ITensor *_a

Anthony Barbier

2018-07-17 16:48:42 +0100

[diff] [blame]

187

{

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

nullptr

};

/** Input B */

const ITensor *_b

{

nullptr

};

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

const ITensor *_c

{

nullptr

};

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

199

/** Output */

200

ITensor *_d{ nullptr };

201

/** GEMM workspace */

202

Tensor _workspace{};

203

/** Pre-transpose tensor */

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

204

ITensor *_pretranspose{ nullptr };

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

205

/** Prepared flag */

206

bool _is_prepared{ false };

Georgios Pinitas

2019-06-21 18:43:12 +0100

[diff] [blame]

207

/** GEMM meta-data */

208

GEMMInfo _gemm_info{};

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

209

/** Weights manager */

210

IWeightsManager *_weights_manager{ nullptr };

211

/** Weights transform object */

212

FallbackTransform<TypeInput, TypeOutput> _weights_transform{};

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

213

};

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

214

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

215

template <typename TypeInput, typename TypeOutput, class OutputStage>

216

void Fallback<TypeInput, TypeOutput, OutputStage>::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d,

217

arm_gemm::GemmArgs<TypeOutput> args, const GEMMInfo &gemm_info,

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

218

MemoryGroup &memory_group, IWeightsManager *weights_manager, const OutputStage &os)

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

219

{

Georgios Pinitas

2019-01-30 17:17:16 +0000

[diff] [blame]

220

arm_gemm::GemmConfig gemm_cfg;

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

221

const arm_gemm::KernelDescription gemm_kernel_info = arm_gemm::get_gemm_method<TypeInput, TypeOutput, OutputStage>(args, os);

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

222

_weights_manager = weights_manager;

Georgios Pinitas

2019-01-30 17:17:16 +0000

[diff] [blame]

223

if(gemm_kernel_info.method != arm_gemm::GemmMethod::GEMV_BATCHED)

224

{

225

gemm_cfg.filter = gemm_kernel_info.name;

226

args._cfg = &gemm_cfg;

227

}

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

228

_gemm_kernel_asm = arm_gemm::gemm<TypeInput, TypeOutput, OutputStage>(args, os);

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

229

if(_gemm_kernel_asm == nullptr)

230

{

231

//configuration not supported: Leave function unconfigured:

return;

}

// arm_compute wrapper for the Gemm object (see above)

236

std::unique_ptr<NEGEMMAssemblyWrapperKernel<TypeInput, TypeOutput>> acl_gemm_wrapper = support::cpp14::make_unique<NEGEMMAssemblyWrapperKernel<TypeInput, TypeOutput>>();

237

ARM_COMPUTE_ERROR_ON(acl_gemm_wrapper == nullptr);

Georgios Pinitas

2019-01-30 17:17:16 +0000

[diff] [blame]

238

acl_gemm_wrapper->configure(_gemm_kernel_asm.get(), gemm_cfg.filter);

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

239

const size_t workspace_size = _gemm_kernel_asm->get_working_size();

240

if(workspace_size > 0)

241

{

242

// Allocate workspace

243

const unsigned int alignment = 4096;

Anthony Barbier

20394d5

2018-08-02 11:29:09 +0100

[diff] [blame]

244

allocate_workspace(workspace_size, memory_group, alignment);

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

245

}

246

247

//if we disable this code below in brackets then ConvLayer deadlocks when threads > 1 and

248

//the shapes are In=1x1x1024 Weights=1x1x1024x1001 Biases=1001 Out=1x1x1001

249

{

Anthony Barbier

2018-07-17 16:48:42 +0100

[diff] [blame]

250

const int window_size = _gemm_kernel_asm->get_window_size();

251

if(window_size < args._maxthreads)

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

252

{

Anthony Barbier

2018-07-17 16:48:42 +0100

[diff] [blame]

253

_gemm_kernel_asm->set_nthreads(window_size);

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

}

}

_optimised_kernel = std::move(acl_gemm_wrapper);

258

_a = a;

259

_b = b;

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

260

_c = c;

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

261

_d = d;

Georgios Pinitas

2019-06-21 18:43:12 +0100

[diff] [blame]

262

_gemm_info = gemm_info;

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

263

// Check for pre-transposed support

264

if(_gemm_kernel_asm->B_pretranspose_required())

265

{

266

// Forcing 128-byte alignment (required by 32-bit kernels)

267

const unsigned int alignment = 128;

268

const size_t B_pretranspose_size = _gemm_kernel_asm->get_B_pretransposed_array_size();

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

269

if(weights_manager && _weights_manager->are_weights_managed(b))

270

{

271

_weights_transform.configure(B_pretranspose_size, alignment);

272

_pretranspose = _weights_manager->acquire(b, &_weights_transform);

}

else

{

_pretranspose = new Tensor();

277

static_cast<Tensor *>(_pretranspose)->allocator()->init(TensorInfo(TensorShape{ (B_pretranspose_size + alignment /* FIXME: remove alignment after COMPMID-1088 */) }, 1, DataType::S8), alignment);

278

}

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

}

}

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

282

template <typename TypeInput, typename TypeOutput, class OutputStage>

283

void Fallback<TypeInput, TypeOutput, OutputStage>::prepare()

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

284

{

285

if(!_is_prepared)

286

{

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

287

// Setup up matrix bias in the assembly kernel, it's just a pointer to matrix C.

288

if(_c && _c->info()->data_type() == DataType::S32)

289

{

290

_gemm_kernel_asm->set_quantized_bias(reinterpret_cast<const int32_t *>(_c->buffer() + _c->info()->offset_first_element_in_bytes()));

291

}

292

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

293

// Pretranspose B if required

294

if(_gemm_kernel_asm->B_pretranspose_required())

295

{

296

const int ldb = _b->info()->strides_in_bytes().y() / sizeof(TypeInput);

Georgios Pinitas

eb84d6b

2018-07-27 18:28:10 +0100

[diff] [blame]

297

const auto in1_ptr = reinterpret_cast<const TypeInput *>(_b->buffer() + _b->info()->offset_first_element_in_bytes());

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

298

const int multi_stride_b = _b->info()->strides_in_bytes().z() / sizeof(TypeInput);

299

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

300

if(_weights_manager && _weights_manager->are_weights_managed(_b))

301

{

302

_weights_transform.set_args(ldb, in1_ptr, multi_stride_b, _gemm_kernel_asm);

303

_weights_manager->run(_b, &_weights_transform);

304

305

// If we didn't run the reshape function, set the pretransposed buffer

306

if(!_weights_transform.is_reshape_run())

307

{

308

_weights_transform.set_pretranspose(_pretranspose);

}

}

else

{

static_cast<Tensor *>(_pretranspose)->allocator()->allocate();

314

ARM_COMPUTE_ERROR_ON(_pretranspose->buffer() == nullptr);

315

_gemm_kernel_asm->pretranspose_B_array(_pretranspose->buffer(), in1_ptr, ldb, multi_stride_b);

316

_b->mark_as_unused();

317

}

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

}

_is_prepared = true;

}

}

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

324

template <typename TypeInput, typename TypeOutput, class OutputStage>

325

void Fallback<TypeInput, TypeOutput, OutputStage>::allocate_workspace(size_t workspace_size, MemoryGroup &memory_group, size_t alignment)

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

326

{

327

ARM_COMPUTE_ERROR_ON_MSG(workspace_size == 0, "size cannot be 0");

328

_workspace.allocator()->init(TensorInfo(TensorShape{ (workspace_size + alignment /* FIXME: remove alignment after COMPMID-1088 */) }, 1, DataType::S8), alignment);

Anthony Barbier

20394d5

2018-08-02 11:29:09 +0100

[diff] [blame]

329

memory_group.manage(&_workspace);

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

330

_workspace.allocator()->allocate();

331

}

332

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

333

template <typename TypeInput, typename TypeOutput, class OutputStage>

334

bool Fallback<TypeInput, TypeOutput, OutputStage>::is_configured() const

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

335

{

336

return _optimised_kernel != nullptr;

337

}

338

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

339

template <typename TypeInput, typename TypeOutput, class OutputStage>

340

void Fallback<TypeInput, TypeOutput, OutputStage>::run()

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

341

{

342

const int lda = _a->info()->strides_in_bytes().y() / sizeof(TypeInput);

Georgios Pinitas

40ed6d8

2018-07-31 17:22:11 +0100

[diff] [blame]

343

int ldb = 0;

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

344

const int ldd = _d->info()->strides_in_bytes().y() / sizeof(TypeOutput);

345

Georgios Pinitas

2019-06-21 18:43:12 +0100

[diff] [blame]

346

const size_t a_batch_idx = _gemm_info.reinterpret_input_as_3d() != 0 ? 3 : 2;

347

const size_t a_multi_idx = a_batch_idx + 1;

348

const size_t d_batch_idx = _gemm_info.depth_output_gemm3d() != 0 ? 3 : 2;

349

const size_t d_multi_idx = d_batch_idx + 1;

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

350

Georgios Pinitas

2019-06-21 18:43:12 +0100

[diff] [blame]

351

const int batch_stride_a = _a->info()->strides_in_bytes()[a_batch_idx] / sizeof(TypeInput);

352

const int batch_stride_d = _d->info()->strides_in_bytes()[d_batch_idx] / sizeof(TypeOutput);

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

353

Georgios Pinitas

2019-06-21 18:43:12 +0100

[diff] [blame]

354

const int multi_stride_a = _a->info()->strides_in_bytes()[a_multi_idx] / sizeof(TypeInput);

Georgios Pinitas

40ed6d8

2018-07-31 17:22:11 +0100

[diff] [blame]

355

int multi_stride_b = 0;

Georgios Pinitas

2019-06-21 18:43:12 +0100

[diff] [blame]

356

const int multi_stride_d = _d->info()->strides_in_bytes()[d_multi_idx] / sizeof(TypeOutput);

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

357

Georgios Pinitas

40ed6d8

2018-07-31 17:22:11 +0100

[diff] [blame]

358

const auto in0_ptr = reinterpret_cast<const TypeInput *>(_a->buffer() + _a->info()->offset_first_element_in_bytes());

359

const TypeInput *in1_ptr = nullptr;

360

auto out_ptr = reinterpret_cast<TypeOutput *>(_d->buffer() + _d->info()->offset_first_element_in_bytes());

361

362

// Check if B is pre-tranposed and de-reference if not

363

if(!_gemm_kernel_asm->B_is_pretransposed())

364

{

365

ldb = _b->info()->strides_in_bytes().y() / sizeof(TypeInput);

366

multi_stride_b = _b->info()->strides_in_bytes().z() / sizeof(TypeInput);

367

in1_ptr = reinterpret_cast<const TypeInput *>(_b->buffer() + _b->info()->offset_first_element_in_bytes());

368

}

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

369

370

// Set workspace if needed and reset number of threads as buffer manager gets re-created with max_threads

371

if(_workspace.buffer() != nullptr)

372

{

373

_gemm_kernel_asm->set_working_space(reinterpret_cast<void *>(_workspace.buffer()));

374

const unsigned int window_size = _gemm_kernel_asm->get_window_size();

375

unsigned int num_threads = NEScheduler::get().num_threads();

376

if(window_size < num_threads)

377

{

378

num_threads = window_size;

379

_gemm_kernel_asm->set_nthreads(num_threads);

}

}

// Prepare assembly kernel

384

prepare();

385

386

// Set gemm parameters

387

_gemm_kernel_asm->set_arrays(in0_ptr, lda, batch_stride_a, multi_stride_a, in1_ptr, ldb, multi_stride_b, out_ptr, ldd, batch_stride_d, multi_stride_d);

388

389

// Schedule assembly kernel

390

NEScheduler::get().schedule(_optimised_kernel.get(), Window::DimX);

391

}

392

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

393

template <typename TypeInput, typename TypeOutput>

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

394

void create_function_or_arm_gemm(std::unique_ptr<IFunction> &acl_function, std::unique_ptr<NEGEMMAssemblyDispatch::IFallback> &arm_gemm, MemoryGroup &memory_group,

395

const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info,

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

396

std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

397

{

Georgios Pinitas

2019-06-21 18:43:12 +0100

[diff] [blame]

398

INEGEMMWrapperKernel::Params p = INEGEMMWrapperKernel::extract_parameters(a, b, d, gemm_info);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

399

const CPUInfo &ci = NEScheduler::get().cpu_info();

400

unsigned int num_threads = NEScheduler::get().num_threads();

401

Georgios Pinitas

2019-06-21 18:43:12 +0100

[diff] [blame]

402

arm_gemm::GemmArgs<TypeOutput> args(&ci, p.M, p.N, p.K, p.batches, p.multis, false, false, alpha, beta, num_threads, gemm_info.pretranpose_B());

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

403

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

404

// Try to create an ACL function:

405

const arm_gemm::KernelDescription gemm_kernel_info = arm_gemm::get_gemm_method<TypeInput, TypeOutput>(args);

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

406

acl_function = create_function_all_types(gemm_kernel_info, a, b, d, alpha, beta, gemm_info, std::move(memory_manager), weights_manager);

Georgios Pinitas

7cd26d4

2019-01-09 18:35:17 +0000

[diff] [blame]

407

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

408

// If we still don't have an ACL function:

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

409

if(acl_function == nullptr)

410

{

411

//Fallback onto arm_gemm function if ACL doesn't support this method.

412

auto fallback = support::cpp14::make_unique<Fallback<TypeInput, TypeOutput>>();

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

413

fallback->configure(a, b, c, d, args, gemm_info, memory_group, weights_manager);

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

414

arm_gemm = std::move(fallback);

}

}

template <typename TypeInput, typename TypeOutput>

419

void create_function_or_arm_gemm_quant(std::unique_ptr<IFunction> &acl_function, std::unique_ptr<NEGEMMAssemblyDispatch::IFallback> &arm_gemm, MemoryGroup &memory_group,

420

const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info,

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

421

std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

422

{

423

INEGEMMWrapperKernel::Params p = INEGEMMWrapperKernel::extract_parameters(a, b, d, gemm_info);

424

const CPUInfo &ci = NEScheduler::get().cpu_info();

425

unsigned int num_threads = NEScheduler::get().num_threads();

426

427

arm_gemm::GemmArgs<TypeOutput> args(&ci, p.M, p.N, p.K, p.batches, p.multis, false, false, alpha, beta, num_threads, gemm_info.pretranpose_B());

428

429

// Configure requantization info

430

const int32_t a_offset = -a->info()->quantization_info().uniform().offset;

431

const int32_t b_offset = -b->info()->quantization_info().uniform().offset;

432

const GEMMLowpOutputStageInfo os_info = gemm_info.gemmlowp_output_stage();

433

434

const arm_gemm::ARequantizeLayer32 gemm_requant_info(nullptr,

435

a_offset, b_offset, os_info.gemmlowp_offset,

436

-os_info.gemmlowp_shift, os_info.gemmlowp_multiplier,

437

os_info.gemmlowp_min_bound, os_info.gemmlowp_max_bound);

438

439

// Try to create an ACL function:

440

const arm_gemm::KernelDescription gemm_kernel_info = arm_gemm::get_gemm_method<TypeInput, TypeOutput>(args, gemm_requant_info);

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

441

acl_function = create_function_all_types(gemm_kernel_info, a, b, d, alpha, beta, gemm_info, std::move(memory_manager), weights_manager);

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

442

443

// If we still don't have an ACL function:

444

if(acl_function == nullptr)

445

{

446

// Fallback onto arm_gemm function if ACL doesn't support this method.

447

auto fallback = support::cpp14::make_unique<Fallback<TypeInput, TypeOutput, arm_gemm::ARequantizeLayer32>>();

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

448

fallback->configure(a, b, c, d, args, gemm_info, memory_group, weights_manager, gemm_requant_info);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

449

arm_gemm = std::move(fallback);

}

}

} //namespace

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

455

NEGEMMAssemblyDispatch::NEGEMMAssemblyDispatch(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)

456

: _function(nullptr), _arm_gemm(nullptr), _memory_group(memory_manager), _memory_manager(memory_manager), _weights_manager(weights_manager)

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

{

}

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

460

Status NEGEMMAssemblyDispatch::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, float alpha, float beta, const GEMMInfo &gemm_info)

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

461

{

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

462

ARM_COMPUTE_UNUSED(alpha, beta, gemm_info);

463

ARM_COMPUTE_UNUSED(c);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

464

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(a, b, d);

465

ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(a);

466

#ifndef __aarch64__

467

ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::U8 || a->data_type() == DataType::S8 || a->data_type() == DataType::QASYMM8, "8bit integer types only supported for aarch64");

468

#endif /* __aarch64__ */

469

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::F32, DataType::U8, DataType::QASYMM8, DataType::S8, DataType::F16);

470

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, b);

471

ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::F32 && d->data_type() != DataType::F32, "Only F32 output supported for F32 input");

472

ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::F16 && d->data_type() != DataType::F16, "Only F16 output supported for F16 input");

Anthony Barbier

9036749

2018-08-01 13:56:08 +0100

[diff] [blame]

473

ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::U8 && d->data_type() != DataType::U32, "Only U32 output supported for U8 input");

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

474

ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::S8 && d->data_type() != DataType::S32, "Only S32 output supported for S8 input");

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

475

ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::QASYMM8 && d->data_type() != DataType::QASYMM8, "Only QASYMM8 output supported for QASYMM8 input");

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

return Status{};

}

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

479

void NEGEMMAssemblyDispatch::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info)

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

480

{

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

481

ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, d);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

482

483

//If we don't support a combination of data types, silently return: it is the caller's responsibility to check if configure() was successful via is_configured()

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

484

if(!NEGEMMAssemblyDispatch::validate(a->info(), b->info(), c != nullptr ? c->info() : nullptr, d->info(), alpha, beta, gemm_info))

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

{

return;

}

switch(a->info()->data_type())

490

{

491

case DataType::F32:

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

492

create_function_or_arm_gemm<float, float>(_function, _arm_gemm, _memory_group, a, b, c, d, alpha, beta, gemm_info, _memory_manager, _weights_manager);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

break;

#ifdef __aarch64__

case DataType::U8:

case DataType::QASYMM8:

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

497

if(d->info()->data_type() == DataType::S32)

498

{

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

499

create_function_or_arm_gemm<uint8_t, uint32_t>(_function, _arm_gemm, _memory_group, a, b, c, d, alpha, beta, gemm_info, _memory_manager, _weights_manager);

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

500

}

501

else

502

{

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

503

create_function_or_arm_gemm_quant<uint8_t, uint8_t>(_function, _arm_gemm, _memory_group, a, b, c, d, alpha, beta, gemm_info, _memory_manager, _weights_manager);

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

504

}

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

505

break;

506

case DataType::S8:

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

507

create_function_or_arm_gemm<int8_t, int32_t>(_function, _arm_gemm, _memory_group, a, b, c, d, alpha, beta, gemm_info, _memory_manager, _weights_manager);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

508

break;

509

#endif /* __aarch64__ */

510

#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC

511

case DataType::F16:

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

512

create_function_or_arm_gemm<float16_t, float16_t>(_function, _arm_gemm, _memory_group, a, b, c, d, alpha, beta, gemm_info, _memory_manager, _weights_manager);

Anthony Barbier