Blame - src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp - ml/ComputeLibrary

2018-07-20 17:49:35 +0100

[diff] [blame]

127

/** Fallback in case ACL doesn't have a function */

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

128

template <typename TypeInput, typename TypeOutput, class OutputStage = arm_gemm::Nothing>

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

129

class Fallback : public NEGEMMAssemblyDispatch::IFallback

Anthony Barbier

2018-07-17 16:48:42 +0100

[diff] [blame]

130

{

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

131

public:

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

/** Destructor */

~Fallback()

{

// Release memory if we have allocated the memory ourselves

136

if(_pretranspose && !(_weights_manager && _weights_manager->are_weights_managed(_b)))

137

{

138

delete _pretranspose;

}

}

Georgios Pinitas

2019-01-30 17:17:16 +0000

[diff] [blame]

142

/** Initialise the functions's input and output.

143

*

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

144

* @param[in] a Input tensor containing the Matrix A.

145

* @param[in] b Input tensor containing the Matrix B.

146

* @param[in] c Input tensor containing the Matrix C.

147

* @param[out] d Output tensor to store the result of matrix multiplication.

148

* @param[in] args Matrix multiplication information.

149

* @param[in] gemm_info GEMM meta-data

150

* @param[in] memory_group Memory group to be used by the function.

151

* @param[in] weights_manager Weights manager to be used by the function.

152

* @param[in] os Output stage meta-data.

Georgios Pinitas

2019-01-30 17:17:16 +0000

[diff] [blame]

153

*/

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

154

void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d,

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

155

arm_gemm::GemmArgs args, const GEMMInfo &gemm_info,

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

156

MemoryGroup &memory_group, IWeightsManager *weights_manager, const OutputStage &os = {});

Georgios Pinitas

2019-01-30 17:17:16 +0000

[diff] [blame]

157

158

// Inherited methods overridden:

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

159

void run() override;

160

void prepare() override;

161

bool is_configured() const override;

Anthony Barbier

2018-07-17 16:48:42 +0100

[diff] [blame]

162

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

163

private:

164

/** Allocate a workspace tensor.

165

*

166

* @param[in] workspace_size Size to allocate.

167

* @param[in] memory_group Tensor memory group.

168

* @param[in] alignment Workspace memory alignment.

169

*/

Anthony Barbier

20394d5

2018-08-02 11:29:09 +0100

[diff] [blame]

170

void allocate_workspace(size_t workspace_size, MemoryGroup &memory_group, size_t alignment);

Anthony Barbier

2018-07-17 16:48:42 +0100

[diff] [blame]

171

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

172

/** Assembly Gemm kernel */

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

173

std::shared_ptr<arm_gemm::GemmCommon<TypeInput, TypeOutput>> _gemm_kernel_asm{ nullptr };

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

174

/** Optimised NEON kernel */

175

std::unique_ptr<INEKernel> _optimised_kernel{ nullptr };

176

/** Input A */

177

const ITensor *_a

Anthony Barbier

2018-07-17 16:48:42 +0100

[diff] [blame]

178

{

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

nullptr

};

/** Input B */

const ITensor *_b

{

nullptr

};

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

const ITensor *_c

{

nullptr

};

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

190

/** Output */

191

ITensor *_d{ nullptr };

192

/** GEMM workspace */

193

Tensor _workspace{};

194

/** Pre-transpose tensor */

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

195

ITensor *_pretranspose{ nullptr };

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

196

/** Prepared flag */

197

bool _is_prepared{ false };

Georgios Pinitas

2019-06-21 18:43:12 +0100

[diff] [blame]

198

/** GEMM meta-data */

199

GEMMInfo _gemm_info{};

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

200

/** Weights manager */

201

IWeightsManager *_weights_manager{ nullptr };

202

/** Weights transform object */

203

FallbackTransform<TypeInput, TypeOutput> _weights_transform{};

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

204

};

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

205

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

206

template <typename TypeInput, typename TypeOutput, class OutputStage>

207

void Fallback<TypeInput, TypeOutput, OutputStage>::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d,

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

208

arm_gemm::GemmArgs args, const GEMMInfo &gemm_info,

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

209

MemoryGroup &memory_group, IWeightsManager *weights_manager, const OutputStage &os)

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

210

{

Georgios Pinitas

2019-01-30 17:17:16 +0000

[diff] [blame]

211

arm_gemm::GemmConfig gemm_cfg;

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

212

const arm_gemm::KernelDescription gemm_kernel_info = arm_gemm::get_gemm_method<TypeInput, TypeOutput, OutputStage>(args, os);

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

213

_weights_manager = weights_manager;

Georgios Pinitas

2019-01-30 17:17:16 +0000

[diff] [blame]

214

if(gemm_kernel_info.method != arm_gemm::GemmMethod::GEMV_BATCHED)

215

{

216

gemm_cfg.filter = gemm_kernel_info.name;

217

args._cfg = &gemm_cfg;

218

}

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

219

_gemm_kernel_asm = arm_gemm::gemm<TypeInput, TypeOutput, OutputStage>(args, os);

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

220

if(_gemm_kernel_asm == nullptr)

221

{

222

//configuration not supported: Leave function unconfigured:

return;

}

// arm_compute wrapper for the Gemm object (see above)

227

std::unique_ptr<NEGEMMAssemblyWrapperKernel<TypeInput, TypeOutput>> acl_gemm_wrapper = support::cpp14::make_unique<NEGEMMAssemblyWrapperKernel<TypeInput, TypeOutput>>();

228

ARM_COMPUTE_ERROR_ON(acl_gemm_wrapper == nullptr);

Georgios Pinitas

2019-01-30 17:17:16 +0000

[diff] [blame]

229

acl_gemm_wrapper->configure(_gemm_kernel_asm.get(), gemm_cfg.filter);

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

230

const size_t workspace_size = _gemm_kernel_asm->get_working_size();

231

if(workspace_size > 0)

232

{

233

// Allocate workspace

234

const unsigned int alignment = 4096;

Anthony Barbier

20394d5

2018-08-02 11:29:09 +0100

[diff] [blame]

235

allocate_workspace(workspace_size, memory_group, alignment);

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

236

}

237

238

//if we disable this code below in brackets then ConvLayer deadlocks when threads > 1 and

239

//the shapes are In=1x1x1024 Weights=1x1x1024x1001 Biases=1001 Out=1x1x1001

240

{

Anthony Barbier

2018-07-17 16:48:42 +0100

[diff] [blame]

241

const int window_size = _gemm_kernel_asm->get_window_size();

242

if(window_size < args._maxthreads)

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

243

{

Anthony Barbier

2018-07-17 16:48:42 +0100

[diff] [blame]

244

_gemm_kernel_asm->set_nthreads(window_size);

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

}

}

_optimised_kernel = std::move(acl_gemm_wrapper);

249

_a = a;

250

_b = b;

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

251

_c = c;

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

252

_d = d;

Georgios Pinitas

2019-06-21 18:43:12 +0100

[diff] [blame]

253

_gemm_info = gemm_info;

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

254

// Check for pre-transposed support

255

if(_gemm_kernel_asm->B_pretranspose_required())

256

{

257

// Forcing 128-byte alignment (required by 32-bit kernels)

258

const unsigned int alignment = 128;

259

const size_t B_pretranspose_size = _gemm_kernel_asm->get_B_pretransposed_array_size();

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

260

if(weights_manager && _weights_manager->are_weights_managed(b))

261

{

262

_weights_transform.configure(B_pretranspose_size, alignment);

263

_pretranspose = _weights_manager->acquire(b, &_weights_transform);

}

else

{

_pretranspose = new Tensor();

268

static_cast<Tensor *>(_pretranspose)->allocator()->init(TensorInfo(TensorShape{ (B_pretranspose_size + alignment /* FIXME: remove alignment after COMPMID-1088 */) }, 1, DataType::S8), alignment);

269

}

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

}

}

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

273

template <typename TypeInput, typename TypeOutput, class OutputStage>

274

void Fallback<TypeInput, TypeOutput, OutputStage>::prepare()

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

275

{

276

if(!_is_prepared)

277

{

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

278

// Setup up matrix bias in the assembly kernel, it's just a pointer to matrix C.

279

if(_c && _c->info()->data_type() == DataType::S32)

280

{

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

281

_gemm_kernel_asm->set_quantized_bias(reinterpret_cast<const int32_t *>(_c->buffer() + _c->info()->offset_first_element_in_bytes()), 0);

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

282

}

283

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

284

// Pretranspose B if required

285

if(_gemm_kernel_asm->B_pretranspose_required())

286

{

287

const int ldb = _b->info()->strides_in_bytes().y() / sizeof(TypeInput);

Georgios Pinitas

eb84d6b

2018-07-27 18:28:10 +0100

[diff] [blame]

288

const auto in1_ptr = reinterpret_cast<const TypeInput *>(_b->buffer() + _b->info()->offset_first_element_in_bytes());

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

289

const int multi_stride_b = _b->info()->strides_in_bytes().z() / sizeof(TypeInput);

290

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

291

if(_weights_manager && _weights_manager->are_weights_managed(_b))

292

{

293

_weights_transform.set_args(ldb, in1_ptr, multi_stride_b, _gemm_kernel_asm);

294

_weights_manager->run(_b, &_weights_transform);

295

296

// If we didn't run the reshape function, set the pretransposed buffer

297

if(!_weights_transform.is_reshape_run())

298

{

299

_weights_transform.set_pretranspose(_pretranspose);

}

}

else

{

static_cast<Tensor *>(_pretranspose)->allocator()->allocate();

305

ARM_COMPUTE_ERROR_ON(_pretranspose->buffer() == nullptr);

306

_gemm_kernel_asm->pretranspose_B_array(_pretranspose->buffer(), in1_ptr, ldb, multi_stride_b);

307

_b->mark_as_unused();

308

}

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

}

_is_prepared = true;

}

}

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

315

template <typename TypeInput, typename TypeOutput, class OutputStage>

316

void Fallback<TypeInput, TypeOutput, OutputStage>::allocate_workspace(size_t workspace_size, MemoryGroup &memory_group, size_t alignment)

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

317

{

318

ARM_COMPUTE_ERROR_ON_MSG(workspace_size == 0, "size cannot be 0");

319

_workspace.allocator()->init(TensorInfo(TensorShape{ (workspace_size + alignment /* FIXME: remove alignment after COMPMID-1088 */) }, 1, DataType::S8), alignment);

Anthony Barbier

20394d5

2018-08-02 11:29:09 +0100

[diff] [blame]

320

memory_group.manage(&_workspace);

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

321

_workspace.allocator()->allocate();

322

}

323

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

324

template <typename TypeInput, typename TypeOutput, class OutputStage>

325

bool Fallback<TypeInput, TypeOutput, OutputStage>::is_configured() const

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

326

{

327

return _optimised_kernel != nullptr;

328

}

329

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

330

template <typename TypeInput, typename TypeOutput, class OutputStage>

331

void Fallback<TypeInput, TypeOutput, OutputStage>::run()

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

332

{

333

const int lda = _a->info()->strides_in_bytes().y() / sizeof(TypeInput);

Georgios Pinitas

40ed6d8

2018-07-31 17:22:11 +0100

[diff] [blame]

334

int ldb = 0;

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

335

const int ldd = _d->info()->strides_in_bytes().y() / sizeof(TypeOutput);

336

Georgios Pinitas

2019-06-21 18:43:12 +0100

[diff] [blame]

337

const size_t a_batch_idx = _gemm_info.reinterpret_input_as_3d() != 0 ? 3 : 2;

338

const size_t a_multi_idx = a_batch_idx + 1;

339

const size_t d_batch_idx = _gemm_info.depth_output_gemm3d() != 0 ? 3 : 2;

340

const size_t d_multi_idx = d_batch_idx + 1;

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

341

Georgios Pinitas

2019-06-21 18:43:12 +0100

[diff] [blame]

342

const int batch_stride_a = _a->info()->strides_in_bytes()[a_batch_idx] / sizeof(TypeInput);

343

const int batch_stride_d = _d->info()->strides_in_bytes()[d_batch_idx] / sizeof(TypeOutput);

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

344

Georgios Pinitas

2019-06-21 18:43:12 +0100

[diff] [blame]

345

const int multi_stride_a = _a->info()->strides_in_bytes()[a_multi_idx] / sizeof(TypeInput);

Georgios Pinitas

40ed6d8

2018-07-31 17:22:11 +0100

[diff] [blame]

346

int multi_stride_b = 0;

Georgios Pinitas

2019-06-21 18:43:12 +0100

[diff] [blame]

347

const int multi_stride_d = _d->info()->strides_in_bytes()[d_multi_idx] / sizeof(TypeOutput);

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

348

Georgios Pinitas

40ed6d8

2018-07-31 17:22:11 +0100

[diff] [blame]

349

const auto in0_ptr = reinterpret_cast<const TypeInput *>(_a->buffer() + _a->info()->offset_first_element_in_bytes());

350

const TypeInput *in1_ptr = nullptr;

351

auto out_ptr = reinterpret_cast<TypeOutput *>(_d->buffer() + _d->info()->offset_first_element_in_bytes());

352

353

// Check if B is pre-tranposed and de-reference if not

354

if(!_gemm_kernel_asm->B_is_pretransposed())

355

{

356

ldb = _b->info()->strides_in_bytes().y() / sizeof(TypeInput);

357

multi_stride_b = _b->info()->strides_in_bytes().z() / sizeof(TypeInput);

358

in1_ptr = reinterpret_cast<const TypeInput *>(_b->buffer() + _b->info()->offset_first_element_in_bytes());

359

}

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

360

361

// Set workspace if needed and reset number of threads as buffer manager gets re-created with max_threads

362

if(_workspace.buffer() != nullptr)

363

{

364

_gemm_kernel_asm->set_working_space(reinterpret_cast<void *>(_workspace.buffer()));

365

const unsigned int window_size = _gemm_kernel_asm->get_window_size();

366

unsigned int num_threads = NEScheduler::get().num_threads();

367

if(window_size < num_threads)

368

{

369

num_threads = window_size;

370

_gemm_kernel_asm->set_nthreads(num_threads);

}

}

// Prepare assembly kernel

375

prepare();

376

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

377

TypeOutput *bias = nullptr;

378

// Setup up matrix bias in the assembly kernel, it's just a pointer to matrix C.

379

if(_c && _c->info()->data_type() != DataType::S32)

380

{

381

bias = reinterpret_cast<TypeOutput *>(_c->buffer() + _c->info()->offset_first_element_in_bytes());

382

}

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

383

// Set gemm parameters

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

384

_gemm_kernel_asm->set_arrays(in0_ptr, lda, batch_stride_a, multi_stride_a,

385

in1_ptr, ldb, multi_stride_b,

386

out_ptr, ldd, batch_stride_d, multi_stride_d,

387

bias, 0);

Anthony Barbier

2018-07-06 17:05:59 +0100

[diff] [blame]

388

389

// Schedule assembly kernel

390

NEScheduler::get().schedule(_optimised_kernel.get(), Window::DimX);

391

}

392

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

393

template <typename TypeInput, typename TypeOutput>

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

394

void create_arm_gemm(std::unique_ptr<NEGEMMAssemblyDispatch::IFallback> &arm_gemm, MemoryGroup &memory_group,

395

const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, arm_gemm::Activation activation, const GEMMInfo &gemm_info,

396

IWeightsManager *weights_manager)

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

397

{

Georgios Pinitas

2019-06-21 18:43:12 +0100

[diff] [blame]

398

INEGEMMWrapperKernel::Params p = INEGEMMWrapperKernel::extract_parameters(a, b, d, gemm_info);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

399

const CPUInfo &ci = NEScheduler::get().cpu_info();

400

unsigned int num_threads = NEScheduler::get().num_threads();

401

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

402

arm_gemm::GemmArgs args(&ci, p.M, p.N, p.K, p.batches, p.multis, false, false, activation, num_threads, gemm_info.pretranpose_B());

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

403

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

404

// Create arm_gemm fallback

405

auto fallback = support::cpp14::make_unique<Fallback<TypeInput, TypeOutput>>();

406

fallback->configure(a, b, c, d, args, gemm_info, memory_group, weights_manager);

407

arm_gemm = std::move(fallback);

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

408

}

409

410

template <typename TypeInput, typename TypeOutput>

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

411

void create_arm_gemm_quant(std::unique_ptr<NEGEMMAssemblyDispatch::IFallback> &arm_gemm, MemoryGroup &memory_group,

412

const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, arm_gemm::Activation activation, const GEMMInfo &gemm_info,

413

IWeightsManager *weights_manager)

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

414

{

415

INEGEMMWrapperKernel::Params p = INEGEMMWrapperKernel::extract_parameters(a, b, d, gemm_info);

416

const CPUInfo &ci = NEScheduler::get().cpu_info();

417

unsigned int num_threads = NEScheduler::get().num_threads();

418

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

419

arm_gemm::GemmArgs args(&ci, p.M, p.N, p.K, p.batches, p.multis, false, false, activation, num_threads, gemm_info.pretranpose_B());

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

420

421

// Configure requantization info

422

const int32_t a_offset = -a->info()->quantization_info().uniform().offset;

423

const int32_t b_offset = -b->info()->quantization_info().uniform().offset;

424

const GEMMLowpOutputStageInfo os_info = gemm_info.gemmlowp_output_stage();

425

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

426

const arm_gemm::ARequantizeLayer32 gemm_requant_info(nullptr, 0,

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

427

a_offset, b_offset, os_info.gemmlowp_offset,

428

-os_info.gemmlowp_shift, os_info.gemmlowp_multiplier,

429

os_info.gemmlowp_min_bound, os_info.gemmlowp_max_bound);

430

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

431

// Create arm_gemm fallback

432

auto fallback = support::cpp14::make_unique<Fallback<TypeInput, TypeOutput, arm_gemm::ARequantizeLayer32>>();

433

fallback->configure(a, b, c, d, args, gemm_info, memory_group, weights_manager, gemm_requant_info);

434

arm_gemm = std::move(fallback);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

}

} //namespace

Michalis Spyrou

2019-09-10 17:20:34 +0100

[diff] [blame]

439

NEGEMMAssemblyDispatch::NEGEMMAssemblyDispatch(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

440

: _arm_gemm(nullptr), _memory_group(std::move(memory_manager)), _weights_manager(weights_manager)

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

{

}

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

444

Status NEGEMMAssemblyDispatch::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, const GEMMInfo &gemm_info)

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

445

{

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

446

ARM_COMPUTE_UNUSED(gemm_info);

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

447

ARM_COMPUTE_UNUSED(c);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

448

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(a, b, d);

449

ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(a);

450

#ifndef __aarch64__

451

ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::U8 || a->data_type() == DataType::S8 || a->data_type() == DataType::QASYMM8, "8bit integer types only supported for aarch64");

452

#endif /* __aarch64__ */

Georgios Pinitas

dbdea0d

2019-10-16 19:21:40 +0100

[diff] [blame]

453

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S8,

454

DataType::F16, DataType::F32);

455

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(b, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::S8,

456

DataType::F16, DataType::F32);

457

if(is_data_type_quantized_per_channel(b->data_type()))

458

{

459

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::QASYMM8_SIGNED, DataType::S8);

}

else

{

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, b);

464

}

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

465

ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::F32 && d->data_type() != DataType::F32, "Only F32 output supported for F32 input");

466

ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::F16 && d->data_type() != DataType::F16, "Only F16 output supported for F16 input");

Anthony Barbier

9036749

2018-08-01 13:56:08 +0100

[diff] [blame]

467

ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::U8 && d->data_type() != DataType::U32, "Only U32 output supported for U8 input");

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

468

ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::S8 && d->data_type() != DataType::S32, "Only S32 output supported for S8 input");

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

469

ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::QASYMM8 && d->data_type() != DataType::QASYMM8, "Only QASYMM8 output supported for QASYMM8 input");

Georgios Pinitas

dbdea0d

2019-10-16 19:21:40 +0100

[diff] [blame]

470

ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->data_type() == DataType::QASYMM8_SIGNED && d->data_type() != DataType::S32, "Only S32 output supported for QASYMM8_SIGNED input");

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

return Status{};

}

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

474

bool NEGEMMAssemblyDispatch::is_activation_supported(const ActivationLayerInfo &activation)

475

{

476

arm_gemm::Activation act = map_to_arm_gemm_activation(activation);

477

return act.type != arm_gemm::Activation::Type::None;

478

}

479

480

void NEGEMMAssemblyDispatch::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, const GEMMInfo &gemm_info)

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

481

{

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

482

ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, d);

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

483

arm_gemm::Activation act = map_to_arm_gemm_activation(gemm_info.activation_info());

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

484

485

//If we don't support a combination of data types, silently return: it is the caller's responsibility to check if configure() was successful via is_configured()

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

486

if(!NEGEMMAssemblyDispatch::validate(a->info(), b->info(), c != nullptr ? c->info() : nullptr, d->info(), gemm_info))

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

{

return;

}

switch(a->info()->data_type())

492

{

493

case DataType::F32:

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

494

create_arm_gemm<float, float>(_arm_gemm, _memory_group, a, b, c, d, act, gemm_info, _weights_manager);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

break;

#ifdef __aarch64__

case DataType::U8:

case DataType::QASYMM8:

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

499

if(d->info()->data_type() == DataType::S32)

500

{

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

501

create_arm_gemm<uint8_t, uint32_t>(_arm_gemm, _memory_group, a, b, c, d, act, gemm_info, _weights_manager);

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

502

}

503

else

504

{

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

505

create_arm_gemm_quant<uint8_t, uint8_t>(_arm_gemm, _memory_group, a, b, c, d, act, gemm_info, _weights_manager);

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

506

}

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

507

break;

508

case DataType::S8:

Georgios Pinitas

dbdea0d

2019-10-16 19:21:40 +0100

[diff] [blame]

509

case DataType::QASYMM8_SIGNED:

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

510

create_arm_gemm<int8_t, int32_t>(_arm_gemm, _memory_group, a, b, c, d, act, gemm_info, _weights_manager);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

511

break;

512

#endif /* __aarch64__ */

513

#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC

514

case DataType::F16:

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

515

create_arm_gemm<float16_t, float16_t>(_arm_gemm, _memory_group, a, b, c, d, act, gemm_info, _weights_manager);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

516

break;

517

#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */

default:

break;

}

}

void NEGEMMAssemblyDispatch::prepare()

524

{

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

525

ARM_COMPUTE_ERROR_ON(_arm_gemm == nullptr);

526

_arm_gemm->prepare();

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

527

}

528

529

bool NEGEMMAssemblyDispatch::is_configured() const

530

{

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

531

return _arm_gemm != nullptr && _arm_gemm->is_configured();

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

532

}

533

534

void NEGEMMAssemblyDispatch::run()

535

{

Georgios Pinitas

da953f2

2019-04-02 17:27:03 +0100

[diff] [blame]

536

MemoryGroupResourceScope scope_mg(_memory_group);

Georgios Pinitas

2019-10-14 19:03:09 +0100

[diff] [blame]

537

538

ARM_COMPUTE_ERROR_ON(_arm_gemm == nullptr);

539

_arm_gemm->run();

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

540

}

Anthony Barbier