Blame - src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp - ml/ComputeLibrary

2019-02-22 16:37:41 +0000

[diff] [blame]

45

_offset_contribution_kernel(), _offset_contribution_output_stage_kernel(), _vector_sum_col(), _vector_sum_row(), _tmp_a(), _tmp_b(), _mm_result_s32(), _original_b(nullptr), _a_offset(0), _b_offset(0),

46

_run_vector_matrix_multiplication(false), _dot_product_path(false), _reshape_b_only_on_first_run(false), _is_prepared(false), _fuse_output_stage(false)

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

{

}

Gian Marco Iodice

2018-10-18 10:21:02 +0100

[diff] [blame]

50

void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info)

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

51

{

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

52

ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);

Gian Marco Iodice

4b90865

2018-10-18 10:21:02 +0100

[diff] [blame]

53

ARM_COMPUTE_UNUSED(c);

54

ARM_COMPUTE_ERROR_THROW_ON(NEGEMMLowpMatrixMultiplyCore::validate(a->info(), b->info(), c != nullptr ? c->info() : nullptr, output->info(), gemm_info));

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

55

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

56

const ITensor *matrix_a = a;

57

const ITensor *matrix_b = b;

58

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

59

// Clear state

Anthony Barbier

71d9b57

2018-07-06 17:05:59 +0100

[diff] [blame]

60

_mtx_a_reshape_kernel = nullptr;

61

_mtx_b_reshape_kernel = nullptr;

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

62

63

// Set internal variables

Gian Marco

c7f9b89

2017-11-30 14:31:13 +0000

[diff] [blame]

64

_a_offset = a->info()->quantization_info().offset;

65

_b_offset = b->info()->quantization_info().offset;

66

_run_vector_matrix_multiplication = a->info()->dimension(1) < 2;

Giorgio Arena

bb54e4e

2018-04-05 17:20:34 +0100

[diff] [blame]

67

_reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

68

_is_prepared = false;

69

_original_b = b;

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

70

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

71

// If GEMMLowpOutputStage != NONE, fuse the offset contribution with the output stage

72

if(gemm_info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE)

73

{

74

_fuse_output_stage = true;

75

76

_memory_group.manage(&_mm_result_s32);

77

78

TensorInfo info_mm_result_s32(output->info()->tensor_shape(), 1, DataType::S32);

79

80

_mm_result_s32.allocator()->init(info_mm_result_s32);

81

}

82

Pablo Tello

2018-02-23 13:43:50 +0000

[diff] [blame]

83

#ifdef __aarch64__

84

switch(a->info()->data_type())

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

85

{

Pablo Tello

66c656a

2018-03-15 10:34:58 +0000

[diff] [blame]

86

case DataType::QASYMM8:

Pablo Tello

2018-02-23 13:43:50 +0000

[diff] [blame]

87

case DataType::U8:

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

88

case DataType::S8:

Pablo Tello

2018-02-23 13:43:50 +0000

[diff] [blame]

89

{

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

90

_asm_glue.configure(a, b, _fuse_output_stage ? &_mm_result_s32 : output, 1.f, 0.f, _reshape_b_only_on_first_run);

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

91

_dot_product_path = _asm_glue.is_configured();

Pablo Tello

2018-02-23 13:43:50 +0000

[diff] [blame]

break;

}

default:

{

ARM_COMPUTE_ERROR("Datatype not supported");

97

break;

98

}

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

99

}

Pablo Tello

2018-02-23 13:43:50 +0000

[diff] [blame]

100

#endif /* __aarch64__ */

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

101

if(!(_dot_product_path || _run_vector_matrix_multiplication))

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

102

{

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

matrix_a = &_tmp_a;

matrix_b = &_tmp_b;

// The interleaved output matrix will have the following shape: [ a_height * 4, ceil(a_width / 4.0f) ]

Georgios Pinitas

02acf01

2019-03-19 10:49:03 +0000

[diff] [blame^]

107

TensorInfo a_info(compute_interleaved_shape(*a->info()), 1, a->info()->data_type(), a->info()->quantization_info());

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

108

// The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width / 16.0f) ]

Georgios Pinitas

02acf01

2019-03-19 10:49:03 +0000

[diff] [blame^]

109

TensorInfo b_info(compute_transpose1xW_shape(*b->info()), 1, b->info()->data_type(), b->info()->quantization_info());

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

110

_tmp_a.allocator()->init(a_info);

111

_tmp_b.allocator()->init(b_info);

112

_memory_group.manage(&_tmp_a);

113

if(!_reshape_b_only_on_first_run)

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

114

{

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

115

_memory_group.manage(&_tmp_b);

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

116

}

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

117

118

// Configure interleave kernel

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

119

{

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

120

auto k = arm_compute::support::cpp14::make_unique<NEGEMMInterleave4x4Kernel>();

121

k->configure(a, &_tmp_a);

122

_mtx_a_reshape_kernel = std::move(k);

123

}

Gian Marco

c7f9b89

2017-11-30 14:31:13 +0000

[diff] [blame]

124

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

125

// Configure transpose kernel

126

{

127

auto k = arm_compute::support::cpp14::make_unique<NEGEMMTranspose1xWKernel>();

128

k->configure(b, &_tmp_b);

129

_mtx_b_reshape_kernel = std::move(k);

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

130

}

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

131

}

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

132

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

133

// Initialize matrix B reduction kernel only if _a_offset is not equal to 0

134

if(_a_offset != 0)

135

{

Isabella Gottardi

2018-01-18 15:50:39 +0000

[diff] [blame]

136

TensorInfo info_vector_sum_col(compute_reductionA_shape(*b->info()), 1, DataType::S32);

137

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

138

_vector_sum_col.allocator()->init(info_vector_sum_col);

Giorgio Arena

bb54e4e

2018-04-05 17:20:34 +0100

[diff] [blame]

139

if(!_reshape_b_only_on_first_run)

140

{

141

_memory_group.manage(&_vector_sum_col);

142

}

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

143

144

// Configure Matrix B reduction kernel

145

_mtx_b_reduction_kernel.configure(b, &_vector_sum_col, a->info()->dimension(0), false);

146

}

147

148

// Initialize Matrix A reduction kernel only if _b_offset is not equal to 0

149

if(_b_offset != 0)

150

{

Isabella Gottardi

2018-01-18 15:50:39 +0000

[diff] [blame]

151

TensorInfo info_vector_sum_row(compute_reductionB_shape(*a->info()), 1, DataType::S32);

152

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

153

_vector_sum_row.allocator()->init(info_vector_sum_row);

154

_memory_group.manage(&_vector_sum_row);

155

156

// Configure matrix A reduction kernel

157

_mtx_a_reduction_kernel.configure(a, &_vector_sum_row, a->info()->dimension(0), false);

158

}

159

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

160

if(_fuse_output_stage)

161

{

162

// Configure matrix multiply kernel

163

if(!_dot_product_path)

164

{

165

auto k = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixMultiplyKernel>();

166

k->configure(matrix_a, matrix_b, &_mm_result_s32);

167

_mm_kernel = std::move(k);

168

}

169

170

_offset_contribution_output_stage_kernel.configure(&_mm_result_s32, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, output, a->info()->dimension(0),

171

_a_offset, _b_offset, gemm_info.gemmlowp_output_stage());

172

173

_mm_result_s32.allocator()->allocate();

}

else

{

// Configure matrix multiply kernel

178

if(!_dot_product_path)

179

{

180

auto k = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixMultiplyKernel>();

181

k->configure(matrix_a, matrix_b, output);

182

_mm_kernel = std::move(k);

183

}

184

// Configure offset contribution kernel

185

_offset_contribution_kernel.configure(output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, a->info()->dimension(0), _a_offset, _b_offset);

186

}

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

187

188

// Allocate tensors

Gian Marco

c7f9b89

2017-11-30 14:31:13 +0000

[diff] [blame]

189

if(!_dot_product_path && !_run_vector_matrix_multiplication)

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

190

{

Pablo Tello

2017-11-02 16:09:35 +0000

[diff] [blame]

191

_tmp_a.allocator()->allocate();

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

192

if(!_reshape_b_only_on_first_run)

193

{

194

_tmp_b.allocator()->allocate();

195

}

Pablo Tello

2017-11-02 16:09:35 +0000

[diff] [blame]

196

}

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

197

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

198

if(_a_offset != 0 && !_reshape_b_only_on_first_run)

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

199

{

200

_vector_sum_col.allocator()->allocate();

}

if(_b_offset != 0)

{

_vector_sum_row.allocator()->allocate();

206

}

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

207

}

208

Gian Marco Iodice

4b90865

2018-10-18 10:21:02 +0100

[diff] [blame]

209

Status NEGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info)

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

210

{

211

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::QASYMM8);

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

212

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32, DataType::QASYMM8);

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

213

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, b);

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

214

ARM_COMPUTE_RETURN_ERROR_ON_MSG(c != nullptr && gemm_info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::NONE, "Bias addition not supported in NEGEMMLowpMatrixMultiplyCore for output S32");

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

215

ARM_COMPUTE_RETURN_ERROR_ON_MSG((a)->dimension(0) != (b)->dimension(1),

216

"The product AB is defined only if the number of columns in A is equal to the number of rows in B");

Chunosov

5124be5

2017-11-22 20:42:13 +0700

[diff] [blame]

217

ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported");

218

ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), "Matrix B already reshaped is not supported");

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

219

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

220

const ITensorInfo *matrix_a_info = a;

221

const ITensorInfo *matrix_b_info = b;

222

223

TensorInfo tmp_a_info{};

224

TensorInfo tmp_b_info{};

225

TensorInfo mm_result_s32_info{};

226

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

227

int32_t a_offset = a->quantization_info().offset;

228

int32_t b_offset = b->quantization_info().offset;

229

const bool reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

230

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

231

bool fuse_output_stage = gemm_info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE;

232

if(fuse_output_stage)

233

{

234

auto_init_if_empty(mm_result_s32_info, a->clone()->set_tensor_shape(output->tensor_shape()).set_data_type(DataType::S32));

235

}

236

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

237

// Check if we need to run the optimized assembly kernel

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

238

const bool run_optimised = bool(NEGEMMAssemblyDispatch::validate(a, b, fuse_output_stage ? &mm_result_s32_info : output, 1.f, 0.f, reshape_b_only_on_first_run));

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

239

240

if(run_optimised)

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

241

{

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

242

ARM_COMPUTE_RETURN_ERROR_ON(b->dimension(0) != output->dimension(0));

243

if(gemm_info.depth_output_gemm3d() != 0)

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

244

{

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

245

if(gemm_info.reinterpret_input_as_3d())

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

246

{

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

247

ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(1) != output->dimension(1));

248

ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(2) != output->dimension(2));

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

249

}

250

else

251

{

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

252

ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(1) != output->dimension(1) * output->dimension(2));

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

253

}

254

}

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

255

else

256

{

257

ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(1) != output->dimension(1));

258

}

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

259

}

260

else

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

261

{

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

262

ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.reinterpret_input_as_3d(), "NEGEMM cannot reinterpret the input tensor as 3D");

263

ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.depth_output_gemm3d() != 0, "NEGEMM cannot reinterpret the output tensor as 3D");

264

265

const bool run_vector_matrix_multiplication = a->dimension(1) < 2;

266

if(!run_vector_matrix_multiplication)

267

{

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

268

matrix_a_info = &tmp_a_info;

269

matrix_b_info = &tmp_b_info;

270

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

271

// The interleaved output matrix will have the following shape: [ a_height * 4, ceil(a_width / 4.0f) ]

272

TensorShape shape_tmp_a = a->tensor_shape();

273

shape_tmp_a.set(0, a->dimension(0) * 4);

274

shape_tmp_a.set(1, std::ceil(a->dimension(1) / 4.f));

275

276

// The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width / 16.0f) ]

277

TensorShape shape_tmp_b = b->tensor_shape();

278

shape_tmp_b.set(0, b->dimension(1) * 16);

279

shape_tmp_b.set(1, std::ceil(b->dimension(0) / 16.f));

280

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

281

// Validate interleave kernel

282

auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(shape_tmp_a));

283

auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(shape_tmp_b));

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

284

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

285

ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMInterleave4x4Kernel::validate(a, &tmp_a_info));

286

ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMTranspose1xWKernel::validate(b, &tmp_b_info));

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

287

}

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

288

}

289

290

TensorInfo info_vector_sum_col, info_vector_sum_row;

291

292

// Validate matrix B reduction kernel only if _a_offset is not equal to 0

293

if(a_offset != 0)

294

{

Isabella Gottardi

2018-01-18 15:50:39 +0000

[diff] [blame]

295

info_vector_sum_col = TensorInfo(compute_reductionA_shape(*b), 1, DataType::S32);

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

296

297

// Configure Matrix B reduction kernel

298

ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixBReductionKernel::validate(b, &info_vector_sum_col, a->dimension(0), false));

299

}

300

301

// Validate Matrix A reduction kernel only if _b_offset is not equal to 0

302

if(b_offset != 0)

303

{

Isabella Gottardi

2018-01-18 15:50:39 +0000

[diff] [blame]

304

info_vector_sum_row = TensorInfo(compute_reductionB_shape(*a), 1, DataType::S32);

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

305

306

// Configure matrix A reduction kernel

307

ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(a, &info_vector_sum_row, a->dimension(0), false));

308

}

309

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

310

if(fuse_output_stage)

{

if(!run_optimised)

{

ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyKernel::validate(matrix_a_info, matrix_b_info, &mm_result_s32_info));

315

}

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

316

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

317

// Validate offset contribution kernel

318

ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOffsetContributionOutputStageKernel::validate(&mm_result_s32_info,

319

a_offset == 0 ? nullptr : &info_vector_sum_col,

320

b_offset == 0 ? nullptr : &info_vector_sum_row,

321

c, output, a_offset, b_offset,

322

gemm_info.gemmlowp_output_stage()));

}

else

{

if(!run_optimised)

{

ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyKernel::validate(matrix_a_info, matrix_b_info, output));

329

}

330

// Validate offset contribution kernel

331

ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOffsetContributionKernel::validate(output,

332

a_offset == 0 ? nullptr : &info_vector_sum_col,

333

b_offset == 0 ? nullptr : &info_vector_sum_row,

334

a_offset, b_offset));

335

}

Georgios Pinitas

631c41a

2017-12-06 11:53:03 +0000

[diff] [blame]

336

return Status{};

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

337

}

338

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

339

void NEGEMMLowpMatrixMultiplyCore::run()

340

{

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

341

prepare();

342

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

343

_memory_group.acquire();

344

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

345

// Reshape inputs

346

if(_mtx_a_reshape_kernel)

Pablo Tello

2017-11-02 16:09:35 +0000

[diff] [blame]

347

{

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

348

NEScheduler::get().schedule(_mtx_a_reshape_kernel.get(), Window::DimY);

349

}

350

if(_mtx_b_reshape_kernel && !_reshape_b_only_on_first_run)

351

{

352

NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY);

Pablo Tello

2017-11-02 16:09:35 +0000

[diff] [blame]

353

}

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

354

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

355

// Run GEMM

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

356

if(_asm_glue.is_configured())

Pablo Tello

2018-02-23 13:43:50 +0000

[diff] [blame]

357

{

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

358

_asm_glue.run();

Pablo Tello

2018-02-23 13:43:50 +0000

[diff] [blame]

}

else

{

NEScheduler::get().schedule(_mm_kernel.get(), Window::DimY);

363

}

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

364

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

365

// Run matrix A reduction kernel only if _b_offset is not equal to 0

366

if(_b_offset != 0)

367

{

368

NEScheduler::get().schedule(&_mtx_a_reduction_kernel, Window::DimX);

369

}

370

371

// Run matrix B reduction kernel only if _a_offset is not equal to 0

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

372

if(_a_offset != 0 && !_reshape_b_only_on_first_run)

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

373

{

374

NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX);

375

}

376

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

377

if(_fuse_output_stage)

378

{

379

// Run offset contribution kernel

380

NEScheduler::get().schedule(&_offset_contribution_output_stage_kernel, Window::DimY);

}

else

{

// Run offset contribution kernel

385

NEScheduler::get().schedule(&_offset_contribution_kernel, Window::DimY);

386

}

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

387

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

388

_memory_group.release();

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

389

}

Giorgio Arena

bb54e4e

2018-04-05 17:20:34 +0100

[diff] [blame]

390

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

391

void NEGEMMLowpMatrixMultiplyCore::prepare()

{

if(!_is_prepared)

{

// Run assembly reshape

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

396

if(_asm_glue.is_configured() && _reshape_b_only_on_first_run)

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

397

{

398

ARM_COMPUTE_ERROR_ON(!_original_b->is_used());

399

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

400

_asm_glue.prepare();

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

401

_original_b->mark_as_unused();

402

}

403

// Run non-assembly reshape

404

else if(_mtx_b_reshape_kernel && _reshape_b_only_on_first_run)

405

{

406

ARM_COMPUTE_ERROR_ON(!_original_b->is_used());

407

408

// Run reshape kernel and mark original weights tensor as unused

409

_tmp_b.allocator()->allocate();

410

NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY);

411

_original_b->mark_as_unused();

412

}

413

414

// Run matrix B reduction kernel only if _a_offset is not equal to 0

415

if(_a_offset != 0 && _reshape_b_only_on_first_run)

416

{

417

_vector_sum_col.allocator()->allocate();

418

NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX);

}

_is_prepared = true;

}

Pablo Tello