Blame - src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp - ml/ComputeLibrary

2019-02-22 16:37:41 +0000

[diff] [blame]

45

_offset_contribution_kernel(), _offset_contribution_output_stage_kernel(), _vector_sum_col(), _vector_sum_row(), _tmp_a(), _tmp_b(), _mm_result_s32(), _original_b(nullptr), _a_offset(0), _b_offset(0),

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

46

_run_vector_matrix_multiplication(false), _assembly_path(false), _fused_assembly_path(false), _reshape_b_only_on_first_run(false), _is_prepared(false), _fuse_output_stage(false)

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

{

}

Gian Marco Iodice

2018-10-18 10:21:02 +0100

[diff] [blame]

50

void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output, const GEMMInfo &gemm_info)

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

51

{

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

52

ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);

Gian Marco Iodice

4b90865

2018-10-18 10:21:02 +0100

[diff] [blame]

53

ARM_COMPUTE_UNUSED(c);

54

ARM_COMPUTE_ERROR_THROW_ON(NEGEMMLowpMatrixMultiplyCore::validate(a->info(), b->info(), c != nullptr ? c->info() : nullptr, output->info(), gemm_info));

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

55

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

56

const ITensor *matrix_a = a;

57

const ITensor *matrix_b = b;

58

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

59

// Clear state

Anthony Barbier

71d9b57

2018-07-06 17:05:59 +0100

[diff] [blame]

60

_mtx_a_reshape_kernel = nullptr;

61

_mtx_b_reshape_kernel = nullptr;

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

62

63

// Set internal variables

Georgios Pinitas

4c5469b

2019-05-21 13:32:43 +0100

[diff] [blame]

64

_a_offset = a->info()->quantization_info().uniform().offset;

65

_b_offset = b->info()->quantization_info().uniform().offset;

Gian Marco

c7f9b89

2017-11-30 14:31:13 +0000

[diff] [blame]

66

_run_vector_matrix_multiplication = a->info()->dimension(1) < 2;

Giorgio Arena

2018-04-05 17:20:34 +0100

[diff] [blame]

67

_reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

68

_is_prepared = false;

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

69

_fused_assembly_path = false;

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

70

_original_b = b;

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

71

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

72

// If GEMMLowpOutputStage != NONE, fuse the offset contribution with the output stage

73

if(gemm_info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE)

74

{

75

_fuse_output_stage = true;

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

76

_memory_group.manage(&_mm_result_s32);

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

77

TensorInfo info_mm_result_s32(output->info()->tensor_shape(), 1, DataType::S32);

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

78

_mm_result_s32.allocator()->init(info_mm_result_s32);

79

}

80

Pablo Tello

2018-02-23 13:43:50 +0000

[diff] [blame]

81

#ifdef __aarch64__

82

switch(a->info()->data_type())

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

83

{

Pablo Tello

66c656a

2018-03-15 10:34:58 +0000

[diff] [blame]

84

case DataType::QASYMM8:

Pablo Tello

2018-02-23 13:43:50 +0000

[diff] [blame]

85

case DataType::U8:

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

86

case DataType::S8:

Pablo Tello

2018-02-23 13:43:50 +0000

[diff] [blame]

87

{

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

88

if(a->info()->data_type() == DataType::QASYMM8 && gemm_info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)

89

{

90

_asm_glue.configure(a, b, c, output, 1.f, 0.f, gemm_info);

91

_fused_assembly_path = _asm_glue.is_configured();

}

else

{

_asm_glue.configure(a, b, nullptr, _fuse_output_stage ? &_mm_result_s32 : output, 1.f, 0.f, gemm_info);

96

}

97

_assembly_path = _asm_glue.is_configured();

Pablo Tello

2018-02-23 13:43:50 +0000

[diff] [blame]

break;

}

default:

{

ARM_COMPUTE_ERROR("Datatype not supported");

103

break;

104

}

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

105

}

Pablo Tello

2018-02-23 13:43:50 +0000

[diff] [blame]

106

#endif /* __aarch64__ */

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

107

if(!(_assembly_path || _run_vector_matrix_multiplication))

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

108

{

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

matrix_a = &_tmp_a;

matrix_b = &_tmp_b;

// The interleaved output matrix will have the following shape: [ a_height * 4, ceil(a_width / 4.0f) ]

Georgios Pinitas

02acf01

2019-03-19 10:49:03 +0000

[diff] [blame]

113

TensorInfo a_info(compute_interleaved_shape(*a->info()), 1, a->info()->data_type(), a->info()->quantization_info());

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

114

// The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width / 16.0f) ]

Georgios Pinitas

02acf01

2019-03-19 10:49:03 +0000

[diff] [blame]

115

TensorInfo b_info(compute_transpose1xW_shape(*b->info()), 1, b->info()->data_type(), b->info()->quantization_info());

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

116

_tmp_a.allocator()->init(a_info);

117

_tmp_b.allocator()->init(b_info);

118

_memory_group.manage(&_tmp_a);

119

if(!_reshape_b_only_on_first_run)

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

120

{

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

121

_memory_group.manage(&_tmp_b);

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

122

}

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

123

124

// Configure interleave kernel

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

125

{

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

126

auto k = arm_compute::support::cpp14::make_unique<NEGEMMInterleave4x4Kernel>();

127

k->configure(a, &_tmp_a);

128

_mtx_a_reshape_kernel = std::move(k);

129

}

Gian Marco

c7f9b89

2017-11-30 14:31:13 +0000

[diff] [blame]

130

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

131

// Configure transpose kernel

132

{

133

auto k = arm_compute::support::cpp14::make_unique<NEGEMMTranspose1xWKernel>();

134

k->configure(b, &_tmp_b);

135

_mtx_b_reshape_kernel = std::move(k);

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

136

}

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

137

}

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

138

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

139

if(!_fused_assembly_path)

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

140

{

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

141

// Initialize matrix B reduction kernel only if _a_offset is not equal to 0

142

if(_a_offset != 0)

Giorgio Arena

2018-04-05 17:20:34 +0100

[diff] [blame]

143

{

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

144

TensorInfo info_vector_sum_col(compute_reductionA_shape(*b->info()), 1, DataType::S32);

145

146

_vector_sum_col.allocator()->init(info_vector_sum_col);

147

if(!_reshape_b_only_on_first_run)

148

{

149

_memory_group.manage(&_vector_sum_col);

150

}

151

152

// Configure Matrix B reduction kernel

153

_mtx_b_reduction_kernel.configure(b, &_vector_sum_col, a->info()->dimension(0), false);

Giorgio Arena

2018-04-05 17:20:34 +0100

[diff] [blame]

154

}

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

155

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

156

// Initialize Matrix A reduction kernel only if _b_offset is not equal to 0

157

if(_b_offset != 0)

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

158

{

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

159

TensorInfo info_vector_sum_row(compute_reductionB_shape(*a->info()), 1, DataType::S32);

160

161

_vector_sum_row.allocator()->init(info_vector_sum_row);

162

_memory_group.manage(&_vector_sum_row);

163

164

// Configure matrix A reduction kernel

165

_mtx_a_reduction_kernel.configure(a, &_vector_sum_row, a->info()->dimension(0), false);

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

166

}

167

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

168

if(_fuse_output_stage)

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

169

{

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

170

// Configure matrix multiply kernel

171

if(!_assembly_path)

172

{

173

auto k = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixMultiplyKernel>();

174

k->configure(matrix_a, matrix_b, &_mm_result_s32);

175

_mm_kernel = std::move(k);

176

}

177

178

_offset_contribution_output_stage_kernel.configure(&_mm_result_s32, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, output, a->info()->dimension(0),

179

_a_offset, _b_offset, gemm_info.gemmlowp_output_stage());

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

180

}

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

181

else

182

{

183

// Configure matrix multiply kernel

184

if(!_assembly_path)

185

{

186

auto k = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixMultiplyKernel>();

187

k->configure(matrix_a, matrix_b, output);

188

_mm_kernel = std::move(k);

189

}

190

// Configure offset contribution kernel

191

_offset_contribution_kernel.configure(output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, a->info()->dimension(0), _a_offset, _b_offset);

192

}

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

193

}

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

194

195

// Allocate tensors

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

196

if(!_assembly_path && !_run_vector_matrix_multiplication)

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

197

{

Pablo Tello

2017-11-02 16:09:35 +0000

[diff] [blame]

198

_tmp_a.allocator()->allocate();

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

199

if(!_reshape_b_only_on_first_run)

200

{

201

_tmp_b.allocator()->allocate();

202

}

Pablo Tello

2017-11-02 16:09:35 +0000

[diff] [blame]

203

}

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

204

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

205

if(!_fused_assembly_path)

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

206

{

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

207

if(_a_offset != 0 && !_reshape_b_only_on_first_run)

208

{

209

_vector_sum_col.allocator()->allocate();

}

if(_b_offset != 0)

{

_vector_sum_row.allocator()->allocate();

215

}

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

216

}

217

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

218

if(_fuse_output_stage)

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

219

{

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

220

_mm_result_s32.allocator()->allocate();

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

221

}

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

222

}

223

Gian Marco Iodice

4b90865

2018-10-18 10:21:02 +0100

[diff] [blame]

224

Status NEGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info)

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

225

{

226

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::QASYMM8);

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

227

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32, DataType::QASYMM8);

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

228

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, b);

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

229

ARM_COMPUTE_RETURN_ERROR_ON_MSG(c != nullptr && gemm_info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::NONE, "Bias addition not supported in NEGEMMLowpMatrixMultiplyCore for output S32");

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

230

ARM_COMPUTE_RETURN_ERROR_ON_MSG((a)->dimension(0) != (b)->dimension(1),

231

"The product AB is defined only if the number of columns in A is equal to the number of rows in B");

Chunosov

5124be5

2017-11-22 20:42:13 +0700

[diff] [blame]

232

ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported");

233

ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), "Matrix B already reshaped is not supported");

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

234

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

235

const ITensorInfo *matrix_a_info = a;

236

const ITensorInfo *matrix_b_info = b;

237

238

TensorInfo tmp_a_info{};

239

TensorInfo tmp_b_info{};

240

TensorInfo mm_result_s32_info{};

241

Georgios Pinitas

37d080f

2019-06-21 18:43:12 +0100

[diff] [blame]

242

int32_t a_offset = a->quantization_info().uniform().offset;

243

int32_t b_offset = b->quantization_info().uniform().offset;

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

244

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

245

bool fuse_output_stage = gemm_info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE && a->data_type() != DataType::QASYMM8;

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

246

if(fuse_output_stage)

247

{

248

auto_init_if_empty(mm_result_s32_info, a->clone()->set_tensor_shape(output->tensor_shape()).set_data_type(DataType::S32));

249

}

250

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

251

// Check if we need to run the optimized assembly kernel

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

252

bool run_optimised = false;

253

bool run_optimised_requantized = false;

254

if(is_data_type_quantized_asymmetric(a->data_type()))

255

{

256

run_optimised = bool(NEGEMMAssemblyDispatch::validate(a, b, c, output, 1.f, 0.f, gemm_info));

257

run_optimised_requantized = run_optimised;

}

else

{

run_optimised = bool(NEGEMMAssemblyDispatch::validate(a, b, nullptr, fuse_output_stage ? &mm_result_s32_info : output, 1.f, 0.f, gemm_info));

262

}

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

263

264

if(run_optimised)

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

265

{

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

266

ARM_COMPUTE_RETURN_ERROR_ON(b->dimension(0) != output->dimension(0));

267

if(gemm_info.depth_output_gemm3d() != 0)

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

268

{

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

269

if(gemm_info.reinterpret_input_as_3d())

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

270

{

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

271

ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(1) != output->dimension(1));

272

ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(2) != output->dimension(2));

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

273

}

274

else

275

{

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

276

ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(1) != output->dimension(1) * output->dimension(2));

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

277

}

278

}

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

279

else

280

{

281

ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(1) != output->dimension(1));

282

}

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

283

}

284

else

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

285

{

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

286

ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.reinterpret_input_as_3d(), "NEGEMM cannot reinterpret the input tensor as 3D");

287

ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.depth_output_gemm3d() != 0, "NEGEMM cannot reinterpret the output tensor as 3D");

288

289

const bool run_vector_matrix_multiplication = a->dimension(1) < 2;

290

if(!run_vector_matrix_multiplication)

291

{

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

292

matrix_a_info = &tmp_a_info;

293

matrix_b_info = &tmp_b_info;

294

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

295

// The interleaved output matrix will have the following shape: [ a_height * 4, ceil(a_width / 4.0f) ]

296

TensorShape shape_tmp_a = a->tensor_shape();

297

shape_tmp_a.set(0, a->dimension(0) * 4);

298

shape_tmp_a.set(1, std::ceil(a->dimension(1) / 4.f));

299

300

// The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width / 16.0f) ]

301

TensorShape shape_tmp_b = b->tensor_shape();

302

shape_tmp_b.set(0, b->dimension(1) * 16);

303

shape_tmp_b.set(1, std::ceil(b->dimension(0) / 16.f));

304

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

305

// Validate interleave kernel

306

auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(shape_tmp_a));

307

auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(shape_tmp_b));

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

308

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

309

ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMInterleave4x4Kernel::validate(a, &tmp_a_info));

310

ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMTranspose1xWKernel::validate(b, &tmp_b_info));

Georgios Pinitas

2018-11-08 10:22:01 +0000

[diff] [blame]

311

}

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

312

}

313

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

314

if(!run_optimised_requantized)

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

315

{

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

316

TensorInfo info_vector_sum_col{};

317

TensorInfo info_vector_sum_row{};

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

318

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

319

// Validate matrix B reduction kernel only if _a_offset is not equal to 0

320

if(a_offset != 0)

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

321

{

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

322

info_vector_sum_col = TensorInfo(compute_reductionA_shape(*b), 1, DataType::S32);

323

324

// Configure Matrix B reduction kernel

325

ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixBReductionKernel::validate(b, &info_vector_sum_col, a->dimension(0), false));

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

326

}

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

327

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

328

// Validate Matrix A reduction kernel only if _b_offset is not equal to 0

329

if(b_offset != 0)

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

330

{

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

331

info_vector_sum_row = TensorInfo(compute_reductionB_shape(*a), 1, DataType::S32);

332

333

// Configure matrix A reduction kernel

334

ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(a, &info_vector_sum_row, a->dimension(0), false));

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

335

}

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

336

337

if(fuse_output_stage)

{

if(!run_optimised)

{

ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyKernel::validate(matrix_a_info, matrix_b_info, &mm_result_s32_info));

342

}

343

344

// Validate offset contribution kernel

345

ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOffsetContributionOutputStageKernel::validate(&mm_result_s32_info,

346

a_offset == 0 ? nullptr : &info_vector_sum_col,

347

b_offset == 0 ? nullptr : &info_vector_sum_row,

348

c, output, a_offset, b_offset,

349

gemm_info.gemmlowp_output_stage()));

}

else

{

if(!run_optimised)

{

ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyKernel::validate(matrix_a_info, matrix_b_info, output));

356

}

357

// Validate offset contribution kernel

358

ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOffsetContributionKernel::validate(output,

359

a_offset == 0 ? nullptr : &info_vector_sum_col,

360

b_offset == 0 ? nullptr : &info_vector_sum_row,

361

a_offset, b_offset));

362

}

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

363

}

Georgios Pinitas

631c41a

2017-12-06 11:53:03 +0000

[diff] [blame]

364

return Status{};

Georgios Pinitas

2017-11-16 19:24:39 +0000

[diff] [blame]

365

}

366

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

367

void NEGEMMLowpMatrixMultiplyCore::run()

368

{

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

369

prepare();

370

Georgios Pinitas

da953f2

2019-04-02 17:27:03 +0100

[diff] [blame]

371

MemoryGroupResourceScope scope_mg(_memory_group);

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

372

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

373

// Reshape inputs

374

if(_mtx_a_reshape_kernel)

Pablo Tello

2017-11-02 16:09:35 +0000

[diff] [blame]

375

{

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

376

NEScheduler::get().schedule(_mtx_a_reshape_kernel.get(), Window::DimY);

377

}

378

if(_mtx_b_reshape_kernel && !_reshape_b_only_on_first_run)

379

{

380

NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY);

Pablo Tello

2017-11-02 16:09:35 +0000

[diff] [blame]

381

}

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

382

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

383

// Run GEMM

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

384

if(_asm_glue.is_configured())

Pablo Tello

2018-02-23 13:43:50 +0000

[diff] [blame]

385

{

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

386

_asm_glue.run();

Pablo Tello

2018-02-23 13:43:50 +0000

[diff] [blame]

}

else

{

NEScheduler::get().schedule(_mm_kernel.get(), Window::DimY);

391

}

Gian Marco Iodice

2017-10-09 15:05:40 +0100

[diff] [blame]

392

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

393

if(!_fused_assembly_path)

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

394

{

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

395

// Run matrix A reduction kernel only if _b_offset is not equal to 0

396

if(_b_offset != 0)

397

{

398

NEScheduler::get().schedule(&_mtx_a_reduction_kernel, Window::DimX);

399

}

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

400

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

401

// Run matrix B reduction kernel only if _a_offset is not equal to 0

402

if(_a_offset != 0 && !_reshape_b_only_on_first_run)

403

{

404

NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX);

405

}

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

406

Georgios Pinitas

2019-06-27 17:00:52 +0100

[diff] [blame]

407

if(_fuse_output_stage)

408

{

409

// Run offset contribution kernel

410

NEScheduler::get().schedule(&_offset_contribution_output_stage_kernel, Window::DimY);

}

else

{

// Run offset contribution kernel

415

NEScheduler::get().schedule(&_offset_contribution_kernel, Window::DimY);

416

}

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

417

}

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

418

}

Giorgio Arena

2018-04-05 17:20:34 +0100

[diff] [blame]

419

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

420

void NEGEMMLowpMatrixMultiplyCore::prepare()

{

if(!_is_prepared)

{

// Run assembly reshape

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

425

if(_asm_glue.is_configured() && _reshape_b_only_on_first_run)

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

426

{

427

ARM_COMPUTE_ERROR_ON(!_original_b->is_used());

428

Anthony Barbier

2018-07-20 17:49:35 +0100

[diff] [blame]

429

_asm_glue.prepare();

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

430

_original_b->mark_as_unused();

431

}

432

// Run non-assembly reshape

433

else if(_mtx_b_reshape_kernel && _reshape_b_only_on_first_run)

434

{

435

ARM_COMPUTE_ERROR_ON(!_original_b->is_used());

436

437

// Run reshape kernel and mark original weights tensor as unused

438

_tmp_b.allocator()->allocate();

439

NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY);

440

_original_b->mark_as_unused();

441

}

442

443

// Run matrix B reduction kernel only if _a_offset is not equal to 0

444

if(_a_offset != 0 && _reshape_b_only_on_first_run)

445

{

446

_vector_sum_col.allocator()->allocate();

447

NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX);

}

_is_prepared = true;

}

Pablo Tello