Blame - src/runtime/CL/functions/CLGEMM.cpp - ml/ComputeLibrary

2017-09-04 18:44:23 +0100

[diff] [blame]

65

{

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

66

GEMMType gemm_type = GEMMType::RESHAPED_V1;

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

67

Gian Marco Iodice

2019-09-24 12:05:06 +0100

[diff] [blame]

68

if(gpu_target_is_in(gpu_target, GPUTarget::G51, GPUTarget::G51BIG, GPUTarget::G51LIT,

69

GPUTarget::G52, GPUTarget::G52LIT, GPUTarget::G71, GPUTarget::G72,

70

GPUTarget::G76, GPUTarget::G77))

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

71

{

Gian Marco Iodice

2019-09-24 12:05:06 +0100

[diff] [blame]

72

if(data_type == DataType::F32)

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

73

{

Gian Marco Iodice

2019-09-24 12:05:06 +0100

[diff] [blame]

74

if((m > 1) && (n < 16))

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

75

{

Gian Marco Iodice

2019-09-24 12:05:06 +0100

[diff] [blame]

76

gemm_type = GEMMType::RESHAPED_V1;

}

else if(m == 1)

{

gemm_type = GEMMType::RESHAPED_ONLY_RHS;

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

81

}

82

else

83

{

Gian Marco Iodice

2019-09-24 12:05:06 +0100

[diff] [blame]

84

// COMPMID-852

85

if((k > 256) && (m > 4) && reshape_b_only_on_first_run)

86

{

87

constexpr float alpha = 3.2f;

88

constexpr float fact0 = 1.51f;

89

constexpr float fact1 = 1.66f;

90

constexpr float ops = 12.0f;

91

const float scale = k > 1024 ? 1.07f : 1.0f;

92

gemm_type = (alpha + ((n * fact0) / ops) < ((fact1 * n * scale) / ops)) ? GEMMType::RESHAPED_V1 : GEMMType::NATIVE;

}

else

{

gemm_type = GEMMType::NATIVE;

}

}

const auto workload = static_cast<float>((m * n) / 20.0f);

101

102

gemm_type = ((workload > 1600.0f) && (gemm_type == GEMMType::RESHAPED_V1) && (data_type == DataType::F32)) ? GEMMType::RESHAPED_V2 : gemm_type;

}

else

{

if((m == 1) || (!reshape_b_only_on_first_run))

107

{

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

108

gemm_type = GEMMType::NATIVE;

109

}

Gian Marco Iodice

2019-09-24 12:05:06 +0100

[diff] [blame]

110

else

111

{

112

gemm_type = GEMMType::RESHAPED_V2;

113

}

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

114

}

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

}

else

{

// We reshape the matrices only if we do not have the vector-by-matrix case and we reshape the matrix B only once

119

gemm_type = ((m != 1) && reshape_b_only_on_first_run) ? GEMMType::RESHAPED_V1 : GEMMType::NATIVE;

120

}

Gian Marco Iodice

edfa9f4

2017-08-15 11:45:22 +0100

[diff] [blame]

121

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

return gemm_type;

}

void CLGEMM::configure_native(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info)

126

{

127

const unsigned int m = gemm_info.reinterpret_input_as_3d() ? (a->info()->dimension(1) * a->info()->dimension(2)) : a->info()->dimension(1);

128

const unsigned int n = b->info()->dimension(0);

129

const unsigned int k = a->info()->dimension(0);

130

const GPUTarget gpu_target = CLScheduler::get().target();

Gian Marco

2018-01-12 10:21:40 +0000

[diff] [blame]

131

132

// Set the target for the kernels

Gian Marco

2018-01-12 10:21:40 +0000

[diff] [blame]

133

_mm_kernel.set_target(gpu_target);

134

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

135

GEMMReshapeInfo reshape_info(m, n, k, 1, 1, gemm_info.depth_output_gemm3d(), gemm_info.reinterpret_input_as_3d(), gemm_info.broadcast_bias());

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

136

137

// Configure and tune matrix multiply kernel

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

138

_mm_kernel.configure(a, b, c, output, alpha, beta, false, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info());

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

139

140

// Tune kernel statically

141

CLScheduler::get().tune_kernel_static(_mm_kernel);

142

}

143

144

void CLGEMM::configure_reshaped_v1(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info)

145

{

Gian Marco Iodice

2018-12-12 10:18:04 +0000

[diff] [blame]

146

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

147

const unsigned int m = reinterpret_input_as_3d ? (a->info()->dimension(1) * a->info()->dimension(2)) : a->info()->dimension(1);

148

const unsigned int n = b->info()->dimension(0);

149

const unsigned int k = a->info()->dimension(0);

150

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

151

const GPUTarget gpu_target = CLScheduler::get().target();

Gian Marco Iodice

2018-12-12 10:18:04 +0000

[diff] [blame]

152

int mult_transpose1xW_width = 1;

153

int mult_interleave4x4_height = 1;

Gian Marco

2018-01-12 10:21:40 +0000

[diff] [blame]

154

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

155

// Set the target for the kernels

156

_reshape_lhs_kernel.set_target(gpu_target);

157

_mm_kernel.set_target(gpu_target);

158

Gian Marco Iodice

2018-05-08 12:01:57 +0100

[diff] [blame]

159

if(get_arch_from_target(gpu_target) == GPUTarget::BIFROST)

Gian Marco

2018-01-12 10:21:40 +0000

[diff] [blame]

160

{

161

mult_transpose1xW_width = 4;

162

mult_interleave4x4_height = 2;

163

}

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

164

giuros01

8b6b4a9

2018-12-18 19:01:33 +0000

[diff] [blame]

165

GEMMRHSMatrixInfo rhs_info;

166

rhs_info.n0 = 16 / b->info()->element_size();

167

rhs_info.k0 = 1;

168

rhs_info.h0 = mult_transpose1xW_width;

169

rhs_info.interleave = false;

170

rhs_info.transpose = false;

Gian Marco

2018-01-12 10:21:40 +0000

[diff] [blame]

171

giuros01

1c9efeb

2019-01-11 14:04:43 +0000

[diff] [blame]

172

GEMMLHSMatrixInfo lhs_info;

173

lhs_info.m0 = 4;

174

lhs_info.k0 = 4;

175

lhs_info.v0 = mult_interleave4x4_height;

176

lhs_info.interleave = true;

177

lhs_info.transpose = true;

178

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

179

GEMMReshapeInfo reshape_info(m, n, k, mult_transpose1xW_width, mult_interleave4x4_height, depth_output_gemm3d, false, gemm_info.broadcast_bias());

Gian Marco

b5311a6

2017-12-13 12:48:03 +0000

[diff] [blame]

180

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

181

_memory_group.manage(&_tmp_a);

182

if(!_reshape_b_only_on_first_run)

Gian Marco Iodice

68a3f56

2018-07-26 11:44:03 +0100

[diff] [blame]

183

{

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

184

_memory_group.manage(&_tmp_b);

Gian Marco Iodice

edfa9f4

2017-08-15 11:45:22 +0100

[diff] [blame]

185

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

186

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

187

// Configure interleave kernel

188

_reshape_lhs_kernel.configure(a, &_tmp_a, lhs_info, reinterpret_input_as_3d);

Gian Marco Iodice

edfa9f4

2017-08-15 11:45:22 +0100

[diff] [blame]

189

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

190

// Configure transpose kernel

191

_reshape_rhs_kernel.configure(b, &_tmp_b, rhs_info);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

192

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

193

// Configure and tune matrix multiply kernel

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

194

_mm_kernel.configure(&_tmp_a, &_tmp_b, c, output, alpha, beta, true, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info());

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

195

196

CLScheduler::get().tune_kernel_static(_mm_kernel);

197

198

// Allocate intermediate tensors

199

_tmp_a.allocator()->allocate();

200

if(!_reshape_b_only_on_first_run)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

201

{

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

202

_tmp_b.allocator()->allocate();

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

}

}

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

206

void CLGEMM::configure_reshaped_v2(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info)

207

{

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

208

DataType data_type = a->info()->data_type();

209

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

210

const unsigned int m = reinterpret_input_as_3d ? (a->info()->dimension(1) * a->info()->dimension(2)) : a->info()->dimension(1);

211

const unsigned int n = b->info()->dimension(0);

212

const unsigned int k = a->info()->dimension(0);

213

const unsigned int batch_size = reinterpret_input_as_3d ? a->info()->dimension(3) : a->info()->dimension(2);

214

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

215

const GPUTarget gpu_target = CLScheduler::get().target();

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame]

216

bool broadcast_bias = gemm_info.broadcast_bias();

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

217

218

GEMMKernelInfo kernel_info;

Gian Marco Iodice

2019-06-26 17:18:11 +0100

[diff] [blame]

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

223

kernel_info.reinterpret_input_as_3d = false;

224

kernel_info.broadcast_bias = broadcast_bias;

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

225

kernel_info.activation_info = gemm_info.activation_info();

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

226

227

// Set the target for the kernels

228

_reshape_lhs_kernel.set_target(gpu_target);

229

_mm_kernel.set_target(gpu_target);

230

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

231

// Manage intermediate buffers

232

_memory_group.manage(&_tmp_a);

233

if(!_reshape_b_only_on_first_run)

234

{

235

_memory_group.manage(&_tmp_b);

236

}

237

// _tmp_a and _tmp_b will be auto configured in _interleave_kernel and in _transpose_kernel

238

239

GEMMLHSMatrixInfo lhs_info{};

240

GEMMRHSMatrixInfo rhs_info{};

241

242

// Pick up the GEMM configuration

243

std::unique_ptr<ICLGEMMKernelConfiguration> gemm_config = CLGEMMReshapedKernelConfigurationFactory::create(gpu_target);

244

ARM_COMPUTE_ERROR_ON_NULLPTR(gemm_config.get());

245

246

// Configure lhs_info and rhs_info

247

std::tie(lhs_info, rhs_info) = gemm_config->configure(m, n, k, batch_size, data_type);

248

249

_reshape_lhs_kernel.configure(a, &_tmp_a, lhs_info, gemm_info.reinterpret_input_as_3d());

250

_reshape_rhs_kernel.configure(b, &_tmp_b, rhs_info);

251

252

// Configure and tune matrix multiply kernel

Gian Marco Iodice

2019-06-26 17:18:11 +0100

[diff] [blame]

253

_mm_reshaped_kernel.configure(&_tmp_a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

254

255

// Allocate intermediate tensors

256

_tmp_a.allocator()->allocate();

257

if(!_reshape_b_only_on_first_run)

258

{

259

_tmp_b.allocator()->allocate();

}

}

void CLGEMM::configure_reshaped_only_rhs(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info)

264

{

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

265

DataType data_type = a->info()->data_type();

266

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

267

const unsigned int m = reinterpret_input_as_3d ? (a->info()->dimension(1) * a->info()->dimension(2)) : a->info()->dimension(1);

268

const unsigned int n = b->info()->dimension(0);

269

const unsigned int k = a->info()->dimension(0);

270

const unsigned int batch_size = reinterpret_input_as_3d ? a->info()->dimension(3) : a->info()->dimension(2);

271

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

272

const GPUTarget gpu_target = CLScheduler::get().target();

Georgios Pinitas

b0f342e

2019-05-21 13:32:43 +0100

[diff] [blame]

273

bool broadcast_bias = gemm_info.broadcast_bias();

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

274

275

GEMMKernelInfo kernel_info;

Gian Marco Iodice

2019-06-26 17:18:11 +0100

[diff] [blame]

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

280

kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;

281

kernel_info.broadcast_bias = broadcast_bias;

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

282

kernel_info.activation_info = gemm_info.activation_info();

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

283

284

// Set the target for the kernels

285

_mm_kernel.set_target(gpu_target);

286

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

287

// Manage intermediate buffers

288

if(!_reshape_b_only_on_first_run)

289

{

290

_memory_group.manage(&_tmp_b);

291

}

292

293

GEMMLHSMatrixInfo lhs_info{};

294

GEMMRHSMatrixInfo rhs_info{};

295

296

// Pick up the GEMM configuration

297

std::unique_ptr<ICLGEMMKernelConfiguration> gemm_config = CLGEMMReshapedOnlyRHSKernelConfigurationFactory::create(gpu_target);

298

ARM_COMPUTE_ERROR_ON_NULLPTR(gemm_config.get());

299

300

// Configure lhs_info and rhs_info

301

std::tie(lhs_info, rhs_info) = gemm_config->configure(m, n, k, batch_size, data_type);

302

303

_reshape_rhs_kernel.configure(b, &_tmp_b, rhs_info);

304

305

// Configure and tune matrix multiply kernel

Gian Marco Iodice

2019-06-26 17:18:11 +0100

[diff] [blame]

306

_mm_reshaped_only_rhs_kernel.configure(a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

307

308

if(!_reshape_b_only_on_first_run)

309

{

310

_tmp_b.allocator()->allocate();

}

}

Status CLGEMM::validate_native(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

Georgios Pinitas

78c0090

2018-01-09 17:33:11 +0000

[diff] [blame]

315

{

Gian Marco Iodice

2018-05-08 12:01:57 +0100

[diff] [blame]

316

ARM_COMPUTE_UNUSED(alpha);

Gian Marco Iodice

215b4ea

2018-06-28 16:29:29 +0100

[diff] [blame]

317

ARM_COMPUTE_UNUSED(output);

Gian Marco Iodice

2018-05-08 12:01:57 +0100

[diff] [blame]

318

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

319

// Get the GPU target

320

const GPUTarget gpu_target = CLScheduler::get().target();

321

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

322

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

323

const unsigned int n = b->dimension(0);

324

const unsigned int k = a->dimension(0);

325

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

Gian Marco Iodice

2018-05-08 12:01:57 +0100

[diff] [blame]

326

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

327

const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d, gemm_info.broadcast_bias());

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

328

329

// Validate matrix multiply

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

330

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixMultiplyKernel::validate(a, b, c, output, alpha, beta,

331

false, reshape_info, gpu_target, gemm_info.fp_mixed_precision(), gemm_info.activation_info()));

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

return Status{};

}

Status CLGEMM::validate_reshaped_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

337

{

338

ARM_COMPUTE_UNUSED(alpha);

339

ARM_COMPUTE_UNUSED(output);

Gian Marco Iodice

2018-05-08 12:01:57 +0100

[diff] [blame]

340

341

TensorInfo tmp_a_info{};

342

TensorInfo tmp_b_info{};

Gian Marco Iodice

2018-05-08 12:01:57 +0100

[diff] [blame]

343

344

// Get the GPU target

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

345

const GPUTarget gpu_target = CLScheduler::get().target();

346

const unsigned int m = gemm_info.reinterpret_input_as_3d() ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

Gian Marco Iodice

2018-12-12 10:18:04 +0000

[diff] [blame]

347

const unsigned int n = b->dimension(0);

348

const unsigned int k = a->dimension(0);

349

int mult_transpose1xW_width = 1;

350

int mult_interleave4x4_height = 1;

351

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

Gian Marco Iodice

2018-05-08 12:01:57 +0100

[diff] [blame]

352

353

if(get_arch_from_target(gpu_target) == GPUTarget::BIFROST)

354

{

355

mult_transpose1xW_width = 4;

356

mult_interleave4x4_height = 2;

357

}

358

giuros01

8b6b4a9

2018-12-18 19:01:33 +0000

[diff] [blame]

359

GEMMRHSMatrixInfo rhs_info;

360

rhs_info.n0 = 16 / b->element_size();

361

rhs_info.k0 = 1;

362

rhs_info.h0 = mult_transpose1xW_width;

363

rhs_info.interleave = false;

364

rhs_info.transpose = false;

365

giuros01

1c9efeb

2019-01-11 14:04:43 +0000

[diff] [blame]

366

GEMMLHSMatrixInfo lhs_info;

367

lhs_info.m0 = 4;

368

lhs_info.k0 = 4;

369

lhs_info.v0 = mult_interleave4x4_height;

370

lhs_info.interleave = true;

371

lhs_info.transpose = true;

372

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

373

const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, mult_transpose1xW_width, mult_interleave4x4_height, depth_output_gemm3d, false, gemm_info.broadcast_bias());

Gian Marco Iodice

2018-05-08 12:01:57 +0100

[diff] [blame]

374

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

375

// Validate interleave kernel

376

auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, gemm_info.reinterpret_input_as_3d())));

377

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMReshapeLHSMatrixKernel::validate(a, &tmp_a_info, lhs_info, gemm_info.reinterpret_input_as_3d()));

Gian Marco Iodice

2018-12-12 10:18:04 +0000

[diff] [blame]

378

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

379

// Validate transpose kernel

380

auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));

381

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMReshapeRHSMatrixKernel::validate(b, &tmp_b_info, rhs_info));

Michele Di Giorgio

ebc3a90

2018-11-16 16:04:25 +0000

[diff] [blame]

382

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

383

// Validate matrix multiply

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

384

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixMultiplyKernel::validate(&tmp_a_info, &tmp_b_info, c, output, alpha, beta,

385

true, reshape_info, gpu_target, gemm_info.fp_mixed_precision(), gemm_info.activation_info()));

Gian Marco Iodice

2018-05-08 12:01:57 +0100

[diff] [blame]

386

Georgios Pinitas

78c0090

2018-01-09 17:33:11 +0000

[diff] [blame]

return Status{};

}

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

390

Status CLGEMM::validate_reshaped_v2(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

391

{

392

ARM_COMPUTE_UNUSED(alpha);

393

ARM_COMPUTE_UNUSED(output);

394

395

TensorInfo tmp_a_info{};

396

TensorInfo tmp_b_info{};

397

398

// Get the GPU target

399

const GPUTarget gpu_target = CLScheduler::get().target();

400

DataType data_type = a->data_type();

401

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

402

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

403

const unsigned int n = b->dimension(0);

404

const unsigned int k = a->dimension(0);

405

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

406

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame]

407

const bool broadcast_bias = gemm_info.broadcast_bias();

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

408

409

GEMMKernelInfo kernel_info;

Gian Marco Iodice

2019-06-26 17:18:11 +0100

[diff] [blame]

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

414

kernel_info.reinterpret_input_as_3d = false;

415

kernel_info.broadcast_bias = broadcast_bias;

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

416

kernel_info.activation_info = gemm_info.activation_info();

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

417

418

GEMMLHSMatrixInfo lhs_info;

419

GEMMRHSMatrixInfo rhs_info;

420

421

// Pick up the GEMM configuration

422

std::unique_ptr<ICLGEMMKernelConfiguration> gemm_config = CLGEMMReshapedKernelConfigurationFactory::create(gpu_target);

423

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(gemm_config.get());

424

425

// Configure lhs_info and rhs_info

426

std::tie(lhs_info, rhs_info) = gemm_config->configure(m, n, k, batch_size, data_type);

427

428

auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, gemm_info.reinterpret_input_as_3d())));

429

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMReshapeLHSMatrixKernel::validate(a, &tmp_a_info, lhs_info, gemm_info.reinterpret_input_as_3d()));

430

431

auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));

432

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMReshapeRHSMatrixKernel::validate(b, &tmp_b_info, rhs_info));

433

434

// Validate matrix multiply

Gian Marco Iodice

2019-06-26 17:18:11 +0100

[diff] [blame]

435

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixMultiplyReshapedKernel::validate(&tmp_a_info, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

436

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

return Status{};

}

Status CLGEMM::validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

441

{

442

ARM_COMPUTE_UNUSED(alpha);

443

ARM_COMPUTE_UNUSED(output);

444

445

TensorInfo tmp_b_info{};

446

447

// Get the GPU target

Gian Marco Iodice

2019-06-26 17:18:11 +0100

[diff] [blame]

448

const GPUTarget gpu_target = CLScheduler::get().target();

449

const DataType data_type = a->data_type();

450

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

451

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

452

const unsigned int n = b->dimension(0);

453

const unsigned int k = a->dimension(0);

454

const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);

455

const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();

456

const bool broadcast_bias = gemm_info.broadcast_bias();

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

457

458

GEMMKernelInfo kernel_info;

Gian Marco Iodice

2019-06-26 17:18:11 +0100

[diff] [blame]

kernel_info.m = m;

kernel_info.n = n;

kernel_info.k = k;

kernel_info.depth_output_gemm3d = depth_output_gemm3d;

463

kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;

464

kernel_info.broadcast_bias = broadcast_bias;

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

465

kernel_info.activation_info = gemm_info.activation_info();

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

466

467

GEMMLHSMatrixInfo lhs_info;

468

GEMMRHSMatrixInfo rhs_info;

469

470

// Pick up the GEMM configuration

471

std::unique_ptr<ICLGEMMKernelConfiguration> gemm_config = CLGEMMReshapedOnlyRHSKernelConfigurationFactory::create(gpu_target);

472

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(gemm_config.get());

473

474

// Configure lhs_info and rhs_info

475

std::tie(lhs_info, rhs_info) = gemm_config->configure(m, n, k, batch_size, data_type);

476

477

auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));

478

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMReshapeRHSMatrixKernel::validate(b, &tmp_b_info, rhs_info));

479

480

// Validate matrix multiply

Gian Marco Iodice

2019-06-26 17:18:11 +0100

[diff] [blame]

481

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

return Status{};

}

void CLGEMM::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info)

487

{

488

ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);

489

490

// Perform validation step

491

ARM_COMPUTE_ERROR_THROW_ON(validate(a->info(), b->info(), c != nullptr ? c->info() : nullptr, output->info(), alpha, beta, gemm_info));

492

493

// Check if we need to reshape the matrix B only on the first run

494

_reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();

495

_is_prepared = gemm_info.retain_internal_weights();

496

_original_b = b;

497

498

// Get the GPU target

499

const GPUTarget gpu_target = CLScheduler::get().target();

500

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

501

const unsigned int m = reinterpret_input_as_3d ? (a->info()->dimension(1) * a->info()->dimension(2)) : a->info()->dimension(1);

502

const unsigned int n = b->info()->dimension(0);

503

const unsigned int k = a->info()->dimension(0);

504

505

// Select GEMMType

506

_gemm_type = select_gemm_type(m, n, k, a->info()->data_type(), _reshape_b_only_on_first_run, gpu_target);

507

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

508

const bool fuse_add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr);

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame]

509

510

const ICLTensor *c_to_use = fuse_add_c ? c : nullptr;

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

switch(_gemm_type)

{

case GEMMType::NATIVE:

515

{

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame]

516

configure_native(a, b, c_to_use, output, alpha, beta, gemm_info);

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

517

break;

518

}

519

case GEMMType::RESHAPED_V1:

520

{

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame]

521

configure_reshaped_v1(a, b, c_to_use, output, alpha, beta, gemm_info);

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

522

break;

523

}

524

case GEMMType::RESHAPED_V2:

525

{

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame]

526

configure_reshaped_v2(a, b, c_to_use, output, alpha, beta, gemm_info);

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

527

break;

528

}

529

case GEMMType::RESHAPED_ONLY_RHS:

530

{

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame]

531

configure_reshaped_only_rhs(a, b, c_to_use, output, alpha, beta, gemm_info);

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

break;

}

default:

{

ARM_COMPUTE_ERROR("GEMMType not supported");

537

}

538

}

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

539

}

540

541

Status CLGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)

542

{

543

// Get the GPU target

544

const GPUTarget gpu_target = CLScheduler::get().target();

545

bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();

546

const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);

547

const unsigned int n = b->dimension(0);

548

const unsigned int k = a->dimension(0);

549

550

// Select GEMMType

551

GEMMType gemm_type = select_gemm_type(m, n, k, a->data_type(), gemm_info.reshape_b_only_on_first_run(), gpu_target);

552

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

553

const bool fuse_add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr);

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame]

554

555

const ITensorInfo *c_to_use = fuse_add_c ? c : nullptr;

556

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

557

switch(gemm_type)

558

{

559

case GEMMType::NATIVE:

560

{

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame]

561

ARM_COMPUTE_RETURN_ON_ERROR(validate_native(a, b, c_to_use, output, alpha, beta, gemm_info));

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

562

break;

563

}

564

case GEMMType::RESHAPED_V1:

565

{

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame]

566

ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_v1(a, b, c_to_use, output, alpha, beta, gemm_info));

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

567

break;

568

}

569

case GEMMType::RESHAPED_V2:

570

{

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame]

571

ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_v2(a, b, c_to_use, output, alpha, beta, gemm_info));

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

572

break;

573

}

574

case GEMMType::RESHAPED_ONLY_RHS:

575

{

Gian Marco Iodice

2019-06-14 16:11:10 +0100

[diff] [blame]

576

ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_only_rhs(a, b, c_to_use, output, alpha, beta, gemm_info));

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

break;

}

default:

{

ARM_COMPUTE_RETURN_ERROR_MSG("GEMMType not supported");

}

}

return Status{};

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

588

void CLGEMM::run()

589

{

Georgios Pinitas

2018-05-02 14:07:55 +0100

[diff] [blame]

590

prepare();

591

Georgios Pinitas

da953f2

2019-04-02 17:27:03 +0100

[diff] [blame]

592

MemoryGroupResourceScope scope_mg(_memory_group);

Georgios Pinitas

8a94e7c

2017-09-15 19:06:47 +0100

[diff] [blame]

593

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

594

// Run matrix multiply kernel

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

595

switch(_gemm_type)

Gian Marco Iodice

2018-12-12 10:18:04 +0000

[diff] [blame]

596

{

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

597

case GEMMType::NATIVE:

598

{

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

599

CLScheduler::get().enqueue(_mm_kernel, true);

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

600

break;

601

}

602

case GEMMType::RESHAPED_V1:

603

{

604

// Run interleave kernel

605

CLScheduler::get().enqueue(_reshape_lhs_kernel, false);

606

607

if(!_reshape_b_only_on_first_run)

608

{

609

// Run transpose kernel

610

CLScheduler::get().enqueue(_reshape_rhs_kernel, false);

611

}

612

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

613

CLScheduler::get().enqueue(_mm_kernel, true);

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

614

break;

615

}

616

case GEMMType::RESHAPED_V2:

617

{

618

// Run interleave kernel

619

CLScheduler::get().enqueue(_reshape_lhs_kernel, false);

620

621

if(!_reshape_b_only_on_first_run)

622

{

623

// Run transpose kernel

624

CLScheduler::get().enqueue(_reshape_rhs_kernel, false);

625

}

626

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

627

CLScheduler::get().enqueue(_mm_reshaped_kernel, true);

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

628

break;

629

}

630

case GEMMType::RESHAPED_ONLY_RHS:

631

{

632

if(!_reshape_b_only_on_first_run)

633

{

634

// Run transpose kernel

635

CLScheduler::get().enqueue(_reshape_rhs_kernel, false);

636

}

637

Gian Marco Iodice

2019-07-29 14:27:16 +0100

[diff] [blame]

638

CLScheduler::get().enqueue(_mm_reshaped_only_rhs_kernel, true);

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

break;

}

default:

{

ARM_COMPUTE_ERROR("GEMMType not supported");

644

}

Gian Marco Iodice

2018-12-12 10:18:04 +0000

[diff] [blame]

645

}

Georgios Pinitas

2018-05-02 14:07:55 +0100

[diff] [blame]

646

}

Georgios Pinitas

82b5148

2018-04-24 15:14:12 +0100

[diff] [blame]

647

Georgios Pinitas

2018-05-02 14:07:55 +0100

[diff] [blame]

648

void CLGEMM::prepare()

649

{

650

if(!_is_prepared)

651

{

Gian Marco Iodice

2019-03-19 11:44:13 +0000

[diff] [blame]

652

if(_gemm_type != GEMMType::NATIVE && _reshape_b_only_on_first_run)

Georgios Pinitas

2018-05-02 14:07:55 +0100

[diff] [blame]

653

{

Georgios Pinitas

7221933

2018-06-05 14:56:06 +0100

[diff] [blame]

654

// Run transpose kernel and mark original weights tensor as unused

Georgios Pinitas

2018-05-02 14:07:55 +0100

[diff] [blame]

655

_tmp_b.allocator()->allocate();

giuros01

8b6b4a9

2018-12-18 19:01:33 +0000

[diff] [blame]

656

CLScheduler::get().enqueue(_reshape_rhs_kernel, false);

Georgios Pinitas

2018-05-02 14:07:55 +0100

[diff] [blame]

657

_original_b->mark_as_unused();

658

}

659

CLScheduler::get().queue().finish();

660

_is_prepared = true;

661

}

Anthony Barbier