Blame - src/cpu/operators/CpuFullyConnected.cpp - ml/ComputeLibrary

std::pair<PixelValue, PixelValue> get_quantized_asymmetric_output_min_max(const QuantizationInfo &q_info, const ActivationLayerInfo &act_info, DataType data_type)

53

{

54

PixelValue type_min{};

55

PixelValue type_max{};

56

std::tie(type_min, type_max) = get_min_max(data_type);

57

const UniformQuantizationInfo q_unif = q_info.uniform();

58

59

if(act_info.enabled())

60

{

61

switch(act_info.activation())

62

{

63

case ActivationLayerInfo::ActivationFunction::RELU:

64

type_min = PixelValue(q_unif.offset);

65

break;

66

case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:

67

type_min = PixelValue(q_unif.offset);

68

type_max = PixelValue(act_info.a(), data_type, q_info);

69

break;

70

case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:

71

type_min = PixelValue(act_info.b(), data_type, q_info);

72

type_max = PixelValue(act_info.a(), data_type, q_info);

73

break;

74

default:

75

ARM_COMPUTE_ERROR("Activation function not supported.");

break;

}

}

return std::make_pair(type_min, type_max);

81

}

82

83

Status get_gemmlowp_output_stage_info(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const ActivationLayerInfo &act,

84

GEMMLowpOutputStageInfo &gemmlowp_output_stage_info)

85

{

86

const auto data_type = src->data_type();

87

const QuantizationInfo oq_info = dst->quantization_info();

88

const UniformQuantizationInfo iq_unif = src->quantization_info().uniform();

89

const UniformQuantizationInfo wq_unif = weights->quantization_info().uniform();

90

const UniformQuantizationInfo oq_unif = oq_info.uniform();

91

92

float multiplier = (iq_unif.scale * wq_unif.scale) / oq_unif.scale;

93

int32_t output_multiplier;

94

int32_t output_shift;

95

96

ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));

97

98

PixelValue type_min{};

99

PixelValue type_max{};

100

std::tie(type_min, type_max) = get_quantized_asymmetric_output_min_max(oq_info, act, data_type);

101

102

gemmlowp_output_stage_info.gemmlowp_multiplier = output_multiplier;

103

gemmlowp_output_stage_info.gemmlowp_shift = output_shift;

104

gemmlowp_output_stage_info.gemmlowp_offset = oq_unif.offset;

105

gemmlowp_output_stage_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;

106

gemmlowp_output_stage_info.gemmlowp_min_bound = type_min.get<int32_t>();

107

gemmlowp_output_stage_info.gemmlowp_max_bound = type_max.get<int32_t>();

return Status{};

}

Status validate_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ActivationLayerInfo &act)

113

{

114

if(is_data_type_quantized_asymmetric(src->data_type()))

115

{

116

// Since we need negative offsets for computing convolution, we need to change QuantizationInfo()

117

// Extract and negate src and weights offset

118

const QuantizationInfo src_quantization_info(src->quantization_info().uniform().scale, -src->quantization_info().uniform().offset);

119

const QuantizationInfo weights_quantization_info(weights->quantization_info().uniform().scale, -weights->quantization_info().uniform().offset);

120

121

GEMMLowpOutputStageInfo gemmlowp_output_stage_info;

122

ARM_COMPUTE_RETURN_ON_ERROR(get_gemmlowp_output_stage_info(src, weights, dst, act, gemmlowp_output_stage_info));

123

124

GEMMInfo gemm_info;

125

gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info);

126

127

// Validate gemmlowp function

128

TensorInfo src_info = src->clone()->set_quantization_info(src_quantization_info);

129

TensorInfo weights_info = weights->clone()->set_quantization_info(weights_quantization_info);

130

ARM_COMPUTE_RETURN_ON_ERROR(CpuGemmLowpMatrixMultiplyCore::validate(&src_info,

&weights_info,

biases,

dst,

gemm_info));

}

else

{

ARM_COMPUTE_RETURN_ON_ERROR(CpuGemm::validate(src, weights, biases, dst, 1.f, 1.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run */)));

}

return Status{};

}

} // namespace

CpuFullyConnected::CpuFullyConnected()

146

: _flatten(nullptr),

147

_convert_weights(nullptr),

148

_transpose_weights(nullptr),

149

_mm_gemm(nullptr),

150

_mm_gemmlowp(nullptr),

151

_flattened_src(),

152

_converted_weights(),

153

_reshaped_weights(),

Georgios Pinitas

fa1db17

2021-08-12 06:28:09 +0100

[diff] [blame]

154

_trans_weights(),

155

_trans_weights_idx(AuxTensorIdx::Count),

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

156

_aux_mem(Count),

Georgios Pinitas

fa1db17

2021-08-12 06:28:09 +0100

[diff] [blame]

157

_needs_weights_conversion(false),

158

_needs_weights_reshape(false),

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

159

_is_fc_after_conv(false),

160

_is_quantized_asymmetric(false),

_is_prepared(false)

{

}

CpuFullyConnected::~CpuFullyConnected() = default;

167

168

void CpuFullyConnected::configure_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ActivationLayerInfo &act)

169

{

170

if(_is_quantized_asymmetric)

171

{

172

// Since we need negative offsets for computing convolution, we need to change QuantizationInfo()

173

// Extract and negate src and weights offset

174

const QuantizationInfo src_quantization_info(src->quantization_info().uniform().scale, -src->quantization_info().uniform().offset);

175

const QuantizationInfo weights_quantization_info(weights->quantization_info().uniform().scale, -weights->quantization_info().uniform().offset);

176

177

TensorInfo src_info = src->clone()->set_quantization_info(src_quantization_info);

178

TensorInfo weights_info = weights->clone()->set_quantization_info(weights_quantization_info);

179

180

// Configure gemmlowp function and output stage for asymmetric quantized types

181

GEMMLowpOutputStageInfo gemmlowp_output_stage_info;

182

const Status status = get_gemmlowp_output_stage_info(&src_info, &weights_info, dst, act, gemmlowp_output_stage_info);

183

ARM_COMPUTE_ERROR_ON(status.error_code() != ErrorCode::OK);

184

185

GEMMInfo gemm_info;

186

gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info);

187

gemm_info.set_activation_info(act);

188

_mm_gemmlowp = std::make_unique<CpuGemmLowpMatrixMultiplyCore>();

189

_mm_gemmlowp->configure(&src_info, &weights_info, biases, dst, gemm_info);

}

else

{

// Configure matrix multiply kernel

194

GEMMInfo gemm_info(false, false, true /* Reshape weights only for the first run */);

195

gemm_info.set_activation_info(act);

196

_mm_gemm = std::make_unique<CpuGemm>();

197

_mm_gemm->configure(src, weights, biases, dst, 1.f, 1.0f, gemm_info);

}

}

void CpuFullyConnected::configure_conv_fc(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ActivationLayerInfo &act)

202

{

203

ARM_COMPUTE_ERROR_ON((weights->dimension(1) != (src->dimension(0) * src->dimension(1) * src->dimension(2))));

204

205

// If the fully connected layer is called after a convolution layer, the src tensor must be linearized

206

207

// Initialize output tensor for flatten

208

auto_init_if_empty(_flattened_src, src->clone()->set_tensor_shape(compute_flatten_shape(src)));

209

210

_flatten = std::make_unique<CpuFlatten>();

211

_flatten->configure(src, &_flattened_src);

212

213

// Configure matrix multiply kernel

214

configure_mm(&_flattened_src, weights, biases, dst, act);

215

}

216

217

void CpuFullyConnected::configure_fc_fc(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ActivationLayerInfo &act)

218

{

219

ARM_COMPUTE_ERROR_ON(src->dimension(0) != weights->dimension(1));

220

221

// Configure matrix multiply kernel

222

configure_mm(src, weights, biases, dst, act);

223

}

224

225

void CpuFullyConnected::configure(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst,

226

FullyConnectedLayerInfo fc_info)

227

{

228

// Perform validate step

229

ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);

230

ARM_COMPUTE_ERROR_THROW_ON(CpuFullyConnected::validate(src,

231

weights,

232

biases != nullptr ? biases : nullptr,

233

dst,

234

fc_info));

ramelg01

3ae3d88

2021-09-12 23:07:47 +0100

[diff] [blame]

235

ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, fc_info);

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

236

Georgios Pinitas

fa1db17

2021-08-12 06:28:09 +0100

[diff] [blame]

237

_needs_weights_conversion = false;

238

_needs_weights_reshape = fc_info.transpose_weights ? !fc_info.are_weights_reshaped : false;

239

_needs_weights_reshape = _needs_weights_reshape && !fc_info.retain_internal_weights;

240

_is_fc_after_conv = true;

241

_is_quantized_asymmetric = is_data_type_quantized_asymmetric(src->data_type());

242

_is_prepared = false;

243

_trans_weights_idx = AuxTensorIdx::Count;

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

244

245

// With the Fully Connected layer we can have 4 different cases:

246

// 1) Convolution layer -> Fully Connected layer without batches

247

// 2) Fully Connected layer -> Fully Connected layer without batches

248

// 3) Convolution layer -> Fully Connected layer with batches

249

// 4) Fully Connected layer -> Fully Connected layer with batches

250

251

const ITensorInfo *weights_to_use = weights;

252

253

// Check if we have a fully connected layer with batches

254

const bool is_batched_fc_layer = dst->dimension(1) > 1;

255

if(is_batched_fc_layer)

256

{

257

_is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(src->tensor_shape().cbegin() + 3,

258

src->tensor_shape().cend(),

259

dst->tensor_shape().cbegin() + 1));

}

else

{

_is_fc_after_conv = src->num_dimensions() > 1;

264

}

265

266

// Reshape weights if needed

Georgios Pinitas

fa1db17

2021-08-12 06:28:09 +0100

[diff] [blame]

267

if(_needs_weights_reshape)

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

268

{

269

// Reshape the weights

270

_transpose_weights = std::make_unique<kernels::CpuTransposeKernel>();

271

_transpose_weights->configure(weights, &_reshaped_weights);

Georgios Pinitas

fa1db17

2021-08-12 06:28:09 +0100

[diff] [blame]

272

weights_to_use = &_reshaped_weights;

273

_trans_weights_idx = AuxTensorIdx::TransposedWeights;

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

274

}

275

276

// Convert weights if needed

277

if(_is_fc_after_conv && (src->data_layout() != fc_info.weights_trained_layout))

278

{

279

// Convert weights

280

_convert_weights = std::make_unique<CpuConvertFullyConnectedWeights>();

281

_convert_weights->configure(weights_to_use,

282

&_converted_weights,

283

src->tensor_shape(),

284

fc_info.weights_trained_layout);

285

Georgios Pinitas

fa1db17

2021-08-12 06:28:09 +0100

[diff] [blame]

286

weights_to_use = &_converted_weights;

287

_needs_weights_conversion = true;

288

_trans_weights_idx = AuxTensorIdx::ConvertedWeights;

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

289

}

290

291

if(_is_fc_after_conv)

292

{

293

// Fully Connected layer after a Convolution Layer without batches

294

configure_conv_fc(src, weights_to_use, biases, dst, fc_info.activation_info);

}

else

{

// Fully Connected layer after a Fully Connected Layer without batches

299

configure_fc_fc(src, weights_to_use, biases, dst, fc_info.activation_info);

300

}

301

Georgios Pinitas

fa1db17

2021-08-12 06:28:09 +0100

[diff] [blame]

302

// Retain the tensorinfo with the weights to use

303

if(_needs_weights_reshape || _needs_weights_conversion)

304

{

305

_trans_weights = *weights_to_use;

306

}

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

307

308

// Set auxiliary memory requirements

309

auto gemm_mem_req = (_is_quantized_asymmetric) ? _mm_gemmlowp->workspace() : _mm_gemm->workspace();

310

for(unsigned int i = 0; i < gemm_mem_req.size(); ++i)

311

{

312

_aux_mem[i] = gemm_mem_req[i];

313

}

314

315

if(_aux_mem[Pretranspose].size > 0)

316

{

Giorgio Arena

63e0beb

2021-09-24 14:04:27 +0100

[diff] [blame]

317

// Release permuted weights at the end of prepare as they are further transposed by the assembly dispatch

318

// Do not release them if biases are dynamic and data type is quantized, since the weights tensor will be used for biases offset calculation

319

_aux_mem[TransposedWeights] = MemoryInfo(offset_int_vec(TransposedWeights), (_is_quantized_asymmetric

320

&& biases && !(biases->are_values_constant())) ?

321

MemoryLifetime::Persistent :

322

MemoryLifetime::Prepare,

323

_reshaped_weights.total_size());

324

_aux_mem[ConvertedWeights] = MemoryInfo(offset_int_vec(ConvertedWeights), MemoryLifetime::Prepare, _converted_weights.total_size());

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

325

}

326

else

327

{

Georgios Pinitas

fa1db17

2021-08-12 06:28:09 +0100

[diff] [blame]

328

_aux_mem[TransposedWeights] = MemoryInfo(offset_int_vec(TransposedWeights), _needs_weights_conversion ? MemoryLifetime::Prepare : MemoryLifetime::Persistent, _reshaped_weights.total_size());

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

329

_aux_mem[ConvertedWeights] = MemoryInfo(offset_int_vec(ConvertedWeights), MemoryLifetime::Persistent, _converted_weights.total_size());

330

}

331

_aux_mem[FlattenedSrc] = MemoryInfo(offset_int_vec(FlattenedSrc), MemoryLifetime::Temporary, _flattened_src.total_size());

332

}

333

334

Status CpuFullyConnected::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,

335

FullyConnectedLayerInfo fc_info)

336

{

337

ARM_COMPUTE_UNUSED(fc_info.retain_internal_weights);

338

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);

339

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);

340

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights, dst);

341

ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

342

ARM_COMPUTE_RETURN_ERROR_ON(fc_info.activation_info.enabled() && is_data_type_quantized(src->data_type()) && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::RELU

343

&& fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU && fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU);

Giorgio Arena

63e0beb

2021-09-24 14:04:27 +0100

[diff] [blame]

344

ARM_COMPUTE_RETURN_ERROR_ON(!weights->are_values_constant() && (!fc_info.are_weights_reshaped || fc_info.transpose_weights));

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

345

346

bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;

347

bool is_fc_after_conv = true;

348

349

const ITensorInfo &flatten_src = TensorInfo(src->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_flatten_shape(src)));

350

const ITensorInfo &reshaped_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));

351

const ITensorInfo &converted_weights = weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) : TensorInfo(*reshaped_weights.clone());

352

353

// With the Fully Connected layer we can have 4 different cases:

354

// 1) Convolution layer -> Fully Connected layer without batches

355

// 2) Fully Connected layer -> Fully Connected layer without batches

356

// 3) Convolution layer -> Fully Connected layer with batches

357

// 4) Fully Connected layer -> Fully Connected layer with batches

358

359

const ITensorInfo *src_to_use = src;

360

const ITensorInfo *weights_to_use = weights;

361

362

// Check if we have a fully connected layer with batches

363

const bool is_batched_fc_layer = dst->dimension(1) > 1;

364

Giorgio Arena

63e0beb

2021-09-24 14:04:27 +0100

[diff] [blame]

365

if(biases != nullptr)

366

{

367

ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);

368

if(is_data_type_quantized(src->data_type()))

369

{

370

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32);

}

else

{

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, biases);

}

}

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

378

if(is_batched_fc_layer)

379

{

380

is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(src->tensor_shape().cbegin() + 3,

381

src->tensor_shape().cend(),

382

dst->tensor_shape().cbegin() + 1));

}

else

{

is_fc_after_conv = src->num_dimensions() > 1;

387

}

388

389

if(!weights_reshaped)

390

{

391

// Validate reshape weights kernel

392

ARM_COMPUTE_RETURN_ON_ERROR(kernels::CpuTransposeKernel::validate(weights, &reshaped_weights));

393

weights_to_use = &reshaped_weights;

394

}

395

396

if(is_fc_after_conv && (src->data_layout() != fc_info.weights_trained_layout))

397

{

398

// Validate convert weights kernel

399

ARM_COMPUTE_RETURN_ON_ERROR(CpuConvertFullyConnectedWeights::validate(weights_to_use,

400

&converted_weights,

401

src->tensor_shape(),

402

fc_info.weights_trained_layout));

403

weights_to_use = &converted_weights;

}

if(is_fc_after_conv)

{

// Fully Connected layer after a Convolution Layer without batches

409

ARM_COMPUTE_RETURN_ERROR_ON((weights_to_use->dimension(1) != (src->dimension(0) * src->dimension(1) * src->dimension(2))));

410

411

// Validate flatten kernel

412

ARM_COMPUTE_RETURN_ON_ERROR(CpuFlatten::validate(src, &flatten_src));

413

src_to_use = &flatten_src;

}

else

{

// Fully Connected layer after a Fully Connected Layer without batches

418

ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(0) != weights_to_use->dimension(1));

419

}

420

// Validate matrix multiply kernel

421

ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(src_to_use, weights_to_use, biases, dst, fc_info.activation_info));

return Status{};

}

void CpuFullyConnected::run(ITensorPack &tensors)

{

prepare(tensors);

auto src = tensors.get_const_tensor(ACL_SRC_0);

431

432

CpuAuxTensorHandler flattened_src(offset_int_vec(FlattenedSrc), _flattened_src, tensors, false);

Georgios Pinitas

fa1db17

2021-08-12 06:28:09 +0100

[diff] [blame]

433

CpuAuxTensorHandler transformed_wei(offset_int_vec(_trans_weights_idx), _trans_weights, tensors, false);

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

434

435

// Linearize src if it comes from a convolutional layer

436

if(_is_fc_after_conv)

437

{

438

ITensorPack flatten_pack{ { ACL_SRC, src }, { ACL_DST, flattened_src.get() } };

439

_flatten->run(flatten_pack);

440

}

441

442

ITensorPack gemm_pack = tensors;

443

gemm_pack.add_const_tensor(ACL_SRC_0, (_is_fc_after_conv) ? flattened_src.get() : src);

Georgios Pinitas

fa1db17

2021-08-12 06:28:09 +0100

[diff] [blame]

444

if(_needs_weights_reshape || _needs_weights_conversion)

445

{

446

gemm_pack.add_const_tensor(ACL_SRC_1, transformed_wei.get());

447

}

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

448

449

// Run matrix multiply

450

if(_is_quantized_asymmetric)

451

{

452

_mm_gemmlowp->run(gemm_pack);

}

else

{

_mm_gemm->run(gemm_pack);

}

}

void CpuFullyConnected::prepare(ITensorPack &tensors)

{

if(!_is_prepared)

{

auto weights = tensors.get_const_tensor(ACL_SRC_1);

465

466

CpuAuxTensorHandler reshaped_weights(offset_int_vec(TransposedWeights), _reshaped_weights, tensors, false);

467

CpuAuxTensorHandler converted_weights(offset_int_vec(ConvertedWeights), _converted_weights, tensors, false);

468

469

// Pointer to current weights

470

const ITensor *cur_weights = weights;

471

472

// Reshape of the weights (happens only once)

Georgios Pinitas

fa1db17

2021-08-12 06:28:09 +0100

[diff] [blame]

473

if(_needs_weights_reshape)

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

474

{

475

// Run reshape weights kernel and mark weights as unused

476

ITensorPack transpose_pack{ { ACL_SRC, weights }, { ACL_DST, reshaped_weights.get() } };

477

NEScheduler::get().schedule_op(_transpose_weights.get(), Window::DimY, _transpose_weights->window(), transpose_pack);

478

479

cur_weights->mark_as_unused();

480

cur_weights = reshaped_weights.get();

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

481

}

482

483

// Convert weights if needed (happens only once)

Georgios Pinitas

fa1db17

2021-08-12 06:28:09 +0100

[diff] [blame]

484

if(_needs_weights_conversion)

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

485

{

486

ITensorPack convert_pack{ { ACL_SRC, cur_weights }, { ACL_DST, converted_weights.get() } };

487

_convert_weights->run(convert_pack);

488

489

cur_weights->mark_as_unused();

490

cur_weights = converted_weights.get();

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

491

}

492

Georgios Pinitas

fa1db17

2021-08-12 06:28:09 +0100

[diff] [blame]

493

ITensorPack gemm_pack = tensors;

494

gemm_pack.add_const_tensor(ACL_SRC_1, cur_weights);

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

495

496

// Prepare GEMM prepare and release unused weights

497

if(!_is_quantized_asymmetric)

498

{

Georgios Pinitas

fa1db17

2021-08-12 06:28:09 +0100

[diff] [blame]

499

_mm_gemm->prepare(gemm_pack);

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

500

}

501

else

502

{

Georgios Pinitas

fa1db17

2021-08-12 06:28:09 +0100

[diff] [blame]

503

_mm_gemmlowp->prepare(gemm_pack);

Michele Di Giorgio

d9cdf14

2021-07-02 15:17:08 +0100

[diff] [blame]

}

_is_prepared = true;

}

}

experimental::MemoryRequirements CpuFullyConnected::workspace() const

{

return _aux_mem;

}

} // namespace cpu

} // namespace arm_compute