Blame - src/cpu/operators/CpuFullyConnected.cpp - ml/ComputeLibrary

2021-07-02 15:17:08 +0100

[diff] [blame]

1

/*

Jonathan Deakin

464ed20

2023-01-12 11:41:14 +0000

[diff] [blame]

2

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

3

*

4

* SPDX-License-Identifier: MIT

5

*

6

* Permission is hereby granted, free of charge, to any person obtaining a copy

7

* of this software and associated documentation files (the "Software"), to

8

* deal in the Software without restriction, including without limitation the

9

* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

10

* sell copies of the Software, and to permit persons to whom the Software is

11

* furnished to do so, subject to the following conditions:

12

*

13

* The above copyright notice and this permission notice shall be included in all

14

* copies or substantial portions of the Software.

15

*

16

* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

17

* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

18

* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

19

* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

20

* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

21

* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

22

* SOFTWARE.

23

*/

Georgios Pinitas

7891a73

2021-08-20 21:39:25 +0100

[diff] [blame]

24

#include "src/cpu/operators/CpuFullyConnected.h"

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

25

26

#include "arm_compute/core/Helpers.h"

27

#include "arm_compute/core/ITensorPack.h"

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

28

#include "arm_compute/core/utils/misc/ShapeCalculator.h"

29

#include "arm_compute/core/utils/quantization/AsymmHelpers.h"

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

30

#include "arm_compute/core/Validate.h"

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

31

#include "arm_compute/runtime/NEON/NEScheduler.h"

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

32

ramelg01

3ae3d88

2021-09-12 23:07:47 +0100

[diff] [blame]

33

#include "src/common/utils/Log.h"

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

34

#include "src/core/helpers/AutoConfiguration.h"

35

#include "src/core/helpers/MemoryHelpers.h"

Viet-Hoa Do

a62129a

2023-04-26 15:38:45 +0100

[diff] [blame]

36

#include "src/core/utils/quantization/AsymmHelpers.h"

Georgios Pinitas

7891a73

2021-08-20 21:39:25 +0100

[diff] [blame]

37

#include "src/cpu/kernels/CpuTransposeKernel.h"

38

#include "src/cpu/operators/CpuConvertFullyConnectedWeights.h"

39

#include "src/cpu/operators/CpuFlatten.h"

40

#include "src/cpu/operators/CpuGemm.h"

41

#include "src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.h"

42

#include "src/cpu/utils/CpuAuxTensorHandler.h"

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

43

44

namespace arm_compute

{

namespace cpu

{

using namespace arm_compute::experimental;

49

using namespace arm_compute::misc::shape_calculator;

50

51

namespace

52

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

53

Status get_gemmlowp_output_stage_info(const ITensorInfo *src,

54

const ITensorInfo *weights,

55

const ITensorInfo *dst,

56

const ActivationLayerInfo &act,

57

GEMMLowpOutputStageInfo &gemmlowp_output_stage_info)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

58

{

59

const auto data_type = src->data_type();

60

const QuantizationInfo oq_info = dst->quantization_info();

61

const UniformQuantizationInfo iq_unif = src->quantization_info().uniform();

62

const UniformQuantizationInfo wq_unif = weights->quantization_info().uniform();

63

const UniformQuantizationInfo oq_unif = oq_info.uniform();

64

65

float multiplier = (iq_unif.scale * wq_unif.scale) / oq_unif.scale;

66

int32_t output_multiplier;

67

int32_t output_shift;

68

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

69

ARM_COMPUTE_RETURN_ON_ERROR(

70

quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

71

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

72

int32_t type_min = 0;

73

int32_t type_max = 0;

Viet-Hoa Do

9c7c2d2

2023-04-11 17:16:27 +0100

[diff] [blame]

74

std::tie(type_min, type_max) = quantization::get_quantized_asymmetric_output_min_max(oq_info, act, data_type);

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

75

76

gemmlowp_output_stage_info.gemmlowp_multiplier = output_multiplier;

77

gemmlowp_output_stage_info.gemmlowp_shift = output_shift;

78

gemmlowp_output_stage_info.gemmlowp_offset = oq_unif.offset;

79

gemmlowp_output_stage_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;

Viet-Hoa Do

a62129a

2023-04-26 15:38:45 +0100

[diff] [blame]

80

gemmlowp_output_stage_info.gemmlowp_min_bound = type_min;

81

gemmlowp_output_stage_info.gemmlowp_max_bound = type_max;

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

return Status{};

}

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

86

Status validate_mm(const ITensorInfo *src,

87

const ITensorInfo *weights,

88

const ITensorInfo *biases,

89

const ITensorInfo *dst,

90

const ActivationLayerInfo &act,

91

bool enable_fast_math,

92

WeightFormat weight_format)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

93

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

94

if (is_data_type_quantized_asymmetric(src->data_type()))

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

95

{

96

// Since we need negative offsets for computing convolution, we need to change QuantizationInfo()

97

// Extract and negate src and weights offset

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

98

const QuantizationInfo src_quantization_info(src->quantization_info().uniform().scale,

99

-src->quantization_info().uniform().offset);

100

const QuantizationInfo weights_quantization_info(weights->quantization_info().uniform().scale,

101

-weights->quantization_info().uniform().offset);

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

102

103

GEMMLowpOutputStageInfo gemmlowp_output_stage_info;

104

ARM_COMPUTE_RETURN_ON_ERROR(get_gemmlowp_output_stage_info(src, weights, dst, act, gemmlowp_output_stage_info));

105

106

GEMMInfo gemm_info;

107

gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info);

cfRod

2021-11-05 11:29:53 +0000

[diff] [blame]

108

gemm_info.set_fast_math(enable_fast_math);

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

109

110

// Validate gemmlowp function

111

TensorInfo src_info = src->clone()->set_quantization_info(src_quantization_info);

112

TensorInfo weights_info = weights->clone()->set_quantization_info(weights_quantization_info);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

113

ARM_COMPUTE_RETURN_ON_ERROR(

114

CpuGemmLowpMatrixMultiplyCore::validate(&src_info, &weights_info, biases, dst, gemm_info));

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

115

}

116

else

117

{

Viet-Hoa Do

2023-04-03 16:27:25 +0100

[diff] [blame]

118

GEMMInfo gemm_info;

Jonathan Deakin

464ed20

2023-01-12 11:41:14 +0000

[diff] [blame]

119

gemm_info.set_weight_format(weight_format);

120

gemm_info.set_fixed_format(weight_format != WeightFormat::UNSPECIFIED);

cfRod

2021-11-05 11:29:53 +0000

[diff] [blame]

121

gemm_info.set_fast_math(enable_fast_math);

122

ARM_COMPUTE_RETURN_ON_ERROR(CpuGemm::validate(src, weights, biases, dst, 1.f, 1.0f, gemm_info));

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

}

return Status{};

}

} // namespace

CpuFullyConnected::CpuFullyConnected()

130

: _flatten(nullptr),

131

_convert_weights(nullptr),

132

_transpose_weights(nullptr),

133

_mm_gemm(nullptr),

134

_mm_gemmlowp(nullptr),

135

_flattened_src(),

136

_converted_weights(),

137

_reshaped_weights(),

Georgios Pinitas

2021-08-12 06:28:09 +0100

[diff] [blame]

138

_trans_weights(),

139

_trans_weights_idx(AuxTensorIdx::Count),

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

140

_aux_mem(Count),

Georgios Pinitas

2021-08-12 06:28:09 +0100

[diff] [blame]

141

_needs_weights_conversion(false),

142

_needs_weights_reshape(false),

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

143

_is_fc_after_conv(false),

144

_is_quantized_asymmetric(false),

cfRod

2021-11-05 11:29:53 +0000

[diff] [blame]

145

_is_prepared(false),

Milos Puzovic

2022-07-27 17:53:21 +0000

[diff] [blame]

146

_enable_fast_math(false),

147

_fixed_format(false),

Viet-Hoa Do

2023-03-13 16:20:04 +0000

[diff] [blame]

148

_weight_format(arm_compute::WeightFormat::UNSPECIFIED),

149

_dynamic_weights(false)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

{

}

CpuFullyConnected::~CpuFullyConnected() = default;

154

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

155

void CpuFullyConnected::configure_mm(const ITensorInfo *src,

156

const ITensorInfo *weights,

157

const ITensorInfo *biases,

158

ITensorInfo *dst,

159

const ActivationLayerInfo &act)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

160

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

161

if (_is_quantized_asymmetric)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

162

{

163

// Since we need negative offsets for computing convolution, we need to change QuantizationInfo()

164

// Extract and negate src and weights offset

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

165

const QuantizationInfo src_quantization_info(src->quantization_info().uniform().scale,

166

-src->quantization_info().uniform().offset);

167

const QuantizationInfo weights_quantization_info(weights->quantization_info().uniform().scale,

168

-weights->quantization_info().uniform().offset);

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

169

170

TensorInfo src_info = src->clone()->set_quantization_info(src_quantization_info);

171

TensorInfo weights_info = weights->clone()->set_quantization_info(weights_quantization_info);

172

173

// Configure gemmlowp function and output stage for asymmetric quantized types

174

GEMMLowpOutputStageInfo gemmlowp_output_stage_info;

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

175

const Status status =

176

get_gemmlowp_output_stage_info(&src_info, &weights_info, dst, act, gemmlowp_output_stage_info);

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

177

ARM_COMPUTE_ERROR_ON(status.error_code() != ErrorCode::OK);

178

Viet-Hoa Do

2023-04-03 16:27:25 +0100

[diff] [blame]

179

GEMMInfo gemm_info;

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

180

gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info);

181

gemm_info.set_activation_info(act);

cfRod

2021-11-05 11:29:53 +0000

[diff] [blame]

182

gemm_info.set_fast_math(_enable_fast_math);

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

183

_mm_gemmlowp = std::make_unique<CpuGemmLowpMatrixMultiplyCore>();

184

_mm_gemmlowp->configure(&src_info, &weights_info, biases, dst, gemm_info);

}

else

{

// Configure matrix multiply kernel

Viet-Hoa Do

2023-04-03 16:27:25 +0100

[diff] [blame]

189

GEMMInfo gemm_info;

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

190

gemm_info.set_activation_info(act);

cfRod

2021-11-05 11:29:53 +0000

[diff] [blame]

191

gemm_info.set_fast_math(_enable_fast_math);

Milos Puzovic

2022-07-27 17:53:21 +0000

[diff] [blame]

192

gemm_info.set_fixed_format(_fixed_format);

193

gemm_info.set_weight_format(_weight_format);

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

194

_mm_gemm = std::make_unique<CpuGemm>();

195

_mm_gemm->configure(src, weights, biases, dst, 1.f, 1.0f, gemm_info);

196

}

197

}

198

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

199

void CpuFullyConnected::configure_conv_fc(const ITensorInfo *src,

200

const ITensorInfo *weights,

201

const ITensorInfo *biases,

202

ITensorInfo *dst,

203

const ActivationLayerInfo &act)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

204

{

205

ARM_COMPUTE_ERROR_ON((weights->dimension(1) != (src->dimension(0) * src->dimension(1) * src->dimension(2))));

206

207

// If the fully connected layer is called after a convolution layer, the src tensor must be linearized

208

209

// Initialize output tensor for flatten

210

auto_init_if_empty(_flattened_src, src->clone()->set_tensor_shape(compute_flatten_shape(src)));

211

212

_flatten = std::make_unique<CpuFlatten>();

213

_flatten->configure(src, &_flattened_src);

214

215

// Configure matrix multiply kernel

216

configure_mm(&_flattened_src, weights, biases, dst, act);

217

}

218

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

219

void CpuFullyConnected::configure_fc_fc(const ITensorInfo *src,

220

const ITensorInfo *weights,

221

const ITensorInfo *biases,

222

ITensorInfo *dst,

223

const ActivationLayerInfo &act)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

224

{

225

ARM_COMPUTE_ERROR_ON(src->dimension(0) != weights->dimension(1));

226

227

// Configure matrix multiply kernel

228

configure_mm(src, weights, biases, dst, act);

229

}

230

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

231

void CpuFullyConnected::configure(const ITensorInfo *src,

232

const ITensorInfo *weights,

233

const ITensorInfo *biases,

234

ITensorInfo *dst,

235

FullyConnectedLayerInfo fc_info,

236

const WeightsInfo &weights_info)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

237

{

238

// Perform validate step

239

ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

240

ARM_COMPUTE_ERROR_THROW_ON(

241

CpuFullyConnected::validate(src, weights, biases != nullptr ? biases : nullptr, dst, fc_info, weights_info));

ramelg01

3ae3d88

2021-09-12 23:07:47 +0100

[diff] [blame]

242

ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, fc_info);

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

243

Georgios Pinitas

2021-08-12 06:28:09 +0100

[diff] [blame]

244

_needs_weights_conversion = false;

245

_needs_weights_reshape = fc_info.transpose_weights ? !fc_info.are_weights_reshaped : false;

246

_needs_weights_reshape = _needs_weights_reshape && !fc_info.retain_internal_weights;

247

_is_fc_after_conv = true;

248

_is_quantized_asymmetric = is_data_type_quantized_asymmetric(src->data_type());

249

_is_prepared = false;

250

_trans_weights_idx = AuxTensorIdx::Count;

cfRod

2021-11-05 11:29:53 +0000

[diff] [blame]

251

_enable_fast_math = fc_info.enable_fast_math;

Milos Puzovic

2022-07-27 17:53:21 +0000

[diff] [blame]

252

_fixed_format = weights_info.weight_format() != WeightFormat::UNSPECIFIED;

253

_weight_format = weights_info.weight_format();

Viet-Hoa Do

2023-03-13 16:20:04 +0000

[diff] [blame]

254

_dynamic_weights = !weights->are_values_constant() && _needs_weights_reshape;

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

255

256

// With the Fully Connected layer we can have 4 different cases:

257

// 1) Convolution layer -> Fully Connected layer without batches

258

// 2) Fully Connected layer -> Fully Connected layer without batches

259

// 3) Convolution layer -> Fully Connected layer with batches

260

// 4) Fully Connected layer -> Fully Connected layer with batches

261

262

const ITensorInfo *weights_to_use = weights;

263

264

// Check if we have a fully connected layer with batches

265

const bool is_batched_fc_layer = dst->dimension(1) > 1;

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

266

if (is_batched_fc_layer)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

267

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

268

_is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&

269

(std::equal(src->tensor_shape().cbegin() + 3, src->tensor_shape().cend(),

270

dst->tensor_shape().cbegin() + 1));

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

}

else

{

_is_fc_after_conv = src->num_dimensions() > 1;

275

}

276

277

// Reshape weights if needed

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

278

if (_needs_weights_reshape)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

279

{

280

// Reshape the weights

281

_transpose_weights = std::make_unique<kernels::CpuTransposeKernel>();

282

_transpose_weights->configure(weights, &_reshaped_weights);

Viet-Hoa Do

2023-04-03 16:27:25 +0100

[diff] [blame]

283

_reshaped_weights.set_are_values_constant(weights->are_values_constant());

284

Georgios Pinitas

2021-08-12 06:28:09 +0100

[diff] [blame]

285

weights_to_use = &_reshaped_weights;

286

_trans_weights_idx = AuxTensorIdx::TransposedWeights;

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

287

}

288

289

// Convert weights if needed

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

290

if (_is_fc_after_conv && (src->data_layout() != fc_info.weights_trained_layout))

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

291

{

292

// Convert weights

293

_convert_weights = std::make_unique<CpuConvertFullyConnectedWeights>();

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

294

_convert_weights->configure(weights_to_use, &_converted_weights, src->tensor_shape(),

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

295

fc_info.weights_trained_layout);

Viet-Hoa Do

2023-04-03 16:27:25 +0100

[diff] [blame]

296

_converted_weights.set_are_values_constant(weights_to_use->are_values_constant());

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

297

Georgios Pinitas

2021-08-12 06:28:09 +0100

[diff] [blame]

298

weights_to_use = &_converted_weights;

299

_needs_weights_conversion = true;

300

_trans_weights_idx = AuxTensorIdx::ConvertedWeights;

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

301

}

302

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

303

if (_is_fc_after_conv)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

304

{

305

// Fully Connected layer after a Convolution Layer without batches

306

configure_conv_fc(src, weights_to_use, biases, dst, fc_info.activation_info);

}

else

{

// Fully Connected layer after a Fully Connected Layer without batches

311

configure_fc_fc(src, weights_to_use, biases, dst, fc_info.activation_info);

312

}

313

Georgios Pinitas

2021-08-12 06:28:09 +0100

[diff] [blame]

314

// Retain the tensorinfo with the weights to use

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

315

if (_needs_weights_reshape || _needs_weights_conversion)

Georgios Pinitas

2021-08-12 06:28:09 +0100

[diff] [blame]

316

{

317

_trans_weights = *weights_to_use;

318

}

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

319

320

// Set auxiliary memory requirements

321

auto gemm_mem_req = (_is_quantized_asymmetric) ? _mm_gemmlowp->workspace() : _mm_gemm->workspace();

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

322

for (unsigned int i = 0; i < gemm_mem_req.size(); ++i)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

323

{

324

_aux_mem[i] = gemm_mem_req[i];

325

}

326

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

327

if (_aux_mem[Pretranspose].size > 0)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

328

{

Giorgio Arena

63e0beb

2021-09-24 14:04:27 +0100

[diff] [blame]

329

// Release permuted weights at the end of prepare as they are further transposed by the assembly dispatch

330

// Do not release them if biases are dynamic and data type is quantized, since the weights tensor will be used for biases offset calculation

Viet-Hoa Do

2023-03-13 16:20:04 +0000

[diff] [blame]

331

// Keep all the auxiliary tensors in case of dynamic weights as they are recalculated every time.

332

_aux_mem[TransposedWeights] = MemoryInfo(

333

offset_int_vec(TransposedWeights),

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

334

_dynamic_weights ? MemoryLifetime::Temporary

335

: (_is_quantized_asymmetric && biases && !(biases->are_values_constant())) ? MemoryLifetime::Persistent

336

: MemoryLifetime::Prepare,

Viet-Hoa Do

2023-03-13 16:20:04 +0000

[diff] [blame]

337

_reshaped_weights.total_size());

338

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

339

_aux_mem[ConvertedWeights] = MemoryInfo(offset_int_vec(ConvertedWeights),

340

_dynamic_weights ? MemoryLifetime::Temporary : MemoryLifetime::Prepare,

341

_converted_weights.total_size());

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

342

}

343

else

344

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

345

_aux_mem[TransposedWeights] = MemoryInfo(offset_int_vec(TransposedWeights),

346

_dynamic_weights ? MemoryLifetime::Temporary

347

: _needs_weights_conversion ? MemoryLifetime::Prepare

348

: MemoryLifetime::Persistent,

349

_reshaped_weights.total_size());

Viet-Hoa Do

2023-03-13 16:20:04 +0000

[diff] [blame]

350

351

_aux_mem[ConvertedWeights] = MemoryInfo(

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

352

offset_int_vec(ConvertedWeights), _dynamic_weights ? MemoryLifetime::Temporary : MemoryLifetime::Persistent,

Viet-Hoa Do

2023-03-13 16:20:04 +0000

[diff] [blame]

353

_converted_weights.total_size());

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

354

}

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

355

_aux_mem[FlattenedSrc] =

356

MemoryInfo(offset_int_vec(FlattenedSrc), MemoryLifetime::Temporary, _flattened_src.total_size());

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

357

}

358

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

359

Status CpuFullyConnected::has_opt_impl(arm_compute::WeightFormat &expected_weight_format,

360

const ITensorInfo *src,

361

const ITensorInfo *weights,

362

const ITensorInfo *biases,

363

const ITensorInfo *dst,

364

FullyConnectedLayerInfo fc_info,

365

WeightsInfo weights_info)

Milos Puzovic

2022-07-27 17:53:21 +0000

[diff] [blame]

366

{

Viet-Hoa Do

2023-04-03 16:27:25 +0100

[diff] [blame]

367

GEMMInfo gemm_info;

Milos Puzovic

2022-07-27 17:53:21 +0000

[diff] [blame]

368

gemm_info.set_activation_info(fc_info.activation_info);

369

gemm_info.set_fast_math(fc_info.enable_fast_math);

370

gemm_info.set_fixed_format(weights_info.weight_format() != WeightFormat::UNSPECIFIED);

371

gemm_info.set_weight_format(weights_info.weight_format());

372

373

return CpuGemm::has_opt_impl(expected_weight_format, src, weights, biases, dst, gemm_info);

374

}

375

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

376

Status CpuFullyConnected::validate(const ITensorInfo *src,

377

const ITensorInfo *weights,

378

const ITensorInfo *biases,

379

const ITensorInfo *dst,

380

FullyConnectedLayerInfo fc_info,

381

const WeightsInfo &weights_info)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

382

{

383

ARM_COMPUTE_UNUSED(fc_info.retain_internal_weights);

384

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

385

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,

386

DataType::F16, DataType::F32);

Jonathan Deakin

464ed20

2023-01-12 11:41:14 +0000

[diff] [blame]

387

388

if (is_fixed_format_fast_math(weights_info.weight_format()))

389

{

390

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(src, DataType::F32);

391

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(weights, DataType::BFLOAT16);

392

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(dst, DataType::F32);

}

else

{

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights, dst);

397

}

398

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

399

ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

400

ARM_COMPUTE_RETURN_ERROR_ON(

401

fc_info.activation_info.enabled() && is_data_type_quantized(src->data_type()) &&

402

fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::RELU &&

403

fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU &&

404

fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU);

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

405

406

bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;

407

bool is_fc_after_conv = true;

408

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

409

const ITensorInfo &flatten_src =

410

TensorInfo(src->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_flatten_shape(src)));

411

const ITensorInfo &reshaped_weights = TensorInfo(

412

weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));

413

const ITensorInfo &converted_weights = weights_reshaped

414

? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())

415

: TensorInfo(*reshaped_weights.clone());

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

416

417

// With the Fully Connected layer we can have 4 different cases:

418

// 1) Convolution layer -> Fully Connected layer without batches

419

// 2) Fully Connected layer -> Fully Connected layer without batches

420

// 3) Convolution layer -> Fully Connected layer with batches

421

// 4) Fully Connected layer -> Fully Connected layer with batches

422

423

const ITensorInfo *src_to_use = src;

424

const ITensorInfo *weights_to_use = weights;

425

426

// Check if we have a fully connected layer with batches

427

const bool is_batched_fc_layer = dst->dimension(1) > 1;

428

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

429

if (biases != nullptr)

Giorgio Arena

63e0beb

2021-09-24 14:04:27 +0100

[diff] [blame]

430

{

431

ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

432

if (is_data_type_quantized(src->data_type()))

Giorgio Arena

63e0beb

2021-09-24 14:04:27 +0100

[diff] [blame]

433

{

434

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32);

}

else

{

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, biases);

439

}

440

}

441

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

442

if (is_batched_fc_layer)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

443

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

444

is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&

445

(std::equal(src->tensor_shape().cbegin() + 3, src->tensor_shape().cend(),

446

dst->tensor_shape().cbegin() + 1));

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

}

else

{

is_fc_after_conv = src->num_dimensions() > 1;

451

}

452

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

453

if (!weights_reshaped)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

454

{

455

// Validate reshape weights kernel

456

ARM_COMPUTE_RETURN_ON_ERROR(kernels::CpuTransposeKernel::validate(weights, &reshaped_weights));

457

weights_to_use = &reshaped_weights;

458

}

459

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

460

if (is_fc_after_conv && (src->data_layout() != fc_info.weights_trained_layout))

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

461

{

462

// Validate convert weights kernel

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

463

ARM_COMPUTE_RETURN_ON_ERROR(CpuConvertFullyConnectedWeights::validate(

464

weights_to_use, &converted_weights, src->tensor_shape(), fc_info.weights_trained_layout));

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

465

weights_to_use = &converted_weights;

466

}

467

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

468

if (is_fc_after_conv)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

469

{

470

// Fully Connected layer after a Convolution Layer without batches

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

471

ARM_COMPUTE_RETURN_ERROR_ON(

472

(weights_to_use->dimension(1) != (src->dimension(0) * src->dimension(1) * src->dimension(2))));

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

473

474

// Validate flatten kernel

475

ARM_COMPUTE_RETURN_ON_ERROR(CpuFlatten::validate(src, &flatten_src));

476

src_to_use = &flatten_src;

}

else

{

// Fully Connected layer after a Fully Connected Layer without batches

481

ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(0) != weights_to_use->dimension(1));

482

}

483

// Validate matrix multiply kernel

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

484

ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(src_to_use, weights_to_use, biases, dst, fc_info.activation_info,

485

fc_info.enable_fast_math, weights_info.weight_format()));

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

return Status{};

}

void CpuFullyConnected::run(ITensorPack &tensors)

{

prepare(tensors);

Viet-Hoa Do

2023-03-13 16:20:04 +0000

[diff] [blame]

494

#ifdef ARM_COMPUTE_ASSERTS_ENABLED

495

++_asrt_run_count;

496

ARM_COMPUTE_ERROR_ON(_dynamic_weights && _asrt_prepare_count != _asrt_run_count);

497

#endif // ARM_COMPUTE_ASSERTS_ENABLED

498

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

499

auto src = tensors.get_const_tensor(ACL_SRC_0);

500

501

CpuAuxTensorHandler flattened_src(offset_int_vec(FlattenedSrc), _flattened_src, tensors, false);

Georgios Pinitas

2021-08-12 06:28:09 +0100

[diff] [blame]

502

CpuAuxTensorHandler transformed_wei(offset_int_vec(_trans_weights_idx), _trans_weights, tensors, false);

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

503

504

// Linearize src if it comes from a convolutional layer

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

505

if (_is_fc_after_conv)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

506

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

507

ITensorPack flatten_pack{{ACL_SRC, src}, {ACL_DST, flattened_src.get()}};

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

508

_flatten->run(flatten_pack);

509

}

510

511

ITensorPack gemm_pack = tensors;

512

gemm_pack.add_const_tensor(ACL_SRC_0, (_is_fc_after_conv) ? flattened_src.get() : src);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

513

if (_needs_weights_reshape || _needs_weights_conversion)

Georgios Pinitas

2021-08-12 06:28:09 +0100

[diff] [blame]

514

{

515

gemm_pack.add_const_tensor(ACL_SRC_1, transformed_wei.get());

516

}

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

517

518

// Run matrix multiply

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

519

if (_is_quantized_asymmetric)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

520

{

521

_mm_gemmlowp->run(gemm_pack);

}

else

{

_mm_gemm->run(gemm_pack);

}

}

void CpuFullyConnected::prepare(ITensorPack &tensors)

530

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

531

if (!_is_prepared || _dynamic_weights)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

532

{

Viet-Hoa Do

2023-03-13 16:20:04 +0000

[diff] [blame]

533

#ifdef ARM_COMPUTE_ASSERTS_ENABLED

534

++_asrt_prepare_count;

535

ARM_COMPUTE_ERROR_ON(!_dynamic_weights && _asrt_prepare_count > 1);

536

#endif // ARM_COMPUTE_ASSERTS_ENABLED

537

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

538

auto weights = tensors.get_const_tensor(ACL_SRC_1);

539

540

CpuAuxTensorHandler reshaped_weights(offset_int_vec(TransposedWeights), _reshaped_weights, tensors, false);

541

CpuAuxTensorHandler converted_weights(offset_int_vec(ConvertedWeights), _converted_weights, tensors, false);

542

543

// Pointer to current weights

544

const ITensor *cur_weights = weights;

545

546

// Reshape of the weights (happens only once)

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

547

if (_needs_weights_reshape)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

548

{

549

// Run reshape weights kernel and mark weights as unused

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

550

ITensorPack transpose_pack{{ACL_SRC, weights}, {ACL_DST, reshaped_weights.get()}};

551

NEScheduler::get().schedule_op(_transpose_weights.get(), Window::DimY, _transpose_weights->window(),

552

transpose_pack);

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

553

554

cur_weights->mark_as_unused();

555

cur_weights = reshaped_weights.get();

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

556

}

557

558

// Convert weights if needed (happens only once)

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

559

if (_needs_weights_conversion)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

560

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

561

ITensorPack convert_pack{{ACL_SRC, cur_weights}, {ACL_DST, converted_weights.get()}};

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

562

_convert_weights->run(convert_pack);

563

564

cur_weights->mark_as_unused();

565

cur_weights = converted_weights.get();

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

566

}

567

Georgios Pinitas

2021-08-12 06:28:09 +0100

[diff] [blame]

568

ITensorPack gemm_pack = tensors;

569

gemm_pack.add_const_tensor(ACL_SRC_1, cur_weights);

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

570

571

// Prepare GEMM prepare and release unused weights

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

572

if (!_is_quantized_asymmetric)

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

573

{

Georgios Pinitas

2021-08-12 06:28:09 +0100

[diff] [blame]

574

_mm_gemm->prepare(gemm_pack);

Michele Di Giorgio

2021-07-02 15:17:08 +0100

[diff] [blame]

575

}

576

else

577

{

Georgios Pinitas

2021-08-12 06:28:09 +0100

[diff] [blame]

578

_mm_gemmlowp->prepare(gemm_pack);

Michele Di Giorgio