Blame - src/gpu/cl/operators/ClFullyConnected.cpp - ml/ComputeLibrary

2021-07-27 15:55:30 +0100

[diff] [blame]

1

/*

Jakub Sujak

2023-03-29 11:16:18 +0100

[diff] [blame]

2

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

3

*

4

* SPDX-License-Identifier: MIT

5

*

6

* Permission is hereby granted, free of charge, to any person obtaining a copy

7

* of this software and associated documentation files (the "Software"), to

8

* deal in the Software without restriction, including without limitation the

9

* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

10

* sell copies of the Software, and to permit persons to whom the Software is

11

* furnished to do so, subject to the following conditions:

12

*

13

* The above copyright notice and this permission notice shall be included in all

14

* copies or substantial portions of the Software.

15

*

16

* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

17

* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

18

* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

19

* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

20

* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

21

* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

22

* SOFTWARE.

23

*/

Georgios Pinitas

7891a73

2021-08-20 21:39:25 +0100

[diff] [blame]

24

#include "src/gpu/cl/operators/ClFullyConnected.h"

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

25

26

#include "arm_compute/core/Size2D.h"

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

27

#include "arm_compute/core/utils/misc/ShapeCalculator.h"

28

#include "arm_compute/core/utils/quantization/AsymmHelpers.h"

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

29

#include "arm_compute/core/Validate.h"

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

30

#include "arm_compute/runtime/CL/CLScheduler.h"

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

31

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

32

#include "src/common/utils/Log.h"

33

#include "src/core/CL/kernels/CLFillBorderKernel.h"

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

34

#include "src/core/helpers/MemoryHelpers.h"

Georgios Pinitas

7891a73

2021-08-20 21:39:25 +0100

[diff] [blame]

35

#include "src/gpu/cl/operators/ClConvertFullyConnectedWeights.h"

36

#include "src/gpu/cl/operators/ClFlatten.h"

37

#include "src/gpu/cl/operators/ClGemm.h"

38

#include "src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h"

Mohammed Suhail Munshi

2e0714d

2023-07-19 14:44:38 +0100

[diff] [blame]

39

#include "src/gpu/cl/operators/ClMatMul.h"

Georgios Pinitas

7891a73

2021-08-20 21:39:25 +0100

[diff] [blame]

40

#include "src/gpu/cl/operators/ClTranspose.h"

41

#include "src/gpu/cl/utils/ClAuxTensorHandler.h"

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

42

#include "src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h"

43

#include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h"

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

44

#include "support/Cast.h"

#include <algorithm>

namespace arm_compute

{

namespace opencl

{

using namespace arm_compute::experimental;

53

using namespace arm_compute::misc::shape_calculator;

54

55

namespace

56

{

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

57

// Function to calculate batched tensor shape in format [M, 1, B0, B1 ..] which is the format matmul expects

58

inline TensorShape get_reshaped_matmul_tensor(const TensorShape &src)

59

{

60

return TensorShape(src.x(), 1, src.y(), src.collapsed_from(2).z()); // Return value optimisation

61

}

62

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

63

Status construct_gemmlowp_output_stage(const ITensorInfo &src,

64

const ITensorInfo &weights,

65

const ITensorInfo &dst,

66

GEMMLowpOutputStageInfo &gemmlowp_output_stage,

67

ActivationLayerInfo activation_info)

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

68

{

69

gemmlowp_output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;

70

gemmlowp_output_stage.gemmlowp_offset = 0;

71

gemmlowp_output_stage.gemmlowp_multiplier = 0;

72

gemmlowp_output_stage.gemmlowp_shift = 0;

73

74

const auto data_type = src.data_type();

75

76

// Configure output stage for quantized case

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

77

if (is_data_type_quantized_asymmetric(data_type))

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

78

{

79

const QuantizationInfo oq_info = dst.quantization_info();

80

const UniformQuantizationInfo iq_unif = src.quantization_info().uniform();

81

const UniformQuantizationInfo wq_unif = weights.quantization_info().uniform();

82

const UniformQuantizationInfo oq_unif = oq_info.uniform();

83

84

const auto output_quant_info = (dst.total_size() == 0) ? iq_unif : oq_unif;

85

86

const float multiplier = (iq_unif.scale * wq_unif.scale) / output_quant_info.scale;

87

int output_multiplier = 0;

88

int output_shift = 0;

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

89

ARM_COMPUTE_RETURN_ON_ERROR(

90

quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

91

92

PixelValue type_min{};

93

PixelValue type_max{};

94

std::tie(type_min, type_max) = get_min_max(data_type);

95

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

96

if (activation_info.enabled())

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

97

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

98

std::tie(type_min, type_max) =

99

get_quantized_activation_min_max(activation_info, data_type, output_quant_info);

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

100

}

101

102

// Set the GEMMLowp output stage info

103

gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;

104

gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier;

105

gemmlowp_output_stage.gemmlowp_shift = output_shift;

106

gemmlowp_output_stage.gemmlowp_multipliers.push_back(output_multiplier);

107

gemmlowp_output_stage.gemmlowp_shifts.push_back(output_shift);

108

type_min.get(gemmlowp_output_stage.gemmlowp_min_bound);

109

type_max.get(gemmlowp_output_stage.gemmlowp_max_bound);

}

return Status{};

}

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

115

Status validate_mm(const ITensorInfo &src,

116

const ITensorInfo &weights,

117

const ITensorInfo *bias,

118

const ITensorInfo &dst,

119

const FullyConnectedLayerInfo &fc_info,

120

bool use_matmul)

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

121

{

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

122

// Note : If input is dynamic and data is not batched, use matmul, else use gemm

123

const bool transpose_weights = fc_info.transpose_weights ? !fc_info.are_weights_reshaped : false;

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

124

const bool use_dynamic_gemm =

125

!use_matmul && !weights.are_values_constant() && transpose_weights; // use dynamic gemm as fallback for matmul

126

const bool is_quantized = is_data_type_quantized_asymmetric(src.data_type());

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

127

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

128

if (use_matmul)

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

129

{

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

130

const MatMulInfo m_info = MatMulInfo().adj_rhs(transpose_weights);

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

131

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

132

// Note: LHS is reshaped here to match ClMatMul expectations of batch index - From [M, B0, B1] to [M, 1, B0, B1]

133

TensorInfo lhs_to_use = src.clone()->set_tensor_shape(get_reshaped_matmul_tensor(src.tensor_shape()));

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

134

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

135

const GPUTarget gpu_target = CLScheduler::get().target();

136

std::unique_ptr<cl_matmul::IClMatMulNativeKernelConfig> t =

137

cl_matmul::ClMatMulNativeKernelConfigurationFactory::create(gpu_target);

138

const MatMulKernelInfo kernel_info = t->configure(&lhs_to_use, &weights, m_info);

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

139

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

140

return is_quantized ? kernels::ClMatMulLowpNativeKernel::validate(&lhs_to_use, &weights, bias, &dst,

141

kernel_info, fc_info.activation_info)

142

: kernels::ClMatMulNativeKernel::validate(&lhs_to_use, &weights, bias, &dst, kernel_info,

143

fc_info.activation_info);

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

144

}

145

else

146

{

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

147

GEMMLowpOutputStageInfo gemmlowp_output_stage;

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

148

ARM_COMPUTE_RETURN_ON_ERROR(

149

construct_gemmlowp_output_stage(src, weights, dst, gemmlowp_output_stage, fc_info.activation_info));

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

150

151

const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped

152

false, // is_b_reshaped

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

153

!use_dynamic_gemm, // reshape_b_only_on_first_run

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

154

0, // depth_output_gemm3d

155

false, // reinterpret_input_as_3d

156

fc_info.retain_internal_weights, // retain_internal_weights

157

gemmlowp_output_stage, // gemmlowp_output_stage

158

fc_info.fp_mixed_precision, // fp_mixed_precision

159

false, // fast_math

160

true, // broadcast_bias

161

ActivationLayerInfo()); // activation_info

162

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

163

if (is_quantized)

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

164

{

165

const UniformQuantizationInfo iq_info = src.quantization_info().uniform();

166

const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();

167

168

// Since we need negative offsets for computing convolution, we need to change QuantizationInfo()

169

// Extract and negate src and weights offset

170

const QuantizationInfo src_quantization_info(iq_info.scale, -iq_info.offset);

171

const QuantizationInfo weights_quantization_info(wq_info.scale, -wq_info.offset);

172

173

// Validate gemmlowp function

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

174

ARM_COMPUTE_RETURN_ON_ERROR(ClGemmLowpMatrixMultiplyCore::validate(

175

&src.clone()->set_quantization_info(src_quantization_info),

176

&weights.clone()->set_quantization_info(weights_quantization_info), bias, &dst, gemm_info));

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

}

else

{

ARM_COMPUTE_RETURN_ON_ERROR(ClGemm::validate(&src, &weights, bias, &dst, 1.f, 1.f, gemm_info));

181

}

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

}

return Status{};

}

} // namespace

ClFullyConnected::ClFullyConnected()

189

: _convert_weights(nullptr),

190

_flatten(nullptr),

191

_reshape_weights(nullptr),

192

_mm_gemm(nullptr),

193

_mm_gemmlowp(nullptr),

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

194

_matmul_native_kernel(nullptr),

195

_matmul_lowp_native_kernel(nullptr),

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

_aux_mem(Count)

{

}

ClFullyConnected::~ClFullyConnected() = default;

201

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

202

void ClFullyConnected::configure_mm(const CLCompileContext &compile_context,

203

ITensorInfo *src,

204

ITensorInfo *weights,

205

ITensorInfo *bias,

206

ITensorInfo *dst,

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

207

const FullyConnectedLayerInfo &fc_info)

208

{

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

209

// If weights are dynamic and matmul is supported use matmul, else use gemm

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

210

if (_use_matmul)

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

211

{

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

212

// Specify whether transpose weights is necessary in matmul info

213

const MatMulInfo mat_info = MatMulInfo().adj_rhs(_transpose_weights);

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

214

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

215

// Note: MatMul does not need offset negation unlike gemm

216

// 1. Change shape when calling matmul to fit batch expectations.

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

217

_lhs_to_use = src->clone()->set_tensor_shape(get_reshaped_matmul_tensor(_lhs_to_use.tensor_shape()));

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

218

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

219

// 2. Use heuristics to get kernel info object

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

220

const GPUTarget gpu_target = CLScheduler::get().target();

221

std::unique_ptr<cl_matmul::IClMatMulNativeKernelConfig> kernel_config =

222

cl_matmul::ClMatMulNativeKernelConfigurationFactory::create(gpu_target);

223

MatMulKernelInfo kernel_info = kernel_config->configure(src, weights, mat_info);

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

224

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

225

// 3. Configure relevant matmul kernel

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

226

if (_is_quantized)

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

227

{

228

_matmul_lowp_native_kernel = std::make_unique<kernels::ClMatMulLowpNativeKernel>();

229

_matmul_lowp_native_kernel->set_target(gpu_target);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

230

_matmul_lowp_native_kernel->configure(compile_context, src, weights, bias, dst, kernel_info,

231

fc_info.activation_info);

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

}

else

{

_matmul_native_kernel = std::make_unique<kernels::ClMatMulNativeKernel>();

236

_matmul_native_kernel->set_target(gpu_target);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

237

_matmul_native_kernel->configure(compile_context, src, weights, bias, dst, kernel_info,

238

fc_info.activation_info);

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

239

}

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

240

}

241

else

242

{

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

243

// Configure GEMM

244

GEMMLowpOutputStageInfo gemmlowp_output_stage;

245

construct_gemmlowp_output_stage(*src, *weights, *dst, gemmlowp_output_stage, fc_info.activation_info);

246

247

const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped

248

false, // is_b_reshaped

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

249

!_dynamic_gemm, // reshape_b_only_on_first_run

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

250

0, // depth_output_gemm3d

251

false, // reinterpret_input_as_3d

252

fc_info.retain_internal_weights, // retain_internal_weights

253

gemmlowp_output_stage, // gemmlowp_output_stage

254

fc_info.fp_mixed_precision, // fp_mixed_precision

255

false, // fast_math

256

true, // broadcast_bias

257

fc_info.activation_info); // activation_info

258

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

259

if (_is_quantized)

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

260

{

261

// Since we need negative offsets for computing convolution, we need to change QuantizationInfo()

262

// Extract and negate input and weights offset

263

const QuantizationInfo src_quantization_info = src->quantization_info();

264

const QuantizationInfo weights_quantization_info = weights->quantization_info();

265

266

TensorInfo src_info = src->clone()->set_quantization_info(src_quantization_info);

267

TensorInfo weights_info = weights->clone()->set_quantization_info(weights_quantization_info);

268

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

269

src_info.set_quantization_info(

270

QuantizationInfo(src_quantization_info.uniform().scale, -src_quantization_info.uniform().offset));

271

weights_info.set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale,

272

-weights_quantization_info.uniform().offset));

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

273

274

// Configure gemmlowp function

275

_mm_gemmlowp = std::make_unique<ClGemmLowpMatrixMultiplyCore>();

276

_mm_gemmlowp->configure(compile_context, &src_info, &weights_info, bias, dst, gemm_info);

}

else

{

// Configure matrix multiply kernel

281

_mm_gemm = std::make_unique<ClGemm>();

282

_mm_gemm->configure(compile_context, src, weights, bias, dst, 1.f, 1.f, gemm_info);

283

}

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

284

}

285

}

286

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

287

void ClFullyConnected::configure_conv_fc(const CLCompileContext &compile_context,

288

ITensorInfo *src,

289

ITensorInfo *weights,

290

ITensorInfo *bias,

291

ITensorInfo *dst,

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

292

const FullyConnectedLayerInfo &fc_info)

293

{

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

294

// MatMul fuses transpose operation, so we use the first dimension for comparison where appropriate.

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

295

ARM_COMPUTE_ERROR_ON((weights->dimension((_use_matmul && _transpose_weights) ? 0 : 1) !=

296

(src->dimension(0) * src->dimension(1) * src->dimension(2))));

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

297

298

// If the fully connected layer is called after a convolution layer, the input tensor must be linearized

299

300

// Initialize output tensor for flatten

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

301

_flattened_src = src->clone()

302

->set_is_resizable(true)

303

.reset_padding()

304

.set_tensor_shape(compute_flatten_shape(src))

305

.set_data_layout(DataLayout::NCHW);

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

306

307

// Configure flatten kernel

308

_flatten = std::make_unique<ClFlatten>();

309

_flatten->configure(compile_context, src, &_flattened_src);

310

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

311

// Note: if flatten has > 1 dimensions after, these dimensions are batch

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

312

// Configure matrix multiply kernel

313

configure_mm(compile_context, &_flattened_src, weights, bias, dst, fc_info);

314

}

315

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

316

void ClFullyConnected::configure_fc_fc(const CLCompileContext &compile_context,

317

ITensorInfo *src,

318

ITensorInfo *weights,

319

ITensorInfo *bias,

320

ITensorInfo *dst,

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

321

const FullyConnectedLayerInfo &fc_info)

322

{

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

323

// MatMul fuses transpose operation, so we use the first dimension for comparison where appropriate.

324

ARM_COMPUTE_ERROR_ON(src->dimension(0) != weights->dimension((_use_matmul && _transpose_weights) ? 0 : 1));

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

325

326

// Configure matrix multiply kernel

327

configure_mm(compile_context, src, weights, bias, dst, fc_info);

328

}

329

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

330

void ClFullyConnected::configure(const CLCompileContext &compile_context,

331

ITensorInfo *src,

332

ITensorInfo *weights,

333

ITensorInfo *biases,

334

ITensorInfo *dst,

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

335

FullyConnectedLayerInfo fc_info)

336

{

337

ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);

ramy.elgammal@arm.com

f77b969

2023-08-07 17:07:02 +0100

[diff] [blame]

338

const GPUTarget gpu_target = get_arch_from_target(CLScheduler::get().target());

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

339

340

// Perform validate step

341

ARM_COMPUTE_ERROR_THROW_ON(ClFullyConnected::validate(src, weights, biases, dst, fc_info));

ramelg01

2e53f17

2021-09-22 10:48:25 +0100

[diff] [blame]

342

ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, fc_info);

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

343

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

344

_transpose_weights = fc_info.transpose_weights ? !fc_info.are_weights_reshaped : false;

345

_is_fc_after_conv = true;

346

_is_quantized = is_data_type_quantized_asymmetric(src->data_type());

347

_is_prepared = fc_info.retain_internal_weights;

348

_weights_to_use = TensorInfo(*weights);

349

_weights_to_use_idx = ACL_SRC_1;

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

350

351

// When using dynamic weights - use matmul kernels.

Mohammed Suhail Munshi

2e0714d

2023-07-19 14:44:38 +0100

[diff] [blame]

352

// Note: MatMul is not used in the following cases (Gemm is used as fallback) :

353

// 1. When the weights tensor is not dynamic

354

// 2. MatMul does not support broadcasting batch dimension, and therefore is disabled if fc is batched.

355

// 3. When FC is after convolution and src tensor data layout does not match weights trained data layout (weights conversion kernel is required)

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

356

const bool is_batched_fc_layer = dst->dimension(1) > 1;

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

357

_use_matmul = gpu_target != GPUTarget::MIDGARD && !weights->are_values_constant() && !is_batched_fc_layer &&

358

!(src->num_dimensions() > 1 && (src->data_layout() != fc_info.weights_trained_layout));

359

_dynamic_gemm = !weights->are_values_constant() && _transpose_weights && !_use_matmul;

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

360

361

// With the Fully Connected layer we can have 4 different cases:

362

// 1) Convolution layer -> Fully Connected layer without batches

363

// 2) Fully Connected layer -> Fully Connected layer without batches

364

// 3) Convolution layer -> Fully Connected layer with batches

365

// 4) Fully Connected layer -> Fully Connected layer with batches

366

367

// Check if we have a fully connected layer with batches

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

368

if (is_batched_fc_layer)

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

369

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

370

_is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&

371

(std::equal(src->tensor_shape().cbegin() + 3, src->tensor_shape().cend(),

372

dst->tensor_shape().cbegin() + 1));

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

}

else

{

_is_fc_after_conv = src->num_dimensions() > 1;

377

}

378

379

ITensorInfo *weights_used = weights;

380

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

381

// Reshape weights if needed - Not needed when matmul is in use as matmul fuses transpose op.

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

382

if (_transpose_weights && !_use_matmul)

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

383

{

384

// Reshape the weights

385

_reshape_weights = std::make_unique<ClTranspose>();

386

_reshape_weights->configure(compile_context, weights, &_reshaped_weights);

387

weights_used = &_reshaped_weights;

388

_weights_to_use_idx = offset_int_vec(TransposedWeights);

389

}

390

391

// Convert weights if needed

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

392

if (_is_fc_after_conv && (src->data_layout() != fc_info.weights_trained_layout))

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

393

{

394

// Convert weights

395

_convert_weights = std::make_unique<ClConvertFullyConnectedWeights>();

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

396

_convert_weights->configure(compile_context, weights_used, &_converted_weights, src->tensor_shape(),

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

397

fc_info.weights_trained_layout);

398

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

399

weights_used = &_converted_weights;

400

_weights_to_use_idx = offset_int_vec(ConvertedWeights);

401

_run_convert_weights = true;

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

402

}

403

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

404

if (_is_fc_after_conv)

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

405

{

406

// Fully Connected layer after a Convolution Layer without batches

407

configure_conv_fc(compile_context, src, weights_used, biases, dst, fc_info);

}

else

{

// Fully Connected layer after a Fully Connected Layer without batches

412

configure_fc_fc(compile_context, src, weights_used, biases, dst, fc_info);

413

}

414

// Update TensorInfo of final weights used (Need to be done in the end due to padding expansion)

415

_weights_to_use = *weights_used;

416

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

417

if (_use_matmul)

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

418

{

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

419

// Note : MatMul does not use transpose and does not need auxillary memory, so only converted weights are added to aux_mem

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

420

_aux_mem[ConvertedWeights] =

421

MemoryInfo(offset_int_vec(ConvertedWeights), MemoryLifetime::Temporary, _converted_weights.total_size());

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

422

}

423

else

424

{

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

425

// Set auxiliary memory requirements for gemm operators

426

auto gemm_mem_req = (_is_quantized) ? _mm_gemmlowp->workspace() : _mm_gemm->workspace();

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

427

for (unsigned int i = 0; i < gemm_mem_req.size(); ++i)

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

428

{

429

_aux_mem[i] = gemm_mem_req[i];

430

}

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

431

if (_aux_mem[1].size > 0 || _aux_mem[2].size > 0) // Persistent weights memory on GEMMs

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

432

{

433

// Release permuted weights at the of prepare as they are further transposed by the assembly dispatch

434

// Keep all the auxiliary tensors in case of dynamic weights as they are recalculated every time

435

_aux_mem[TransposedWeights] = MemoryInfo(

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

436

offset_int_vec(TransposedWeights), _dynamic_gemm ? MemoryLifetime::Temporary : MemoryLifetime::Prepare,

437

_reshaped_weights.total_size());

438

_aux_mem[ConvertedWeights] = MemoryInfo(offset_int_vec(ConvertedWeights),

439

_dynamic_gemm ? MemoryLifetime::Temporary : MemoryLifetime::Prepare,

440

_converted_weights.total_size());

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

}

else

{

// Release permuted weights at the of prepare as they are further transposed by the assembly dispatch

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

445

const auto transposed_wei_lft = (_weights_to_use_idx == offset_int_vec(TransposedWeights))

446

? MemoryLifetime::Persistent

447

: MemoryLifetime::Prepare;

448

const auto converted_wei_lft = (_weights_to_use_idx == offset_int_vec(ConvertedWeights))

449

? MemoryLifetime::Persistent

450

: MemoryLifetime::Prepare;

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

451

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

452

_aux_mem[TransposedWeights] = MemoryInfo(offset_int_vec(TransposedWeights),

453

_dynamic_gemm ? MemoryLifetime::Temporary : transposed_wei_lft,

454

_reshaped_weights.total_size());

455

_aux_mem[ConvertedWeights] = MemoryInfo(offset_int_vec(ConvertedWeights),

456

_dynamic_gemm ? MemoryLifetime::Temporary : converted_wei_lft,

457

_converted_weights.total_size());

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

458

}

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

459

}

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

460

_aux_mem[FlattenedSrc] =

461

MemoryInfo(offset_int_vec(FlattenedSrc), MemoryLifetime::Temporary, _flattened_src.total_size());

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

462

}

463

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

464

Status ClFullyConnected::validate(const ITensorInfo *src,

465

const ITensorInfo *weights,

466

const ITensorInfo *biases,

467

const ITensorInfo *dst,

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

468

FullyConnectedLayerInfo fc_info)

469

{

470

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

471

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,

472

DataType::F16, DataType::F32);

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

473

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights, dst);

474

ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

475

ARM_COMPUTE_RETURN_ERROR_ON(

476

fc_info.activation_info.enabled() && is_data_type_quantized(src->data_type()) &&

477

fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::RELU &&

478

fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU &&

479

fc_info.activation_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU);

ramy.elgammal@arm.com

f77b969

2023-08-07 17:07:02 +0100

[diff] [blame]

480

const GPUTarget gpu_target = get_arch_from_target(CLScheduler::get().target());

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

481

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

482

const bool transpose_weights = fc_info.transpose_weights ? !fc_info.are_weights_reshaped : false;

483

bool is_fc_after_conv = true;

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

484

485

// When using dynamic weights - use matmul kernels.

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

486

// Note: MatMul does not support broadcasting so fallback with batched cases.

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

487

const bool is_batched_fc_layer = dst->dimension(1) > 1;

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

488

const bool use_matmul = gpu_target != GPUTarget::MIDGARD && !weights->are_values_constant() &&

489

!is_batched_fc_layer &&

490

!(src->num_dimensions() > 1 && (src->data_layout() != fc_info.weights_trained_layout));

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

491

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

492

const ITensorInfo &flatten_src = TensorInfo(src->clone()

493

->set_is_resizable(true)

494

.reset_padding()

495

.set_tensor_shape(compute_flatten_shape(src))

496

.set_data_layout(DataLayout::NCHW));

497

const ITensorInfo &reshaped_weights = TensorInfo(

498

weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights)));

499

const ITensorInfo &converted_weights = (transpose_weights && !use_matmul)

500

? TensorInfo(*reshaped_weights.clone())

501

: TensorInfo(weights->clone()->set_is_resizable(true).reset_padding());

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

502

503

// With the Fully Connected layer we can have 4 different cases:

504

// 1) Convolution layer -> Fully Connected layer without batches

505

// 2) Fully Connected layer -> Fully Connected layer without batches

506

// 3) Convolution layer -> Fully Connected layer with batches

507

// 4) Fully Connected layer -> Fully Connected layer with batches

508

509

const ITensorInfo *src_to_use = src;

510

const ITensorInfo *weights_to_use = weights;

511

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

512

if (biases != nullptr)

Giorgio Arena

63e0beb

2021-09-24 14:04:27 +0100

[diff] [blame]

513

{

514

ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

515

if (is_data_type_quantized(src->data_type()))

Giorgio Arena

63e0beb

2021-09-24 14:04:27 +0100

[diff] [blame]

516

{

517

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32);

}

else

{

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, biases);

522

}

523

}

524

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

525

// Check if FC is after conv (flatten kernel is run in case where FC is after conv.)

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

526

if (is_batched_fc_layer)

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

527

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

528

is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&

529

(std::equal(src->tensor_shape().cbegin() + 3, src->tensor_shape().cend(),

530

dst->tensor_shape().cbegin() + 1));

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

}

else

{

is_fc_after_conv = src->num_dimensions() > 1;

535

}

536

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

537

// Transpose kernel does not run when matmul is supported as matmul fuses transpose op.

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

538

if (transpose_weights && !use_matmul)

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

539

{

540

// Validate reshape weights kernel

541

ARM_COMPUTE_RETURN_ON_ERROR(ClTranspose::validate(weights, &reshaped_weights));

542

weights_to_use = &reshaped_weights;

543

}

544

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

545

if (is_fc_after_conv && (src->data_layout() != fc_info.weights_trained_layout))

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

546

{

547

// Validate convert weights kernel

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

548

ARM_COMPUTE_RETURN_ON_ERROR(ClConvertFullyConnectedWeights::validate(

549

weights_to_use, &converted_weights, src->tensor_shape(), fc_info.weights_trained_layout));

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

550

weights_to_use = &converted_weights;

551

}

552

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

553

if (is_fc_after_conv)

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

554

{

555

// Fully Connected layer after a Convolution Layer without batches

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

556

// K Index of matrix multiplication. MatMul performs transpose in kernel, so index is 0 when matmul and transpose enabled

557

const int weight_idx = (use_matmul && transpose_weights) ? 0 : 1;

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

558

ARM_COMPUTE_RETURN_ERROR_ON(

559

(weights_to_use->dimension(weight_idx) != (src->dimension(0) * src->dimension(1) * src->dimension(2))));

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

560

561

// Validate flatten kernel

562

ARM_COMPUTE_RETURN_ON_ERROR(ClFlatten::validate(src, &flatten_src));

563

src_to_use = &flatten_src;

}

else

{

// Fully Connected layer after a Fully Connected Layer without batches

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

568

// K Index of matrix multiplication. MatMul performs transpose in kernel, so index is 0 when matmul and transpose enabled

569

const int weight_idx = (use_matmul && transpose_weights) ? 0 : 1;

570

ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(0) != weights_to_use->dimension(weight_idx));

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

571

}

572

573

// Validate matrix multiply kernel

Mohammed Suhail Munshi

2e0714d

2023-07-19 14:44:38 +0100

[diff] [blame]

574

ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(*src_to_use, *weights_to_use, biases, *dst, fc_info, use_matmul));

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

return Status{};

}

void ClFullyConnected::run(ITensorPack &tensors)

{

prepare(tensors);

Jakub Sujak

2023-03-29 11:16:18 +0100

[diff] [blame]

583

#ifdef ARM_COMPUTE_ASSERTS_ENABLED

584

++_asrt_run_count;

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

585

ARM_COMPUTE_ERROR_ON(_dynamic_gemm && _asrt_prepare_count != _asrt_run_count);

Jakub Sujak

2023-03-29 11:16:18 +0100

[diff] [blame]

586

#endif // ARM_COMPUTE_ASSERTS_ENABLED

587

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

588

auto src = tensors.get_const_tensor(ACL_SRC_0);

589

590

CLAuxTensorHandler flattened_src(offset_int_vec(FlattenedSrc), _flattened_src, tensors, false);

591

CLAuxTensorHandler weights(_weights_to_use_idx, _weights_to_use, tensors, false);

592

593

// Linearize input if it comes from a convolutional layer

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

594

if (_is_fc_after_conv)

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

595

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

596

ITensorPack flatten_pack{{ACL_SRC, src}, {ACL_DST, flattened_src.get()}};

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

597

_flatten->run(flatten_pack);

598

}

599

600

ITensorPack gemm_pack = tensors;

601

gemm_pack.add_const_tensor(ACL_SRC_0, (_is_fc_after_conv) ? flattened_src.get() : src);

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

602

if (_weights_to_use_idx != ACL_SRC_1)

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

603

{

604

gemm_pack.add_const_tensor(ACL_SRC_1, weights.get());

605

}

606

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

607

// Run MatMul Op

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

608

if (_use_matmul)

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

609

{

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

610

// Run matmul kernels for matrix multiplication

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

611

if (_is_quantized)

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

612

{

613

CLScheduler::get().enqueue_op(*_matmul_lowp_native_kernel, gemm_pack, true);

}

else

{

CLScheduler::get().enqueue_op(*_matmul_native_kernel, gemm_pack, true);

618

}

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

619

}

620

else

621

{

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

622

// Run matrix multiply

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

623

if (_is_quantized)

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

624

{

625

_mm_gemmlowp->run(gemm_pack);

}

else

{

_mm_gemm->run(gemm_pack);

630

}

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

}

}

void ClFullyConnected::prepare(ITensorPack &tensors)

635

{

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

636

// Note : Running prepare() each run when _use_matmul is true is unnecessary unless weights conversion is needed.

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

637

if (!_is_prepared || _dynamic_gemm)

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

638

{

Jakub Sujak

2023-03-29 11:16:18 +0100

[diff] [blame]

639

#ifdef ARM_COMPUTE_ASSERTS_ENABLED

640

++_asrt_prepare_count;

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

641

ARM_COMPUTE_ERROR_ON(!_dynamic_gemm && !_use_matmul && _asrt_prepare_count > 1);

Jakub Sujak

2023-03-29 11:16:18 +0100

[diff] [blame]

642

#endif // ARM_COMPUTE_ASSERTS_ENABLED

643

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

644

auto weights = tensors.get_const_tensor(ACL_SRC_1);

645

646

CLAuxTensorHandler reshaped_weights(offset_int_vec(TransposedWeights), _reshaped_weights, tensors, false);

647

CLAuxTensorHandler converted_weights(offset_int_vec(ConvertedWeights), _converted_weights, tensors, false);

648

649

// Pointer to current weights

650

const ITensor *cur_weights = weights;

651

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

652

// Reshape weights if needed. Disabled when matmul kernels are enabled as matmul fuses transpose.

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

653

if (_transpose_weights && !_use_matmul)

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

654

{

655

// Run reshape weights kernel and mark weights as unused

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

656

ITensorPack transpose_pack{{ACL_SRC, weights}, {ACL_DST, reshaped_weights.get()}};

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

657

_reshape_weights->run(transpose_pack);

658

659

cur_weights->mark_as_unused();

660

cur_weights = reshaped_weights.get();

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

661

}

662

Jakub Sujak

2023-03-29 11:16:18 +0100

[diff] [blame]

663

// Convert weights if needed

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

664

if (_run_convert_weights)

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

665

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

666

ITensorPack convert_pack{{ACL_SRC, cur_weights}, {ACL_DST, converted_weights.get()}};

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

667

_convert_weights->run(convert_pack);

668

669

cur_weights->mark_as_unused();

670

cur_weights = converted_weights.get();

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

671

}

672

Jakub Sujak

2023-03-29 11:16:18 +0100

[diff] [blame]

673

ITensorPack gemm_pack = tensors;

674

gemm_pack.add_const_tensor(ACL_SRC_1, cur_weights);

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

675

Mohammed Suhail Munshi

2023-06-27 14:25:58 +0100

[diff] [blame]

676

// Prepare GEMM prepare and release unused weights

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

677

if (_dynamic_gemm || !_use_matmul)

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

678

{

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

679

if (!_is_quantized)

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

680

{

681

_mm_gemm->prepare(gemm_pack);

}

else

{

_mm_gemmlowp->prepare(gemm_pack);

686

}

Georgios Pinitas

2021-07-27 15:55:30 +0100

[diff] [blame]

687

}

Mohammed Suhail Munshi

2023-06-19 14:57:57 +0100

[diff] [blame]

688

Georgios Pinitas