Blame - src/runtime/NEON/functions/NEConvolutionLayer.cpp - ml/ComputeLibrary

2017-09-04 18:44:23 +0100

[diff] [blame]

80

81

_weights_reshaped.allocator()->init(info_wr);

Georgios Pinitas

2017-09-08 19:47:30 +0100

[diff] [blame]

82

_memory_group.manage(&_weights_reshaped);

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

83

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

84

_weights_reshape_kernel.configure(weights, biases, &_weights_reshaped);

85

_weights_transposed_kernel.configure(&_weights_reshaped, output);

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

86

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

87

_weights_reshaped.allocator()->allocate();

}

else

{

_weights_reshape_kernel.configure(weights, biases, output);

}

}

Giorgio Arena

2017-11-30 15:08:38 +0000

[diff] [blame^]

95

Status NEConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, bool transpose1xW)

96

{

97

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);

98

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, output);

99

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(weights, output);

100

ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);

101

102

if(biases != nullptr)

103

{

104

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases);

105

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(weights, biases);

106

ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(3));

107

ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);

108

}

109

110

// Check if bias are present, if yes they will be embedded to the weights matrix

111

const bool has_bias = (biases != nullptr);

112

113

// Checks performed when biases are present

114

if(has_bias)

115

{

116

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases);

117

ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(3));

118

ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);

}

if(transpose1xW)

{

TensorInfo weights_reshaped = weights->clone()->set_tensor_shape(get_reshaped_weights_shape(weights, has_bias));

124

ARM_COMPUTE_RETURN_ON_ERROR(NEWeightsReshapeKernel::validate(weights, biases, &weights_reshaped));

125

ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMTranspose1xWKernel::validate(&weights_reshaped, output));

}

else

{

ARM_COMPUTE_RETURN_ON_ERROR(NEWeightsReshapeKernel::validate(weights, biases, output));

}

return Status{};

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

135

void NEConvolutionLayerReshapeWeights::run()

136

{

Georgios Pinitas

2017-09-08 19:47:30 +0100

[diff] [blame]

137

_memory_group.acquire();

138

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

139

NEScheduler::get().schedule(&_weights_reshape_kernel, 3);

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

140

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

141

if(_transpose1xW)

142

{

143

NEScheduler::get().schedule(&_weights_transposed_kernel, Window::DimY);

144

}

Georgios Pinitas

2017-09-08 19:47:30 +0100

[diff] [blame]

145

146

_memory_group.release();

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

147

}

148

Giorgio Arena

2017-11-30 15:08:38 +0000

[diff] [blame^]

149

namespace

150

{

151

TensorShape get_reshaped_weights_shape_conv(const ITensorInfo *weights, bool has_bias, bool is_fully_connected_convolution)

152

{

153

unsigned int mat_weights_cols = weights->dimension(3);

154

unsigned int mat_weights_rows = weights->dimension(0) * weights->dimension(1) * weights->dimension(2) + (has_bias ? 1 : 0);

155

156

if(is_fully_connected_convolution)

157

{

158

// Create tensor to store the reshaped weights

159

return TensorShape(mat_weights_cols, mat_weights_rows);

}

else

{

// Create tensor to store transposed weights

164

const float transpose_width = 16.0f / weights->element_size();

165

return TensorShape(mat_weights_rows * static_cast<unsigned int>(transpose_width), static_cast<unsigned int>(std::ceil(mat_weights_cols / transpose_width)));

}

}

Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, DataType &dt,

170

bool &has_bias,

171

bool &are_weights_reshaped, unsigned int &kernel_width, unsigned int &kernel_height, bool &is_fully_connected_convolution, unsigned int &mat_weights_cols, unsigned int &mat_weights_rows,

172

unsigned int &conv_w, unsigned int &conv_h)

173

{

174

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);

175

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);

176

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, weights);

177

ARM_COMPUTE_RETURN_ERROR_ON(!weights_info.are_reshaped() && weights->dimension(2) != input->dimension(2));

178

ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);

179

180

if(biases != nullptr)

181

{

182

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);

183

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases);

184

ARM_COMPUTE_RETURN_ERROR_ON(!weights_info.are_reshaped() && biases->dimension(0) != weights->dimension(3));

185

ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);

186

}

187

188

dt = input->data_type();

189

has_bias = (biases != nullptr);

190

are_weights_reshaped = weights_info.are_reshaped();

191

kernel_width = (are_weights_reshaped) ? weights_info.kernel_size().first : weights->dimension(0);

192

kernel_height = (are_weights_reshaped) ? weights_info.kernel_size().second : weights->dimension(1);

193

mat_weights_cols = weights->dimension(3);

194

mat_weights_rows = weights->dimension(0) * weights->dimension(1) * weights->dimension(2) + (has_bias ? 1 : 0);

195

196

std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_width, kernel_height,

197

conv_info);

198

199

is_fully_connected_convolution = ((conv_w == 1) && (conv_h == 1));

return Status{};

}

} // namespace

Georgios Pinitas

2017-09-08 19:47:30 +0100

[diff] [blame]

205

NEConvolutionLayer::NEConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

206

: _memory_group(std::move(memory_manager)), _input_im2col_kernel(), _input_interleave_kernel(), _reshape_weights(), _mm_kernel(), _mm_optimised_kernel(nullptr), _output_col2im_kernel(),

207

_input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(), _gemm_output(), _workspace(), _has_bias(false), _is_fully_connected_convolution(false), _are_weights_reshaped(false)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

{

}

void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info)

212

{

Giorgio Arena

2017-11-30 15:08:38 +0000

[diff] [blame^]

213

// Perform validate step

214

ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

215

Giorgio Arena

2017-11-30 15:08:38 +0000

[diff] [blame^]

216

DataType dt{};

217

unsigned int kernel_width = 0;

218

unsigned int kernel_height = 0;

219

unsigned int mat_weights_cols = 0;

220

unsigned int mat_weights_rows = 0;

221

unsigned int conv_w = 0;

222

unsigned int conv_h = 0;

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

223

Giorgio Arena

2017-11-30 15:08:38 +0000

[diff] [blame^]

224

Status status = validate_and_initialize_values(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(), conv_info, weights_info, dt, _has_bias, _are_weights_reshaped,

225

kernel_width, kernel_height,

226

_is_fully_connected_convolution,

227

mat_weights_cols, mat_weights_rows, conv_w, conv_h);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

228

Giorgio Arena

2017-11-30 15:08:38 +0000

[diff] [blame^]

229

ARM_COMPUTE_ERROR_THROW_ON(status);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

230

Giorgio Arena

2017-11-30 15:08:38 +0000

[diff] [blame^]

231

const unsigned int fixed_point_position = input->info()->fixed_point_position();

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

232

Moritz Pflanzer

2017-09-15 10:42:58 +0100

[diff] [blame]

233

#if defined(__arm__)

234

if(NEScheduler::get().cpu_info().CPU == CPUTarget::ARMV7 && dt == DataType::F32)

235

{

236

_mm_optimised_kernel = support::cpp14::make_unique<NEGEMMAArch32Kernel>();

237

}

238

#elif defined(__aarch64__)

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

239

if(NEScheduler::get().cpu_info().CPU >= CPUTarget::ARMV8 && dt == DataType::F32)

240

{

241

_mm_optimised_kernel = support::cpp14::make_unique<NEGEMMAArch64Kernel>();

242

}

Moritz Pflanzer

2017-09-15 10:42:58 +0100

[diff] [blame]

243

#endif /* defined(__arm__) || defined(__aarch64__) */

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

244

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

245

// Reshape weights if needed

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

246

if(_mm_optimised_kernel != nullptr)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

247

{

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

248

if(_are_weights_reshaped)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

249

{

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

250

mat_weights_cols = weights_info.num_kernels();

251

mat_weights_rows = weights->info()->dimension(1);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

252

}

253

else

254

{

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

255

TensorShape reshaped_weights_shape{ mat_weights_cols, mat_weights_rows };

256

257

// Create tensor to store the reshaped weights

258

_weights_reshaped.allocator()->init(TensorInfo(reshaped_weights_shape, 1, dt, fixed_point_position));

259

_reshape_weights.configure(weights, biases, &_weights_reshaped, false /* 1xW transpose */);

260

weights = &_weights_reshaped;

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

261

}

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

}

else

{

if(_are_weights_reshaped)

266

{

Georgios Pinitas

b660dcf

2017-12-13 10:48:06 +0000

[diff] [blame]

267

if(_is_fully_connected_convolution)

268

{

269

mat_weights_cols = weights_info.num_kernels();

270

mat_weights_rows = weights->info()->dimension(1);

}

else

{

const unsigned int transpose_width = 16 / input->info()->element_size();

275

mat_weights_cols = weights_info.num_kernels();

276

mat_weights_rows = weights->info()->dimension(0) / transpose_width + (_has_bias ? 1 : 0);

277

}

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

}

else

{

TensorShape reshaped_weights_shape;

282

283

if(_is_fully_connected_convolution)

284

{

285

reshaped_weights_shape = TensorShape{ mat_weights_cols, mat_weights_rows };

}

else

{

// Create tensor to store transposed weights

290

const float transpose_width = 16.0f / input->info()->element_size();

291

reshaped_weights_shape = TensorShape{ mat_weights_rows *static_cast<unsigned int>(transpose_width),

292

static_cast<unsigned int>(std::ceil(mat_weights_cols / transpose_width)) };

293

}

294

295

// Create tensor to store the reshaped weights

296

_weights_reshaped.allocator()->init(TensorInfo(reshaped_weights_shape, 1, dt, fixed_point_position));

297

_reshape_weights.configure(weights, biases, &_weights_reshaped, !_is_fully_connected_convolution /* 1xW transpose */);

298

weights = &_weights_reshaped;

299

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

300

}

301

302

// Create tensor to store im2col reshaped inputs

303

const unsigned int mat_input_cols = mat_weights_rows;

304

const unsigned int mat_input_rows = conv_w * conv_h;

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

305

306

TensorShape shape_im2col(input->info()->tensor_shape());

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

307

shape_im2col.set(0, mat_input_cols);

308

shape_im2col.set(1, mat_input_rows);

309

shape_im2col.set(2, 1);

Giorgio Arena

2017-11-30 15:08:38 +0000

[diff] [blame^]

310

_input_im2col_reshaped.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col));

Georgios Pinitas

2017-09-08 19:47:30 +0100

[diff] [blame]

311

_memory_group.manage(&_input_im2col_reshaped);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

312

313

// Create tensor (interleave) to prepare input tensor for GEMM

Georgios Pinitas

1b2e2e5

2017-09-28 11:30:27 +0100

[diff] [blame]

314

if(!_is_fully_connected_convolution && _mm_optimised_kernel == nullptr)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

315

{

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

316

TensorShape shape_interleaved(shape_im2col);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

317

shape_interleaved.set(0, shape_interleaved.x() * 4);

318

shape_interleaved.set(1, std::ceil(shape_interleaved.y() / 4.f));

Giorgio Arena

2017-11-30 15:08:38 +0000

[diff] [blame^]

319

_input_interleaved_reshaped.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_interleaved));

Georgios Pinitas

2017-09-08 19:47:30 +0100

[diff] [blame]

320

_memory_group.manage(&_input_interleaved_reshaped);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

321

}

322

323

// Create GEMM output tensor

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

324

TensorShape shape_gemm(_input_im2col_reshaped.info()->tensor_shape());

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

325

shape_gemm.set(0, mat_weights_cols);

326

shape_gemm.set(1, mat_input_rows);

Giorgio Arena

2017-11-30 15:08:38 +0000

[diff] [blame^]

327

_gemm_output.allocator()->init(_input_im2col_reshaped.info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_gemm));

Georgios Pinitas

2017-09-08 19:47:30 +0100

[diff] [blame]

328

_memory_group.manage(&_gemm_output);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

329

330

// Configure kernels

Gian Marco Iodice

13edbff

2017-06-26 17:20:16 +0100

[diff] [blame]

331

_input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias);

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

332

Moritz Pflanzer

2017-09-15 10:42:58 +0100

[diff] [blame]

333

#if defined(__arm__) || defined(__aarch64__)

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

334

if(_mm_optimised_kernel != nullptr)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

335

{

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

336

struct CPUInfo ci = NEScheduler::get().cpu_info();

337

338

const int M = _gemm_output.info()->tensor_shape().y();

339

const int N = _gemm_output.info()->tensor_shape().x();

340

const int K = _input_im2col_reshaped.info()->tensor_shape().x();

341

Moritz Pflanzer

2017-09-15 10:42:58 +0100

[diff] [blame]

342

#if defined(__arm__)

343

GemmInterleaved<sgemm_8x6, float, float> gemm(&ci, M, N, K, false, false);

344

#elif defined(__aarch64__)

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

345

GemmInterleaved<sgemm_12x8, float, float> gemm(&ci, M, N, K, false, false);

Moritz Pflanzer

2017-09-15 10:42:58 +0100

[diff] [blame]

346

#endif /* defined(__arm__) || defined(__aarch64__) */

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

347

348

constexpr size_t alignment = 4096;

349

_workspace.allocator()->init(TensorInfo(TensorShape{ (gemm.get_working_size() + alignment - 1) * NEScheduler::get().num_threads() }, 1, DataType::U8));

350

_memory_group.manage(&_workspace);

351

352

// Configure matrix multiplication kernel

Georgios Pinitas

08c5a06

2017-12-14 17:53:39 +0000

[diff] [blame]

353

_mm_optimised_kernel->configure(&_input_im2col_reshaped, weights, &_gemm_output, &_workspace);

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

354

355

_workspace.allocator()->allocate();

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

356

}

357

else

Moritz Pflanzer

2017-09-15 10:42:58 +0100

[diff] [blame]

358

#endif /* defined(__arm__) || defined(__aarch64__) */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

359

{

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

360

if(_is_fully_connected_convolution)

361

{

362

_mm_kernel.configure(&_input_im2col_reshaped, weights, &_gemm_output, 1.0f);

}

else

{

_input_interleave_kernel.configure(&_input_im2col_reshaped, &_input_interleaved_reshaped);

367

_mm_kernel.configure(&_input_interleaved_reshaped, weights, &_gemm_output, 1.0f);

368

_input_interleaved_reshaped.allocator()->allocate();

369

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

370

}

Moritz Pflanzer

2017-08-31 14:56:32 +0100

[diff] [blame]

371

Georgios Pinitas

2017-09-08 19:47:30 +0100

[diff] [blame]

372

_input_im2col_reshaped.allocator()->allocate();

Georgios Pinitas

d912fd8

2017-11-27 21:00:13 +0000

[diff] [blame]

373

_output_col2im_kernel.configure(&_gemm_output, output, Size2D(conv_w, conv_h));

Georgios Pinitas

2017-09-08 19:47:30 +0100

[diff] [blame]

374

_gemm_output.allocator()->allocate();

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

375

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

376

// Allocate intermediate tensor

377

if(!_are_weights_reshaped)

378

{

379

_weights_reshaped.allocator()->allocate();

380

}

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

381

}

382

Giorgio Arena