Blame - src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp - ml/ComputeLibrary

2017-10-18 17:58:22 +0100

[diff] [blame]

{

}

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

48

void NEDepthwiseConvolutionLayer3x3::configure_generic(ITensor *input,

49

const ITensor *weights,

50

const ITensor *biases,

51

ITensor *output,

52

const PadStrideInfo &conv_info,

53

unsigned int depth_multiplier,

54

const ActivationLayerInfo &act_info)

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

55

{

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

56

ARM_COMPUTE_UNUSED(act_info);

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

57

Georgios Pinitas

f72f936

2018-01-12 16:29:45 +0000

[diff] [blame]

58

PixelValue zero_value(0.f);

59

Georgios Pinitas

a799ce0

2018-09-12 20:11:34 +0100

[diff] [blame]

60

// Initialize the intermediate accumulator tensor in case of quantized input

61

if(_is_quantized)

62

{

63

TensorShape accum_shape = output->info()->tensor_shape();

64

DataLayout accum_layout = output->info()->data_layout();

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

65

if(!_is_nchw)

Georgios Pinitas

a799ce0

2018-09-12 20:11:34 +0100

[diff] [blame]

66

{

67

permute(accum_shape, PermutationVector(1U, 2U, 0U));

68

accum_layout = DataLayout::NCHW;

69

}

70

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

71

_memory_group.manage(&_accumulator);

Georgios Pinitas

2481d46

2019-02-19 18:47:46 +0000

[diff] [blame]

72

_accumulator.allocator()->init(TensorInfo(accum_shape, 1, DataType::S32, output->info()->quantization_info()));

Georgios Pinitas

a799ce0

2018-09-12 20:11:34 +0100

[diff] [blame]

73

_accumulator.info()->set_data_layout(accum_layout);

74

zero_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().offset));

75

}

76

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

77

if(!_is_nchw)

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

78

{

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

79

_memory_group.manage(&_permuted_input);

80

_memory_group.manage(&_permuted_output);

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

81

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

82

// Configure the function to transform the input tensor from NHWC -> NCHW

83

_permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));

84

_permuted_input.info()->set_data_layout(DataLayout::NCHW);

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

85

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

86

// Configure the function to transform the weights tensor from HWI -> IHW

87

_permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));

88

_permuted_weights.info()->set_data_layout(DataLayout::NCHW);

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

89

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

90

// Configure optimized depthwise

91

_dwc_kernel.configure(&_permuted_input, &_permuted_weights, (_is_quantized) ? &_accumulator : &_permuted_output, conv_info, depth_multiplier);

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

92

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

93

// Configure border handler

94

_border_handler.configure(&_permuted_input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);

95

96

// Allocate tensors

97

_permuted_input.allocator()->allocate();

Georgios Pinitas

f72f936

2018-01-12 16:29:45 +0000

[diff] [blame]

98

}

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

99

else

100

{

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

101

// Configure depthwise convolution kernel

102

_dwc_kernel.configure(input, weights, (_is_quantized) ? &_accumulator : output, conv_info, depth_multiplier);

Georgios Pinitas

9be0c5a

2018-02-19 12:46:29 +0000

[diff] [blame]

103

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

104

// Configure border handler

105

_border_handler.configure(input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

106

}

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

107

108

// Configure biases accumulation

109

if(_is_quantized)

110

{

111

const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info();

112

113

float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale;

114

int output_multiplier, output_shift;

115

quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

116

_output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, output_multiplier, output_shift, output_quant_info.offset);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

117

_accumulator.allocator()->allocate();

118

}

119

else if(_has_bias)

120

{

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

121

_output_stage_kernel.configure(_is_nchw ? output : &_permuted_output, biases);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

122

}

123

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

124

// Permute output

125

if(!_is_nchw)

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

126

{

127

// Configure the function to transform the convoluted output to NHWC

128

_permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));

129

_permuted_output.allocator()->allocate();

130

}

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

131

}

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

132

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

133

void NEDepthwiseConvolutionLayer3x3::configure_optimized(const ITensor *input,

134

const ITensor *weights,

135

const ITensor *biases,

136

ITensor *output,

137

const PadStrideInfo &conv_info,

138

unsigned int depth_multiplier,

139

const ActivationLayerInfo &act_info)

140

{

141

ActivationLayerInfo act_info_to_use = ActivationLayerInfo();

142

const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);

143

const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info);

144

_is_activationlayer_enabled = act_info.enabled() && !(is_relu || is_relu6);

145

if(!_is_activationlayer_enabled)

146

{

147

act_info_to_use = act_info;

}

if(_is_nchw)

{

_memory_group.manage(&_permuted_input);

153

_memory_group.manage(&_permuted_output);

154

155

// Configure the function to transform the input tensor from NCHW -> NHWC

156

_permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));

157

_permuted_input.info()->set_data_layout(DataLayout::NHWC);

158

159

// Configure the function to transform the weights tensor from IHW -> HWI

160

_permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));

161

_permuted_weights.info()->set_data_layout(DataLayout::NHWC);

162

163

// Configure optimized depthwise

164

_dwc_optimized_func.configure(&_permuted_input, &_permuted_weights, biases, &_permuted_output, conv_info, depth_multiplier, act_info_to_use);

165

166

// Configure the function to transform the convoluted output to ACL's native ordering format NCHW

167

_permuted_output.info()->set_data_layout(DataLayout::NHWC);

168

_permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));

169

170

// Allocate tensors

171

_permuted_input.allocator()->allocate();

172

_permuted_output.allocator()->allocate();

}

else

{

_dwc_optimized_func.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info_to_use);

}

}

void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input,

181

const ITensor *weights,

182

const ITensor *biases,

183

ITensor *output, const PadStrideInfo &conv_info,

184

unsigned int depth_multiplier,

185

const ActivationLayerInfo &act_info)

186

{

187

ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);

188

ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);

189

190

_original_weights = weights;

191

_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());

192

_has_bias = biases != nullptr;

193

_is_optimized = NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input->info(),

weights->info(),

conv_info,

depth_multiplier);

_is_nchw = input->info()->data_layout() == DataLayout::NCHW;

198

_permute = _is_optimized == _is_nchw;

199

_is_prepared = false;

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

200

_is_activationlayer_enabled = act_info.enabled();

201

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

202

// Configure appropriate pipeline

203

if(_is_optimized)

204

{

205

configure_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info);

}

else

{

configure_generic(input, weights, biases, output, conv_info, depth_multiplier, act_info);

210

}

211

212

// Configure activation

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

213

if(_is_activationlayer_enabled)

214

{

215

_activationlayer_function.configure(output, nullptr, act_info);

216

}

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

217

}

218

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

219

Status NEDepthwiseConvolutionLayer3x3::validate(const ITensorInfo *input,

220

const ITensorInfo *weights,

221

const ITensorInfo *biases,

222

const ITensorInfo *output,

223

const PadStrideInfo &conv_info,

224

unsigned int depth_multiplier,

225

const ActivationLayerInfo &act_info)

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

226

{

227

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

228

ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

229

Giorgio Arena

66cbafb

2018-08-23 14:51:00 +0100

[diff] [blame]

230

if(biases != nullptr)

231

{

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

232

const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);

Giorgio Arena

66cbafb

2018-08-23 14:51:00 +0100

[diff] [blame]

233

ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

234

ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));

Giorgio Arena

66cbafb

2018-08-23 14:51:00 +0100

[diff] [blame]

235

}

236

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

237

if(!NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input, weights, conv_info, depth_multiplier))

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

238

{

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

239

const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());

240

TensorInfo accumulator = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));

241

ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayer3x3Kernel::validate(input, weights, is_quantized ? &accumulator : output, conv_info, depth_multiplier));

if(is_quantized)

{

ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&accumulator, biases, output));

}

}

else

{

ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionAssemblyDispatch::validate(input, weights, biases, output, conv_info, depth_multiplier));

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

251

}

252

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

253

//Validate Activation Layer

254

if(act_info.enabled())

255

{

256

ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));

257

}

258

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

259

return Status{};

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

260

}

261

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

262

void NEDepthwiseConvolutionLayer3x3::run_generic()

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

263

{

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

264

// Fill border

265

NEScheduler::get().schedule(&_border_handler, Window::DimX);

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

266

267

// Execute depthwise convolution

268

NEScheduler::get().schedule(&_dwc_kernel, Window::DimX);

269

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

270

// Add biases

Georgios Pinitas

f72f936

2018-01-12 16:29:45 +0000

[diff] [blame]

271

if(_has_bias || _is_quantized)

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

272

{

Michalis Spyrou

b91e34c

2017-12-20 15:50:55 +0000

[diff] [blame]

273

NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

274

}

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

275

276

// Permute output

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

277

if(!_is_nchw)

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

278

{

279

_permute_output.run();

280

}

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

281

}

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

282

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

283

void NEDepthwiseConvolutionLayer3x3::run_optimized()

284

{

285

// Run assembly function

286

_dwc_optimized_func.run();

// Permute output

if(_is_nchw)

{

_permute_output.run();

}

}

void NEDepthwiseConvolutionLayer3x3::run()

{

prepare();

Georgios Pinitas

2019-04-02 17:27:03 +0100

[diff] [blame^]

299

MemoryGroupResourceScope scope_mg(_memory_group);

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

// Permute input

if(_permute)

{

_permute_input.run();

305

}

306

307

_is_optimized ? run_optimized() : run_generic();

308

309

// Run activation

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

310

if(_is_activationlayer_enabled)

311

{

312

_activationlayer_function.run();

313

}

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

314

}

315

316

void NEDepthwiseConvolutionLayer3x3::prepare()

{

if(!_is_prepared)

{

// Permute weights

if(_permute)

{

_permuted_weights.allocator()->allocate();

324

_permute_weights.run();

325

_original_weights->mark_as_unused();

326

}

327

328

// Prepare optimized function

329

if(_is_optimized)

330

{

331

_dwc_optimized_func.prepare();

332

if(!_permuted_weights.is_used())

333

{

334

_permuted_weights.allocator()->free();

}

}

_is_prepared = true;

}

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

340

}

341

Giorgio Arena

04a8f8c

2017-11-23 11:45:24 +0000

[diff] [blame]

342

NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer()

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

343

: _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _output_stage_kernel(), _v2mm_input_fill_border(), _v2mm_weights_fill_border(), _permute_input(),

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

344

_permute_weights(), _permute_output(), _activationlayer_function(), _input_reshaped(), _weights_reshaped(), _v2mm_output(), _output_reshaped(), _permuted_input(), _permuted_weights(),

345

_permuted_output(), _is_prepared(false), _is_quantized(false), _is_nhwc(false), _is_activationlayer_enabled(false), _original_weights(nullptr)

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

{

}

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

349

void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,

350

unsigned int depth_multiplier, const ActivationLayerInfo &act_info)

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

351

{

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

352

const unsigned int channel_idx = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL);

353

ARM_COMPUTE_UNUSED(channel_idx);

354

Georgios Pinitas

8cffcd6

2018-11-16 17:11:50 +0000

[diff] [blame]

355

ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

356

ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

357

ARM_COMPUTE_ERROR_ON((input->info()->dimension(channel_idx) * depth_multiplier) != weights->info()->dimension(channel_idx));

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

358

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

359

_is_nhwc = input->info()->data_layout() == DataLayout::NHWC;

360

361

ITensor *input_to_use = input;

362

const ITensor *weights_to_use = weights;

363

ITensor *output_to_use = output;

if(_is_nhwc)

{

_permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));

368

_permuted_input.info()->set_data_layout(DataLayout::NCHW);

369

input_to_use = &_permuted_input;

370

371

_permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));

372

_permuted_weights.info()->set_data_layout(DataLayout::NCHW);

373

weights_to_use = &_permuted_weights;

374

}

375

376

const size_t weights_w = weights_to_use->info()->dimension(0);

377

const size_t weights_h = weights_to_use->info()->dimension(1);

378

const size_t weights_z = weights_to_use->info()->dimension(2);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

379

Georgios Pinitas

1562be3

2018-03-08 19:09:19 +0000

[diff] [blame]

380

_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

381

_is_prepared = false;

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

382

_original_weights = weights_to_use;

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

383

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

384

// Should bias be appended ?

385

bool append_bias = (biases != nullptr) && !_is_quantized;

386

387

// Calculate output shape

Giorgio Arena

7657224

2018-04-04 17:44:26 +0100

[diff] [blame]

388

TensorShape output_shape = shape_calculator::compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info, depth_multiplier);

389

390

// Output auto inizialitation if not yet initialized

391

auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));

392

ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

393

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

394

if(_is_nhwc)

395

{

396

permute(output_shape, PermutationVector(1U, 2U, 0U));

397

_permuted_output.allocator()->init(output->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape));

398

_permuted_output.info()->set_data_layout(DataLayout::NCHW);

399

output_to_use = &_permuted_output;

400

}

401

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

402

// Output width and height

Giorgio Arena

7657224

2018-04-04 17:44:26 +0100

[diff] [blame]

403

const unsigned int conv_w = output_shape.x();

404

const unsigned int conv_h = output_shape.y();

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

405

406

// Set up intermediate tensors

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

407

const size_t patch_size = weights_w * weights_h + (append_bias ? 1 : 0);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

408

const size_t conv_size = conv_w * conv_h;

409

410

// Im2Col configuration

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

411

TensorShape shape_im2col = input_to_use->info()->tensor_shape();

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

412

shape_im2col.set(0, patch_size);

413

shape_im2col.set(1, conv_size);

414

shape_im2col.set(2, weights_z);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

415

_input_reshaped.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col).set_data_layout(DataLayout::NCHW));

416

_im2col_kernel.configure(input_to_use, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, append_bias, depth_multiplier);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

417

418

// Weights reshape configuration

419

const TensorShape shape_weights_reshape(patch_size, weights_z);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

420

_weights_reshaped.allocator()->init(weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_weights_reshape).set_data_layout(DataLayout::NCHW));

421

_weights_reshape_kernel.configure(weights_to_use, &_weights_reshaped, append_bias ? biases : nullptr);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

422

423

// GEMV configuration

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

424

DataType v2mm_dt = (input->info()->data_type() == DataType::QASYMM8) ? DataType::S32 : input->info()->data_type();

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

425

TensorShape shape_v2mm_out = input_to_use->info()->tensor_shape();

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

426

shape_v2mm_out.set(0, conv_size * weights_z);

427

shape_v2mm_out.set(1, 1);

428

shape_v2mm_out.set(2, 1);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

429

_v2mm_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(v2mm_dt).set_tensor_shape(shape_v2mm_out).set_data_layout(DataLayout::NCHW));

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

430

_v2mm_kernel.configure(&_input_reshaped, &_weights_reshaped, &_v2mm_output);

Giorgio Arena

7657224

2018-04-04 17:44:26 +0100

[diff] [blame]

431

_output_reshaped.allocator()->init(_v2mm_output.info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape));

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

432

_vector_to_tensor_kernel.configure(&_v2mm_output, (_is_quantized) ? &_output_reshaped : output_to_use, conv_w, conv_h);

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

433

434

// Output staged configuration

435

if(_is_quantized)

436

{

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

437

const QuantizationInfo output_quant_info = output->info()->quantization_info();

Georgios Pinitas

9be0c5a

2018-02-19 12:46:29 +0000

[diff] [blame]

438

439

float multiplier = input->info()->quantization_info().scale * weights->info()->quantization_info().scale / output_quant_info.scale;

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

440

int output_multiplier, output_shift;

441

quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

442

_output_stage_kernel.configure(&_output_reshaped, biases, output_to_use, output_multiplier, output_shift, output_quant_info.offset);

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

443

_output_reshaped.allocator()->allocate();

444

}

445

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

446

if(_is_nhwc)

447

{

448

_permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));

449

450

_permuted_input.allocator()->allocate();

451

_permuted_weights.allocator()->allocate();

452

_permuted_output.allocator()->allocate();

453

}

454

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

455

// Fill borders on inputs

Anthony Barbier

fb8dda2

2018-01-30 09:27:05 +0000

[diff] [blame]

456

PixelValue zero_in(static_cast<int32_t>(0));

457

PixelValue zero_w(static_cast<int32_t>(0));

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

458

if(_is_quantized)

459

{

460

zero_in = PixelValue(static_cast<int32_t>(input->info()->quantization_info().offset));

461

zero_w = PixelValue(static_cast<int32_t>(weights->info()->quantization_info().offset));

462

}

463

BorderSize border_size = _v2mm_kernel.border_size();

464

_v2mm_input_fill_border.configure(&_input_reshaped, border_size, BorderMode::CONSTANT, zero_in);

465

466

border_size.bottom = 0;

467

_v2mm_weights_fill_border.configure(&_weights_reshaped, border_size, BorderMode::CONSTANT, zero_w);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

468

469

// Allocate intermediate tensors

470

_input_reshaped.allocator()->allocate();

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

471

_v2mm_output.allocator()->allocate();

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

472

473

//Configure Activation Layer

474

_is_activationlayer_enabled = act_info.enabled();

475

476

if(_is_activationlayer_enabled)

477

{

478

_activationlayer_function.configure(output, nullptr, act_info);

479

}

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

480

}

481

Georgios Pinitas

2018-08-17 17:16:06 +0100

[diff] [blame]

482

Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

483

unsigned int depth_multiplier, const ActivationLayerInfo &act_info)

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

484

{

485

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

486

ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);

487

488

const unsigned int width_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);

489

const unsigned int height_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

490

Georgios Pinitas

2018-08-17 17:16:06 +0100

[diff] [blame]

491

// Clone output to use auto init

492

auto output_clone = output->clone();

493

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

494

const ITensorInfo *input_to_use = input;

495

const ITensorInfo *weights_to_use = weights;

Georgios Pinitas

2018-08-17 17:16:06 +0100

[diff] [blame]

496

const ITensorInfo *output_to_use = output_clone.get();

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

497

498

TensorShape permuted_input_shape = input->tensor_shape();

499

TensorShape permuted_weights_shape = weights->tensor_shape();

500

TensorInfo permuted_input;

501

TensorInfo permuted_weights;

502

503

if(input->data_layout() == DataLayout::NHWC)

504

{

505

permute(permuted_input_shape, PermutationVector(1U, 2U, 0U));

506

permute(permuted_weights_shape, PermutationVector(1U, 2U, 0U));

507

508

permuted_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NCHW));

509

permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NCHW));

510

511

input_to_use = &permuted_input;

512

weights_to_use = &permuted_weights;

513

}

514

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

515

const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());

516

const bool append_bias = (biases != nullptr) && !is_quantized;

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

517

TensorShape output_shape = shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier);

518

const size_t weights_w = weights_to_use->dimension(0);

519

const size_t weights_h = weights_to_use->dimension(1);

520

const size_t weights_z = weights_to_use->dimension(2);

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

521

const unsigned int conv_w = output_shape[width_idx];

522

const unsigned int conv_h = output_shape[height_idx];

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

523

const size_t patch_size = weights_w * weights_h + (append_bias ? 1 : 0);

524

const size_t conv_size = conv_w * conv_h;

525

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

526

// Output auto inizialitation if not yet initialized

Georgios Pinitas

2018-08-17 17:16:06 +0100

[diff] [blame]

527

auto_init_if_empty(*output_clone, input->clone()->set_tensor_shape(output_shape));

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

528

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);

529

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

530

TensorInfo permuted_output;

531

if(input->data_layout() == DataLayout::NHWC)

532

{

533

permute(output_shape, PermutationVector(1U, 2U, 0U));

Georgios Pinitas

2018-08-17 17:16:06 +0100

[diff] [blame]

534

permuted_output = TensorInfo(output_clone->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_data_layout(DataLayout::NCHW));

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

535

output_to_use = &permuted_output;

536

}

537

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

538

// Im2Col configuration

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

539

TensorShape shape_im2col = input_to_use->tensor_shape();

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

540

shape_im2col.set(0, patch_size);

541

shape_im2col.set(1, conv_size);

542

shape_im2col.set(2, weights_z);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

543

TensorInfo input_reshaped(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col).set_data_layout(DataLayout::NCHW));

544

ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseIm2ColKernel::validate(input_to_use, &input_reshaped, Size2D(weights_w, weights_h), conv_info, append_bias, depth_multiplier));

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

545

546

// Weights reshape configuration

547

const TensorShape shape_weights_reshape(patch_size, weights_z);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

548

TensorInfo weights_reshaped(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_weights_reshape).set_data_layout(DataLayout::NCHW));

549

ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseWeightsReshapeKernel::validate(weights_to_use, &weights_reshaped, append_bias ? biases : nullptr));

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

550

551

// GEMV configuration

552

DataType v2mm_dt = (input->data_type() == DataType::QASYMM8) ? DataType::S32 : input->data_type();

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

553

TensorShape shape_v2mm_out = input_to_use->tensor_shape();

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

554

shape_v2mm_out.set(0, conv_size * weights_z);

555

shape_v2mm_out.set(1, 1);

556

shape_v2mm_out.set(2, 1);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

557

TensorInfo v2mm_output(input->clone()->set_is_resizable(true).reset_padding().set_data_type(v2mm_dt).set_tensor_shape(shape_v2mm_out).set_data_layout(DataLayout::NCHW));

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

558

ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixVectorMultiplyKernel::validate(&input_reshaped, &weights_reshaped, &v2mm_output));

559

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

560

TensorInfo output_reshaped(v2mm_output.clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_to_use->tensor_shape()));

561

ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseVectorToTensorKernel::validate(&v2mm_output, (is_quantized) ? &output_reshaped : output_to_use, conv_w, conv_h));

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

562

563

if(is_quantized)

564

{

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

565

ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&output_reshaped, biases, output_to_use));

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

566

}

567

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

568

// Validate Activation Layer

569

if(act_info.enabled())

570

{

571

ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));

572

}

573

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

return Status{};

}

Giorgio Arena

2017-11-23 11:45:24 +0000

[diff] [blame]

577

void NEDepthwiseConvolutionLayer::run()

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

578

{

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

579

prepare();

Georgios Pinitas

1562be3

2018-03-08 19:09:19 +0000

[diff] [blame]

580

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

581

if(_is_nhwc)

582

{

583

_permute_input.run();

584

}

585

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

586

NEScheduler::get().schedule(&_im2col_kernel, Window::DimX);

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

587

NEScheduler::get().schedule(&_v2mm_input_fill_border, Window::DimX);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

588

NEScheduler::get().schedule(&_v2mm_kernel, Window::DimX);

589

NEScheduler::get().schedule(&_vector_to_tensor_kernel, Window::DimX);

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

590

if(_is_quantized)

591

{

592

NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);

593

}

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

if(_is_nhwc)

{

_permute_output.run();

598

}

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

599

600

if(_is_activationlayer_enabled)

601

{

602

_activationlayer_function.run();

603

}

Anthony Barbier

fb8dda2

2018-01-30 09:27:05 +0000

[diff] [blame]

604

}

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

605

606

void NEDepthwiseConvolutionLayer::prepare()

{

if(!_is_prepared)

{

ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());

611

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

612

if(_is_nhwc)

613

{

614

_permute_weights.run();

615

}

616

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

617

// Run reshape and mark original weights as unused

618

_weights_reshaped.allocator()->allocate();

619

NEScheduler::get().schedule(&_weights_reshape_kernel, Window::DimX);

620

NEScheduler::get().schedule(&_v2mm_weights_fill_border, Window::DimX);

621

_original_weights->mark_as_unused();

_is_prepared = true;

}

}

Georgios Pinitas