Blame - src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp - ml/ComputeLibrary

2017-10-18 17:58:22 +0100

[diff] [blame]

{

}

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

48

void NEDepthwiseConvolutionLayer3x3::configure_generic(ITensor *input,

49

const ITensor *weights,

50

const ITensor *biases,

51

ITensor *output,

52

const PadStrideInfo &conv_info,

53

unsigned int depth_multiplier,

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

54

const ActivationLayerInfo &act_info,

55

const Size2D &dilation)

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

56

{

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

57

ARM_COMPUTE_UNUSED(act_info);

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

58

Georgios Pinitas

f72f936

2018-01-12 16:29:45 +0000

[diff] [blame]

59

PixelValue zero_value(0.f);

60

Georgios Pinitas

2018-09-12 20:11:34 +0100

[diff] [blame]

61

// Initialize the intermediate accumulator tensor in case of quantized input

62

if(_is_quantized)

63

{

64

TensorShape accum_shape = output->info()->tensor_shape();

65

DataLayout accum_layout = output->info()->data_layout();

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

66

if(!_is_nchw)

Georgios Pinitas

2018-09-12 20:11:34 +0100

[diff] [blame]

67

{

68

permute(accum_shape, PermutationVector(1U, 2U, 0U));

69

accum_layout = DataLayout::NCHW;

70

}

71

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

72

_memory_group.manage(&_accumulator);

Georgios Pinitas

2481d46

2019-02-19 18:47:46 +0000

[diff] [blame]

73

_accumulator.allocator()->init(TensorInfo(accum_shape, 1, DataType::S32, output->info()->quantization_info()));

Georgios Pinitas

2018-09-12 20:11:34 +0100

[diff] [blame]

74

_accumulator.info()->set_data_layout(accum_layout);

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

75

zero_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().uniform().offset));

Georgios Pinitas

2018-09-12 20:11:34 +0100

[diff] [blame]

76

}

77

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

78

if(!_is_nchw)

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

79

{

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

80

_memory_group.manage(&_permuted_input);

81

_memory_group.manage(&_permuted_output);

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

82

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

83

// Configure the function to transform the input tensor from NHWC -> NCHW

84

_permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));

85

_permuted_input.info()->set_data_layout(DataLayout::NCHW);

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

86

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

87

// Configure the function to transform the weights tensor from HWI -> IHW

88

_permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));

89

_permuted_weights.info()->set_data_layout(DataLayout::NCHW);

Pablo Tello

a28aebc

2019-06-03 14:59:48 +0100

[diff] [blame]

90

_permuted_output.info()->set_quantization_info(output->info()->quantization_info());

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

91

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

92

// Configure depthwise

93

_dwc_kernel.configure(&_permuted_input, &_permuted_weights, (_is_quantized) ? &_accumulator : &_permuted_output, conv_info, depth_multiplier, dilation);

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

94

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

95

// Configure border handler

96

_border_handler.configure(&_permuted_input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);

97

98

// Allocate tensors

99

_permuted_input.allocator()->allocate();

Georgios Pinitas

f72f936

2018-01-12 16:29:45 +0000

[diff] [blame]

100

}

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

101

else

102

{

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

103

// Configure depthwise convolution kernel

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

104

_dwc_kernel.configure(input, weights, (_is_quantized) ? &_accumulator : output, conv_info, depth_multiplier, dilation);

Georgios Pinitas

9be0c5a

2018-02-19 12:46:29 +0000

[diff] [blame]

105

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

106

// Configure border handler

107

_border_handler.configure(input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

108

}

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

109

110

// Configure biases accumulation

111

if(_is_quantized)

112

{

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

113

const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();

114

const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();

115

const UniformQuantizationInfo oq_info = (output->info()->total_size() == 0) ? iq_info : output->info()->quantization_info().uniform();

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

116

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

117

float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;

Michalis Spyrou

a4f378d

2019-04-26 14:54:54 +0100

[diff] [blame]

118

int output_multiplier;

119

int output_shift;

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

120

quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

121

_output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, output_multiplier, output_shift, oq_info.offset);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

122

_accumulator.allocator()->allocate();

123

}

124

else if(_has_bias)

125

{

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

126

_output_stage_kernel.configure(_is_nchw ? output : &_permuted_output, biases);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

127

}

128

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

129

// Permute output

130

if(!_is_nchw)

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

131

{

132

// Configure the function to transform the convoluted output to NHWC

133

_permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));

134

_permuted_output.allocator()->allocate();

135

}

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

136

}

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

137

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

138

void NEDepthwiseConvolutionLayer3x3::configure_optimized(const ITensor *input,

139

const ITensor *weights,

140

const ITensor *biases,

141

ITensor *output,

142

const PadStrideInfo &conv_info,

143

unsigned int depth_multiplier,

144

const ActivationLayerInfo &act_info)

145

{

146

ActivationLayerInfo act_info_to_use = ActivationLayerInfo();

147

const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);

148

const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info);

149

_is_activationlayer_enabled = act_info.enabled() && !(is_relu || is_relu6);

150

if(!_is_activationlayer_enabled)

151

{

152

act_info_to_use = act_info;

}

if(_is_nchw)

{

_memory_group.manage(&_permuted_input);

158

_memory_group.manage(&_permuted_output);

159

160

// Configure the function to transform the input tensor from NCHW -> NHWC

161

_permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));

162

_permuted_input.info()->set_data_layout(DataLayout::NHWC);

163

164

// Configure the function to transform the weights tensor from IHW -> HWI

165

_permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));

166

_permuted_weights.info()->set_data_layout(DataLayout::NHWC);

167

Pablo Tello

a28aebc

2019-06-03 14:59:48 +0100

[diff] [blame]

168

_permuted_output.info()->set_data_layout(DataLayout::NHWC);

169

_permuted_output.info()->set_quantization_info(output->info()->quantization_info());

170

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

171

// Configure optimized depthwise

172

_dwc_optimized_func.configure(&_permuted_input, &_permuted_weights, biases, &_permuted_output, conv_info, depth_multiplier, act_info_to_use);

173

174

// Configure the function to transform the convoluted output to ACL's native ordering format NCHW

175

_permuted_output.info()->set_data_layout(DataLayout::NHWC);

176

_permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));

177

178

// Allocate tensors

179

_permuted_input.allocator()->allocate();

180

_permuted_output.allocator()->allocate();

}

else

{

_dwc_optimized_func.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info_to_use);

}

}

void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input,

189

const ITensor *weights,

190

const ITensor *biases,

191

ITensor *output, const PadStrideInfo &conv_info,

192

unsigned int depth_multiplier,

Usama Arif

2019-04-08 17:30:48 +0100

[diff] [blame]

193

const ActivationLayerInfo &act_info,

194

const Size2D &dilation)

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

195

{

196

ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);

197

ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);

198

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

199

// idx_w and idx_h only used for validation

200

const size_t idx_w = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::WIDTH);

201

const size_t idx_h = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT);

202

ARM_COMPUTE_UNUSED(idx_w);

203

ARM_COMPUTE_UNUSED(idx_h);

204

205

ARM_COMPUTE_ERROR_ON(weights->info()->dimension(idx_w) + (weights->info()->dimension(idx_w) - 1) * (dilation.x() - 1) > input->info()->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());

206

ARM_COMPUTE_ERROR_ON(weights->info()->dimension(idx_h) + (weights->info()->dimension(idx_h) - 1) * (dilation.y() - 1) > input->info()->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());

207

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

208

_original_weights = weights;

209

_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());

210

_has_bias = biases != nullptr;

211

_is_optimized = NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input->info(),

212

weights->info(),

213

conv_info,

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

214

depth_multiplier, dilation);

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

215

_is_nchw = input->info()->data_layout() == DataLayout::NCHW;

216

_permute = _is_optimized == _is_nchw;

217

_is_prepared = false;

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

218

_is_activationlayer_enabled = act_info.enabled();

219

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

220

// Configure appropriate pipeline

221

if(_is_optimized)

222

{

223

configure_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info);

224

}

225

else

226

{

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

227

configure_generic(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

228

}

229

230

// Configure activation

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

231

if(_is_activationlayer_enabled)

232

{

233

_activationlayer_function.configure(output, nullptr, act_info);

234

}

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

235

}

236

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

237

Status NEDepthwiseConvolutionLayer3x3::validate(const ITensorInfo *input,

238

const ITensorInfo *weights,

239

const ITensorInfo *biases,

240

const ITensorInfo *output,

241

const PadStrideInfo &conv_info,

242

unsigned int depth_multiplier,

Usama Arif

2019-04-08 17:30:48 +0100

[diff] [blame]

243

const ActivationLayerInfo &act_info,

244

const Size2D &dilation)

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

245

{

246

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

247

ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

248

ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() < 1 || dilation.y() < 1);

249

const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);

250

const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);

251

ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (dilation.x() - 1) > input->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());

252

ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (dilation.y() - 1) > input->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

253

Giorgio Arena

66cbafb

2018-08-23 14:51:00 +0100

[diff] [blame]

254

if(biases != nullptr)

255

{

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

256

const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);

Giorgio Arena

66cbafb

2018-08-23 14:51:00 +0100

[diff] [blame]

257

ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

258

ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));

Giorgio Arena

66cbafb

2018-08-23 14:51:00 +0100

[diff] [blame]

259

}

260

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

261

if(!NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input, weights, conv_info, depth_multiplier, dilation))

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

262

{

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

263

const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());

264

TensorInfo accumulator = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));

265

ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayer3x3Kernel::validate(input, weights, is_quantized ? &accumulator : output, conv_info, depth_multiplier));

if(is_quantized)

{

ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&accumulator, biases, output));

}

}

else

{

ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionAssemblyDispatch::validate(input, weights, biases, output, conv_info, depth_multiplier));

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

275

}

276

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

277

//Validate Activation Layer

278

if(act_info.enabled())

279

{

280

ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));

281

}

282

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

283

return Status{};

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

284

}

285

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

286

void NEDepthwiseConvolutionLayer3x3::run_generic()

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

287

{

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

288

// Fill border

289

NEScheduler::get().schedule(&_border_handler, Window::DimX);

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

290

291

// Execute depthwise convolution

292

NEScheduler::get().schedule(&_dwc_kernel, Window::DimX);

293

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

294

// Add biases

Georgios Pinitas

f72f936

2018-01-12 16:29:45 +0000

[diff] [blame]

295

if(_has_bias || _is_quantized)

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

296

{

Michalis Spyrou

b91e34c

2017-12-20 15:50:55 +0000

[diff] [blame]

297

NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

298

}

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

299

300

// Permute output

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

301

if(!_is_nchw)

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

302

{

303

_permute_output.run();

304

}

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

305

}

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

306

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

307

void NEDepthwiseConvolutionLayer3x3::run_optimized()

308

{

309

// Run assembly function

310

_dwc_optimized_func.run();

// Permute output

if(_is_nchw)

{

_permute_output.run();

}

}

void NEDepthwiseConvolutionLayer3x3::run()

{

prepare();

Georgios Pinitas

2019-04-02 17:27:03 +0100

[diff] [blame]

323

MemoryGroupResourceScope scope_mg(_memory_group);

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

// Permute input

if(_permute)

{

_permute_input.run();

329

}

330

331

_is_optimized ? run_optimized() : run_generic();

332

333

// Run activation

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

334

if(_is_activationlayer_enabled)

335

{

336

_activationlayer_function.run();

337

}

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

338

}

339

340

void NEDepthwiseConvolutionLayer3x3::prepare()

{

if(!_is_prepared)

{

// Permute weights

if(_permute)

{

_permuted_weights.allocator()->allocate();

348

_permute_weights.run();

349

_original_weights->mark_as_unused();

350

}

351

352

// Prepare optimized function

353

if(_is_optimized)

354

{

355

_dwc_optimized_func.prepare();

356

if(!_permuted_weights.is_used())

357

{

358

_permuted_weights.allocator()->free();

}

}

_is_prepared = true;

}

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

364

}

365

Georgios Pinitas

30271c7

2019-06-24 14:56:34 +0100

[diff] [blame]

366

NEDepthwiseConvolutionLayerOptimized::NEDepthwiseConvolutionLayerOptimized(std::shared_ptr<IMemoryManager> memory_manager)

367

: _memory_group(memory_manager), _dwc_kernel(), _dwc_optimized_func(memory_manager), _output_stage_kernel(), _border_handler(), _permute_input(), _permute_weights(), _permute_output(),

368

_activationlayer_function(), _accumulator(), _permuted_input(), _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_optimized(false),

369

_is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false)

{

}

void NEDepthwiseConvolutionLayerOptimized::configure_generic(ITensor *input,

374

const ITensor *weights,

375

const ITensor *biases,

376

ITensor *output,

377

const PadStrideInfo &conv_info,

378

unsigned int depth_multiplier,

379

const ActivationLayerInfo &act_info,

380

const Size2D &dilation)

381

{

382

ARM_COMPUTE_UNUSED(act_info);

383

384

PixelValue zero_value(0.f);

385

386

// Initialize the intermediate accumulator tensor in case of quantized input

387

if(_is_quantized)

388

{

389

TensorShape accum_shape = output->info()->tensor_shape();

390

DataLayout accum_layout = output->info()->data_layout();

391

if(!_is_nchw)

392

{

393

permute(accum_shape, PermutationVector(1U, 2U, 0U));

394

accum_layout = DataLayout::NCHW;

395

}

396

397

_memory_group.manage(&_accumulator);

398

_accumulator.allocator()->init(TensorInfo(accum_shape, 1, DataType::S32, output->info()->quantization_info()));

399

_accumulator.info()->set_data_layout(accum_layout);

400

zero_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().uniform().offset));

}

if(!_is_nchw)

{

_memory_group.manage(&_permuted_input);

406

_memory_group.manage(&_permuted_output);

407

408

// Configure the function to transform the input tensor from NHWC -> NCHW

409

_permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));

410

_permuted_input.info()->set_data_layout(DataLayout::NCHW);

411

412

// Configure the function to transform the weights tensor from HWI -> IHW

413

_permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));

414

_permuted_weights.info()->set_data_layout(DataLayout::NCHW);

415

_permuted_output.info()->set_quantization_info(output->info()->quantization_info());

416

417

// Configure depthwise

418

_dwc_kernel.configure(&_permuted_input, &_permuted_weights, (_is_quantized) ? &_accumulator : &_permuted_output, conv_info, depth_multiplier, dilation);

419

420

// Configure border handler

421

_border_handler.configure(&_permuted_input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);

422

423

// Allocate tensors

424

_permuted_input.allocator()->allocate();

}

else

{

// Configure depthwise convolution kernel

429

_dwc_kernel.configure(input, weights, (_is_quantized) ? &_accumulator : output, conv_info, depth_multiplier, dilation);

430

431

// Configure border handler

432

_border_handler.configure(input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);

433

}

434

435

// Configure biases accumulation

436

if(_is_quantized)

437

{

438

const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();

439

const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();

440

const UniformQuantizationInfo oq_info = (output->info()->total_size() == 0) ? iq_info : output->info()->quantization_info().uniform();

441

442

float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;

443

int output_multiplier;

444

int output_shift;

445

quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);

446

_output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, output_multiplier, output_shift, oq_info.offset);

447

_accumulator.allocator()->allocate();

}

else if(_has_bias)

{

_output_stage_kernel.configure(_is_nchw ? output : &_permuted_output, biases);

}

// Permute output

if(!_is_nchw)

{

// Configure the function to transform the convoluted output to NHWC

458

_permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));

459

_permuted_output.allocator()->allocate();

}

}

void NEDepthwiseConvolutionLayerOptimized::configure_optimized(const ITensor *input,

464

const ITensor *weights,

465

const ITensor *biases,

466

ITensor *output,

467

const PadStrideInfo &conv_info,

468

unsigned int depth_multiplier,

469

const ActivationLayerInfo &act_info,

470

const Size2D &dilation)

471

{

472

ActivationLayerInfo act_info_to_use = ActivationLayerInfo();

473

const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);

474

const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info);

475

_is_activationlayer_enabled = act_info.enabled() && !(is_relu || is_relu6);

476

if(!_is_activationlayer_enabled)

477

{

478

act_info_to_use = act_info;

}

if(_is_nchw)

{

_memory_group.manage(&_permuted_input);

484

_memory_group.manage(&_permuted_output);

485

486

// Configure the function to transform the input tensor from NCHW -> NHWC

487

_permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));

488

_permuted_input.info()->set_data_layout(DataLayout::NHWC);

489

490

// Configure the function to transform the weights tensor from IHW -> HWI

491

_permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));

492

_permuted_weights.info()->set_data_layout(DataLayout::NHWC);

493

494

_permuted_output.info()->set_data_layout(DataLayout::NHWC);

495

_permuted_output.info()->set_quantization_info(output->info()->quantization_info());

496

497

// Configure optimized depthwise

498

_dwc_optimized_func.configure(&_permuted_input, &_permuted_weights, biases, &_permuted_output, conv_info, depth_multiplier, act_info_to_use, dilation);

499

500

// Configure the function to transform the convoluted output to ACL's native ordering format NCHW

501

_permuted_output.info()->set_data_layout(DataLayout::NHWC);

502

_permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));

503

504

// Allocate tensors

505

_permuted_input.allocator()->allocate();

506

_permuted_output.allocator()->allocate();

}

else

{

_dwc_optimized_func.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info_to_use, dilation);

}

}

void NEDepthwiseConvolutionLayerOptimized::configure(ITensor *input,

515

const ITensor *weights,

516

const ITensor *biases,

517

ITensor *output, const PadStrideInfo &conv_info,

518

unsigned int depth_multiplier,

519

const ActivationLayerInfo &act_info,

520

const Size2D &dilation)

521

{

522

ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);

523

ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);

524

525

// idx_w and idx_h only used for validation

526

const size_t idx_w = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::WIDTH);

527

const size_t idx_h = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT);

528

ARM_COMPUTE_UNUSED(idx_w);

529

ARM_COMPUTE_UNUSED(idx_h);

530

531

ARM_COMPUTE_ERROR_ON(weights->info()->dimension(idx_w) + (weights->info()->dimension(idx_w) - 1) * (dilation.x() - 1) > input->info()->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());

532

ARM_COMPUTE_ERROR_ON(weights->info()->dimension(idx_h) + (weights->info()->dimension(idx_h) - 1) * (dilation.y() - 1) > input->info()->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());

533

534

_original_weights = weights;

535

_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());

536

_has_bias = biases != nullptr;

537

_is_optimized = NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input->info(),

weights->info(),

conv_info,

depth_multiplier,

dilation);

_is_nchw = input->info()->data_layout() == DataLayout::NCHW;

543

_permute = _is_optimized == _is_nchw;

544

_is_prepared = false;

545

_is_activationlayer_enabled = act_info.enabled();

546

547

// Configure appropriate pipeline

548

if(_is_optimized)

549

{

550

configure_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);

}

else

{

configure_generic(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);

555

}

556

557

// Configure activation

558

if(_is_activationlayer_enabled)

559

{

560

_activationlayer_function.configure(output, nullptr, act_info);

}

}

Status NEDepthwiseConvolutionLayerOptimized::validate(const ITensorInfo *input,

565

const ITensorInfo *weights,

566

const ITensorInfo *biases,

567

const ITensorInfo *output,

568

const PadStrideInfo &conv_info,

569

unsigned int depth_multiplier,

570

const ActivationLayerInfo &act_info,

571

const Size2D &dilation)

572

{

573

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);

574

ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);

575

ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() < 1 || dilation.y() < 1);

576

const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);

577

const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);

578

ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (dilation.x() - 1) > input->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());

579

ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (dilation.y() - 1) > input->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());

580

581

if(biases != nullptr)

582

{

583

const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);

584

ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);

585

ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));

586

}

587

588

if(!NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input, weights, conv_info, depth_multiplier, dilation))

589

{

590

const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());

591

TensorInfo accumulator = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));

592

ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayer3x3Kernel::validate(input, weights, is_quantized ? &accumulator : output, conv_info, depth_multiplier, dilation));

if(is_quantized)

{

ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&accumulator, biases, output));

}

}

else

{

ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionAssemblyDispatch::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation));

602

}

603

604

//Validate Activation Layer

605

if(act_info.enabled())

606

{

607

ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));

}

return Status{};

}

void NEDepthwiseConvolutionLayerOptimized::run_generic()

614

{

615

// Fill border

616

NEScheduler::get().schedule(&_border_handler, Window::DimX);

617

618

// Execute depthwise convolution

619

NEScheduler::get().schedule(&_dwc_kernel, Window::DimX);

620

621

// Add biases

622

if(_has_bias || _is_quantized)

623

{

624

NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);

}

// Permute output

if(!_is_nchw)

{

_permute_output.run();

}

}

void NEDepthwiseConvolutionLayerOptimized::run_optimized()

635

{

636

// Run assembly function

637

_dwc_optimized_func.run();

// Permute output

if(_is_nchw)

{

_permute_output.run();

}

}

void NEDepthwiseConvolutionLayerOptimized::run()

{

prepare();

MemoryGroupResourceScope scope_mg(_memory_group);

// Permute input

if(_permute)

{

_permute_input.run();

656

}

657

658

_is_optimized ? run_optimized() : run_generic();

659

660

// Run activation

661

if(_is_activationlayer_enabled)

662

{

663

_activationlayer_function.run();

}

}

void NEDepthwiseConvolutionLayerOptimized::prepare()

{

if(!_is_prepared)

{

// Permute weights

if(_permute)

{

_permuted_weights.allocator()->allocate();

675

_permute_weights.run();

676

_original_weights->mark_as_unused();

677

}

678

679

// Prepare optimized function

680

if(_is_optimized)

681

{

682

_dwc_optimized_func.prepare();

683

if(!_permuted_weights.is_used())

684

{

685

_permuted_weights.allocator()->free();

}

}

_is_prepared = true;

}

}

Giorgio Arena

2017-11-23 11:45:24 +0000

[diff] [blame]

693

NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer()

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

694

: _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _output_stage_kernel(), _v2mm_input_fill_border(), _v2mm_weights_fill_border(), _permute_input(),

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

695

_permute_weights(), _permute_output(), _activationlayer_function(), _input_reshaped(), _weights_reshaped(), _v2mm_output(), _output_reshaped(), _permuted_input(), _permuted_weights(),

696

_permuted_output(), _is_prepared(false), _is_quantized(false), _is_nhwc(false), _is_activationlayer_enabled(false), _original_weights(nullptr)

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

{

}

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

700

void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,

Usama Arif

2019-04-08 17:30:48 +0100

[diff] [blame]

701

unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

702

{

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

703

const unsigned int channel_idx = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL);

704

ARM_COMPUTE_UNUSED(channel_idx);

Georgios Pinitas

8cffcd6

2018-11-16 17:11:50 +0000

[diff] [blame]

705

ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

706

ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

707

ARM_COMPUTE_ERROR_ON((input->info()->dimension(channel_idx) * depth_multiplier) != weights->info()->dimension(channel_idx));

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

708

// idx_w and idx_h only used for validation

709

const size_t idx_w = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::WIDTH);

710

const size_t idx_h = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT);

711

ARM_COMPUTE_UNUSED(idx_w);

712

ARM_COMPUTE_UNUSED(idx_h);

713

714

ARM_COMPUTE_ERROR_ON(weights->info()->dimension(idx_w) + (weights->info()->dimension(idx_w) - 1) * (dilation.x() - 1) > input->info()->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());

715

ARM_COMPUTE_ERROR_ON(weights->info()->dimension(idx_h) + (weights->info()->dimension(idx_h) - 1) * (dilation.y() - 1) > input->info()->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

716

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

717

_is_nhwc = input->info()->data_layout() == DataLayout::NHWC;

718

719

ITensor *input_to_use = input;

720

const ITensor *weights_to_use = weights;

721

ITensor *output_to_use = output;

if(_is_nhwc)

{

_permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));

726

_permuted_input.info()->set_data_layout(DataLayout::NCHW);

727

input_to_use = &_permuted_input;

728

729

_permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));

730

_permuted_weights.info()->set_data_layout(DataLayout::NCHW);

731

weights_to_use = &_permuted_weights;

732

}

733

734

const size_t weights_w = weights_to_use->info()->dimension(0);

735

const size_t weights_h = weights_to_use->info()->dimension(1);

736

const size_t weights_z = weights_to_use->info()->dimension(2);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

737

Georgios Pinitas

1562be3

2018-03-08 19:09:19 +0000

[diff] [blame]

738

_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

739

_is_prepared = false;

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

740

_original_weights = weights_to_use;

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

741

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

742

// Should bias be appended ?

743

bool append_bias = (biases != nullptr) && !_is_quantized;

744

745

// Calculate output shape

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

746

TensorShape output_shape = shape_calculator::compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info, depth_multiplier, dilation);

Giorgio Arena

7657224

2018-04-04 17:44:26 +0100

[diff] [blame]

747

748

// Output auto inizialitation if not yet initialized

749

auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));

750

ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

751

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

752

if(_is_nhwc)

753

{

754

permute(output_shape, PermutationVector(1U, 2U, 0U));

755

_permuted_output.allocator()->init(output->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape));

756

_permuted_output.info()->set_data_layout(DataLayout::NCHW);

Pablo Tello

a28aebc

2019-06-03 14:59:48 +0100

[diff] [blame]

757

_permuted_output.info()->set_quantization_info(output->info()->quantization_info());

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

758

output_to_use = &_permuted_output;

759

}

760

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

761

// Output width and height

Giorgio Arena

7657224

2018-04-04 17:44:26 +0100

[diff] [blame]

762

const unsigned int conv_w = output_shape.x();

763

const unsigned int conv_h = output_shape.y();

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

764

765

// Set up intermediate tensors

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

766

const size_t patch_size = weights_w * weights_h + (append_bias ? 1 : 0);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

767

const size_t conv_size = conv_w * conv_h;

768

769

// Im2Col configuration

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

770

TensorShape shape_im2col = input_to_use->info()->tensor_shape();

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

771

shape_im2col.set(0, patch_size);

772

shape_im2col.set(1, conv_size);

773

shape_im2col.set(2, weights_z);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

774

_input_reshaped.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col).set_data_layout(DataLayout::NCHW));

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

775

_im2col_kernel.configure(input_to_use, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, append_bias, depth_multiplier, dilation);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

776

777

// Weights reshape configuration

778

const TensorShape shape_weights_reshape(patch_size, weights_z);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

779

_weights_reshaped.allocator()->init(weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_weights_reshape).set_data_layout(DataLayout::NCHW));

780

_weights_reshape_kernel.configure(weights_to_use, &_weights_reshaped, append_bias ? biases : nullptr);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

781

782

// GEMV configuration

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

783

DataType v2mm_dt = (input->info()->data_type() == DataType::QASYMM8) ? DataType::S32 : input->info()->data_type();

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

784

TensorShape shape_v2mm_out = input_to_use->info()->tensor_shape();

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

785

shape_v2mm_out.set(0, conv_size * weights_z);

786

shape_v2mm_out.set(1, 1);

787

shape_v2mm_out.set(2, 1);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

788

_v2mm_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(v2mm_dt).set_tensor_shape(shape_v2mm_out).set_data_layout(DataLayout::NCHW));

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

789

_v2mm_kernel.configure(&_input_reshaped, &_weights_reshaped, &_v2mm_output);

Giorgio Arena

7657224

2018-04-04 17:44:26 +0100

[diff] [blame]

790

_output_reshaped.allocator()->init(_v2mm_output.info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape));

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

791

_vector_to_tensor_kernel.configure(&_v2mm_output, (_is_quantized) ? &_output_reshaped : output_to_use, conv_w, conv_h);

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

792

793

// Output staged configuration

794

if(_is_quantized)

795

{

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

796

const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();

797

const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();

798

const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();

Georgios Pinitas

9be0c5a

2018-02-19 12:46:29 +0000

[diff] [blame]

799

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

800

float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;

Michalis Spyrou

a4f378d

2019-04-26 14:54:54 +0100

[diff] [blame]

801

int output_multiplier;

802

int output_shift;

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

803

quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

804

_output_stage_kernel.configure(&_output_reshaped, biases, output_to_use, output_multiplier, output_shift, oq_info.offset);

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

805

_output_reshaped.allocator()->allocate();

806

}

807

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

808

if(_is_nhwc)

809

{

810

_permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));

811

812

_permuted_input.allocator()->allocate();

813

_permuted_weights.allocator()->allocate();

814

_permuted_output.allocator()->allocate();

815

}

816

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

817

// Fill borders on inputs

Anthony Barbier

fb8dda2

2018-01-30 09:27:05 +0000

[diff] [blame]

818

PixelValue zero_in(static_cast<int32_t>(0));

819

PixelValue zero_w(static_cast<int32_t>(0));

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

820

if(_is_quantized)

821

{

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

822

zero_in = PixelValue(static_cast<int32_t>(input->info()->quantization_info().uniform().offset));

823

zero_w = PixelValue(static_cast<int32_t>(weights->info()->quantization_info().uniform().offset));

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

824

}

825

BorderSize border_size = _v2mm_kernel.border_size();

826

_v2mm_input_fill_border.configure(&_input_reshaped, border_size, BorderMode::CONSTANT, zero_in);

827

828

border_size.bottom = 0;

829

_v2mm_weights_fill_border.configure(&_weights_reshaped, border_size, BorderMode::CONSTANT, zero_w);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

830

831

// Allocate intermediate tensors

832

_input_reshaped.allocator()->allocate();

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

833

_v2mm_output.allocator()->allocate();

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

834

835

//Configure Activation Layer

836

_is_activationlayer_enabled = act_info.enabled();

837

838

if(_is_activationlayer_enabled)

839

{

840

_activationlayer_function.configure(output, nullptr, act_info);

841

}

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

842

}

843

Georgios Pinitas

2018-08-17 17:16:06 +0100

[diff] [blame]

844

Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,

Usama Arif

2019-04-08 17:30:48 +0100

[diff] [blame]

845

unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

846

{

847

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

848

ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

849

ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() < 1 || dilation.y() < 1);

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

850

851

const unsigned int width_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);

852

const unsigned int height_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

853

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

854

ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) + (weights->dimension(width_idx) - 1) * (dilation.x() - 1) > input->dimension(width_idx) + conv_info.pad_left() + conv_info.pad_right());

855

ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(height_idx) + (weights->dimension(height_idx) - 1) * (dilation.y() - 1) > input->dimension(height_idx) + conv_info.pad_top() + conv_info.pad_bottom());

Georgios Pinitas

2018-08-17 17:16:06 +0100

[diff] [blame]

856

// Clone output to use auto init

857

auto output_clone = output->clone();

858

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

859

const ITensorInfo *input_to_use = input;

860

const ITensorInfo *weights_to_use = weights;

Georgios Pinitas

2018-08-17 17:16:06 +0100

[diff] [blame]

861

const ITensorInfo *output_to_use = output_clone.get();

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

862

863

TensorShape permuted_input_shape = input->tensor_shape();

864

TensorShape permuted_weights_shape = weights->tensor_shape();

865

TensorInfo permuted_input;

866

TensorInfo permuted_weights;

867

868

if(input->data_layout() == DataLayout::NHWC)

869

{

870

permute(permuted_input_shape, PermutationVector(1U, 2U, 0U));

871

permute(permuted_weights_shape, PermutationVector(1U, 2U, 0U));

872

873

permuted_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NCHW));

874

permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NCHW));

875

876

input_to_use = &permuted_input;

877

weights_to_use = &permuted_weights;

878

}

879

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

880

const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());

881

const bool append_bias = (biases != nullptr) && !is_quantized;

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

882

TensorShape output_shape = shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

883

const size_t weights_w = weights_to_use->dimension(0);

884

const size_t weights_h = weights_to_use->dimension(1);

885

const size_t weights_z = weights_to_use->dimension(2);

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

886

const unsigned int conv_w = output_shape[width_idx];

887

const unsigned int conv_h = output_shape[height_idx];

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

888

const size_t patch_size = weights_w * weights_h + (append_bias ? 1 : 0);

889

const size_t conv_size = conv_w * conv_h;

890

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

891

// Output auto inizialitation if not yet initialized

Georgios Pinitas

2018-08-17 17:16:06 +0100

[diff] [blame]

892

auto_init_if_empty(*output_clone, input->clone()->set_tensor_shape(output_shape));

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

893

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);

894

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

895

TensorInfo permuted_output;

896

if(input->data_layout() == DataLayout::NHWC)

897

{

898

permute(output_shape, PermutationVector(1U, 2U, 0U));

Georgios Pinitas

2018-08-17 17:16:06 +0100

[diff] [blame]

899

permuted_output = TensorInfo(output_clone->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_data_layout(DataLayout::NCHW));

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

900

output_to_use = &permuted_output;

901

}

902

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

903

// Im2Col configuration

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

904

TensorShape shape_im2col = input_to_use->tensor_shape();

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

905

shape_im2col.set(0, patch_size);

906

shape_im2col.set(1, conv_size);

907

shape_im2col.set(2, weights_z);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

908

TensorInfo input_reshaped(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col).set_data_layout(DataLayout::NCHW));

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

909

ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseIm2ColKernel::validate(input_to_use, &input_reshaped, Size2D(weights_w, weights_h), conv_info, append_bias, depth_multiplier, dilation));

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

910

911

// Weights reshape configuration

912

const TensorShape shape_weights_reshape(patch_size, weights_z);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

913

TensorInfo weights_reshaped(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_weights_reshape).set_data_layout(DataLayout::NCHW));

914

ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseWeightsReshapeKernel::validate(weights_to_use, &weights_reshaped, append_bias ? biases : nullptr));

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

915

916

// GEMV configuration

917

DataType v2mm_dt = (input->data_type() == DataType::QASYMM8) ? DataType::S32 : input->data_type();

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

918

TensorShape shape_v2mm_out = input_to_use->tensor_shape();

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

919

shape_v2mm_out.set(0, conv_size * weights_z);

920

shape_v2mm_out.set(1, 1);

921

shape_v2mm_out.set(2, 1);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

922

TensorInfo v2mm_output(input->clone()->set_is_resizable(true).reset_padding().set_data_type(v2mm_dt).set_tensor_shape(shape_v2mm_out).set_data_layout(DataLayout::NCHW));

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

923

ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixVectorMultiplyKernel::validate(&input_reshaped, &weights_reshaped, &v2mm_output));

924

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

925

TensorInfo output_reshaped(v2mm_output.clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_to_use->tensor_shape()));

926

ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseVectorToTensorKernel::validate(&v2mm_output, (is_quantized) ? &output_reshaped : output_to_use, conv_w, conv_h));

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

927

928

if(is_quantized)

929

{

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

930

ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&output_reshaped, biases, output_to_use));

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

931

}

932

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

933

// Validate Activation Layer

934

if(act_info.enabled())

935

{

936

ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));

937

}

938

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

return Status{};

}

Giorgio Arena

2017-11-23 11:45:24 +0000

[diff] [blame]

942

void NEDepthwiseConvolutionLayer::run()

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

943

{

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

944

prepare();

Georgios Pinitas

1562be3

2018-03-08 19:09:19 +0000

[diff] [blame]

945

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

946

if(_is_nhwc)

947

{

948

_permute_input.run();

949

}

950

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

951

NEScheduler::get().schedule(&_im2col_kernel, Window::DimX);

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

952

NEScheduler::get().schedule(&_v2mm_input_fill_border, Window::DimX);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

953

NEScheduler::get().schedule(&_v2mm_kernel, Window::DimX);

954

NEScheduler::get().schedule(&_vector_to_tensor_kernel, Window::DimX);

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

955

if(_is_quantized)

956

{

957

NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);

958

}

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

if(_is_nhwc)

{

_permute_output.run();

963

}

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

964

965

if(_is_activationlayer_enabled)

966

{

967

_activationlayer_function.run();

968

}

Anthony Barbier

fb8dda2

2018-01-30 09:27:05 +0000

[diff] [blame]

969

}

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

970

971

void NEDepthwiseConvolutionLayer::prepare()

{

if(!_is_prepared)

{

ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());

976

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

977

if(_is_nhwc)

978

{

979

_permute_weights.run();

980

}

981

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

982

// Run reshape and mark original weights as unused

983

_weights_reshaped.allocator()->allocate();

984

NEScheduler::get().schedule(&_weights_reshape_kernel, Window::DimX);

985

NEScheduler::get().schedule(&_v2mm_weights_fill_border, Window::DimX);

986

_original_weights->mark_as_unused();

_is_prepared = true;

}

}

Georgios Pinitas