Blame - src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp - ml/ComputeLibrary

2017-10-18 17:58:22 +0100

[diff] [blame]

{

}

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

48

void NEDepthwiseConvolutionLayer3x3::configure_generic(ITensor *input,

49

const ITensor *weights,

50

const ITensor *biases,

51

ITensor *output,

52

const PadStrideInfo &conv_info,

53

unsigned int depth_multiplier,

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

54

const ActivationLayerInfo &act_info,

55

const Size2D &dilation)

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

56

{

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

57

ARM_COMPUTE_UNUSED(act_info);

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

58

Georgios Pinitas

f72f936

2018-01-12 16:29:45 +0000

[diff] [blame]

59

PixelValue zero_value(0.f);

60

Georgios Pinitas

2018-09-12 20:11:34 +0100

[diff] [blame]

61

// Initialize the intermediate accumulator tensor in case of quantized input

62

if(_is_quantized)

63

{

64

TensorShape accum_shape = output->info()->tensor_shape();

65

DataLayout accum_layout = output->info()->data_layout();

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

66

if(!_is_nchw)

Georgios Pinitas

2018-09-12 20:11:34 +0100

[diff] [blame]

67

{

68

permute(accum_shape, PermutationVector(1U, 2U, 0U));

69

accum_layout = DataLayout::NCHW;

70

}

71

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

72

_memory_group.manage(&_accumulator);

Georgios Pinitas

2481d46

2019-02-19 18:47:46 +0000

[diff] [blame]

73

_accumulator.allocator()->init(TensorInfo(accum_shape, 1, DataType::S32, output->info()->quantization_info()));

Georgios Pinitas

2018-09-12 20:11:34 +0100

[diff] [blame]

74

_accumulator.info()->set_data_layout(accum_layout);

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

75

zero_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().uniform().offset));

Georgios Pinitas

2018-09-12 20:11:34 +0100

[diff] [blame]

76

}

77

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

78

if(!_is_nchw)

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

79

{

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

80

_memory_group.manage(&_permuted_input);

81

_memory_group.manage(&_permuted_output);

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

82

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

83

// Configure the function to transform the input tensor from NHWC -> NCHW

84

_permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));

85

_permuted_input.info()->set_data_layout(DataLayout::NCHW);

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

86

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

87

// Configure the function to transform the weights tensor from HWI -> IHW

88

_permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));

89

_permuted_weights.info()->set_data_layout(DataLayout::NCHW);

Pablo Tello

a28aebc

2019-06-03 14:59:48 +0100

[diff] [blame^]

90

_permuted_output.info()->set_quantization_info(output->info()->quantization_info());

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

91

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

92

// Configure depthwise

93

_dwc_kernel.configure(&_permuted_input, &_permuted_weights, (_is_quantized) ? &_accumulator : &_permuted_output, conv_info, depth_multiplier, dilation);

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

94

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

95

// Configure border handler

96

_border_handler.configure(&_permuted_input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);

97

98

// Allocate tensors

99

_permuted_input.allocator()->allocate();

Georgios Pinitas

f72f936

2018-01-12 16:29:45 +0000

[diff] [blame]

100

}

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

101

else

102

{

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

103

// Configure depthwise convolution kernel

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

104

_dwc_kernel.configure(input, weights, (_is_quantized) ? &_accumulator : output, conv_info, depth_multiplier, dilation);

Georgios Pinitas

9be0c5a

2018-02-19 12:46:29 +0000

[diff] [blame]

105

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

106

// Configure border handler

107

_border_handler.configure(input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

108

}

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

109

110

// Configure biases accumulation

111

if(_is_quantized)

112

{

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

113

const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();

114

const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();

115

const UniformQuantizationInfo oq_info = (output->info()->total_size() == 0) ? iq_info : output->info()->quantization_info().uniform();

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

116

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

117

float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;

Michalis Spyrou

a4f378d

2019-04-26 14:54:54 +0100

[diff] [blame]

118

int output_multiplier;

119

int output_shift;

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

120

quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

121

_output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, output_multiplier, output_shift, oq_info.offset);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

122

_accumulator.allocator()->allocate();

123

}

124

else if(_has_bias)

125

{

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

126

_output_stage_kernel.configure(_is_nchw ? output : &_permuted_output, biases);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

127

}

128

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

129

// Permute output

130

if(!_is_nchw)

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

131

{

132

// Configure the function to transform the convoluted output to NHWC

133

_permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));

134

_permuted_output.allocator()->allocate();

135

}

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

136

}

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

137

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

138

void NEDepthwiseConvolutionLayer3x3::configure_optimized(const ITensor *input,

139

const ITensor *weights,

140

const ITensor *biases,

141

ITensor *output,

142

const PadStrideInfo &conv_info,

143

unsigned int depth_multiplier,

144

const ActivationLayerInfo &act_info)

145

{

146

ActivationLayerInfo act_info_to_use = ActivationLayerInfo();

147

const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);

148

const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info);

149

_is_activationlayer_enabled = act_info.enabled() && !(is_relu || is_relu6);

150

if(!_is_activationlayer_enabled)

151

{

152

act_info_to_use = act_info;

}

if(_is_nchw)

{

_memory_group.manage(&_permuted_input);

158

_memory_group.manage(&_permuted_output);

159

160

// Configure the function to transform the input tensor from NCHW -> NHWC

161

_permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));

162

_permuted_input.info()->set_data_layout(DataLayout::NHWC);

163

164

// Configure the function to transform the weights tensor from IHW -> HWI

165

_permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));

166

_permuted_weights.info()->set_data_layout(DataLayout::NHWC);

167

Pablo Tello

a28aebc

2019-06-03 14:59:48 +0100

[diff] [blame^]

168

_permuted_output.info()->set_data_layout(DataLayout::NHWC);

169

_permuted_output.info()->set_quantization_info(output->info()->quantization_info());

170

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

171

// Configure optimized depthwise

172

_dwc_optimized_func.configure(&_permuted_input, &_permuted_weights, biases, &_permuted_output, conv_info, depth_multiplier, act_info_to_use);

173

174

// Configure the function to transform the convoluted output to ACL's native ordering format NCHW

175

_permuted_output.info()->set_data_layout(DataLayout::NHWC);

176

_permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));

177

178

// Allocate tensors

179

_permuted_input.allocator()->allocate();

180

_permuted_output.allocator()->allocate();

}

else

{

_dwc_optimized_func.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info_to_use);

}

}

void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input,

189

const ITensor *weights,

190

const ITensor *biases,

191

ITensor *output, const PadStrideInfo &conv_info,

192

unsigned int depth_multiplier,

Usama Arif

2019-04-08 17:30:48 +0100

[diff] [blame]

193

const ActivationLayerInfo &act_info,

194

const Size2D &dilation)

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

195

{

196

ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);

197

ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);

198

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

199

// idx_w and idx_h only used for validation

200

const size_t idx_w = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::WIDTH);

201

const size_t idx_h = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT);

202

ARM_COMPUTE_UNUSED(idx_w);

203

ARM_COMPUTE_UNUSED(idx_h);

204

205

ARM_COMPUTE_ERROR_ON(weights->info()->dimension(idx_w) + (weights->info()->dimension(idx_w) - 1) * (dilation.x() - 1) > input->info()->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());

206

ARM_COMPUTE_ERROR_ON(weights->info()->dimension(idx_h) + (weights->info()->dimension(idx_h) - 1) * (dilation.y() - 1) > input->info()->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());

207

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

208

_original_weights = weights;

209

_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());

210

_has_bias = biases != nullptr;

211

_is_optimized = NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input->info(),

212

weights->info(),

213

conv_info,

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

214

depth_multiplier, dilation);

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

215

_is_nchw = input->info()->data_layout() == DataLayout::NCHW;

216

_permute = _is_optimized == _is_nchw;

217

_is_prepared = false;

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

218

_is_activationlayer_enabled = act_info.enabled();

219

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

220

// Configure appropriate pipeline

221

if(_is_optimized)

222

{

223

configure_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info);

224

}

225

else

226

{

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

227

configure_generic(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

228

}

229

230

// Configure activation

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

231

if(_is_activationlayer_enabled)

232

{

233

_activationlayer_function.configure(output, nullptr, act_info);

234

}

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

235

}

236

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

237

Status NEDepthwiseConvolutionLayer3x3::validate(const ITensorInfo *input,

238

const ITensorInfo *weights,

239

const ITensorInfo *biases,

240

const ITensorInfo *output,

241

const PadStrideInfo &conv_info,

242

unsigned int depth_multiplier,

Usama Arif

2019-04-08 17:30:48 +0100

[diff] [blame]

243

const ActivationLayerInfo &act_info,

244

const Size2D &dilation)

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

245

{

246

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

247

ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

248

ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() < 1 || dilation.y() < 1);

249

const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);

250

const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);

251

ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (dilation.x() - 1) > input->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());

252

ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (dilation.y() - 1) > input->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

253

Giorgio Arena

66cbafb

2018-08-23 14:51:00 +0100

[diff] [blame]

254

if(biases != nullptr)

255

{

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

256

const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);

Giorgio Arena

66cbafb

2018-08-23 14:51:00 +0100

[diff] [blame]

257

ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

258

ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));

Giorgio Arena

66cbafb

2018-08-23 14:51:00 +0100

[diff] [blame]

259

}

260

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

261

if(!NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input, weights, conv_info, depth_multiplier, dilation))

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

262

{

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

263

const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());

264

TensorInfo accumulator = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));

265

ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayer3x3Kernel::validate(input, weights, is_quantized ? &accumulator : output, conv_info, depth_multiplier));

if(is_quantized)

{

ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&accumulator, biases, output));

}

}

else

{

ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionAssemblyDispatch::validate(input, weights, biases, output, conv_info, depth_multiplier));

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

275

}

276

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

277

//Validate Activation Layer

278

if(act_info.enabled())

279

{

280

ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));

281

}

282

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

283

return Status{};

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

284

}

285

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

286

void NEDepthwiseConvolutionLayer3x3::run_generic()

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

287

{

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

288

// Fill border

289

NEScheduler::get().schedule(&_border_handler, Window::DimX);

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

290

291

// Execute depthwise convolution

292

NEScheduler::get().schedule(&_dwc_kernel, Window::DimX);

293

Georgios Pinitas

2018-01-30 18:13:46 +0000

[diff] [blame]

294

// Add biases

Georgios Pinitas

f72f936

2018-01-12 16:29:45 +0000

[diff] [blame]

295

if(_has_bias || _is_quantized)

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

296

{

Michalis Spyrou

b91e34c

2017-12-20 15:50:55 +0000

[diff] [blame]

297

NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

298

}

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

299

300

// Permute output

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

301

if(!_is_nchw)

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

302

{

303

_permute_output.run();

304

}

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

305

}

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

306

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

307

void NEDepthwiseConvolutionLayer3x3::run_optimized()

308

{

309

// Run assembly function

310

_dwc_optimized_func.run();

// Permute output

if(_is_nchw)

{

_permute_output.run();

}

}

void NEDepthwiseConvolutionLayer3x3::run()

{

prepare();

Georgios Pinitas

2019-04-02 17:27:03 +0100

[diff] [blame]

323

MemoryGroupResourceScope scope_mg(_memory_group);

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

// Permute input

if(_permute)

{

_permute_input.run();

329

}

330

331

_is_optimized ? run_optimized() : run_generic();

332

333

// Run activation

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

334

if(_is_activationlayer_enabled)

335

{

336

_activationlayer_function.run();

337

}

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

338

}

339

340

void NEDepthwiseConvolutionLayer3x3::prepare()

{

if(!_is_prepared)

{

// Permute weights

if(_permute)

{

_permuted_weights.allocator()->allocate();

348

_permute_weights.run();

349

_original_weights->mark_as_unused();

350

}

351

352

// Prepare optimized function

353

if(_is_optimized)

354

{

355

_dwc_optimized_func.prepare();

356

if(!_permuted_weights.is_used())

357

{

358

_permuted_weights.allocator()->free();

}

}

_is_prepared = true;

}

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

364

}

365

Giorgio Arena

04a8f8c

2017-11-23 11:45:24 +0000

[diff] [blame]

366

NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer()

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

367

: _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _output_stage_kernel(), _v2mm_input_fill_border(), _v2mm_weights_fill_border(), _permute_input(),

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

368

_permute_weights(), _permute_output(), _activationlayer_function(), _input_reshaped(), _weights_reshaped(), _v2mm_output(), _output_reshaped(), _permuted_input(), _permuted_weights(),

369

_permuted_output(), _is_prepared(false), _is_quantized(false), _is_nhwc(false), _is_activationlayer_enabled(false), _original_weights(nullptr)

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

{

}

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

373

void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,

Usama Arif

2019-04-08 17:30:48 +0100

[diff] [blame]

374

unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

375

{

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

376

const unsigned int channel_idx = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL);

377

ARM_COMPUTE_UNUSED(channel_idx);

Georgios Pinitas

8cffcd6

2018-11-16 17:11:50 +0000

[diff] [blame]

378

ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

379

ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

380

ARM_COMPUTE_ERROR_ON((input->info()->dimension(channel_idx) * depth_multiplier) != weights->info()->dimension(channel_idx));

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

381

// idx_w and idx_h only used for validation

382

const size_t idx_w = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::WIDTH);

383

const size_t idx_h = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT);

384

ARM_COMPUTE_UNUSED(idx_w);

385

ARM_COMPUTE_UNUSED(idx_h);

386

387

ARM_COMPUTE_ERROR_ON(weights->info()->dimension(idx_w) + (weights->info()->dimension(idx_w) - 1) * (dilation.x() - 1) > input->info()->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());

388

ARM_COMPUTE_ERROR_ON(weights->info()->dimension(idx_h) + (weights->info()->dimension(idx_h) - 1) * (dilation.y() - 1) > input->info()->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

389

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

390

_is_nhwc = input->info()->data_layout() == DataLayout::NHWC;

391

392

ITensor *input_to_use = input;

393

const ITensor *weights_to_use = weights;

394

ITensor *output_to_use = output;

if(_is_nhwc)

{

_permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));

399

_permuted_input.info()->set_data_layout(DataLayout::NCHW);

400

input_to_use = &_permuted_input;

401

402

_permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));

403

_permuted_weights.info()->set_data_layout(DataLayout::NCHW);

404

weights_to_use = &_permuted_weights;

405

}

406

407

const size_t weights_w = weights_to_use->info()->dimension(0);

408

const size_t weights_h = weights_to_use->info()->dimension(1);

409

const size_t weights_z = weights_to_use->info()->dimension(2);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

410

Georgios Pinitas

1562be3

2018-03-08 19:09:19 +0000

[diff] [blame]

411

_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

412

_is_prepared = false;

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

413

_original_weights = weights_to_use;

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

414

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

415

// Should bias be appended ?

416

bool append_bias = (biases != nullptr) && !_is_quantized;

417

418

// Calculate output shape

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

419

TensorShape output_shape = shape_calculator::compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info, depth_multiplier, dilation);

Giorgio Arena

7657224

2018-04-04 17:44:26 +0100

[diff] [blame]

420

421

// Output auto inizialitation if not yet initialized

422

auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));

423

ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

424

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

425

if(_is_nhwc)

426

{

427

permute(output_shape, PermutationVector(1U, 2U, 0U));

428

_permuted_output.allocator()->init(output->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape));

429

_permuted_output.info()->set_data_layout(DataLayout::NCHW);

Pablo Tello

a28aebc

2019-06-03 14:59:48 +0100

[diff] [blame^]

430

_permuted_output.info()->set_quantization_info(output->info()->quantization_info());

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

431

output_to_use = &_permuted_output;

432

}

433

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

434

// Output width and height

Giorgio Arena

7657224

2018-04-04 17:44:26 +0100

[diff] [blame]

435

const unsigned int conv_w = output_shape.x();

436

const unsigned int conv_h = output_shape.y();

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

437

438

// Set up intermediate tensors

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

439

const size_t patch_size = weights_w * weights_h + (append_bias ? 1 : 0);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

440

const size_t conv_size = conv_w * conv_h;

441

442

// Im2Col configuration

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

443

TensorShape shape_im2col = input_to_use->info()->tensor_shape();

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

444

shape_im2col.set(0, patch_size);

445

shape_im2col.set(1, conv_size);

446

shape_im2col.set(2, weights_z);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

447

_input_reshaped.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col).set_data_layout(DataLayout::NCHW));

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

448

_im2col_kernel.configure(input_to_use, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, append_bias, depth_multiplier, dilation);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

449

450

// Weights reshape configuration

451

const TensorShape shape_weights_reshape(patch_size, weights_z);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

452

_weights_reshaped.allocator()->init(weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_weights_reshape).set_data_layout(DataLayout::NCHW));

453

_weights_reshape_kernel.configure(weights_to_use, &_weights_reshaped, append_bias ? biases : nullptr);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

454

455

// GEMV configuration

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

456

DataType v2mm_dt = (input->info()->data_type() == DataType::QASYMM8) ? DataType::S32 : input->info()->data_type();

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

457

TensorShape shape_v2mm_out = input_to_use->info()->tensor_shape();

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

458

shape_v2mm_out.set(0, conv_size * weights_z);

459

shape_v2mm_out.set(1, 1);

460

shape_v2mm_out.set(2, 1);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

461

_v2mm_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(v2mm_dt).set_tensor_shape(shape_v2mm_out).set_data_layout(DataLayout::NCHW));

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

462

_v2mm_kernel.configure(&_input_reshaped, &_weights_reshaped, &_v2mm_output);

Giorgio Arena

7657224

2018-04-04 17:44:26 +0100

[diff] [blame]

463

_output_reshaped.allocator()->init(_v2mm_output.info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape));

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

464

_vector_to_tensor_kernel.configure(&_v2mm_output, (_is_quantized) ? &_output_reshaped : output_to_use, conv_w, conv_h);

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

465

466

// Output staged configuration

467

if(_is_quantized)

468

{

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

469

const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();

470

const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();

471

const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();

Georgios Pinitas

9be0c5a

2018-02-19 12:46:29 +0000

[diff] [blame]

472

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

473

float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;

Michalis Spyrou

a4f378d

2019-04-26 14:54:54 +0100

[diff] [blame]

474

int output_multiplier;

475

int output_shift;

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

476

quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

477

_output_stage_kernel.configure(&_output_reshaped, biases, output_to_use, output_multiplier, output_shift, oq_info.offset);

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

478

_output_reshaped.allocator()->allocate();

479

}

480

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

481

if(_is_nhwc)

482

{

483

_permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));

484

485

_permuted_input.allocator()->allocate();

486

_permuted_weights.allocator()->allocate();

487

_permuted_output.allocator()->allocate();

488

}

489

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

490

// Fill borders on inputs

Anthony Barbier

fb8dda2

2018-01-30 09:27:05 +0000

[diff] [blame]

491

PixelValue zero_in(static_cast<int32_t>(0));

492

PixelValue zero_w(static_cast<int32_t>(0));

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

493

if(_is_quantized)

494

{

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

495

zero_in = PixelValue(static_cast<int32_t>(input->info()->quantization_info().uniform().offset));

496

zero_w = PixelValue(static_cast<int32_t>(weights->info()->quantization_info().uniform().offset));

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

497

}

498

BorderSize border_size = _v2mm_kernel.border_size();

499

_v2mm_input_fill_border.configure(&_input_reshaped, border_size, BorderMode::CONSTANT, zero_in);

500

501

border_size.bottom = 0;

502

_v2mm_weights_fill_border.configure(&_weights_reshaped, border_size, BorderMode::CONSTANT, zero_w);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

503

504

// Allocate intermediate tensors

505

_input_reshaped.allocator()->allocate();

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

506

_v2mm_output.allocator()->allocate();

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

507

508

//Configure Activation Layer

509

_is_activationlayer_enabled = act_info.enabled();

510

511

if(_is_activationlayer_enabled)

512

{

513

_activationlayer_function.configure(output, nullptr, act_info);

514

}

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

515

}

516

Georgios Pinitas

2018-08-17 17:16:06 +0100

[diff] [blame]

517

Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,

Usama Arif

2019-04-08 17:30:48 +0100

[diff] [blame]

518

unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

519

{

520

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

521

ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

522

ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() < 1 || dilation.y() < 1);

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

523

524

const unsigned int width_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);

525

const unsigned int height_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

526

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

527

ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) + (weights->dimension(width_idx) - 1) * (dilation.x() - 1) > input->dimension(width_idx) + conv_info.pad_left() + conv_info.pad_right());

528

ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(height_idx) + (weights->dimension(height_idx) - 1) * (dilation.y() - 1) > input->dimension(height_idx) + conv_info.pad_top() + conv_info.pad_bottom());

Georgios Pinitas

2018-08-17 17:16:06 +0100

[diff] [blame]

529

// Clone output to use auto init

530

auto output_clone = output->clone();

531

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

532

const ITensorInfo *input_to_use = input;

533

const ITensorInfo *weights_to_use = weights;

Georgios Pinitas

2018-08-17 17:16:06 +0100

[diff] [blame]

534

const ITensorInfo *output_to_use = output_clone.get();

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

535

536

TensorShape permuted_input_shape = input->tensor_shape();

537

TensorShape permuted_weights_shape = weights->tensor_shape();

538

TensorInfo permuted_input;

539

TensorInfo permuted_weights;

540

541

if(input->data_layout() == DataLayout::NHWC)

542

{

543

permute(permuted_input_shape, PermutationVector(1U, 2U, 0U));

544

permute(permuted_weights_shape, PermutationVector(1U, 2U, 0U));

545

546

permuted_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NCHW));

547

permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NCHW));

548

549

input_to_use = &permuted_input;

550

weights_to_use = &permuted_weights;

551

}

552

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

553

const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());

554

const bool append_bias = (biases != nullptr) && !is_quantized;

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

555

TensorShape output_shape = shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

556

const size_t weights_w = weights_to_use->dimension(0);

557

const size_t weights_h = weights_to_use->dimension(1);

558

const size_t weights_z = weights_to_use->dimension(2);

Gian Marco Iodice

2018-09-07 15:32:14 +0100

[diff] [blame]

559

const unsigned int conv_w = output_shape[width_idx];

560

const unsigned int conv_h = output_shape[height_idx];

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

561

const size_t patch_size = weights_w * weights_h + (append_bias ? 1 : 0);

562

const size_t conv_size = conv_w * conv_h;

563

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

564

// Output auto inizialitation if not yet initialized

Georgios Pinitas

2018-08-17 17:16:06 +0100

[diff] [blame]

565

auto_init_if_empty(*output_clone, input->clone()->set_tensor_shape(output_shape));

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

566

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);

567

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

568

TensorInfo permuted_output;

569

if(input->data_layout() == DataLayout::NHWC)

570

{

571

permute(output_shape, PermutationVector(1U, 2U, 0U));

Georgios Pinitas

2018-08-17 17:16:06 +0100

[diff] [blame]

572

permuted_output = TensorInfo(output_clone->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_data_layout(DataLayout::NCHW));

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

573

output_to_use = &permuted_output;

574

}

575

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

576

// Im2Col configuration

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

577

TensorShape shape_im2col = input_to_use->tensor_shape();

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

578

shape_im2col.set(0, patch_size);

579

shape_im2col.set(1, conv_size);

580

shape_im2col.set(2, weights_z);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

581

TensorInfo input_reshaped(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col).set_data_layout(DataLayout::NCHW));

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

582

ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseIm2ColKernel::validate(input_to_use, &input_reshaped, Size2D(weights_w, weights_h), conv_info, append_bias, depth_multiplier, dilation));

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

583

584

// Weights reshape configuration

585

const TensorShape shape_weights_reshape(patch_size, weights_z);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

586

TensorInfo weights_reshaped(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_weights_reshape).set_data_layout(DataLayout::NCHW));

587

ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseWeightsReshapeKernel::validate(weights_to_use, &weights_reshaped, append_bias ? biases : nullptr));

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

588

589

// GEMV configuration

590

DataType v2mm_dt = (input->data_type() == DataType::QASYMM8) ? DataType::S32 : input->data_type();

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

591

TensorShape shape_v2mm_out = input_to_use->tensor_shape();

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

592

shape_v2mm_out.set(0, conv_size * weights_z);

593

shape_v2mm_out.set(1, 1);

594

shape_v2mm_out.set(2, 1);

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

595

TensorInfo v2mm_output(input->clone()->set_is_resizable(true).reset_padding().set_data_type(v2mm_dt).set_tensor_shape(shape_v2mm_out).set_data_layout(DataLayout::NCHW));

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

596

ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixVectorMultiplyKernel::validate(&input_reshaped, &weights_reshaped, &v2mm_output));

597

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

598

TensorInfo output_reshaped(v2mm_output.clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_to_use->tensor_shape()));

599

ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseVectorToTensorKernel::validate(&v2mm_output, (is_quantized) ? &output_reshaped : output_to_use, conv_w, conv_h));

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

600

601

if(is_quantized)

602

{

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

603

ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&output_reshaped, biases, output_to_use));

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

604

}

605

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

606

// Validate Activation Layer

607

if(act_info.enabled())

608

{

609

ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));

610

}

611

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

return Status{};

}

Giorgio Arena

2017-11-23 11:45:24 +0000

[diff] [blame]

615

void NEDepthwiseConvolutionLayer::run()

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

616

{

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

617

prepare();

Georgios Pinitas

1562be3

2018-03-08 19:09:19 +0000

[diff] [blame]

618

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

619

if(_is_nhwc)

620

{

621

_permute_input.run();

622

}

623

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

624

NEScheduler::get().schedule(&_im2col_kernel, Window::DimX);

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

625

NEScheduler::get().schedule(&_v2mm_input_fill_border, Window::DimX);

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

626

NEScheduler::get().schedule(&_v2mm_kernel, Window::DimX);

627

NEScheduler::get().schedule(&_vector_to_tensor_kernel, Window::DimX);

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

628

if(_is_quantized)

629

{

630

NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);

631

}

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

if(_is_nhwc)

{

_permute_output.run();

636

}

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

637

638

if(_is_activationlayer_enabled)

639

{

640

_activationlayer_function.run();

641

}

Anthony Barbier

fb8dda2

2018-01-30 09:27:05 +0000

[diff] [blame]

642

}

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

643

644

void NEDepthwiseConvolutionLayer::prepare()

{

if(!_is_prepared)

{

ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());

649

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

650

if(_is_nhwc)

651

{

652

_permute_weights.run();

653

}

654

Georgios Pinitas

2018-06-05 14:56:06 +0100

[diff] [blame]

655

// Run reshape and mark original weights as unused

656

_weights_reshaped.allocator()->allocate();

657

NEScheduler::get().schedule(&_weights_reshape_kernel, Window::DimX);

658

NEScheduler::get().schedule(&_v2mm_weights_fill_border, Window::DimX);

659

_original_weights->mark_as_unused();

_is_prepared = true;

}

}

Georgios Pinitas