Blame - src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp - ml/ComputeLibrary

2019-10-09 15:32:39 +0100

[diff] [blame]

53

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

54

ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);

55

56

const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());

57

const bool is_nhwc = input->data_layout() == DataLayout::NHWC;

58

const bool needs_permute = is_nhwc && (depth_multiplier > 1);

59

const bool needs_weights_reshape = is_nhwc && (depth_multiplier == 1) && is_quantized;

60

const bool is_stride_1 = ((conv_info.stride().first == conv_info.stride().second) && (conv_info.stride().first == 1));

61

const bool is_stride_1_dilation_1 = (is_stride_1 && dilation.x() == 1 && dilation.y() == 1);

62

const bool is_dot8_supported = dot8_supported(CLKernelLibrary::get().get_device());

63

DepthwiseConvolutionReshapeInfo info;

64

info.c0 = 4;

65

info.transpose = is_stride_1_dilation_1 && is_dot8_supported;

66

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

67

TensorInfo output_multipliers_shifts_info(TensorInfo(TensorShape(1U), 1, DataType::S32));

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

68

if(is_quantized)

69

{

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

70

if(is_data_type_quantized_per_channel(weights->data_type()))

71

{

72

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QSYMM8_PER_CHANNEL);

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

73

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

74

const size_t idx_c = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::CHANNEL);

75

output_multipliers_shifts_info.set_tensor_shape(TensorShape(weights->dimension(idx_c)));

}

else

{

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);

80

}

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

}

if(needs_permute)

{

TensorShape permuted_input_shape = input->tensor_shape();

86

TensorShape permuted_weights_shape = weights->tensor_shape();

87

TensorShape permuted_output_shape = shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);

88

89

permute(permuted_input_shape, PermutationVector(1U, 2U, 0U));

90

permute(permuted_weights_shape, PermutationVector(1U, 2U, 0U));

91

permute(permuted_output_shape, PermutationVector(1U, 2U, 0U));

92

93

const TensorInfo permuted_input = input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NCHW);

94

const TensorInfo permuted_weights = weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NCHW);

95

const TensorInfo permuted_output = output->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NCHW);

96

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

97

ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseConvolutionLayer3x3NCHWKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output,

98

conv_info, depth_multiplier, act_info, gpu_target,

99

dilation, &output_multipliers_shifts_info, &output_multipliers_shifts_info));

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

}

else if(is_nhwc)

{

if(needs_weights_reshape)

104

{

105

auto reshaped_weights_shape = arm_compute::misc::shape_calculator::compute_reshaped_depthwise_weights_shape(*weights, info);

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

106

ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseConvolutionLayer3x3NHWCKernel::validate(input, &weights->clone()->set_tensor_shape(reshaped_weights_shape), biases,

107

output, conv_info, depth_multiplier, act_info,

108

dilation, &output_multipliers_shifts_info, &output_multipliers_shifts_info));

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

109

}

110

else

111

{

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

112

ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseConvolutionLayer3x3NHWCKernel::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info,

113

dilation, &output_multipliers_shifts_info, &output_multipliers_shifts_info));

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

}

}

else

{

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

118

ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseConvolutionLayer3x3NCHWKernel::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, gpu_target,

119

dilation, &output_multipliers_shifts_info, &output_multipliers_shifts_info));

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

}

return Status{};

}

} // namespace

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

125

CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::CLDepthwiseConvolutionLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager)

126

: _memory_group(std::move(memory_manager)),

Georgios Pinitas

2020-11-21 03:04:18 +0000

[diff] [blame]

127

_dwc_native_kernel(std::make_unique<CLDepthwiseConvolutionLayerNativeKernel>()),

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

128

_permute_input_to_nhwc(),

129

_permute_weights_to_nhwc(),

130

_permute_output_to_nchw(),

131

_permuted_input(),

132

_permuted_weights(),

133

_permuted_output(),

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

134

_output_multipliers(),

135

_output_shifts(),

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

136

_original_weights(),

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

137

_input(),

138

_output(),

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

139

_needs_permute(false),

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

140

_is_prepared(false),

141

_is_quantized(false)

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

{

}

Sang-Hoon Park

2020-10-21 15:58:54 +0100

[diff] [blame]

145

CLDepthwiseConvolutionLayer::~CLDepthwiseConvolutionLayer() = default;

146

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

147

void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,

148

unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)

149

{

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

150

configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);

151

}

152

153

void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases,

154

ICLTensor *output, const PadStrideInfo &conv_info,

155

unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)

156

{

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

157

ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);

158

ARM_COMPUTE_ERROR_THROW_ON(CLDepthwiseConvolutionLayer::validate(input->info(),

159

weights->info(),

160

biases != nullptr ? biases->info() : nullptr,

output->info(),

conv_info,

depth_multiplier,

act_info,

dilation));

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

167

_is_quantized = is_data_type_quantized(input->info()->data_type());

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

168

_is_prepared = false;

169

_original_weights = weights;

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

170

_input = input;

171

_output = output;

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

172

_needs_permute = input->info()->data_layout() == DataLayout::NCHW;

173

174

ICLTensor *input_to_use = input;

175

const ICLTensor *weights_to_use = weights;

176

ICLTensor *output_to_use = output;

177

if(_needs_permute)

178

{

179

_memory_group.manage(&_permuted_input);

180

_memory_group.manage(&_permuted_output);

181

182

// Configure the function to transform the input tensor from NCHW -> NHWC

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

183

_permute_input_to_nhwc.configure(compile_context, input, &_permuted_input, PermutationVector(2U, 0U, 1U));

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

184

_permuted_input.info()->set_data_layout(DataLayout::NHWC);

185

186

// Configure the function to transform the weights tensor from IHW -> HWI

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

187

_permute_weights_to_nhwc.configure(compile_context, weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

188

_permuted_weights.info()->set_data_layout(DataLayout::NHWC);

189

190

// Set output quantization info before dwc kernel configure

191

_permuted_output.info()->set_quantization_info(output->info()->quantization_info());

192

193

input_to_use = &_permuted_input;

194

weights_to_use = &_permuted_weights;

195

output_to_use = &_permuted_output;

196

}

197

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

198

CLTensor *output_multipliers_to_use = nullptr;

199

CLTensor *output_shifts_to_use = nullptr;

200

if(_is_quantized)

201

{

202

const size_t idx_c = get_data_layout_dimension_index(weights->info()->data_layout(), DataLayoutDimension::CHANNEL);

203

const size_t num_filters = (is_data_type_quantized_per_channel(weights->info()->data_type())) ? weights->info()->dimension(idx_c) : 1;

204

205

_output_multipliers.allocator()->init(TensorInfo(TensorShape(num_filters), 1, DataType::S32));

206

_output_shifts.allocator()->init(TensorInfo(TensorShape(num_filters), 1, DataType::S32));

207

208

output_multipliers_to_use = &_output_multipliers;

209

output_shifts_to_use = &_output_shifts;

210

}

211

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

212

DWCWeightsKernelInfo dwc_weights_info;

213

dwc_weights_info.n0 = (depth_multiplier == 1) ? 8 : 1;

214

DWCKernelInfo dwc_info;

215

dwc_info.activation_info = act_info;

Sang-Hoon Park

2020-10-21 15:58:54 +0100

[diff] [blame]

216

_dwc_native_kernel->configure(compile_context, input_to_use, weights_to_use, biases, output_to_use,

217

dwc_weights_info, dwc_info, conv_info, depth_multiplier, dilation,

218

output_multipliers_to_use, output_shifts_to_use);

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

if(_needs_permute)

{

_permuted_input.allocator()->allocate();

223

224

// Configure the function to transform the convoluted output to NCHW format

225

_permuted_output.info()->set_data_layout(DataLayout::NCHW);

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

226

_permute_output_to_nchw.configure(compile_context, &_permuted_output, output, PermutationVector(1U, 2U, 0U));

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

227

_permuted_output.allocator()->allocate();

228

}

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

if(_is_quantized)

{

_output_multipliers.allocator()->allocate();

233

_output_shifts.allocator()->allocate();

234

}

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

235

}

236

237

Status CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,

238

const PadStrideInfo &conv_info,

239

unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)

240

{

241

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);

242

const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);

243

const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);

244

245

ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (dilation.x() - 1) > input->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());

246

ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (dilation.y() - 1) > input->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());

247

248

DWCWeightsKernelInfo dwc_weights_info;

249

dwc_weights_info.n0 = (depth_multiplier == 1) ? 8 : 1;

250

DWCKernelInfo dwc_info;

251

dwc_info.activation_info = act_info;

252

253

const bool needs_permute = input->data_layout() == DataLayout::NCHW;

254

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

255

const bool is_quantized = is_data_type_quantized(input->data_type());

256

257

TensorInfo output_multipliers_shifts_info(TensorInfo(TensorShape(1U), 1, DataType::S32));

258

if(is_quantized)

259

{

260

if(is_data_type_quantized_per_channel(weights->data_type()))

261

{

262

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QSYMM8_PER_CHANNEL);

263

264

const size_t idx_c = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::CHANNEL);

265

output_multipliers_shifts_info.set_tensor_shape(TensorShape(weights->dimension(idx_c)));

}

else

{

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);

}

}

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

273

if(needs_permute)

274

{

275

TensorShape permuted_input_shape = input->tensor_shape();

276

TensorShape permuted_weights_shape = weights->tensor_shape();

277

TensorShape permuted_output_shape = shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);

278

279

permute(permuted_input_shape, PermutationVector(2U, 0U, 1U));

280

permute(permuted_weights_shape, PermutationVector(2U, 0U, 1U));

281

permute(permuted_output_shape, PermutationVector(2U, 0U, 1U));

282

283

const TensorInfo permuted_input = input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NHWC);

284

const TensorInfo permuted_weights = weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NHWC);

285

const TensorInfo permuted_output = output->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NHWC);

286

287

ARM_COMPUTE_RETURN_ON_ERROR(CLPermute::validate(input, &permuted_input, PermutationVector(2U, 0U, 1U)));

288

ARM_COMPUTE_RETURN_ON_ERROR(CLPermute::validate(weights, &permuted_weights, PermutationVector(2U, 0U, 1U)));

289

ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseConvolutionLayerNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, dwc_weights_info,

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

290

dwc_info, conv_info, depth_multiplier, dilation,

291

&output_multipliers_shifts_info, &output_multipliers_shifts_info));

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

292

ARM_COMPUTE_RETURN_ON_ERROR(CLPermute::validate(&permuted_output, output, PermutationVector(1U, 2U, 0U)));

293

}

294

else

295

{

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

296

ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseConvolutionLayerNativeKernel::validate(input, weights, biases, output, dwc_weights_info, dwc_info, conv_info, depth_multiplier,

297

dilation, &output_multipliers_shifts_info, &output_multipliers_shifts_info));

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

}

return Status{};

}

void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::run()

{

prepare();

MemoryGroupResourceScope scope_mg(_memory_group);

if(_needs_permute)

{

_permute_input_to_nhwc.run();

311

}

Sang-Hoon Park

2020-10-21 15:58:54 +0100

[diff] [blame]

312

CLScheduler::get().enqueue(*_dwc_native_kernel);

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

313

if(_needs_permute)

314

{

315

_permute_output_to_nchw.run();

}

}

void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::prepare()

320

{

321

if(!_is_prepared)

322

{

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

323

if(_is_quantized)

324

{

325

_output_multipliers.map();

326

_output_shifts.map();

Vidhya Sudhan Loganathan

951b8a4

2019-11-04 14:42:08 +0000

[diff] [blame]

327

const unsigned int idx_ofms = get_data_layout_dimension_index(_output->info()->data_layout(), DataLayoutDimension::CHANNEL);

328

quantization::compute_quantized_multipliers_and_shifts(_input->info(),

329

_original_weights->info(),

330

_output->info(),

331

idx_ofms,

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

332

reinterpret_cast<int32_t *>(_output_multipliers.ptr_to_element(Coordinates(0))),

333

reinterpret_cast<int32_t *>(_output_shifts.ptr_to_element(Coordinates(0))));

334

_output_multipliers.unmap();

335

_output_shifts.unmap();

336

}

337

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

338

if(_needs_permute)

339

{

340

ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());

341

342

_permuted_weights.allocator()->allocate();

343

_permute_weights_to_nhwc.run();

344

_original_weights->mark_as_unused();

}

_is_prepared = true;

}

}

CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::CLDepthwiseConvolutionLayerInternal3x3(std::shared_ptr<IMemoryManager> memory_manager)

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

351

: _memory_group(std::move(memory_manager)),

352

_kernel(nullptr),

Georgios Pinitas

2020-11-21 03:04:18 +0000

[diff] [blame]

353

_border_handler(std::make_unique<CLFillBorderKernel>()),

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

354

_permute_input_to_nchw(),

355

_permute_weights_to_nchw(),

356

_permute_output_to_nhwc(),

Georgios Pinitas

2020-11-21 03:04:18 +0000

[diff] [blame]

357

_reshape_weights(std::make_unique<CLDepthwiseConvolutionLayerReshapeWeightsKernel>()),

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

_permuted_input(),

_permuted_weights(),

_permuted_output(),

_output_multipliers(),

362

_output_shifts(),

363

_original_weights(nullptr),

364

_input(nullptr),

365

_output(nullptr),

366

_needs_permute(false),

367

_needs_weights_reshape(false),

368

_is_prepared(false),

369

_is_quantized(false)

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

{

}

void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,

374

const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation)

375

{

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

376

configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);

377

}

378

379

void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases,

380

ICLTensor *output,

381

const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation)

382

{

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

383

const GPUTarget gpu_target = CLScheduler::get().target();

Usama Arif

881f2de

2019-04-12 10:29:17 +0100

[diff] [blame]

384

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

385

// Perform validation step

386

ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);

Manuel Bottini

387259a

2020-05-21 17:14:36 +0100

[diff] [blame]

387

ARM_COMPUTE_ERROR_THROW_ON(CLDepthwiseConvolutionLayerInternal3x3::validate(input->info(),

Sang-Hoon Park

2020-10-21 15:58:54 +0100

[diff] [blame]

388

weights->info(),

389

biases != nullptr ? biases->info() : nullptr,

output->info(),

conv_info,

depth_multiplier,

act_info,

gpu_target,

dilation));

Giorgio Arena

2017-08-23 16:36:24 +0100

[diff] [blame]

396

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

397

const bool is_nhwc = input->info()->data_layout() == DataLayout::NHWC;

398

_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());

giuros01

2019-01-07 17:47:19 +0000

[diff] [blame]

399

_needs_permute = is_nhwc && (depth_multiplier > 1);

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

400

_needs_weights_reshape = is_nhwc && (depth_multiplier == 1) && _is_quantized;

401

Georgios Pinitas

2018-12-07 18:31:47 +0000

[diff] [blame]

402

_is_prepared = false;

403

_original_weights = weights;

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

404

_input = input;

405

_output = output;

Georgios Pinitas

2018-12-07 18:31:47 +0000

[diff] [blame]

406

407

ICLTensor *input_to_use = input;

408

const ICLTensor *weights_to_use = weights;

409

ICLTensor *output_to_use = output;

410

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

411

const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights->info()->data_type());

412

const bool is_stride_1 = ((conv_info.stride().first == conv_info.stride().second) && (conv_info.stride().first == 1));

413

const bool is_dot8_supported = dot8_supported(CLKernelLibrary::get().get_device()) && !is_quantized_per_channel;

414

const bool is_stride_1_dilation_1 = (is_stride_1 && dilation.x() == 1 && dilation.y() == 1);

Usama Arif

e73686a

2019-04-08 17:30:48 +0100

[diff] [blame]

415

giuros01

2019-01-07 17:47:19 +0000

[diff] [blame]

416

DepthwiseConvolutionReshapeInfo info;

417

info.c0 = 4;

Usama Arif

e73686a

2019-04-08 17:30:48 +0100

[diff] [blame]

418

info.transpose = is_stride_1_dilation_1 && is_dot8_supported;

giuros01

2019-01-07 17:47:19 +0000

[diff] [blame]

419

Georgios Pinitas

2018-12-07 18:31:47 +0000

[diff] [blame]

420

if(_needs_permute)

Giorgio Arena

2018-01-31 10:30:59 +0000

[diff] [blame]

421

{

Georgios Pinitas

2018-12-07 18:31:47 +0000

[diff] [blame]

422

_memory_group.manage(&_permuted_input);

423

_memory_group.manage(&_permuted_output);

424

425

// Configure the function to transform the input tensor from NHWC -> NCHW

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

426

_permute_input_to_nchw.configure(compile_context, input, &_permuted_input, PermutationVector(1U, 2U, 0U));

Georgios Pinitas

2018-12-07 18:31:47 +0000

[diff] [blame]

427

_permuted_input.info()->set_data_layout(DataLayout::NCHW);

428

429

// Configure the function to transform the weights tensor from HWI -> IHW

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

430

_permute_weights_to_nchw.configure(compile_context, weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));

Georgios Pinitas

2018-12-07 18:31:47 +0000

[diff] [blame]

431

_permuted_weights.info()->set_data_layout(DataLayout::NCHW);

Pablo Tello

a28aebc

2019-06-03 14:59:48 +0100

[diff] [blame]

432

_permuted_output.info()->set_quantization_info(output->info()->quantization_info());

Georgios Pinitas

2018-12-07 18:31:47 +0000

[diff] [blame]

433

434

input_to_use = &_permuted_input;

435

weights_to_use = &_permuted_weights;

436

output_to_use = &_permuted_output;

437

Georgios Pinitas

2020-11-21 03:04:18 +0000

[diff] [blame]

438

_kernel = std::make_unique<CLDepthwiseConvolutionLayer3x3NCHWKernel>();

Giorgio Arena

2018-01-31 10:30:59 +0000

[diff] [blame]

439

}

Georgios Pinitas

2018-12-07 18:31:47 +0000

[diff] [blame]

440

else if(is_nhwc)

Giorgio Arena

2018-01-31 10:30:59 +0000

[diff] [blame]

441

{

giuros01

2019-01-07 17:47:19 +0000

[diff] [blame]

442

if(_needs_weights_reshape)

443

{

Sang-Hoon Park

2020-10-21 15:58:54 +0100

[diff] [blame]

444

_reshape_weights->configure(compile_context, weights, &_permuted_weights, info);

giuros01

2019-01-07 17:47:19 +0000

[diff] [blame]

445

weights_to_use = &_permuted_weights;

446

}

Georgios Pinitas

2020-11-21 03:04:18 +0000

[diff] [blame]

447

_kernel = std::make_unique<CLDepthwiseConvolutionLayer3x3NHWCKernel>();

Giorgio Arena

2018-01-31 10:30:59 +0000

[diff] [blame]

448

}

Georgios Pinitas

2018-12-07 18:31:47 +0000

[diff] [blame]

449

else

450

{

Georgios Pinitas

2020-11-21 03:04:18 +0000

[diff] [blame]

451

_kernel = std::make_unique<CLDepthwiseConvolutionLayer3x3NCHWKernel>();

Georgios Pinitas

2018-12-07 18:31:47 +0000

[diff] [blame]

452

}

Giorgio Arena

2018-01-31 10:30:59 +0000

[diff] [blame]

453

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

454

CLTensor *output_multipliers_to_use = nullptr;

455

CLTensor *output_shifts_to_use = nullptr;

456

if(_is_quantized)

457

{

458

const size_t idx_c = get_data_layout_dimension_index(weights->info()->data_layout(), DataLayoutDimension::CHANNEL);

459

const size_t num_filters = (is_quantized_per_channel) ? weights->info()->dimension(idx_c) : 1;

460

461

_output_multipliers.allocator()->init(TensorInfo(TensorShape(num_filters), 1, DataType::S32));

462

_output_shifts.allocator()->init(TensorInfo(TensorShape(num_filters), 1, DataType::S32));

463

464

output_multipliers_to_use = &_output_multipliers;

465

output_shifts_to_use = &_output_shifts;

466

}

467

Georgios Pinitas

2018-12-07 18:31:47 +0000

[diff] [blame]

468

// Configure kernel

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

469

_kernel->set_target(gpu_target);

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

470

_kernel->configure(compile_context, input_to_use, weights_to_use, biases, output_to_use, conv_info, depth_multiplier,

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

471

act_info, dilation, output_multipliers_to_use, output_shifts_to_use);

if(_is_quantized)

{

_output_multipliers.allocator()->allocate();

476

_output_shifts.allocator()->allocate();

477

}

Georgios Pinitas

2018-12-07 18:31:47 +0000

[diff] [blame]

478

479

// Permute output if needed

480

if(_needs_permute)

481

{

482

// Configure the function to transform the convoluted output to ACL's native ordering format NCHW

Georgios Pinitas

3f8aac4

2018-12-24 13:09:02 +0000

[diff] [blame]

483

_permuted_output.info()->set_data_layout(DataLayout::NCHW);

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

484

_permute_output_to_nhwc.configure(compile_context, &_permuted_output, output, PermutationVector(2U, 0U, 1U));

Georgios Pinitas

2018-12-07 18:31:47 +0000

[diff] [blame]

485

486

// Allocate tensors

487

_permuted_input.allocator()->allocate();

488

_permuted_output.allocator()->allocate();

489

}

Diego Lopez Recas

fa0add1

2017-11-28 16:44:52 +0000

[diff] [blame]

490

// Configure border handler

491

PixelValue &&zero_value(0.f);

492

if(is_data_type_quantized_asymmetric(input->info()->data_type()))

493

{

Georgios Pinitas

4c5469b

2019-05-21 13:32:43 +0100

[diff] [blame]

494

zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().uniform().offset));

Diego Lopez Recas

fa0add1

2017-11-28 16:44:52 +0000

[diff] [blame]

495

}

Sang-Hoon Park

2020-10-21 15:58:54 +0100

[diff] [blame]

496

_border_handler->configure(compile_context, input_to_use, _kernel->border_size(), BorderMode::CONSTANT, zero_value);

Giorgio Arena

2017-08-23 16:36:24 +0100

[diff] [blame]

497

}

498

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

499

Status CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,

500

const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, GPUTarget gpu_target, const Size2D &dilation)

Giorgio Arena

2018-04-23 16:16:21 +0100

[diff] [blame]

501

{

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

502

return validate_arguments_3x3(input, weights, biases, output, conv_info, depth_multiplier, act_info, gpu_target, dilation);

Giorgio Arena

2018-04-23 16:16:21 +0100

[diff] [blame]

503

}

504

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

505

void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::run()

Giorgio Arena

2017-08-23 16:36:24 +0100

[diff] [blame]

506

{

Georgios Pinitas

2018-12-07 18:31:47 +0000

[diff] [blame]

507

prepare();

508

Georgios Pinitas

da953f2

2019-04-02 17:27:03 +0100

[diff] [blame]

509

MemoryGroupResourceScope scope_mg(_memory_group);

Georgios Pinitas

2018-12-07 18:31:47 +0000

[diff] [blame]

if(_needs_permute)

{

_permute_input_to_nchw.run();

514

}

Sang-Hoon Park

2020-10-21 15:58:54 +0100

[diff] [blame]

515

CLScheduler::get().enqueue(*_border_handler);

Giorgio Arena

2018-01-31 10:30:59 +0000

[diff] [blame]

516

CLScheduler::get().enqueue(*_kernel);

Georgios Pinitas

2018-12-07 18:31:47 +0000

[diff] [blame]

if(_needs_permute)

{

_permute_output_to_nhwc.run();

521

}

Georgios Pinitas

2018-12-07 18:31:47 +0000

[diff] [blame]

522

}

523

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

524

void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::prepare()

Georgios Pinitas

2018-12-07 18:31:47 +0000

[diff] [blame]

525

{

526

if(!_is_prepared)

527

{

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

528

if(_is_quantized)

529

{

530

_output_multipliers.map();

531

_output_shifts.map();

Vidhya Sudhan Loganathan

951b8a4

2019-11-04 14:42:08 +0000

[diff] [blame]

532

const unsigned int idx_ofms = get_data_layout_dimension_index(_output->info()->data_layout(), DataLayoutDimension::CHANNEL);

533

quantization::compute_quantized_multipliers_and_shifts(_input->info(),

534

_original_weights->info(),

535

_output->info(),

536

idx_ofms,

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

537

reinterpret_cast<int32_t *>(_output_multipliers.ptr_to_element(Coordinates(0))),

538

reinterpret_cast<int32_t *>(_output_shifts.ptr_to_element(Coordinates(0))));

539

_output_multipliers.unmap();

540

_output_shifts.unmap();

541

}

542

Georgios Pinitas

2018-12-07 18:31:47 +0000

[diff] [blame]

543

if(_needs_permute)

544

{

545

ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());

546

547

_permuted_weights.allocator()->allocate();

548

_permute_weights_to_nchw.run();

549

_original_weights->mark_as_unused();

550

}

giuros01

2019-01-07 17:47:19 +0000

[diff] [blame]

551

552

if(_needs_weights_reshape)

553

{

554

ARM_COMPUTE_ERROR_ON(_needs_permute);

555

ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());

556

_permuted_weights.allocator()->allocate();

Sang-Hoon Park

2020-10-21 15:58:54 +0100

[diff] [blame]

557

CLScheduler::get().enqueue(*_reshape_weights);

giuros01

2019-01-07 17:47:19 +0000

[diff] [blame]

558

_original_weights->mark_as_unused();

559

}

Georgios Pinitas

2018-12-07 18:31:47 +0000

[diff] [blame]

560

_is_prepared = true;

561

}

Giorgio Arena

2017-08-23 16:36:24 +0100

[diff] [blame]

562

}

563

Michele Di Giorgio

a046e16

2019-10-08 09:36:26 +0100

[diff] [blame]

564

CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

565

: _memory_manager(std::move(memory_manager)), _depth_conv_func(DepthwiseConvolutionFunction::GENERIC), _func_3x3(), _func_generic()

Giorgio Arena

2017-08-23 16:36:24 +0100

[diff] [blame]

{

}

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

569

void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,

570

ActivationLayerInfo act_info, const Size2D &dilation)

Giorgio Arena

93a690e

2017-08-01 16:09:33 +0100

[diff] [blame]

571

{

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

572

configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);

573

}

574

575

void CLDepthwiseConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,

576

const PadStrideInfo &conv_info,

Sang-Hoon Park

2020-10-21 15:58:54 +0100

[diff] [blame]

577

unsigned int depth_multiplier,

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

578

ActivationLayerInfo act_info, const Size2D &dilation)

579

{

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

580

const GPUTarget gpu_target = CLScheduler::get().target();

581

_depth_conv_func = get_depthwiseconvolution_function(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, depth_multiplier, act_info,

582

dilation, gpu_target);

583

switch(_depth_conv_func)

Georgios Pinitas

de5a1cc

2018-02-02 12:52:07 +0000

[diff] [blame]

584

{

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

585

case DepthwiseConvolutionFunction::OPTIMIZED:

586

_func_3x3.set_memory_group(_memory_manager);

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

587

_func_3x3.configure(compile_context, input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

588

break;

589

case DepthwiseConvolutionFunction::GENERIC:

Pablo Tello

8bf622a

2018-12-03 15:54:49 +0000

[diff] [blame]

590

{

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

591

_func_generic.set_memory_group(_memory_manager);

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

592

_func_generic.configure(compile_context, input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);

Pablo Tello

8bf622a

2018-12-03 15:54:49 +0000

[diff] [blame]

593

}

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

594

break;

595

default:

596

ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");

Georgios Pinitas

60e9825

2018-10-22 16:17:20 +0100

[diff] [blame]

597

}

Giorgio Arena

93a690e

2017-08-01 16:09:33 +0100

[diff] [blame]

598

}

599

Giorgio Arena

2018-04-23 16:16:21 +0100

[diff] [blame]

600

Status CLDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

601

unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation)

Giorgio Arena

2018-04-23 16:16:21 +0100

[diff] [blame]

602

{

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

603

const GPUTarget gpu_target = CLScheduler::get().target();

604

DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation, gpu_target);

605

switch(depth_conv_func)

Georgios Pinitas

60e9825

2018-10-22 16:17:20 +0100

[diff] [blame]

606

{

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

607

case DepthwiseConvolutionFunction::OPTIMIZED:

608

return CLDepthwiseConvolutionLayerInternal3x3::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, gpu_target, dilation);

609

case DepthwiseConvolutionFunction::GENERIC:

610

return CLDepthwiseConvolutionLayerGeneric::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);

611

default:

612

ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");

613

}

614

}

Georgios Pinitas

60e9825

2018-10-22 16:17:20 +0100

[diff] [blame]

615

Manuel Bottini