Blame - arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h - ml/ComputeLibrary

2019-10-09 15:32:39 +0100

[diff] [blame]

77

* @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].

78

* Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

79

* @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

80

* Data type supported: Same as @p input, S32 when input is QASYMM8.

81

* @param[in] output Destination tensor. Data type supported: same as @p input.

82

* @param[in] conv_info Padding and stride information to use for the convolution.

83

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

84

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.

85

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

*

* @return a status

*/

static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,

90

ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

91

92

// Inherited methods overriden:

93

void run() override;

94

void prepare() override;

95

96

private:

97

/** Static function to choose the best depthwise convolution function for @ref CLDepthwiseConvolutionLayer

98

*

99

* @param[in] input Source tensor info. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

100

* @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].

101

* Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

102

* @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

103

* Data type supported: Same as @p input, S32 when input is QASYMM8.

104

* @param[in] output Destination tensor. Data type supported: same as @p input.

105

* @param[in] conv_info Padding and stride information to use for the convolution.

106

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

107

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.

108

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

109

* @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard.

110

*

111

* @return a Depthwise Convolution Function

112

*/

113

static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,

114

const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,

115

ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), GPUTarget gpu_target = GPUTarget::MIDGARD);

116

117

/** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels:

118

*

119

* -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW)

120

* -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC)

121

* -# @ref CLDepthwiseConvolutionLayerReshapeWeightsKernel (if data_layout == NHWC)

122

* -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)

123

*

124

*/

125

class CLDepthwiseConvolutionLayerInternal3x3 : public IFunction

126

{

127

public:

128

/** Default constructor */

129

CLDepthwiseConvolutionLayerInternal3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);

130

/** Prevent instances of this class from being copied (As this class contains pointers) */

131

CLDepthwiseConvolutionLayerInternal3x3(const CLDepthwiseConvolutionLayerInternal3x3 &) = delete;

132

/** Default move constructor */

133

CLDepthwiseConvolutionLayerInternal3x3(CLDepthwiseConvolutionLayerInternal3x3 &&) = default;

134

/** Prevent instances of this class from being copied (As this class contains pointers) */

135

CLDepthwiseConvolutionLayerInternal3x3 &operator=(const CLDepthwiseConvolutionLayerInternal3x3 &) = delete;

136

/** Default move assignment operator */

137

CLDepthwiseConvolutionLayerInternal3x3 &operator=(CLDepthwiseConvolutionLayerInternal3x3 &&) = default;

138

/** Initialize the function's source, destination, conv and border_size.

139

*

140

* @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

141

* @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM].

142

* Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

143

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

144

* Data type supported: Same as @p input.

145

* @param[out] output Destination tensor. Data type supported: same as @p input.

146

* @param[in] conv_info Padding and stride information to use for the convolution.

147

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

148

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.

149

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

150

*/

151

void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,

152

ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

153

154

/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3

155

*

156

* @param[in] input Source tensor info. Data type supported: QASYMM8 for all layouts, F16/F32 for NCHW.

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

157

* @param[in] weights Weights tensor info. A 3D tensor with shape [3, 3, IFM].

158

* Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

159

* @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

160

* Data type supported: Same as @p input, S32 when input is QASYMM8.

161

* @param[in] output Destination tensor. Data type supported: same as @p input.

162

* @param[in] conv_info Padding and stride information to use for the convolution.

163

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

164

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.

165

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

*

* @return a status

*/

static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,

170

ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD, const Size2D &dilation = Size2D(1U, 1U));

171

172

// Inherited methods overriden:

173

void run() override;

174

void prepare() override;

175

176

void set_memory_group(std::shared_ptr<IMemoryManager> memory_manager)

177

{

178

_memory_group = MemoryGroup(std::move(memory_manager));

};

private:

MemoryGroup _memory_group;

183

std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel;

184

CLFillBorderKernel _border_handler;

185

CLPermute _permute_input_to_nchw;

186

CLPermute _permute_weights_to_nchw;

187

CLPermute _permute_output_to_nhwc;

188

CLDepthwiseConvolutionLayerReshapeWeightsKernel _reshape_weights;

189

CLTensor _permuted_input;

190

CLTensor _permuted_weights;

191

CLTensor _permuted_output;

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

192

CLTensor _output_multipliers;

193

CLTensor _output_shifts;

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

194

const ITensor *_original_weights;

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

195

const ITensor *_input;

196

const ITensor *_output;

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

197

bool _needs_permute;

198

bool _needs_weights_reshape;

199

bool _is_prepared;

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

200

bool _is_quantized;

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

201

};

202

203

/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:

204

*

205

* -# @ref CLDepthwiseConvolutionLayerNativeKernel

206

* -# @ref CLPermute (x 3) if the data layout is NCHW

207

*

208

*/

209

class CLDepthwiseConvolutionLayerGeneric : public IFunction

210

{

211

public:

212

/** Default constructor */

213

CLDepthwiseConvolutionLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager = nullptr);

214

/** Prevent instances of this class from being copied (As this class contains pointers) */

215

CLDepthwiseConvolutionLayerGeneric(const CLDepthwiseConvolutionLayerGeneric &) = delete;

216

/** Default move constructor */

217

CLDepthwiseConvolutionLayerGeneric(CLDepthwiseConvolutionLayerGeneric &&) = default;

218

/** Prevent instances of this class from being copied (As this class contains pointers) */

219

CLDepthwiseConvolutionLayerGeneric &operator=(const CLDepthwiseConvolutionLayerGeneric &) = delete;

220

/** Default move assignment operator */

221

CLDepthwiseConvolutionLayerGeneric &operator=(CLDepthwiseConvolutionLayerGeneric &&) = default;

222

/** Initialize the function's source, destination, weights and convolution information.

223

*

Michele Di Giorgio

2020-01-06 14:07:44 +0000

[diff] [blame]

224

* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F32. (Written to only for border filling).

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

225

* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].

Michele Di Giorgio

2020-01-06 14:07:44 +0000

[diff] [blame]

226

* Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

227

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

Michele Di Giorgio

2020-01-06 14:07:44 +0000

[diff] [blame]

228

* Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

229

* @param[out] output Destination tensor. Data type supported: same as @p input.

230

* @param[in] conv_info Padding and stride information to use for the convolution.

231

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

232

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

233

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

234

*/

235

void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,

236

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

237

238

/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerGeneric

239

*

Michele Di Giorgio

2020-01-06 14:07:44 +0000

[diff] [blame]

240

* @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F32.

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

241

* @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].

Michele Di Giorgio

2020-01-06 14:07:44 +0000

[diff] [blame]

242

* Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

243

* @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

Michele Di Giorgio

2020-01-06 14:07:44 +0000

[diff] [blame]

244

* Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

245

* @param[in] output Destination tensor. Data type supported: same as @p input.

246

* @param[in] conv_info Padding and stride information to use for the convolution.

247

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

248

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

249

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

*

* @return a status

*/

static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,

254

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

255

256

// Inherited methods overriden:

257

void run() override;

258

void prepare() override;

259

260

void set_memory_group(std::shared_ptr<IMemoryManager> memory_manager)

261

{

262

_memory_group = MemoryGroup(std::move(memory_manager));

};

private:

MemoryGroup _memory_group;

267

268

CLDepthwiseConvolutionLayerNativeKernel _dwc_native_kernel;

269

CLPermute _permute_input_to_nhwc;

270

CLPermute _permute_weights_to_nhwc;

271

CLPermute _permute_output_to_nchw;

272

273

CLTensor _permuted_input;

274

CLTensor _permuted_weights;

275

CLTensor _permuted_output;

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

276

CLTensor _output_multipliers;

277

CLTensor _output_shifts;

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

278

const ITensor *_original_weights;

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

279

const ITensor *_input;

280

const ITensor *_output;

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

281

282

bool _needs_permute;

283

bool _is_prepared;

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

284

bool _is_quantized;

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

285

};

286

287

std::shared_ptr<IMemoryManager> _memory_manager;

288

289

DepthwiseConvolutionFunction _depth_conv_func;

290

CLDepthwiseConvolutionLayerInternal3x3 _func_3x3;

291

CLDepthwiseConvolutionLayerGeneric _func_generic;

292

};

293

Giorgio Arena

dfca60b

2018-01-31 10:30:59 +0000

[diff] [blame]

294

/** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels:

Giorgio Arena

2017-08-01 16:09:33 +0100

[diff] [blame]

295

*

Giorgio Arena

dfca60b

2018-01-31 10:30:59 +0000

[diff] [blame]

296

* -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW)

297

* -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC)

giuros01

6d10996

2019-01-07 17:47:19 +0000

[diff] [blame]

298

* -# @ref CLDepthwiseConvolutionLayerReshapeWeightsKernel (if data_layout == NHWC)

Giorgio Arena

9fe4144

2017-08-23 16:36:24 +0100

[diff] [blame]

299

* -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)

Giorgio Arena

2017-08-01 16:09:33 +0100

[diff] [blame]

300

*

301

*/

Giorgio Arena

04a8f8c

2017-11-23 11:45:24 +0000

[diff] [blame]

302

class CLDepthwiseConvolutionLayer3x3 : public IFunction

Giorgio Arena

2017-08-01 16:09:33 +0100

[diff] [blame]

303

{

304

public:

305

/** Default constructor */

Georgios Pinitas

05045c1

2018-12-07 18:31:47 +0000

[diff] [blame]

306

CLDepthwiseConvolutionLayer3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);

307

/** Prevent instances of this class from being copied (As this class contains pointers) */

308

CLDepthwiseConvolutionLayer3x3(const CLDepthwiseConvolutionLayer3x3 &) = delete;

309

/** Default move constructor */

310

CLDepthwiseConvolutionLayer3x3(CLDepthwiseConvolutionLayer3x3 &&) = default;

311

/** Prevent instances of this class from being copied (As this class contains pointers) */

312

CLDepthwiseConvolutionLayer3x3 &operator=(const CLDepthwiseConvolutionLayer3x3 &) = delete;

313

/** Default move assignment operator */

314

CLDepthwiseConvolutionLayer3x3 &operator=(CLDepthwiseConvolutionLayer3x3 &&) = default;

Giorgio Arena

2017-08-01 16:09:33 +0100

[diff] [blame]

315

/** Initialize the function's source, destination, conv and border_size.

316

*

Giorgio Arena

7657224

2018-04-04 17:44:26 +0100

[diff] [blame]

317

* @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

318

* @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM].

319

* Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.

Usama Arif

2019-04-08 17:30:48 +0100

[diff] [blame]

320

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

Giorgio Arena

7657224

2018-04-04 17:44:26 +0100

[diff] [blame]

321

* Data type supported: Same as @p input.

322

* @param[out] output Destination tensor. Data type supported: same as @p input.

323

* @param[in] conv_info Padding and stride information to use for the convolution.

324

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

325

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.

Usama Arif

2019-04-08 17:30:48 +0100

[diff] [blame]

326

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

Giorgio Arena

2017-08-01 16:09:33 +0100

[diff] [blame]

327

*/

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

328

ARM_COMPUTE_DEPRECATED_REL_REPLACE(20.02, CLDepthwiseConvolutionLayer)

Giorgio Arena

7657224

2018-04-04 17:44:26 +0100

[diff] [blame]

329

void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,

Usama Arif

2019-04-08 17:30:48 +0100

[diff] [blame]

330

ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

Giorgio Arena

2017-08-01 16:09:33 +0100

[diff] [blame]

331

Giorgio Arena

ad0c738

2018-04-23 16:16:21 +0100

[diff] [blame]

332

/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3

333

*

Usama Arif

2019-04-08 17:30:48 +0100

[diff] [blame]

334

* @param[in] input Source tensor info. Data type supported: QASYMM8 for all layouts, F16/F32 for NCHW.

Michele Di Giorgio

2019-10-09 15:32:39 +0100

[diff] [blame]

335

* @param[in] weights Weights tensor info. A 3D tensor with shape [3, 3, IFM].

336

* Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.

Usama Arif

2019-04-08 17:30:48 +0100

[diff] [blame]

337

* @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

Giorgio Arena

ad0c738

2018-04-23 16:16:21 +0100

[diff] [blame]

338

* Data type supported: Same as @p input, S32 when input is QASYMM8.

339

* @param[in] output Destination tensor. Data type supported: same as @p input.

340

* @param[in] conv_info Padding and stride information to use for the convolution.

341

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

342

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.

343

* @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard.

Usama Arif

2019-04-08 17:30:48 +0100

[diff] [blame]

344

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

Giorgio Arena

ad0c738

2018-04-23 16:16:21 +0100

[diff] [blame]

*

* @return a status

*/

static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,

Usama Arif