Blame - arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h - ml/ComputeLibrary

void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,

71

ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

72

73

/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer

74

*

75

* @param[in] input Source tensor info. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW

76

* @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.

77

* @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

78

* Data type supported: Same as @p input, S32 when input is QASYMM8.

79

* @param[in] output Destination tensor. Data type supported: same as @p input.

80

* @param[in] conv_info Padding and stride information to use for the convolution.

81

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

82

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.

83

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

*

* @return a status

*/

static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,

88

ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

89

90

// Inherited methods overriden:

91

void run() override;

92

void prepare() override;

93

94

private:

95

/** Static function to choose the best depthwise convolution function for @ref CLDepthwiseConvolutionLayer

96

*

97

* @param[in] input Source tensor info. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW

98

* @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.

99

* @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

100

* Data type supported: Same as @p input, S32 when input is QASYMM8.

101

* @param[in] output Destination tensor. Data type supported: same as @p input.

102

* @param[in] conv_info Padding and stride information to use for the convolution.

103

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

104

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.

105

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

106

* @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard.

107

*

108

* @return a Depthwise Convolution Function

109

*/

110

static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,

111

const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,

112

ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), GPUTarget gpu_target = GPUTarget::MIDGARD);

113

114

/** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels:

115

*

116

* -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW)

117

* -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC)

118

* -# @ref CLDepthwiseConvolutionLayerReshapeWeightsKernel (if data_layout == NHWC)

119

* -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)

120

*

121

*/

122

class CLDepthwiseConvolutionLayerInternal3x3 : public IFunction

123

{

124

public:

125

/** Default constructor */

126

CLDepthwiseConvolutionLayerInternal3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);

127

/** Prevent instances of this class from being copied (As this class contains pointers) */

128

CLDepthwiseConvolutionLayerInternal3x3(const CLDepthwiseConvolutionLayerInternal3x3 &) = delete;

129

/** Default move constructor */

130

CLDepthwiseConvolutionLayerInternal3x3(CLDepthwiseConvolutionLayerInternal3x3 &&) = default;

131

/** Prevent instances of this class from being copied (As this class contains pointers) */

132

CLDepthwiseConvolutionLayerInternal3x3 &operator=(const CLDepthwiseConvolutionLayerInternal3x3 &) = delete;

133

/** Default move assignment operator */

134

CLDepthwiseConvolutionLayerInternal3x3 &operator=(CLDepthwiseConvolutionLayerInternal3x3 &&) = default;

135

/** Initialize the function's source, destination, conv and border_size.

136

*

137

* @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

138

* @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input.

139

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

140

* Data type supported: Same as @p input.

141

* @param[out] output Destination tensor. Data type supported: same as @p input.

142

* @param[in] conv_info Padding and stride information to use for the convolution.

143

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

144

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.

145

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

146

*/

147

void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,

148

ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

149

150

/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3

151

*

152

* @param[in] input Source tensor info. Data type supported: QASYMM8 for all layouts, F16/F32 for NCHW.

153

* @param[in] weights Weights tensor info. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input.

154

* @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

155

* Data type supported: Same as @p input, S32 when input is QASYMM8.

156

* @param[in] output Destination tensor. Data type supported: same as @p input.

157

* @param[in] conv_info Padding and stride information to use for the convolution.

158

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

159

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.

160

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

*

* @return a status

*/

static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,

165

ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD, const Size2D &dilation = Size2D(1U, 1U));

166

167

// Inherited methods overriden:

168

void run() override;

169

void prepare() override;

170

171

void set_memory_group(std::shared_ptr<IMemoryManager> memory_manager)

172

{

173

_memory_group = MemoryGroup(std::move(memory_manager));

};

private:

MemoryGroup _memory_group;

178

std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel;

179

CLFillBorderKernel _border_handler;

180

CLPermute _permute_input_to_nchw;

181

CLPermute _permute_weights_to_nchw;

182

CLPermute _permute_output_to_nhwc;

183

CLDepthwiseConvolutionLayerReshapeWeightsKernel _reshape_weights;

184

CLTensor _permuted_input;

185

CLTensor _permuted_weights;

186

CLTensor _permuted_output;

187

const ITensor *_original_weights;

188

bool _needs_permute;

189

bool _needs_weights_reshape;

bool _is_prepared;

};

/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:

194

*

195

* -# @ref CLDepthwiseConvolutionLayerNativeKernel

196

* -# @ref CLPermute (x 3) if the data layout is NCHW

197

*

198

*/

199

class CLDepthwiseConvolutionLayerGeneric : public IFunction

200

{

201

public:

202

/** Default constructor */

203

CLDepthwiseConvolutionLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager = nullptr);

204

/** Prevent instances of this class from being copied (As this class contains pointers) */

205

CLDepthwiseConvolutionLayerGeneric(const CLDepthwiseConvolutionLayerGeneric &) = delete;

206

/** Default move constructor */

207

CLDepthwiseConvolutionLayerGeneric(CLDepthwiseConvolutionLayerGeneric &&) = default;

208

/** Prevent instances of this class from being copied (As this class contains pointers) */

209

CLDepthwiseConvolutionLayerGeneric &operator=(const CLDepthwiseConvolutionLayerGeneric &) = delete;

210

/** Default move assignment operator */

211

CLDepthwiseConvolutionLayerGeneric &operator=(CLDepthwiseConvolutionLayerGeneric &&) = default;

212

/** Initialize the function's source, destination, weights and convolution information.

213

*

214

* @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling).

215

* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.

216

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

217

* Data type supported: Same as @p input, S32 when input is QASYMM8.

218

* @param[out] output Destination tensor. Data type supported: same as @p input.

219

* @param[in] conv_info Padding and stride information to use for the convolution.

220

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

221

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

222

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

223

*/

224

void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,

225

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

226

227

/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerGeneric

228

*

229

* @param[in] input Source tensor info. Data type supported: QASYMM8/F32.

230

* @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.

231

* @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

232

* Data type supported: Same as @p input, S32 when input is QASYMM8.

233

* @param[in] output Destination tensor. Data type supported: same as @p input.

234

* @param[in] conv_info Padding and stride information to use for the convolution.

235

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

236

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

237

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

*

* @return a status

*/

static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,

242

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

243

244

// Inherited methods overriden:

245

void run() override;

246

void prepare() override;

247

248

void set_memory_group(std::shared_ptr<IMemoryManager> memory_manager)

249

{

250

_memory_group = MemoryGroup(std::move(memory_manager));

};

private:

MemoryGroup _memory_group;

255

256

CLDepthwiseConvolutionLayerNativeKernel _dwc_native_kernel;

257

CLPermute _permute_input_to_nhwc;

258

CLPermute _permute_weights_to_nhwc;

259

CLPermute _permute_output_to_nchw;

260

261

CLTensor _permuted_input;

262

CLTensor _permuted_weights;

263

CLTensor _permuted_output;

264

const ITensor *_original_weights;

bool _needs_permute;

bool _is_prepared;

};

std::shared_ptr<IMemoryManager> _memory_manager;

271

272

DepthwiseConvolutionFunction _depth_conv_func;

273

CLDepthwiseConvolutionLayerInternal3x3 _func_3x3;

274

CLDepthwiseConvolutionLayerGeneric _func_generic;

275

};

276

Giorgio Arena

dfca60b

2018-01-31 10:30:59 +0000

[diff] [blame]

277

/** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels:

Giorgio Arena

93a690e

2017-08-01 16:09:33 +0100

[diff] [blame]

278

*

Giorgio Arena

dfca60b

2018-01-31 10:30:59 +0000

[diff] [blame]

279

* -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW)

280

* -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC)

giuros01

6d10996

2019-01-07 17:47:19 +0000

[diff] [blame]

281

* -# @ref CLDepthwiseConvolutionLayerReshapeWeightsKernel (if data_layout == NHWC)

Giorgio Arena

9fe4144

2017-08-23 16:36:24 +0100

[diff] [blame]

282

* -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)

Giorgio Arena

93a690e

2017-08-01 16:09:33 +0100

[diff] [blame]

283

*

284

*/

Giorgio Arena

04a8f8c

2017-11-23 11:45:24 +0000

[diff] [blame]

285

class CLDepthwiseConvolutionLayer3x3 : public IFunction

Giorgio Arena

93a690e

2017-08-01 16:09:33 +0100

[diff] [blame]

286

{

287

public:

288

/** Default constructor */

Georgios Pinitas

05045c1

2018-12-07 18:31:47 +0000

[diff] [blame]

289

CLDepthwiseConvolutionLayer3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);

290

/** Prevent instances of this class from being copied (As this class contains pointers) */

291

CLDepthwiseConvolutionLayer3x3(const CLDepthwiseConvolutionLayer3x3 &) = delete;

292

/** Default move constructor */

293

CLDepthwiseConvolutionLayer3x3(CLDepthwiseConvolutionLayer3x3 &&) = default;

294

/** Prevent instances of this class from being copied (As this class contains pointers) */

295

CLDepthwiseConvolutionLayer3x3 &operator=(const CLDepthwiseConvolutionLayer3x3 &) = delete;

296

/** Default move assignment operator */

297

CLDepthwiseConvolutionLayer3x3 &operator=(CLDepthwiseConvolutionLayer3x3 &&) = default;

Giorgio Arena

93a690e

2017-08-01 16:09:33 +0100

[diff] [blame]

298

/** Initialize the function's source, destination, conv and border_size.

299

*

Giorgio Arena

7657224

2018-04-04 17:44:26 +0100

[diff] [blame]

300

* @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

301

* @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input.

Usama Arif

e73686a

2019-04-08 17:30:48 +0100

[diff] [blame]

302

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

Giorgio Arena

7657224

2018-04-04 17:44:26 +0100

[diff] [blame]

303

* Data type supported: Same as @p input.

304

* @param[out] output Destination tensor. Data type supported: same as @p input.

305

* @param[in] conv_info Padding and stride information to use for the convolution.

306

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

307

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.

Usama Arif

e73686a

2019-04-08 17:30:48 +0100

[diff] [blame]

308

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

Giorgio Arena

93a690e

2017-08-01 16:09:33 +0100

[diff] [blame]

309

*/

Manuel Bottini

05069f0

2019-09-26 17:18:26 +0100

[diff] [blame^]

310

ARM_COMPUTE_DEPRECATED_REL_REPLACE(20.02, CLDepthwiseConvolutionLayer)

Giorgio Arena

7657224

2018-04-04 17:44:26 +0100

[diff] [blame]

311

void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,

Usama Arif

e73686a

2019-04-08 17:30:48 +0100

[diff] [blame]

312

ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

Giorgio Arena

93a690e

2017-08-01 16:09:33 +0100

[diff] [blame]

313

Giorgio Arena

ad0c738

2018-04-23 16:16:21 +0100

[diff] [blame]

314

/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3

315

*

Usama Arif

e73686a

2019-04-08 17:30:48 +0100

[diff] [blame]

316

* @param[in] input Source tensor info. Data type supported: QASYMM8 for all layouts, F16/F32 for NCHW.

317

* @param[in] weights Weights tensor info. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input.

318

* @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

Giorgio Arena

ad0c738

2018-04-23 16:16:21 +0100

[diff] [blame]

319

* Data type supported: Same as @p input, S32 when input is QASYMM8.

320

* @param[in] output Destination tensor. Data type supported: same as @p input.

321

* @param[in] conv_info Padding and stride information to use for the convolution.

322

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

323

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.

324

* @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard.

Usama Arif

e73686a

2019-04-08 17:30:48 +0100

[diff] [blame]

325

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

Giorgio Arena

ad0c738

2018-04-23 16:16:21 +0100

[diff] [blame]

*

* @return a status

*/

static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,

Usama Arif

e73686a

2019-04-08 17:30:48 +0100

[diff] [blame]

330