Blame - arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h - ml/ComputeLibrary

2019-03-11 14:03:23 +0000

[diff] [blame]

91

void prepare() override;

Michalis Spyrou

7362f0d

2017-10-18 17:58:22 +0100

[diff] [blame]

92

93

private:

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

94

/** Static function to choose the best depthwise convolution function for @ref NEDepthwiseConvolutionLayer

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

95

*

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

96

* @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32

Michele Di Giorgio

df4cf57

2019-10-09 15:32:39 +0100

[diff] [blame]

97

* @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

98

* Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

99

* @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

100

* Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

101

* @param[in] output Destination tensor. Data type supported: same as @p input.

102

* @param[in] conv_info Padding and stride information to use for the convolution.

103

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

104

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 quantized are supported.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

105

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

Usama Arif

881f2de

2019-04-12 10:29:17 +0100

[diff] [blame]

106

*

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

107

* @return a Depthwise Convolution Function

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

108

*/

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

109

static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,

110

const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,

111

ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

112

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

113

/** Basic function to execute optimized depthwise convolution routines. This function calls the following NEON kernels:

114

*

115

* @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported

116

*

117

* -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present

118

* -# @ref NEDepthwiseConvolutionLayer3x3Kernel if 3x3 and no assembly kernel implementation is present

119

* -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present

120

* -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required

121

* -# @ref NEActivationLayer if fused activation is required

122

*

123

*/

124

class NEDepthwiseConvolutionLayerOptimizedInternal : public IFunction

125

{

126

public:

127

/** Default constructor */

128

NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager = nullptr);

129

/** Prevent instances of this class from being copied (As this class contains pointers) */

130

NEDepthwiseConvolutionLayerOptimizedInternal(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;

131

/** Default move constructor */

132

NEDepthwiseConvolutionLayerOptimizedInternal(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;

133

/** Prevent instances of this class from being copied (As this class contains pointers) */

134

NEDepthwiseConvolutionLayerOptimizedInternal &operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;

135

/** Default move assignment operator */

136

NEDepthwiseConvolutionLayerOptimizedInternal &operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;

137

/** Initialize the function's source, destination, kernels and border_size.

138

*

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

139

* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

140

* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.

141

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

142

* Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

143

* @param[out] output Destination tensor. Data type supported: same as @p input.

144

* @param[in] conv_info Padding and stride information to use for the convolution.

145

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

146

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

147

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

148

*/

149

void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,

150

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

151

152

/** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3

153

*

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

154

* @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

155

* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.

156

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

157

* Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

158

* @param[in] output Destination tensor. Data type supported: same as @p input.

159

* @param[in] conv_info Padding and stride information to use for the convolution.

160

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

161

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

162

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

*

* @return a status

*/

static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,

167

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

168

169

// Inherited methods overriden:

170

void run() override;

171

void prepare() override;

172

173

private:

174

/** Configure the kernels/functions for the generic pipeline.

175

*

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

176

* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

177

* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.

178

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

179

* Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

180

* @param[out] output Destination tensor. Data type supported: same as @p input.

181

* @param[in] conv_info Padding and stride information to use for the convolution.

182

* @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

183

* @param[in] act_info Activation layer information in case of a fused activation.

184

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

185

*

186

*/

187

void configure_generic(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,

188

unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));

189

/** Configure the kernels/functions for the optimized pipeline.

190

*

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

191

* @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

192

* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.

193

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

194

* Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

195

* @param[out] output Destination tensor. Data type supported: same as @p input.

196

* @param[in] conv_info Padding and stride information to use for the convolution.

197

* @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

198

* @param[in] act_info Activation layer information in case of a fused activation.

199

*/

200

void configure_optimized(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,

201

unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));

202

/** Run generic kernel */

203

void run_generic();

204

/** Run optimized function */

205

void run_optimized();

206

207

MemoryGroup _memory_group;

208

NEDepthwiseConvolutionLayer3x3Kernel _dwc_kernel;

209

NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;

210

NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;

211

NEFillBorderKernel _border_handler;

212

NEPermute _permute_input;

213

NEPermute _permute_weights;

214

NEPermute _permute_output;

215

NEActivationLayer _activationlayer_function;

216

Tensor _accumulator;

217

Tensor _permuted_input;

218

Tensor _permuted_weights;

219

Tensor _permuted_output;

220

const ITensor *_original_weights;

bool _has_bias;

bool _is_quantized;

bool _is_optimized;

bool _is_nchw;

bool _permute;

bool _is_activationlayer_enabled;

bool _is_prepared;

};

/** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernel:

231

*

232

* -# @ref NEDepthwiseConvolutionLayerNativeKernel

233

*

234

*/

235

class NEDepthwiseConvolutionLayerGeneric : public IFunction

236

{

237

public:

238

/** Default constructor */

239

NEDepthwiseConvolutionLayerGeneric();

240

/** Prevent instances of this class from being copied (As this class contains pointers) */

241

NEDepthwiseConvolutionLayerGeneric(const NEDepthwiseConvolutionLayerGeneric &) = delete;

242

/** Default move constructor */

243

NEDepthwiseConvolutionLayerGeneric(NEDepthwiseConvolutionLayerGeneric &&) = default;

244

/** Prevent instances of this class from being copied (As this class contains pointers) */

245

NEDepthwiseConvolutionLayerGeneric &operator=(const NEDepthwiseConvolutionLayerGeneric &) = delete;

246

/** Default move assignment operator */

247

NEDepthwiseConvolutionLayerGeneric &operator=(NEDepthwiseConvolutionLayerGeneric &&) = default;

248

/** Initialize the function's source, destination, weights and convolution information.

249

*

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

250

* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

251

* @param[out] output Destination tensor. Data type supported: same as @p input.

Michele Di Giorgio

df4cf57

2019-10-09 15:32:39 +0100

[diff] [blame]

252

* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

253

* Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

254

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

255

* Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

256

* @param[in] conv_info Padding and stride information to use for the convolution.

257

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

258

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

259

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

260

*/

261

void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,

262

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

263

264

/** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerGeneric

265

*

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

266

* @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

267

* @param[in] output Destination tensor. Data type supported: same as @p input.

Michele Di Giorgio

df4cf57

2019-10-09 15:32:39 +0100

[diff] [blame]

268

* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

269

* Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

270

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

271

* Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

272

* @param[in] conv_info Padding and stride information to use for the convolution.

273

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

274

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

275

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

*

* @return a status

*/

static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,

280

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

281

282

// Inherited methods overriden:

283

void run() override;

284

void prepare() override;

285

286

private:

287

NEDepthwiseConvolutionLayerNativeKernel _depthwise_conv_kernel;

288

NEFillBorderKernel _fill_border;

289

NEPermute _permute_input;

290

NEPermute _permute_weights;

291

NEPermute _permute_output;

292

NEActivationLayer _activationlayer_function;

293

Tensor _permuted_input;

294

Tensor _permuted_weights;

295

Tensor _permuted_output;

296

bool _is_prepared;

297

bool _is_nchw;

298

bool _is_activationlayer_enabled;

299

const ITensor *_original_weights;

300

};

301

302

DepthwiseConvolutionFunction _depth_conv_func;

303

NEDepthwiseConvolutionLayerOptimizedInternal _func_optimized;

304

NEDepthwiseConvolutionLayerGeneric _func_generic;

Michalis Spyrou

7362f0d

2017-10-18 17:58:22 +0100

[diff] [blame]

305

};

Michalis Spyrou

b7b3153

2017-11-23 12:10:21 +0000

[diff] [blame]

306

Georgios Pinitas

2019-06-24 14:56:34 +0100

[diff] [blame]

307

/** Basic function to execute optimized depthwise convolution routines. This function calls the following NEON kernels:

308

*

309

* @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported

310

*

311

* -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present

312

* -# @ref NEDepthwiseConvolutionLayer3x3Kernel if 3x3 and no assembly kernel implementation is present

313

* -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present

314

* -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required

315

* -# @ref NEActivationLayer if fused activation is required

316

*

317

*/

318

class NEDepthwiseConvolutionLayerOptimized : public IFunction

319

{

320

public:

321

/** Default constructor */

322

NEDepthwiseConvolutionLayerOptimized(std::shared_ptr<IMemoryManager> memory_manager = nullptr);

323

/** Prevent instances of this class from being copied (As this class contains pointers) */

324

NEDepthwiseConvolutionLayerOptimized(const NEDepthwiseConvolutionLayerOptimized &) = delete;

325

/** Default move constructor */

326

NEDepthwiseConvolutionLayerOptimized(NEDepthwiseConvolutionLayerOptimized &&) = default;

327

/** Prevent instances of this class from being copied (As this class contains pointers) */

328

NEDepthwiseConvolutionLayerOptimized &operator=(const NEDepthwiseConvolutionLayerOptimized &) = delete;

329

/** Default move assignment operator */

330

NEDepthwiseConvolutionLayerOptimized &operator=(NEDepthwiseConvolutionLayerOptimized &&) = default;

331

/** Initialize the function's source, destination, kernels and border_size.

332

*

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

333

* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).

Georgios Pinitas

2019-06-24 14:56:34 +0100

[diff] [blame]

334

* @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.

335

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

336

* Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.

Georgios Pinitas

2019-06-24 14:56:34 +0100

[diff] [blame]

337

* @param[out] output Destination tensor. Data type supported: same as @p input.

338

* @param[in] conv_info Padding and stride information to use for the convolution.

339

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

340

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

341

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

342

*/

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

343

ARM_COMPUTE_DEPRECATED_REL_REPLACE(20.02, NEDepthwiseConvolutionLayer)

Georgios Pinitas

2019-06-24 14:56:34 +0100

[diff] [blame]

344

void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,

345

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

346

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

347

/** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerOptimized

Georgios Pinitas

2019-06-24 14:56:34 +0100

[diff] [blame]

348

*

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

349

* @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).

Georgios Pinitas

2019-06-24 14:56:34 +0100

[diff] [blame]

350

* @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.

351

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

Michele Di Giorgio

2020-01-07 15:06:41 +0000

[diff] [blame^]

352

* Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.

Georgios Pinitas