Blame - arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h - ml/ComputeLibrary

static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,

Usama Arif

e73686a

2019-04-08 17:30:48 +0100

[diff] [blame]

85

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

Abe Mbise

7784c83

2018-05-31 16:48:41 +0100

[diff] [blame]

86

Michalis Spyrou

7362f0d

2017-10-18 17:58:22 +0100

[diff] [blame]

87

// Inherited methods overriden:

88

void run() override;

Georgios Pinitas

47d39dc

2019-03-11 14:03:23 +0000

[diff] [blame]

89

void prepare() override;

Michalis Spyrou

7362f0d

2017-10-18 17:58:22 +0100

[diff] [blame]

90

91

private:

Manuel Bottini

05069f0

2019-09-26 17:18:26 +0100

[diff] [blame^]

92

/** Static function to choose the best depthwise convolution function for @ref NEDepthwiseConvolutionLayer

Georgios Pinitas

47d39dc

2019-03-11 14:03:23 +0000

[diff] [blame]

93

*

Manuel Bottini

05069f0

2019-09-26 17:18:26 +0100

[diff] [blame^]

94

* @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32

95

* @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.

96

* @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

97

* Data type supported: Same as @p input.

98

* @param[in] output Destination tensor. Data type supported: same as @p input.

99

* @param[in] conv_info Padding and stride information to use for the convolution.

100

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

101

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.

102

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

Usama Arif

881f2de

2019-04-12 10:29:17 +0100

[diff] [blame]

103

*

Manuel Bottini

05069f0

2019-09-26 17:18:26 +0100

[diff] [blame^]

104

* @return a Depthwise Convolution Function

Georgios Pinitas

47d39dc

2019-03-11 14:03:23 +0000

[diff] [blame]

105

*/

Manuel Bottini

05069f0

2019-09-26 17:18:26 +0100

[diff] [blame^]

106

static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,

107

const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,

108

ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

Georgios Pinitas

47d39dc

2019-03-11 14:03:23 +0000

[diff] [blame]

109

Manuel Bottini

05069f0

2019-09-26 17:18:26 +0100

[diff] [blame^]

110

/** Basic function to execute optimized depthwise convolution routines. This function calls the following NEON kernels:

111

*

112

* @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported

113

*

114

* -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present

115

* -# @ref NEDepthwiseConvolutionLayer3x3Kernel if 3x3 and no assembly kernel implementation is present

116

* -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present

117

* -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required

118

* -# @ref NEActivationLayer if fused activation is required

119

*

120

*/

121

class NEDepthwiseConvolutionLayerOptimizedInternal : public IFunction

122

{

123

public:

124

/** Default constructor */

125

NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager = nullptr);

126

/** Prevent instances of this class from being copied (As this class contains pointers) */

127

NEDepthwiseConvolutionLayerOptimizedInternal(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;

128

/** Default move constructor */

129

NEDepthwiseConvolutionLayerOptimizedInternal(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;

130

/** Prevent instances of this class from being copied (As this class contains pointers) */

131

NEDepthwiseConvolutionLayerOptimizedInternal &operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;

132

/** Default move assignment operator */

133

NEDepthwiseConvolutionLayerOptimizedInternal &operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;

134

/** Initialize the function's source, destination, kernels and border_size.

135

*

136

* @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

137

* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.

138

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

139

* Data type supported: Same as @p input.

140

* @param[out] output Destination tensor. Data type supported: same as @p input.

141

* @param[in] conv_info Padding and stride information to use for the convolution.

142

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

143

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

144

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

145

*/

146

void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,

147

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

148

149

/** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3

150

*

151

* @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

152

* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.

153

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

154

* Data type supported: Same as @p input.

155

* @param[in] output Destination tensor. Data type supported: same as @p input.

156

* @param[in] conv_info Padding and stride information to use for the convolution.

157

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

158

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

159

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

*

* @return a status

*/

static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,

164

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

165

166

// Inherited methods overriden:

167

void run() override;

168

void prepare() override;

169

170

private:

171

/** Configure the kernels/functions for the generic pipeline.

172

*

173

* @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

174

* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.

175

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

176

* Data type supported: Same as @p input.

177

* @param[out] output Destination tensor. Data type supported: same as @p input.

178

* @param[in] conv_info Padding and stride information to use for the convolution.

179

* @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

180

* @param[in] act_info Activation layer information in case of a fused activation.

181

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

182

*

183

*/

184

void configure_generic(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,

185

unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));

186

/** Configure the kernels/functions for the optimized pipeline.

187

*

188

* @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

189

* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.

190

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

191

* Data type supported: Same as @p input.

192

* @param[out] output Destination tensor. Data type supported: same as @p input.

193

* @param[in] conv_info Padding and stride information to use for the convolution.

194

* @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

195

* @param[in] act_info Activation layer information in case of a fused activation.

196

*/

197

void configure_optimized(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,

198

unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));

199

/** Run generic kernel */

200

void run_generic();

201

/** Run optimized function */

202

void run_optimized();

203

204

MemoryGroup _memory_group;

205

NEDepthwiseConvolutionLayer3x3Kernel _dwc_kernel;

206

NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;

207

NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;

208

NEFillBorderKernel _border_handler;

209

NEPermute _permute_input;

210

NEPermute _permute_weights;

211

NEPermute _permute_output;

212

NEActivationLayer _activationlayer_function;

213

Tensor _accumulator;

214

Tensor _permuted_input;

215

Tensor _permuted_weights;

216

Tensor _permuted_output;

217

const ITensor *_original_weights;

bool _has_bias;

bool _is_quantized;

bool _is_optimized;

bool _is_nchw;

bool _permute;

bool _is_activationlayer_enabled;

bool _is_prepared;

};

/** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernel:

228

*

229

* -# @ref NEDepthwiseConvolutionLayerNativeKernel

230

*

231

*/

232

class NEDepthwiseConvolutionLayerGeneric : public IFunction

233

{

234

public:

235

/** Default constructor */

236

NEDepthwiseConvolutionLayerGeneric();

237

/** Prevent instances of this class from being copied (As this class contains pointers) */

238

NEDepthwiseConvolutionLayerGeneric(const NEDepthwiseConvolutionLayerGeneric &) = delete;

239

/** Default move constructor */

240

NEDepthwiseConvolutionLayerGeneric(NEDepthwiseConvolutionLayerGeneric &&) = default;

241

/** Prevent instances of this class from being copied (As this class contains pointers) */

242

NEDepthwiseConvolutionLayerGeneric &operator=(const NEDepthwiseConvolutionLayerGeneric &) = delete;

243

/** Default move assignment operator */

244

NEDepthwiseConvolutionLayerGeneric &operator=(NEDepthwiseConvolutionLayerGeneric &&) = default;

245

/** Initialize the function's source, destination, weights and convolution information.

246

*

247

* @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

248

* @param[out] output Destination tensor. Data type supported: same as @p input.

249

* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.

250

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

251

* Data type supported: Same as @p input, S32 when input is QASYMM8.

252

* @param[in] conv_info Padding and stride information to use for the convolution.

253

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

254

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

255

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

256

*/

257

void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,

258

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

259

260

/** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerGeneric

261

*

262

* @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

263

* @param[in] output Destination tensor. Data type supported: same as @p input.

264

* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.

265

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

266

* Data type supported: Same as @p input, S32 when input is QASYMM8.

267

* @param[in] conv_info Padding and stride information to use for the convolution.

268

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

269

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

270

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

*

* @return a status

*/

static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,

275

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

276

277

// Inherited methods overriden:

278

void run() override;

279

void prepare() override;

280

281

private:

282

NEDepthwiseConvolutionLayerNativeKernel _depthwise_conv_kernel;

283

NEFillBorderKernel _fill_border;

284

NEPermute _permute_input;

285

NEPermute _permute_weights;

286

NEPermute _permute_output;

287

NEActivationLayer _activationlayer_function;

288

Tensor _permuted_input;

289

Tensor _permuted_weights;

290

Tensor _permuted_output;

291

bool _is_prepared;

292

bool _is_nchw;

293

bool _is_activationlayer_enabled;

294

const ITensor *_original_weights;

295

};

296

297

DepthwiseConvolutionFunction _depth_conv_func;

298

NEDepthwiseConvolutionLayerOptimizedInternal _func_optimized;

299

NEDepthwiseConvolutionLayerGeneric _func_generic;

Michalis Spyrou

7362f0d

2017-10-18 17:58:22 +0100

[diff] [blame]

300

};

Michalis Spyrou

b7b3153

2017-11-23 12:10:21 +0000

[diff] [blame]

301

Georgios Pinitas

30271c7

2019-06-24 14:56:34 +0100

[diff] [blame]

302

/** Basic function to execute optimized depthwise convolution routines. This function calls the following NEON kernels:

303

*

304

* @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported

305

*

306

* -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present

307

* -# @ref NEDepthwiseConvolutionLayer3x3Kernel if 3x3 and no assembly kernel implementation is present

308

* -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present

309

* -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required

310

* -# @ref NEActivationLayer if fused activation is required

311

*

312

*/

313

class NEDepthwiseConvolutionLayerOptimized : public IFunction

314

{

315

public:

316

/** Default constructor */

317

NEDepthwiseConvolutionLayerOptimized(std::shared_ptr<IMemoryManager> memory_manager = nullptr);

318

/** Prevent instances of this class from being copied (As this class contains pointers) */

319

NEDepthwiseConvolutionLayerOptimized(const NEDepthwiseConvolutionLayerOptimized &) = delete;

320

/** Default move constructor */

321

NEDepthwiseConvolutionLayerOptimized(NEDepthwiseConvolutionLayerOptimized &&) = default;

322

/** Prevent instances of this class from being copied (As this class contains pointers) */

323

NEDepthwiseConvolutionLayerOptimized &operator=(const NEDepthwiseConvolutionLayerOptimized &) = delete;

324

/** Default move assignment operator */

325

NEDepthwiseConvolutionLayerOptimized &operator=(NEDepthwiseConvolutionLayerOptimized &&) = default;

326

/** Initialize the function's source, destination, kernels and border_size.

327

*

328

* @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

329

* @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.

330

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

331

* Data type supported: Same as @p input.

332

* @param[out] output Destination tensor. Data type supported: same as @p input.

333

* @param[in] conv_info Padding and stride information to use for the convolution.

334

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

335

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

336

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

337

*/

Manuel Bottini

05069f0

2019-09-26 17:18:26 +0100

[diff] [blame^]

338

ARM_COMPUTE_DEPRECATED_REL_REPLACE(20.02, NEDepthwiseConvolutionLayer)

Georgios Pinitas

30271c7

2019-06-24 14:56:34 +0100

[diff] [blame]

339

void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,

340

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

341

Manuel Bottini

05069f0

2019-09-26 17:18:26 +0100

[diff] [blame^]

342

/** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerOptimized

Georgios Pinitas

30271c7

2019-06-24 14:56:34 +0100

[diff] [blame]

343

*

344

* @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

345

* @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.

346

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

347

* Data type supported: Same as @p input.

348

* @param[in] output Destination tensor. Data type supported: same as @p input.

349

* @param[in] conv_info Padding and stride information to use for the convolution.

350

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

351

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

352

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

*

* @return a status

*/

static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,

357