Blame - arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h - ml/ComputeLibrary

2019-04-08 17:30:48 +0100

[diff] [blame]

100

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

101

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

102

// Inherited methods overriden:

103

void run() override;

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

104

void prepare() override;

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

105

106

private:

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

107

/** Configure the kernels/functions for the generic pipeline.

108

*

109

* @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

110

* @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input.

Usama Arif

2019-04-08 17:30:48 +0100

[diff] [blame]

111

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

112

* Data type supported: Same as @p input.

113

* @param[out] output Destination tensor. Data type supported: same as @p input.

114

* @param[in] conv_info Padding and stride information to use for the convolution.

115

* @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

116

* @param[in] act_info Activation layer information in case of a fused activation.

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

117

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

118

*

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

119

*/

120

void configure_generic(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

121

unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

122

/** Configure the kernels/functions for the optimized pipeline.

123

*

124

* @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

125

* @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input.

Usama Arif

2019-04-08 17:30:48 +0100

[diff] [blame]

126

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

127

* Data type supported: Same as @p input.

128

* @param[out] output Destination tensor. Data type supported: same as @p input.

129

* @param[in] conv_info Padding and stride information to use for the convolution.

130

* @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

131

* @param[in] act_info Activation layer information in case of a fused activation.

132

*/

133

void configure_optimized(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,

134

unsigned int depth_multiplier, const ActivationLayerInfo &act_info);

135

/** Run generic kernel */

136

void run_generic();

137

/** Run optimized function */

138

void run_optimized();

139

140

private:

141

MemoryGroup _memory_group;

Georgios Pinitas

4074c99

2018-01-30 18:13:46 +0000

[diff] [blame]

142

NEDepthwiseConvolutionLayer3x3Kernel _dwc_kernel;

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

143

NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;

Michalis Spyrou

b91e34c

2017-12-20 15:50:55 +0000

[diff] [blame]

144

NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;

145

NEFillBorderKernel _border_handler;

Georgios Pinitas

284cfe2

2018-02-13 12:15:13 +0000

[diff] [blame]

146

NEPermute _permute_input;

147

NEPermute _permute_weights;

148

NEPermute _permute_output;

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

149

NEActivationLayer _activationlayer_function;

Georgios Pinitas

f72f936

2018-01-12 16:29:45 +0000

[diff] [blame]

150

Tensor _accumulator;

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

151

Tensor _permuted_input;

152

Tensor _permuted_weights;

153

Tensor _permuted_output;

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

154

const ITensor *_original_weights;

Michalis Spyrou

b91e34c

2017-12-20 15:50:55 +0000

[diff] [blame]

155

bool _has_bias;

Georgios Pinitas

f72f936

2018-01-12 16:29:45 +0000

[diff] [blame]

156

bool _is_quantized;

Georgios Pinitas

4074c99

2018-01-30 18:13:46 +0000

[diff] [blame]

157

bool _is_optimized;

Giorgio Arena

1ed1fc6

2018-03-26 16:20:05 +0100

[diff] [blame]

158

bool _is_nchw;

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

159

bool _permute;

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

160

bool _is_activationlayer_enabled;

Georgios Pinitas

2019-03-11 14:03:23 +0000

[diff] [blame]

161

bool _is_prepared;

Michalis Spyrou

2017-10-18 17:58:22 +0100

[diff] [blame]

162

};

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

163

Georgios Pinitas

30271c7

2019-06-24 14:56:34 +0100

[diff] [blame]

164

/** Basic function to execute optimized depthwise convolution routines. This function calls the following NEON kernels:

165

*

166

* @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported

167

*

168

* -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present

169

* -# @ref NEDepthwiseConvolutionLayer3x3Kernel if 3x3 and no assembly kernel implementation is present

170

* -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present

171

* -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required

172

* -# @ref NEActivationLayer if fused activation is required

173

*

174

*/

175

class NEDepthwiseConvolutionLayerOptimized : public IFunction

176

{

177

public:

178

/** Default constructor */

179

NEDepthwiseConvolutionLayerOptimized(std::shared_ptr<IMemoryManager> memory_manager = nullptr);

180

/** Prevent instances of this class from being copied (As this class contains pointers) */

181

NEDepthwiseConvolutionLayerOptimized(const NEDepthwiseConvolutionLayerOptimized &) = delete;

182

/** Default move constructor */

183

NEDepthwiseConvolutionLayerOptimized(NEDepthwiseConvolutionLayerOptimized &&) = default;

184

/** Prevent instances of this class from being copied (As this class contains pointers) */

185

NEDepthwiseConvolutionLayerOptimized &operator=(const NEDepthwiseConvolutionLayerOptimized &) = delete;

186

/** Default move assignment operator */

187

NEDepthwiseConvolutionLayerOptimized &operator=(NEDepthwiseConvolutionLayerOptimized &&) = default;

188

/** Initialize the function's source, destination, kernels and border_size.

189

*

190

* @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

191

* @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.

192

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

193

* Data type supported: Same as @p input.

194

* @param[out] output Destination tensor. Data type supported: same as @p input.

195

* @param[in] conv_info Padding and stride information to use for the convolution.

196

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

197

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

198

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

199

*/

200

void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,

201

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

202

203

/** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3

204

*

205

* @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

206

* @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.

207

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

208

* Data type supported: Same as @p input.

209

* @param[in] output Destination tensor. Data type supported: same as @p input.

210

* @param[in] conv_info Padding and stride information to use for the convolution.

211

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

212

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

213

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

*

* @return a status

*/

static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,

218

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

219

220

// Inherited methods overriden:

221

void run() override;

222

void prepare() override;

223

224

private:

225

/** Configure the kernels/functions for the generic pipeline.

226

*

227

* @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

228

* @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.

229

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

230

* Data type supported: Same as @p input.

231

* @param[out] output Destination tensor. Data type supported: same as @p input.

232

* @param[in] conv_info Padding and stride information to use for the convolution.

233

* @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

234

* @param[in] act_info Activation layer information in case of a fused activation.

235

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

236

*

237

*/

238

void configure_generic(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,

239

unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));

240

/** Configure the kernels/functions for the optimized pipeline.

241

*

242

* @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

243

* @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.

244

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

245

* Data type supported: Same as @p input.

246

* @param[out] output Destination tensor. Data type supported: same as @p input.

247

* @param[in] conv_info Padding and stride information to use for the convolution.

248

* @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

249

* @param[in] act_info Activation layer information in case of a fused activation.

250

*/

251

void configure_optimized(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,

252

unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));

253

/** Run generic kernel */

254

void run_generic();

255

/** Run optimized function */

256

void run_optimized();

257

258

private:

259

MemoryGroup _memory_group;

260

NEDepthwiseConvolutionLayer3x3Kernel _dwc_kernel;

261

NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;

262

NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;

263

NEFillBorderKernel _border_handler;

264

NEPermute _permute_input;

265

NEPermute _permute_weights;

266

NEPermute _permute_output;

267

NEActivationLayer _activationlayer_function;

268

Tensor _accumulator;

269

Tensor _permuted_input;

270

Tensor _permuted_weights;

271

Tensor _permuted_output;

272

const ITensor *_original_weights;

bool _has_bias;

bool _is_quantized;

bool _is_optimized;

bool _is_nchw;

bool _permute;

bool _is_activationlayer_enabled;

bool _is_prepared;

};

Giorgio Arena

2017-12-12 15:04:43 +0000

[diff] [blame]

282

/** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels:

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

283

*

Giorgio Arena

2019-07-12 14:49:49 +0100

[diff] [blame]

284

* If data type is F32 and data layout is NHWC:

Gian Marco Iodice

bd9097d

2019-07-26 15:31:02 +0100

[diff] [blame]

285

* -# @ref NEDepthwiseConvolutionLayerNativeKernel

Giorgio Arena

2019-07-12 14:49:49 +0100

[diff] [blame]

286

*

287

* Otherwise:

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

288

* -# @ref NEDepthwiseIm2ColKernel

289

* -# @ref NEDepthwiseWeightsReshapeKernel

290

* -# @ref NEGEMMMatrixVectorMultiplyKernel

291

* -# @ref NEFillBorderKernel (if pad_x or pad_y > 0)

292

*

293

*/

Giorgio Arena

04a8f8c

2017-11-23 11:45:24 +0000

[diff] [blame]

294

class NEDepthwiseConvolutionLayer : public IFunction

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

295

{

296

public:

297

/** Default constructor */

Giorgio Arena

04a8f8c

2017-11-23 11:45:24 +0000

[diff] [blame]

298

NEDepthwiseConvolutionLayer();

Georgios Pinitas

1562be3

2018-03-08 19:09:19 +0000

[diff] [blame]

299

/** Prevent instances of this class from being copied (As this class contains pointers) */

300

NEDepthwiseConvolutionLayer(const NEDepthwiseConvolutionLayer &) = delete;

301

/** Default move constructor */

302

NEDepthwiseConvolutionLayer(NEDepthwiseConvolutionLayer &&) = default;

303

/** Prevent instances of this class from being copied (As this class contains pointers) */

304

NEDepthwiseConvolutionLayer &operator=(const NEDepthwiseConvolutionLayer &) = delete;

305

/** Default move assignment operator */

306

NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default;

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

307

/** Initialize the function's source, destination, weights and convolution information.

308

*

Georgios Pinitas

8cffcd6

2018-11-16 17:11:50 +0000

[diff] [blame]

309

* @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

Giorgio Arena

7657224

2018-04-04 17:44:26 +0100

[diff] [blame]

310

* @param[out] output Destination tensor. Data type supported: same as @p input.

311

* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.

312

* @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

313

* Data type supported: Same as @p input, S32 when input is QASYMM8.

314

* @param[in] conv_info Padding and stride information to use for the convolution.

315

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

316

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

317

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

318

*/

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

319

void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,

Usama Arif

2019-04-08 17:30:48 +0100

[diff] [blame]

320

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

321

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

322

/** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer

323

*

Georgios Pinitas

8cffcd6

2018-11-16 17:11:50 +0000

[diff] [blame]

324

* @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

325

* @param[in] output Destination tensor. Data type supported: same as @p input.

326

* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.

327

* @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

328

* Data type supported: Same as @p input, S32 when input is QASYMM8.

329

* @param[in] conv_info Padding and stride information to use for the convolution.

330

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

331

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

Usama Arif

2019-04-12 10:29:17 +0100

[diff] [blame]

332

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

333

*

334

* @return a status

335

*/

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

336

static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,

Usama Arif

2019-04-08 17:30:48 +0100

[diff] [blame]

337

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

Abe Mbise

2018-05-31 16:48:41 +0100

[diff] [blame]

338

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

339

// Inherited methods overriden:

340

void run() override;

Georgios Pinitas

7221933

2018-06-05 14:56:06 +0100

[diff] [blame]

341

void prepare() override;

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

342

343

private:

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

344

NEDepthwiseIm2ColKernel _im2col_kernel;

345

NEDepthwiseWeightsReshapeKernel _weights_reshape_kernel;

346

NEGEMMMatrixVectorMultiplyKernel _v2mm_kernel;

Gian Marco Iodice

bd9097d

2019-07-26 15:31:02 +0100

[diff] [blame]

347

NEDepthwiseConvolutionLayerNativeKernel _depthwise_conv_kernel;

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

348

NEDepthwiseVectorToTensorKernel _vector_to_tensor_kernel;

349

NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;

Giorgio Arena

2019-07-12 14:49:49 +0100

[diff] [blame]

350

NEFillBorderKernel _fill_border;

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

351

NEFillBorderKernel _v2mm_input_fill_border;

352

NEFillBorderKernel _v2mm_weights_fill_border;

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

353

NEPermute _permute_input;

354

NEPermute _permute_weights;

355

NEPermute _permute_output;

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

356

NEActivationLayer _activationlayer_function;

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

357

Tensor _input_reshaped;

358

Tensor _weights_reshaped;

359

Tensor _v2mm_output;

360

Tensor _output_reshaped;

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

361

Tensor _permuted_input;

362

Tensor _permuted_weights;

363

Tensor _permuted_output;

Georgios Pinitas

7221933

2018-06-05 14:56:06 +0100

[diff] [blame]

364

bool _is_prepared;

Georgios Pinitas

2018-01-22 16:29:17 +0000

[diff] [blame]

365

bool _is_quantized;

Giorgio Arena

2018-08-13 15:49:49 +0100

[diff] [blame]

366

bool _is_nhwc;

Georgios Pinitas

2018-10-22 16:17:20 +0100

[diff] [blame]

367

bool _is_activationlayer_enabled;

Giorgio Arena

2019-07-12 14:49:49 +0100

[diff] [blame]

368

bool _is_optimized;

Georgios Pinitas

1562be3

2018-03-08 19:09:19 +0000

[diff] [blame]

369

const ITensor *_original_weights;

Michalis Spyrou

2017-11-23 12:10:21 +0000

[diff] [blame]

370

};

Georgios Pinitas

8cffcd6

2018-11-16 17:11:50 +0000

[diff] [blame]

371

} // namespace arm_compute

Michalis Spyrou