Blame - arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h - ml/ComputeLibrary

2020-04-08 10:15:51 +0100

[diff] [blame]

88

/** Initialize the function's source, destination, weights and convolution information.

89

*

90

* @param[in] compile_context The compile context to be used.

morgolock

2020-10-14 12:12:55 +0100

[diff] [blame]

91

* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP16/FP32. Data layout supported: NHWC, NCHW

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

92

* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].

morgolock

2020-10-14 12:12:55 +0100

[diff] [blame]

93

* Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

94

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

morgolock

2020-10-14 12:12:55 +0100

[diff] [blame]

95

* Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

96

* @param[out] output Destination tensor. Data type supported: same as @p input.

97

* @param[in] conv_info Padding and stride information to use for the convolution.

98

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

99

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

100

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

101

*/

102

void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,

103

unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

104

105

/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer

106

*

Gian Marco Iodice

2021-06-30 08:39:44 +0000

[diff] [blame^]

107

* @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/FP16/FP32. Data layout supported: NHWC, NCHW

Michele Di Giorgio

df4cf57

2019-10-09 15:32:39 +0100

[diff] [blame]

108

* @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].

morgolock

2020-10-14 12:12:55 +0100

[diff] [blame]

109

* Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

110

* @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

morgolock

2020-10-14 12:12:55 +0100

[diff] [blame]

111

* Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

112

* @param[in] output Destination tensor. Data type supported: same as @p input.

113

* @param[in] conv_info Padding and stride information to use for the convolution.

114

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

Gian Marco Iodice

2021-06-30 08:39:44 +0000

[diff] [blame^]

115

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

116

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

117

*

118

* @return a status

119

*/

Gian Marco Iodice

2021-06-30 08:39:44 +0000

[diff] [blame^]

120

static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,

121

ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

122

123

// Inherited methods overriden:

124

void run() override;

125

void prepare() override;

126

Gian Marco Iodice

2021-06-30 08:39:44 +0000

[diff] [blame^]

127

private:

128

/** Static function to choose the best depthwise convolution function for @ref CLDepthwiseConvolutionLayer

129

*

130

* @param[in] input Source tensor info. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW

131

* @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].

132

* Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.

133

* @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

134

* Data type supported: Same as @p input, S32 when input is QASYMM8.

135

* @param[in] output Destination tensor. Data type supported: same as @p input.

136

* @param[in] conv_info Padding and stride information to use for the convolution.

137

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

138

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.

139

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

140

*

141

* @return a Depthwise Convolution Function

142

*/

143

static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,

144

const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,

145

ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

146

147

/** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels:

148

*

149

* -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW)

150

* -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC)

151

* -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)

152

*

153

*/

154

class CLDepthwiseConvolutionLayerInternal3x3 : public IFunction

Gian Marco Iodice

561c176

2021-04-16 15:08:59 +0100

[diff] [blame]

155

{

Gian Marco Iodice

2021-06-30 08:39:44 +0000

[diff] [blame^]

156

public:

157

/** Default constructor */

158

CLDepthwiseConvolutionLayerInternal3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);

159

/** Prevent instances of this class from being copied (As this class contains pointers) */

160

CLDepthwiseConvolutionLayerInternal3x3(const CLDepthwiseConvolutionLayerInternal3x3 &) = delete;

161

/** Default move constructor */

162

CLDepthwiseConvolutionLayerInternal3x3(CLDepthwiseConvolutionLayerInternal3x3 &&) = default;

163

/** Prevent instances of this class from being copied (As this class contains pointers) */

164

CLDepthwiseConvolutionLayerInternal3x3 &operator=(const CLDepthwiseConvolutionLayerInternal3x3 &) = delete;

165

/** Default move assignment operator */

166

CLDepthwiseConvolutionLayerInternal3x3 &operator=(CLDepthwiseConvolutionLayerInternal3x3 &&) = default;

167

/** Initialize the function's source, destination, conv and border_size.

168

*

169

* @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

170

* @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM].

171

* Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.

172

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

173

* Data type supported: Same as @p input.

174

* @param[out] output Destination tensor. Data type supported: same as @p input.

175

* @param[in] conv_info Padding and stride information to use for the convolution.

176

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

177

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.

178

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

179

*/

180

void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,

181

ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

182

/** Initialize the function's source, destination, conv and border_size.

183

*

184

* @param[in] compile_context The compile context to be used.

185

* @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).

186

* @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM].

187

* Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.

188

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

189

* Data type supported: Same as @p input.

190

* @param[out] output Destination tensor. Data type supported: same as @p input.

191

* @param[in] conv_info Padding and stride information to use for the convolution.

192

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

193

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.

194

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

195

*/

196

void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,

197

unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

198

199

/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3

200

*

201

* @param[in] input Source tensor info. Data type supported: QASYMM8 for all layouts, F16/F32 for NCHW.

202

* @param[in] weights Weights tensor info. A 3D tensor with shape [3, 3, IFM].

203

* Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.

204

* @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

205

* Data type supported: Same as @p input, S32 when input is QASYMM8.

206

* @param[in] output Destination tensor. Data type supported: same as @p input.

207

* @param[in] conv_info Padding and stride information to use for the convolution.

208

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

209

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.

210

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

*

* @return a status

*/

static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,

215

ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

216

217

// Inherited methods overriden:

218

void run() override;

219

void prepare() override;

220

221

void set_memory_group(std::shared_ptr<IMemoryManager> memory_manager)

222

{

223

_memory_group = MemoryGroup(std::move(memory_manager));

};

private:

MemoryGroup _memory_group;

228

std::unique_ptr<CLDepthwiseConvolutionLayer3x3NCHWKernel> _kernel_nchw;

229

std::unique_ptr<CLDepthwiseConvolutionLayer3x3NHWCKernel> _kernel_nhwc;

230

std::unique_ptr<CLFillBorderKernel> _border_handler;

231

CLPermute _permute_input_to_nchw;

232

CLPermute _permute_weights_to_nchw;

233

CLPermute _permute_output_to_nhwc;

234

CLTensor _permuted_input;

235

CLTensor _permuted_weights;

236

CLTensor _permuted_output;

237

CLTensor _output_multipliers;

238

CLTensor _output_shifts;

239

const ITensor *_original_weights;

240

const ITensor *_input;

241

const ITensor *_output;

bool _needs_permute;

bool _is_prepared;

bool _is_quantized;

bool _is_nhwc;

Gian Marco Iodice

561c176

2021-04-16 15:08:59 +0100

[diff] [blame]

246

};

247

Gian Marco Iodice

2021-06-30 08:39:44 +0000

[diff] [blame^]

248

/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:

249

*

250

* -# @ref CLDepthwiseConvolutionLayerNativeKernel

251

* -# @ref CLPermute (x 3) if the data layout is NCHW

252

*

253

*/

254

class CLDepthwiseConvolutionLayerGeneric : public IFunction

255

{

256

public:

257

/** Default constructor */

258

CLDepthwiseConvolutionLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager = nullptr);

259

/** Prevent instances of this class from being copied (As this class contains pointers) */

260

CLDepthwiseConvolutionLayerGeneric(const CLDepthwiseConvolutionLayerGeneric &) = delete;

261

/** Default move constructor */

262

CLDepthwiseConvolutionLayerGeneric(CLDepthwiseConvolutionLayerGeneric &&) = default;

263

/** Prevent instances of this class from being copied (As this class contains pointers) */

264

CLDepthwiseConvolutionLayerGeneric &operator=(const CLDepthwiseConvolutionLayerGeneric &) = delete;

265

/** Default move assignment operator */

266

CLDepthwiseConvolutionLayerGeneric &operator=(CLDepthwiseConvolutionLayerGeneric &&) = default;

267

/** Initialize the function's source, destination, weights and convolution information.

268

*

269

* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F32. (Written to only for border filling).

270

* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].

271

* Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.

272

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

273

* Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.

274

* @param[out] output Destination tensor. Data type supported: same as @p input.

275

* @param[in] conv_info Padding and stride information to use for the convolution.

276

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

277

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

278

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

279

*/

280

void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,

281

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

282

/** Initialize the function's source, destination, weights and convolution information.

283

*

284

* @param[in] compile_context The compile context to be used.

285

* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F32. (Written to only for border filling).

286

* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].

287

* Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.

288

* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.

289

* Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.

290

* @param[out] output Destination tensor. Data type supported: same as @p input.

291

* @param[in] conv_info Padding and stride information to use for the convolution.

292

* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.

293

* @param[in] act_info (Optional) Activation layer information in case of a fused activation.

294

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

295

*/

296

void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,

297

unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));

Manuel Bottini

2019-09-26 17:18:26 +0100

[diff] [blame]

298

Gian Marco Iodice