blob: 8fe96449636854c21351f4a0b86f6186a812fc83 [file] [log] [blame]
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01001/*
Georgios Pinitas47d39dc2019-03-11 14:03:23 +00002 * Copyright (c) 2017-2019 ARM Limited.
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__
25#define __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__
26
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000027#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h"
Gian Marco Iodicebd9097d2019-07-26 15:31:02 +010028#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
Michalis Spyroub91e34c2017-12-20 15:50:55 +000029#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010030#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
Georgios Pinitas60e98252018-10-22 16:17:20 +010031#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
Georgios Pinitas284cfe22018-02-13 12:15:13 +000032#include "arm_compute/runtime/NEON/functions/NEPermute.h"
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000033#include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010034
35namespace arm_compute
36{
Georgios Pinitas30271c72019-06-24 14:56:34 +010037// Forward declarations
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010038class ITensor;
39
Manuel Bottini05069f02019-09-26 17:18:26 +010040/** Function to execute a depthwise convolution.
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010041 */
Manuel Bottini05069f02019-09-26 17:18:26 +010042class NEDepthwiseConvolutionLayer : public IFunction
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010043{
44public:
45 /** Default constructor */
Manuel Bottini05069f02019-09-26 17:18:26 +010046 NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000047 /** Prevent instances of this class from being copied (As this class contains pointers) */
Manuel Bottini05069f02019-09-26 17:18:26 +010048 NEDepthwiseConvolutionLayer(const NEDepthwiseConvolutionLayer &) = delete;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000049 /** Default move constructor */
Manuel Bottini05069f02019-09-26 17:18:26 +010050 NEDepthwiseConvolutionLayer(NEDepthwiseConvolutionLayer &&) = default;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000051 /** Prevent instances of this class from being copied (As this class contains pointers) */
Manuel Bottini05069f02019-09-26 17:18:26 +010052 NEDepthwiseConvolutionLayer &operator=(const NEDepthwiseConvolutionLayer &) = delete;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000053 /** Default move assignment operator */
Manuel Bottini05069f02019-09-26 17:18:26 +010054 NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default;
55 /** Initialize the function's source, destination, weights and convolution information.
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010056 *
Manuel Bottini05069f02019-09-26 17:18:26 +010057 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32
58 * @param[out] output Destination tensor. Data type supported: same as @p input.
59 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
Usama Arife73686a2019-04-08 17:30:48 +010060 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Giorgio Arena76572242018-04-04 17:44:26 +010061 * Data type supported: Same as @p input.
Giorgio Arena76572242018-04-04 17:44:26 +010062 * @param[in] conv_info Padding and stride information to use for the convolution.
63 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
Georgios Pinitas60e98252018-10-22 16:17:20 +010064 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
Usama Arif881f2de2019-04-12 10:29:17 +010065 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010066 */
Georgios Pinitas60e98252018-10-22 16:17:20 +010067 void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
Usama Arife73686a2019-04-08 17:30:48 +010068 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010069
Manuel Bottini05069f02019-09-26 17:18:26 +010070 /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer
Abe Mbise7784c832018-05-31 16:48:41 +010071 *
Manuel Bottini05069f02019-09-26 17:18:26 +010072 * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32
73 * @param[in] output Destination tensor. Data type supported: same as @p input.
74 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
Usama Arife73686a2019-04-08 17:30:48 +010075 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Abe Mbise7784c832018-05-31 16:48:41 +010076 * Data type supported: Same as @p input.
Abe Mbise7784c832018-05-31 16:48:41 +010077 * @param[in] conv_info Padding and stride information to use for the convolution.
78 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
Georgios Pinitas60e98252018-10-22 16:17:20 +010079 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
Usama Arif881f2de2019-04-12 10:29:17 +010080 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Abe Mbise7784c832018-05-31 16:48:41 +010081 *
82 * @return a status
83 */
Georgios Pinitas60e98252018-10-22 16:17:20 +010084 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
Usama Arife73686a2019-04-08 17:30:48 +010085 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Abe Mbise7784c832018-05-31 16:48:41 +010086
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010087 // Inherited methods overriden:
88 void run() override;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000089 void prepare() override;
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010090
91private:
Manuel Bottini05069f02019-09-26 17:18:26 +010092 /** Static function to choose the best depthwise convolution function for @ref NEDepthwiseConvolutionLayer
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000093 *
Manuel Bottini05069f02019-09-26 17:18:26 +010094 * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32
95 * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
96 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
97 * Data type supported: Same as @p input.
98 * @param[in] output Destination tensor. Data type supported: same as @p input.
99 * @param[in] conv_info Padding and stride information to use for the convolution.
100 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
101 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
102 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Usama Arif881f2de2019-04-12 10:29:17 +0100103 *
Manuel Bottini05069f02019-09-26 17:18:26 +0100104 * @return a Depthwise Convolution Function
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000105 */
Manuel Bottini05069f02019-09-26 17:18:26 +0100106 static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
107 const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
108 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000109
Manuel Bottini05069f02019-09-26 17:18:26 +0100110 /** Basic function to execute optimized depthwise convolution routines. This function calls the following NEON kernels:
111 *
112 * @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported
113 *
114 * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present
115 * -# @ref NEDepthwiseConvolutionLayer3x3Kernel if 3x3 and no assembly kernel implementation is present
116 * -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present
117 * -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required
118 * -# @ref NEActivationLayer if fused activation is required
119 *
120 */
121 class NEDepthwiseConvolutionLayerOptimizedInternal : public IFunction
122 {
123 public:
124 /** Default constructor */
125 NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
126 /** Prevent instances of this class from being copied (As this class contains pointers) */
127 NEDepthwiseConvolutionLayerOptimizedInternal(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;
128 /** Default move constructor */
129 NEDepthwiseConvolutionLayerOptimizedInternal(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
130 /** Prevent instances of this class from being copied (As this class contains pointers) */
131 NEDepthwiseConvolutionLayerOptimizedInternal &operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;
132 /** Default move assignment operator */
133 NEDepthwiseConvolutionLayerOptimizedInternal &operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
134 /** Initialize the function's source, destination, kernels and border_size.
135 *
136 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
137 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
138 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
139 * Data type supported: Same as @p input.
140 * @param[out] output Destination tensor. Data type supported: same as @p input.
141 * @param[in] conv_info Padding and stride information to use for the convolution.
142 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
143 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
144 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
145 */
146 void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
147 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
148
149 /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3
150 *
151 * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
152 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
153 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
154 * Data type supported: Same as @p input.
155 * @param[in] output Destination tensor. Data type supported: same as @p input.
156 * @param[in] conv_info Padding and stride information to use for the convolution.
157 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
158 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
159 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
160 *
161 * @return a status
162 */
163 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
164 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
165
166 // Inherited methods overriden:
167 void run() override;
168 void prepare() override;
169
170 private:
171 /** Configure the kernels/functions for the generic pipeline.
172 *
173 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
174 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
175 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
176 * Data type supported: Same as @p input.
177 * @param[out] output Destination tensor. Data type supported: same as @p input.
178 * @param[in] conv_info Padding and stride information to use for the convolution.
179 * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
180 * @param[in] act_info Activation layer information in case of a fused activation.
181 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
182 *
183 */
184 void configure_generic(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
185 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));
186 /** Configure the kernels/functions for the optimized pipeline.
187 *
188 * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
189 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
190 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
191 * Data type supported: Same as @p input.
192 * @param[out] output Destination tensor. Data type supported: same as @p input.
193 * @param[in] conv_info Padding and stride information to use for the convolution.
194 * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
195 * @param[in] act_info Activation layer information in case of a fused activation.
196 */
197 void configure_optimized(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
198 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));
199 /** Run generic kernel */
200 void run_generic();
201 /** Run optimized function */
202 void run_optimized();
203
204 MemoryGroup _memory_group;
205 NEDepthwiseConvolutionLayer3x3Kernel _dwc_kernel;
206 NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;
207 NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
208 NEFillBorderKernel _border_handler;
209 NEPermute _permute_input;
210 NEPermute _permute_weights;
211 NEPermute _permute_output;
212 NEActivationLayer _activationlayer_function;
213 Tensor _accumulator;
214 Tensor _permuted_input;
215 Tensor _permuted_weights;
216 Tensor _permuted_output;
217 const ITensor *_original_weights;
218 bool _has_bias;
219 bool _is_quantized;
220 bool _is_optimized;
221 bool _is_nchw;
222 bool _permute;
223 bool _is_activationlayer_enabled;
224 bool _is_prepared;
225 };
226
227 /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernel:
228 *
229 * -# @ref NEDepthwiseConvolutionLayerNativeKernel
230 *
231 */
232 class NEDepthwiseConvolutionLayerGeneric : public IFunction
233 {
234 public:
235 /** Default constructor */
236 NEDepthwiseConvolutionLayerGeneric();
237 /** Prevent instances of this class from being copied (As this class contains pointers) */
238 NEDepthwiseConvolutionLayerGeneric(const NEDepthwiseConvolutionLayerGeneric &) = delete;
239 /** Default move constructor */
240 NEDepthwiseConvolutionLayerGeneric(NEDepthwiseConvolutionLayerGeneric &&) = default;
241 /** Prevent instances of this class from being copied (As this class contains pointers) */
242 NEDepthwiseConvolutionLayerGeneric &operator=(const NEDepthwiseConvolutionLayerGeneric &) = delete;
243 /** Default move assignment operator */
244 NEDepthwiseConvolutionLayerGeneric &operator=(NEDepthwiseConvolutionLayerGeneric &&) = default;
245 /** Initialize the function's source, destination, weights and convolution information.
246 *
247 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
248 * @param[out] output Destination tensor. Data type supported: same as @p input.
249 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
250 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
251 * Data type supported: Same as @p input, S32 when input is QASYMM8.
252 * @param[in] conv_info Padding and stride information to use for the convolution.
253 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
254 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
255 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
256 */
257 void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
258 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
259
260 /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerGeneric
261 *
262 * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
263 * @param[in] output Destination tensor. Data type supported: same as @p input.
264 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
265 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
266 * Data type supported: Same as @p input, S32 when input is QASYMM8.
267 * @param[in] conv_info Padding and stride information to use for the convolution.
268 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
269 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
270 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
271 *
272 * @return a status
273 */
274 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
275 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
276
277 // Inherited methods overriden:
278 void run() override;
279 void prepare() override;
280
281 private:
282 NEDepthwiseConvolutionLayerNativeKernel _depthwise_conv_kernel;
283 NEFillBorderKernel _fill_border;
284 NEPermute _permute_input;
285 NEPermute _permute_weights;
286 NEPermute _permute_output;
287 NEActivationLayer _activationlayer_function;
288 Tensor _permuted_input;
289 Tensor _permuted_weights;
290 Tensor _permuted_output;
291 bool _is_prepared;
292 bool _is_nchw;
293 bool _is_activationlayer_enabled;
294 const ITensor *_original_weights;
295 };
296
297 DepthwiseConvolutionFunction _depth_conv_func;
298 NEDepthwiseConvolutionLayerOptimizedInternal _func_optimized;
299 NEDepthwiseConvolutionLayerGeneric _func_generic;
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100300};
Michalis Spyroub7b31532017-11-23 12:10:21 +0000301
Georgios Pinitas30271c72019-06-24 14:56:34 +0100302/** Basic function to execute optimized depthwise convolution routines. This function calls the following NEON kernels:
303 *
304 * @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported
305 *
306 * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present
307 * -# @ref NEDepthwiseConvolutionLayer3x3Kernel if 3x3 and no assembly kernel implementation is present
308 * -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present
309 * -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required
310 * -# @ref NEActivationLayer if fused activation is required
311 *
312 */
313class NEDepthwiseConvolutionLayerOptimized : public IFunction
314{
315public:
316 /** Default constructor */
317 NEDepthwiseConvolutionLayerOptimized(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
318 /** Prevent instances of this class from being copied (As this class contains pointers) */
319 NEDepthwiseConvolutionLayerOptimized(const NEDepthwiseConvolutionLayerOptimized &) = delete;
320 /** Default move constructor */
321 NEDepthwiseConvolutionLayerOptimized(NEDepthwiseConvolutionLayerOptimized &&) = default;
322 /** Prevent instances of this class from being copied (As this class contains pointers) */
323 NEDepthwiseConvolutionLayerOptimized &operator=(const NEDepthwiseConvolutionLayerOptimized &) = delete;
324 /** Default move assignment operator */
325 NEDepthwiseConvolutionLayerOptimized &operator=(NEDepthwiseConvolutionLayerOptimized &&) = default;
326 /** Initialize the function's source, destination, kernels and border_size.
327 *
328 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
329 * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
330 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
331 * Data type supported: Same as @p input.
332 * @param[out] output Destination tensor. Data type supported: same as @p input.
333 * @param[in] conv_info Padding and stride information to use for the convolution.
334 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
335 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
336 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
337 */
Manuel Bottini05069f02019-09-26 17:18:26 +0100338 ARM_COMPUTE_DEPRECATED_REL_REPLACE(20.02, NEDepthwiseConvolutionLayer)
Georgios Pinitas30271c72019-06-24 14:56:34 +0100339 void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
340 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
341
Manuel Bottini05069f02019-09-26 17:18:26 +0100342 /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerOptimized
Georgios Pinitas30271c72019-06-24 14:56:34 +0100343 *
344 * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
345 * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
346 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
347 * Data type supported: Same as @p input.
348 * @param[in] output Destination tensor. Data type supported: same as @p input.
349 * @param[in] conv_info Padding and stride information to use for the convolution.
350 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
351 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
352 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
353 *
354 * @return a status
355 */
356 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
357 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
358
359 // Inherited methods overriden:
360 void run() override;
361 void prepare() override;
362
363private:
Manuel Bottini05069f02019-09-26 17:18:26 +0100364 NEDepthwiseConvolutionLayer _func;
Michalis Spyroub7b31532017-11-23 12:10:21 +0000365};
Georgios Pinitas8cffcd62018-11-16 17:11:50 +0000366} // namespace arm_compute
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100367#endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__ */