blob: 87405fdb14112b1d463052112ef812343a114e48 [file] [log] [blame]
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01001/*
Georgios Pinitas47d39dc2019-03-11 14:03:23 +00002 * Copyright (c) 2017-2019 ARM Limited.
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__
25#define __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__
26
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000027#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h"
Gian Marco Iodicebd9097d2019-07-26 15:31:02 +010028#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
Michalis Spyroub7b31532017-11-23 12:10:21 +000029#include "arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h"
30#include "arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h"
31#include "arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h"
Michalis Spyroub91e34c2017-12-20 15:50:55 +000032#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010033#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
Michalis Spyroub7b31532017-11-23 12:10:21 +000034#include "arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010035#include "arm_compute/core/Types.h"
Georgios Pinitas98b85112019-07-10 16:44:45 +010036#include "arm_compute/core/utils/misc/Macros.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010037#include "arm_compute/runtime/IFunction.h"
38#include "arm_compute/runtime/IMemoryManager.h"
39#include "arm_compute/runtime/MemoryGroup.h"
Georgios Pinitas60e98252018-10-22 16:17:20 +010040#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
Georgios Pinitas284cfe22018-02-13 12:15:13 +000041#include "arm_compute/runtime/NEON/functions/NEPermute.h"
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000042#include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010043#include "arm_compute/runtime/Tensor.h"
44
45namespace arm_compute
46{
Georgios Pinitas30271c72019-06-24 14:56:34 +010047// Forward declarations
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010048class ITensor;
49
50/** Basic function to execute a depthwise convolution for kernel size 3x3xC. This function calls the following NEON kernels:
51 *
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000052 * -# @ref NEDepthwiseConvolutionLayer3x3
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010053 * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0)
54 *
55 */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000056class NEDepthwiseConvolutionLayer3x3 : public IFunction
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010057{
58public:
59 /** Default constructor */
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000060 NEDepthwiseConvolutionLayer3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
61 /** Prevent instances of this class from being copied (As this class contains pointers) */
62 NEDepthwiseConvolutionLayer3x3(const NEDepthwiseConvolutionLayer3x3 &) = delete;
63 /** Default move constructor */
64 NEDepthwiseConvolutionLayer3x3(NEDepthwiseConvolutionLayer3x3 &&) = default;
65 /** Prevent instances of this class from being copied (As this class contains pointers) */
66 NEDepthwiseConvolutionLayer3x3 &operator=(const NEDepthwiseConvolutionLayer3x3 &) = delete;
67 /** Default move assignment operator */
68 NEDepthwiseConvolutionLayer3x3 &operator=(NEDepthwiseConvolutionLayer3x3 &&) = default;
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010069 /** Initialize the function's source, destination, kernels and border_size.
70 *
Georgios Pinitas20c246a2018-09-12 16:45:53 +010071 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
Giorgio Arena76572242018-04-04 17:44:26 +010072 * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input.
Usama Arife73686a2019-04-08 17:30:48 +010073 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Giorgio Arena76572242018-04-04 17:44:26 +010074 * Data type supported: Same as @p input.
75 * @param[out] output Destination tensor. Data type supported: same as @p input.
76 * @param[in] conv_info Padding and stride information to use for the convolution.
77 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
Georgios Pinitas60e98252018-10-22 16:17:20 +010078 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
Usama Arif881f2de2019-04-12 10:29:17 +010079 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010080 */
Georgios Pinitas98b85112019-07-10 16:44:45 +010081 ARM_COMPUTE_DEPRECATED_REL_REPLACE(19.08, NEDepthwiseConvolutionLayerOptimized)
Georgios Pinitas60e98252018-10-22 16:17:20 +010082 void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
Usama Arife73686a2019-04-08 17:30:48 +010083 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010084
Abe Mbise7784c832018-05-31 16:48:41 +010085 /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3
86 *
Georgios Pinitas20c246a2018-09-12 16:45:53 +010087 * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
Abe Mbise7784c832018-05-31 16:48:41 +010088 * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input.
Usama Arife73686a2019-04-08 17:30:48 +010089 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Abe Mbise7784c832018-05-31 16:48:41 +010090 * Data type supported: Same as @p input.
91 * @param[in] output Destination tensor. Data type supported: same as @p input.
92 * @param[in] conv_info Padding and stride information to use for the convolution.
93 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
Georgios Pinitas60e98252018-10-22 16:17:20 +010094 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
Usama Arif881f2de2019-04-12 10:29:17 +010095 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Abe Mbise7784c832018-05-31 16:48:41 +010096 *
97 * @return a status
98 */
Georgios Pinitas60e98252018-10-22 16:17:20 +010099 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
Usama Arife73686a2019-04-08 17:30:48 +0100100 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Abe Mbise7784c832018-05-31 16:48:41 +0100101
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100102 // Inherited methods overriden:
103 void run() override;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000104 void prepare() override;
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100105
106private:
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000107 /** Configure the kernels/functions for the generic pipeline.
108 *
109 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
110 * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input.
Usama Arife73686a2019-04-08 17:30:48 +0100111 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000112 * Data type supported: Same as @p input.
113 * @param[out] output Destination tensor. Data type supported: same as @p input.
114 * @param[in] conv_info Padding and stride information to use for the convolution.
115 * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
116 * @param[in] act_info Activation layer information in case of a fused activation.
Usama Arif881f2de2019-04-12 10:29:17 +0100117 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
118 *
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000119 */
120 void configure_generic(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
Usama Arif881f2de2019-04-12 10:29:17 +0100121 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000122 /** Configure the kernels/functions for the optimized pipeline.
123 *
124 * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
125 * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input.
Usama Arife73686a2019-04-08 17:30:48 +0100126 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000127 * Data type supported: Same as @p input.
128 * @param[out] output Destination tensor. Data type supported: same as @p input.
129 * @param[in] conv_info Padding and stride information to use for the convolution.
130 * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
131 * @param[in] act_info Activation layer information in case of a fused activation.
132 */
133 void configure_optimized(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
134 unsigned int depth_multiplier, const ActivationLayerInfo &act_info);
135 /** Run generic kernel */
136 void run_generic();
137 /** Run optimized function */
138 void run_optimized();
139
140private:
141 MemoryGroup _memory_group;
Georgios Pinitas4074c992018-01-30 18:13:46 +0000142 NEDepthwiseConvolutionLayer3x3Kernel _dwc_kernel;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000143 NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;
Michalis Spyroub91e34c2017-12-20 15:50:55 +0000144 NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
145 NEFillBorderKernel _border_handler;
Georgios Pinitas284cfe22018-02-13 12:15:13 +0000146 NEPermute _permute_input;
147 NEPermute _permute_weights;
148 NEPermute _permute_output;
Georgios Pinitas60e98252018-10-22 16:17:20 +0100149 NEActivationLayer _activationlayer_function;
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000150 Tensor _accumulator;
Giorgio Arena26b22162018-08-13 15:49:49 +0100151 Tensor _permuted_input;
152 Tensor _permuted_weights;
153 Tensor _permuted_output;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000154 const ITensor *_original_weights;
Michalis Spyroub91e34c2017-12-20 15:50:55 +0000155 bool _has_bias;
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000156 bool _is_quantized;
Georgios Pinitas4074c992018-01-30 18:13:46 +0000157 bool _is_optimized;
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100158 bool _is_nchw;
Giorgio Arena26b22162018-08-13 15:49:49 +0100159 bool _permute;
Georgios Pinitas60e98252018-10-22 16:17:20 +0100160 bool _is_activationlayer_enabled;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000161 bool _is_prepared;
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100162};
Michalis Spyroub7b31532017-11-23 12:10:21 +0000163
Georgios Pinitas30271c72019-06-24 14:56:34 +0100164/** Basic function to execute optimized depthwise convolution routines. This function calls the following NEON kernels:
165 *
166 * @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported
167 *
168 * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present
169 * -# @ref NEDepthwiseConvolutionLayer3x3Kernel if 3x3 and no assembly kernel implementation is present
170 * -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present
171 * -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required
172 * -# @ref NEActivationLayer if fused activation is required
173 *
174 */
175class NEDepthwiseConvolutionLayerOptimized : public IFunction
176{
177public:
178 /** Default constructor */
179 NEDepthwiseConvolutionLayerOptimized(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
180 /** Prevent instances of this class from being copied (As this class contains pointers) */
181 NEDepthwiseConvolutionLayerOptimized(const NEDepthwiseConvolutionLayerOptimized &) = delete;
182 /** Default move constructor */
183 NEDepthwiseConvolutionLayerOptimized(NEDepthwiseConvolutionLayerOptimized &&) = default;
184 /** Prevent instances of this class from being copied (As this class contains pointers) */
185 NEDepthwiseConvolutionLayerOptimized &operator=(const NEDepthwiseConvolutionLayerOptimized &) = delete;
186 /** Default move assignment operator */
187 NEDepthwiseConvolutionLayerOptimized &operator=(NEDepthwiseConvolutionLayerOptimized &&) = default;
188 /** Initialize the function's source, destination, kernels and border_size.
189 *
190 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
191 * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
192 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
193 * Data type supported: Same as @p input.
194 * @param[out] output Destination tensor. Data type supported: same as @p input.
195 * @param[in] conv_info Padding and stride information to use for the convolution.
196 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
197 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
198 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
199 */
200 void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
201 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
202
203 /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3
204 *
205 * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
206 * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
207 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
208 * Data type supported: Same as @p input.
209 * @param[in] output Destination tensor. Data type supported: same as @p input.
210 * @param[in] conv_info Padding and stride information to use for the convolution.
211 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
212 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
213 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
214 *
215 * @return a status
216 */
217 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
218 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
219
220 // Inherited methods overriden:
221 void run() override;
222 void prepare() override;
223
224private:
225 /** Configure the kernels/functions for the generic pipeline.
226 *
227 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
228 * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
229 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
230 * Data type supported: Same as @p input.
231 * @param[out] output Destination tensor. Data type supported: same as @p input.
232 * @param[in] conv_info Padding and stride information to use for the convolution.
233 * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
234 * @param[in] act_info Activation layer information in case of a fused activation.
235 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
236 *
237 */
238 void configure_generic(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
239 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));
240 /** Configure the kernels/functions for the optimized pipeline.
241 *
242 * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
243 * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
244 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
245 * Data type supported: Same as @p input.
246 * @param[out] output Destination tensor. Data type supported: same as @p input.
247 * @param[in] conv_info Padding and stride information to use for the convolution.
248 * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
249 * @param[in] act_info Activation layer information in case of a fused activation.
250 */
251 void configure_optimized(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
252 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));
253 /** Run generic kernel */
254 void run_generic();
255 /** Run optimized function */
256 void run_optimized();
257
258private:
259 MemoryGroup _memory_group;
260 NEDepthwiseConvolutionLayer3x3Kernel _dwc_kernel;
261 NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;
262 NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
263 NEFillBorderKernel _border_handler;
264 NEPermute _permute_input;
265 NEPermute _permute_weights;
266 NEPermute _permute_output;
267 NEActivationLayer _activationlayer_function;
268 Tensor _accumulator;
269 Tensor _permuted_input;
270 Tensor _permuted_weights;
271 Tensor _permuted_output;
272 const ITensor *_original_weights;
273 bool _has_bias;
274 bool _is_quantized;
275 bool _is_optimized;
276 bool _is_nchw;
277 bool _permute;
278 bool _is_activationlayer_enabled;
279 bool _is_prepared;
280};
281
Giorgio Arena39725282017-12-12 15:04:43 +0000282/** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels:
Michalis Spyroub7b31532017-11-23 12:10:21 +0000283 *
Giorgio Arena44f55722019-07-12 14:49:49 +0100284 * If data type is F32 and data layout is NHWC:
Gian Marco Iodicebd9097d2019-07-26 15:31:02 +0100285 * -# @ref NEDepthwiseConvolutionLayerNativeKernel
Giorgio Arena44f55722019-07-12 14:49:49 +0100286 *
287 * Otherwise:
Michalis Spyroub7b31532017-11-23 12:10:21 +0000288 * -# @ref NEDepthwiseIm2ColKernel
289 * -# @ref NEDepthwiseWeightsReshapeKernel
290 * -# @ref NEGEMMMatrixVectorMultiplyKernel
291 * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0)
292 *
293 */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +0000294class NEDepthwiseConvolutionLayer : public IFunction
Michalis Spyroub7b31532017-11-23 12:10:21 +0000295{
296public:
297 /** Default constructor */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +0000298 NEDepthwiseConvolutionLayer();
Georgios Pinitas1562be32018-03-08 19:09:19 +0000299 /** Prevent instances of this class from being copied (As this class contains pointers) */
300 NEDepthwiseConvolutionLayer(const NEDepthwiseConvolutionLayer &) = delete;
301 /** Default move constructor */
302 NEDepthwiseConvolutionLayer(NEDepthwiseConvolutionLayer &&) = default;
303 /** Prevent instances of this class from being copied (As this class contains pointers) */
304 NEDepthwiseConvolutionLayer &operator=(const NEDepthwiseConvolutionLayer &) = delete;
305 /** Default move assignment operator */
306 NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default;
Michalis Spyroub7b31532017-11-23 12:10:21 +0000307 /** Initialize the function's source, destination, weights and convolution information.
308 *
Georgios Pinitas8cffcd62018-11-16 17:11:50 +0000309 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
Giorgio Arena76572242018-04-04 17:44:26 +0100310 * @param[out] output Destination tensor. Data type supported: same as @p input.
311 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
312 * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
313 * Data type supported: Same as @p input, S32 when input is QASYMM8.
314 * @param[in] conv_info Padding and stride information to use for the convolution.
315 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
Georgios Pinitas60e98252018-10-22 16:17:20 +0100316 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
Usama Arif881f2de2019-04-12 10:29:17 +0100317 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Michalis Spyroub7b31532017-11-23 12:10:21 +0000318 */
Georgios Pinitas60e98252018-10-22 16:17:20 +0100319 void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
Usama Arife73686a2019-04-08 17:30:48 +0100320 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Michalis Spyroub7b31532017-11-23 12:10:21 +0000321
Abe Mbise7784c832018-05-31 16:48:41 +0100322 /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer
323 *
Georgios Pinitas8cffcd62018-11-16 17:11:50 +0000324 * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
Abe Mbise7784c832018-05-31 16:48:41 +0100325 * @param[in] output Destination tensor. Data type supported: same as @p input.
326 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
327 * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
328 * Data type supported: Same as @p input, S32 when input is QASYMM8.
329 * @param[in] conv_info Padding and stride information to use for the convolution.
330 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
Georgios Pinitas60e98252018-10-22 16:17:20 +0100331 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
Usama Arif881f2de2019-04-12 10:29:17 +0100332 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Abe Mbise7784c832018-05-31 16:48:41 +0100333 *
334 * @return a status
335 */
Georgios Pinitas60e98252018-10-22 16:17:20 +0100336 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
Usama Arife73686a2019-04-08 17:30:48 +0100337 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Abe Mbise7784c832018-05-31 16:48:41 +0100338
Michalis Spyroub7b31532017-11-23 12:10:21 +0000339 // Inherited methods overriden:
340 void run() override;
Georgios Pinitas72219332018-06-05 14:56:06 +0100341 void prepare() override;
Michalis Spyroub7b31532017-11-23 12:10:21 +0000342
343private:
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000344 NEDepthwiseIm2ColKernel _im2col_kernel;
345 NEDepthwiseWeightsReshapeKernel _weights_reshape_kernel;
346 NEGEMMMatrixVectorMultiplyKernel _v2mm_kernel;
Gian Marco Iodicebd9097d2019-07-26 15:31:02 +0100347 NEDepthwiseConvolutionLayerNativeKernel _depthwise_conv_kernel;
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000348 NEDepthwiseVectorToTensorKernel _vector_to_tensor_kernel;
349 NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
Giorgio Arena44f55722019-07-12 14:49:49 +0100350 NEFillBorderKernel _fill_border;
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000351 NEFillBorderKernel _v2mm_input_fill_border;
352 NEFillBorderKernel _v2mm_weights_fill_border;
Giorgio Arena26b22162018-08-13 15:49:49 +0100353 NEPermute _permute_input;
354 NEPermute _permute_weights;
355 NEPermute _permute_output;
Georgios Pinitas60e98252018-10-22 16:17:20 +0100356 NEActivationLayer _activationlayer_function;
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000357 Tensor _input_reshaped;
358 Tensor _weights_reshaped;
359 Tensor _v2mm_output;
360 Tensor _output_reshaped;
Giorgio Arena26b22162018-08-13 15:49:49 +0100361 Tensor _permuted_input;
362 Tensor _permuted_weights;
363 Tensor _permuted_output;
Georgios Pinitas72219332018-06-05 14:56:06 +0100364 bool _is_prepared;
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000365 bool _is_quantized;
Giorgio Arena26b22162018-08-13 15:49:49 +0100366 bool _is_nhwc;
Georgios Pinitas60e98252018-10-22 16:17:20 +0100367 bool _is_activationlayer_enabled;
Giorgio Arena44f55722019-07-12 14:49:49 +0100368 bool _is_optimized;
Georgios Pinitas1562be32018-03-08 19:09:19 +0000369 const ITensor *_original_weights;
Michalis Spyroub7b31532017-11-23 12:10:21 +0000370};
Georgios Pinitas8cffcd62018-11-16 17:11:50 +0000371} // namespace arm_compute
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100372#endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__ */