blob: ccab6716442e886371ebe801dc55a8d5b0931936 [file] [log] [blame]
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01001/*
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +00002 * Copyright (c) 2017-2020 ARM Limited.
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouf4643372019-11-29 16:17:13 +000024#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
25#define ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010026
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000027#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h"
Gian Marco Iodicebd9097d2019-07-26 15:31:02 +010028#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
Michalis Spyroub91e34c2017-12-20 15:50:55 +000029#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010030#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
Georgios Pinitas60e98252018-10-22 16:17:20 +010031#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
Georgios Pinitas284cfe22018-02-13 12:15:13 +000032#include "arm_compute/runtime/NEON/functions/NEPermute.h"
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000033#include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010034
35namespace arm_compute
36{
Georgios Pinitas30271c72019-06-24 14:56:34 +010037// Forward declarations
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010038class ITensor;
39
Manuel Bottini05069f02019-09-26 17:18:26 +010040/** Function to execute a depthwise convolution.
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010041 */
Manuel Bottini05069f02019-09-26 17:18:26 +010042class NEDepthwiseConvolutionLayer : public IFunction
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010043{
44public:
45 /** Default constructor */
Manuel Bottini05069f02019-09-26 17:18:26 +010046 NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000047 /** Prevent instances of this class from being copied (As this class contains pointers) */
Manuel Bottini05069f02019-09-26 17:18:26 +010048 NEDepthwiseConvolutionLayer(const NEDepthwiseConvolutionLayer &) = delete;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000049 /** Default move constructor */
Manuel Bottini05069f02019-09-26 17:18:26 +010050 NEDepthwiseConvolutionLayer(NEDepthwiseConvolutionLayer &&) = default;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000051 /** Prevent instances of this class from being copied (As this class contains pointers) */
Manuel Bottini05069f02019-09-26 17:18:26 +010052 NEDepthwiseConvolutionLayer &operator=(const NEDepthwiseConvolutionLayer &) = delete;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000053 /** Default move assignment operator */
Manuel Bottini05069f02019-09-26 17:18:26 +010054 NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default;
55 /** Initialize the function's source, destination, weights and convolution information.
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010056 *
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +000057 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
Manuel Bottini05069f02019-09-26 17:18:26 +010058 * @param[out] output Destination tensor. Data type supported: same as @p input.
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +010059 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +000060 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
Usama Arife73686a2019-04-08 17:30:48 +010061 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +000062 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
Giorgio Arena76572242018-04-04 17:44:26 +010063 * @param[in] conv_info Padding and stride information to use for the convolution.
64 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
Georgios Pinitas60e98252018-10-22 16:17:20 +010065 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
Usama Arif881f2de2019-04-12 10:29:17 +010066 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010067 */
Georgios Pinitas60e98252018-10-22 16:17:20 +010068 void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
Usama Arife73686a2019-04-08 17:30:48 +010069 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010070
Manuel Bottini05069f02019-09-26 17:18:26 +010071 /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer
Abe Mbise7784c832018-05-31 16:48:41 +010072 *
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +000073 * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
Manuel Bottini05069f02019-09-26 17:18:26 +010074 * @param[in] output Destination tensor. Data type supported: same as @p input.
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +010075 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +000076 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
Usama Arife73686a2019-04-08 17:30:48 +010077 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +000078 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
Abe Mbise7784c832018-05-31 16:48:41 +010079 * @param[in] conv_info Padding and stride information to use for the convolution.
80 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
Georgios Pinitas60e98252018-10-22 16:17:20 +010081 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
Usama Arif881f2de2019-04-12 10:29:17 +010082 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Abe Mbise7784c832018-05-31 16:48:41 +010083 *
84 * @return a status
85 */
Georgios Pinitas60e98252018-10-22 16:17:20 +010086 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
Usama Arife73686a2019-04-08 17:30:48 +010087 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Abe Mbise7784c832018-05-31 16:48:41 +010088
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010089 // Inherited methods overriden:
90 void run() override;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000091 void prepare() override;
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010092
93private:
Manuel Bottini05069f02019-09-26 17:18:26 +010094 /** Static function to choose the best depthwise convolution function for @ref NEDepthwiseConvolutionLayer
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000095 *
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +000096 * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +010097 * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +000098 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
Manuel Bottini05069f02019-09-26 17:18:26 +010099 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000100 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
Manuel Bottini05069f02019-09-26 17:18:26 +0100101 * @param[in] output Destination tensor. Data type supported: same as @p input.
102 * @param[in] conv_info Padding and stride information to use for the convolution.
103 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000104 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 quantized are supported.
Manuel Bottini05069f02019-09-26 17:18:26 +0100105 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Usama Arif881f2de2019-04-12 10:29:17 +0100106 *
Manuel Bottini05069f02019-09-26 17:18:26 +0100107 * @return a Depthwise Convolution Function
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000108 */
Manuel Bottini05069f02019-09-26 17:18:26 +0100109 static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
110 const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
111 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000112
Manuel Bottini05069f02019-09-26 17:18:26 +0100113 /** Basic function to execute optimized depthwise convolution routines. This function calls the following NEON kernels:
114 *
115 * @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported
116 *
117 * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present
118 * -# @ref NEDepthwiseConvolutionLayer3x3Kernel if 3x3 and no assembly kernel implementation is present
119 * -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present
120 * -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required
121 * -# @ref NEActivationLayer if fused activation is required
122 *
123 */
124 class NEDepthwiseConvolutionLayerOptimizedInternal : public IFunction
125 {
126 public:
127 /** Default constructor */
128 NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
129 /** Prevent instances of this class from being copied (As this class contains pointers) */
130 NEDepthwiseConvolutionLayerOptimizedInternal(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;
131 /** Default move constructor */
132 NEDepthwiseConvolutionLayerOptimizedInternal(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
133 /** Prevent instances of this class from being copied (As this class contains pointers) */
134 NEDepthwiseConvolutionLayerOptimizedInternal &operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;
135 /** Default move assignment operator */
136 NEDepthwiseConvolutionLayerOptimizedInternal &operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
137 /** Initialize the function's source, destination, kernels and border_size.
138 *
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000139 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
Manuel Bottini05069f02019-09-26 17:18:26 +0100140 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
141 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000142 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
Manuel Bottini05069f02019-09-26 17:18:26 +0100143 * @param[out] output Destination tensor. Data type supported: same as @p input.
144 * @param[in] conv_info Padding and stride information to use for the convolution.
145 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
146 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
147 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
148 */
149 void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
150 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
151
152 /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3
153 *
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000154 * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
Manuel Bottini05069f02019-09-26 17:18:26 +0100155 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
156 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000157 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
Manuel Bottini05069f02019-09-26 17:18:26 +0100158 * @param[in] output Destination tensor. Data type supported: same as @p input.
159 * @param[in] conv_info Padding and stride information to use for the convolution.
160 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
161 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
162 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
163 *
164 * @return a status
165 */
166 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
167 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
168
169 // Inherited methods overriden:
170 void run() override;
171 void prepare() override;
172
173 private:
174 /** Configure the kernels/functions for the generic pipeline.
175 *
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000176 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
Manuel Bottini05069f02019-09-26 17:18:26 +0100177 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
178 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000179 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
Manuel Bottini05069f02019-09-26 17:18:26 +0100180 * @param[out] output Destination tensor. Data type supported: same as @p input.
181 * @param[in] conv_info Padding and stride information to use for the convolution.
182 * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
183 * @param[in] act_info Activation layer information in case of a fused activation.
184 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
185 *
186 */
187 void configure_generic(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
188 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));
189 /** Configure the kernels/functions for the optimized pipeline.
190 *
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000191 * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
Manuel Bottini05069f02019-09-26 17:18:26 +0100192 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
193 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000194 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
Manuel Bottini05069f02019-09-26 17:18:26 +0100195 * @param[out] output Destination tensor. Data type supported: same as @p input.
196 * @param[in] conv_info Padding and stride information to use for the convolution.
197 * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
198 * @param[in] act_info Activation layer information in case of a fused activation.
199 */
200 void configure_optimized(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
201 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));
202 /** Run generic kernel */
203 void run_generic();
204 /** Run optimized function */
205 void run_optimized();
206
207 MemoryGroup _memory_group;
208 NEDepthwiseConvolutionLayer3x3Kernel _dwc_kernel;
209 NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;
210 NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
211 NEFillBorderKernel _border_handler;
212 NEPermute _permute_input;
213 NEPermute _permute_weights;
214 NEPermute _permute_output;
215 NEActivationLayer _activationlayer_function;
216 Tensor _accumulator;
217 Tensor _permuted_input;
218 Tensor _permuted_weights;
219 Tensor _permuted_output;
220 const ITensor *_original_weights;
221 bool _has_bias;
222 bool _is_quantized;
223 bool _is_optimized;
224 bool _is_nchw;
225 bool _permute;
226 bool _is_activationlayer_enabled;
227 bool _is_prepared;
228 };
229
230 /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernel:
231 *
232 * -# @ref NEDepthwiseConvolutionLayerNativeKernel
233 *
234 */
235 class NEDepthwiseConvolutionLayerGeneric : public IFunction
236 {
237 public:
238 /** Default constructor */
239 NEDepthwiseConvolutionLayerGeneric();
240 /** Prevent instances of this class from being copied (As this class contains pointers) */
241 NEDepthwiseConvolutionLayerGeneric(const NEDepthwiseConvolutionLayerGeneric &) = delete;
242 /** Default move constructor */
243 NEDepthwiseConvolutionLayerGeneric(NEDepthwiseConvolutionLayerGeneric &&) = default;
244 /** Prevent instances of this class from being copied (As this class contains pointers) */
245 NEDepthwiseConvolutionLayerGeneric &operator=(const NEDepthwiseConvolutionLayerGeneric &) = delete;
246 /** Default move assignment operator */
247 NEDepthwiseConvolutionLayerGeneric &operator=(NEDepthwiseConvolutionLayerGeneric &&) = default;
248 /** Initialize the function's source, destination, weights and convolution information.
249 *
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000250 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
Manuel Bottini05069f02019-09-26 17:18:26 +0100251 * @param[out] output Destination tensor. Data type supported: same as @p input.
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100252 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000253 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
Manuel Bottini05069f02019-09-26 17:18:26 +0100254 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000255 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
Manuel Bottini05069f02019-09-26 17:18:26 +0100256 * @param[in] conv_info Padding and stride information to use for the convolution.
257 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
258 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
259 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
260 */
261 void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
262 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
263
264 /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerGeneric
265 *
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000266 * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
Manuel Bottini05069f02019-09-26 17:18:26 +0100267 * @param[in] output Destination tensor. Data type supported: same as @p input.
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100268 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000269 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
Manuel Bottini05069f02019-09-26 17:18:26 +0100270 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000271 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
Manuel Bottini05069f02019-09-26 17:18:26 +0100272 * @param[in] conv_info Padding and stride information to use for the convolution.
273 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
274 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
275 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
276 *
277 * @return a status
278 */
279 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
280 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
281
282 // Inherited methods overriden:
283 void run() override;
284 void prepare() override;
285
286 private:
287 NEDepthwiseConvolutionLayerNativeKernel _depthwise_conv_kernel;
288 NEFillBorderKernel _fill_border;
289 NEPermute _permute_input;
290 NEPermute _permute_weights;
291 NEPermute _permute_output;
292 NEActivationLayer _activationlayer_function;
293 Tensor _permuted_input;
294 Tensor _permuted_weights;
295 Tensor _permuted_output;
296 bool _is_prepared;
297 bool _is_nchw;
298 bool _is_activationlayer_enabled;
299 const ITensor *_original_weights;
300 };
301
302 DepthwiseConvolutionFunction _depth_conv_func;
303 NEDepthwiseConvolutionLayerOptimizedInternal _func_optimized;
304 NEDepthwiseConvolutionLayerGeneric _func_generic;
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100305};
Michalis Spyroub7b31532017-11-23 12:10:21 +0000306
Georgios Pinitas30271c72019-06-24 14:56:34 +0100307/** Basic function to execute optimized depthwise convolution routines. This function calls the following NEON kernels:
308 *
309 * @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported
310 *
311 * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present
312 * -# @ref NEDepthwiseConvolutionLayer3x3Kernel if 3x3 and no assembly kernel implementation is present
313 * -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present
314 * -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required
315 * -# @ref NEActivationLayer if fused activation is required
316 *
317 */
318class NEDepthwiseConvolutionLayerOptimized : public IFunction
319{
320public:
321 /** Default constructor */
322 NEDepthwiseConvolutionLayerOptimized(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
323 /** Prevent instances of this class from being copied (As this class contains pointers) */
324 NEDepthwiseConvolutionLayerOptimized(const NEDepthwiseConvolutionLayerOptimized &) = delete;
325 /** Default move constructor */
326 NEDepthwiseConvolutionLayerOptimized(NEDepthwiseConvolutionLayerOptimized &&) = default;
327 /** Prevent instances of this class from being copied (As this class contains pointers) */
328 NEDepthwiseConvolutionLayerOptimized &operator=(const NEDepthwiseConvolutionLayerOptimized &) = delete;
329 /** Default move assignment operator */
330 NEDepthwiseConvolutionLayerOptimized &operator=(NEDepthwiseConvolutionLayerOptimized &&) = default;
331 /** Initialize the function's source, destination, kernels and border_size.
332 *
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000333 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
Georgios Pinitas30271c72019-06-24 14:56:34 +0100334 * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
335 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000336 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
Georgios Pinitas30271c72019-06-24 14:56:34 +0100337 * @param[out] output Destination tensor. Data type supported: same as @p input.
338 * @param[in] conv_info Padding and stride information to use for the convolution.
339 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
340 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
341 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
342 */
Manuel Bottini05069f02019-09-26 17:18:26 +0100343 ARM_COMPUTE_DEPRECATED_REL_REPLACE(20.02, NEDepthwiseConvolutionLayer)
Georgios Pinitas30271c72019-06-24 14:56:34 +0100344 void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
345 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
346
Manuel Bottini05069f02019-09-26 17:18:26 +0100347 /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerOptimized
Georgios Pinitas30271c72019-06-24 14:56:34 +0100348 *
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000349 * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
Georgios Pinitas30271c72019-06-24 14:56:34 +0100350 * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
351 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000352 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
Georgios Pinitas30271c72019-06-24 14:56:34 +0100353 * @param[in] output Destination tensor. Data type supported: same as @p input.
354 * @param[in] conv_info Padding and stride information to use for the convolution.
355 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
356 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
357 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
358 *
359 * @return a status
360 */
361 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
362 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
363
364 // Inherited methods overriden:
365 void run() override;
366 void prepare() override;
367
368private:
Manuel Bottini05069f02019-09-26 17:18:26 +0100369 NEDepthwiseConvolutionLayer _func;
Michalis Spyroub7b31532017-11-23 12:10:21 +0000370};
Georgios Pinitas8cffcd62018-11-16 17:11:50 +0000371} // namespace arm_compute
Michalis Spyrouf4643372019-11-29 16:17:13 +0000372#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H */