blob: 396e2368c3efb72ed89d88d8517a93cd81e09429 [file] [log] [blame]
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01001/*
Georgios Pinitas47d39dc2019-03-11 14:03:23 +00002 * Copyright (c) 2017-2019 ARM Limited.
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__
25#define __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__
26
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000027#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h"
Michalis Spyroub7b31532017-11-23 12:10:21 +000028#include "arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h"
29#include "arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h"
30#include "arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h"
Michalis Spyroub91e34c2017-12-20 15:50:55 +000031#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010032#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
Michalis Spyroub7b31532017-11-23 12:10:21 +000033#include "arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010034#include "arm_compute/core/Types.h"
35#include "arm_compute/runtime/IFunction.h"
36#include "arm_compute/runtime/IMemoryManager.h"
37#include "arm_compute/runtime/MemoryGroup.h"
Georgios Pinitas60e98252018-10-22 16:17:20 +010038#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
Georgios Pinitas284cfe22018-02-13 12:15:13 +000039#include "arm_compute/runtime/NEON/functions/NEPermute.h"
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000040#include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010041#include "arm_compute/runtime/Tensor.h"
42
43namespace arm_compute
44{
45class ITensor;
46
47/** Basic function to execute a depthwise convolution for kernel size 3x3xC. This function calls the following NEON kernels:
48 *
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000049 * -# @ref NEDepthwiseConvolutionLayer3x3
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010050 * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0)
51 *
52 */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000053class NEDepthwiseConvolutionLayer3x3 : public IFunction
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010054{
55public:
56 /** Default constructor */
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000057 NEDepthwiseConvolutionLayer3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
58 /** Prevent instances of this class from being copied (As this class contains pointers) */
59 NEDepthwiseConvolutionLayer3x3(const NEDepthwiseConvolutionLayer3x3 &) = delete;
60 /** Default move constructor */
61 NEDepthwiseConvolutionLayer3x3(NEDepthwiseConvolutionLayer3x3 &&) = default;
62 /** Prevent instances of this class from being copied (As this class contains pointers) */
63 NEDepthwiseConvolutionLayer3x3 &operator=(const NEDepthwiseConvolutionLayer3x3 &) = delete;
64 /** Default move assignment operator */
65 NEDepthwiseConvolutionLayer3x3 &operator=(NEDepthwiseConvolutionLayer3x3 &&) = default;
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010066 /** Initialize the function's source, destination, kernels and border_size.
67 *
Georgios Pinitas20c246a2018-09-12 16:45:53 +010068 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
Giorgio Arena76572242018-04-04 17:44:26 +010069 * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input.
Usama Arife73686a2019-04-08 17:30:48 +010070 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Giorgio Arena76572242018-04-04 17:44:26 +010071 * Data type supported: Same as @p input.
72 * @param[out] output Destination tensor. Data type supported: same as @p input.
73 * @param[in] conv_info Padding and stride information to use for the convolution.
74 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
Georgios Pinitas60e98252018-10-22 16:17:20 +010075 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
Usama Arif881f2de2019-04-12 10:29:17 +010076 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010077 */
Georgios Pinitas60e98252018-10-22 16:17:20 +010078 void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
Usama Arife73686a2019-04-08 17:30:48 +010079 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010080
Abe Mbise7784c832018-05-31 16:48:41 +010081 /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3
82 *
Georgios Pinitas20c246a2018-09-12 16:45:53 +010083 * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
Abe Mbise7784c832018-05-31 16:48:41 +010084 * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input.
Usama Arife73686a2019-04-08 17:30:48 +010085 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Abe Mbise7784c832018-05-31 16:48:41 +010086 * Data type supported: Same as @p input.
87 * @param[in] output Destination tensor. Data type supported: same as @p input.
88 * @param[in] conv_info Padding and stride information to use for the convolution.
89 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
Georgios Pinitas60e98252018-10-22 16:17:20 +010090 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
Usama Arif881f2de2019-04-12 10:29:17 +010091 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Abe Mbise7784c832018-05-31 16:48:41 +010092 *
93 * @return a status
94 */
Georgios Pinitas60e98252018-10-22 16:17:20 +010095 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
Usama Arife73686a2019-04-08 17:30:48 +010096 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Abe Mbise7784c832018-05-31 16:48:41 +010097
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010098 // Inherited methods overriden:
99 void run() override;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000100 void prepare() override;
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100101
102private:
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000103 /** Configure the kernels/functions for the generic pipeline.
104 *
105 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
106 * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input.
Usama Arife73686a2019-04-08 17:30:48 +0100107 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000108 * Data type supported: Same as @p input.
109 * @param[out] output Destination tensor. Data type supported: same as @p input.
110 * @param[in] conv_info Padding and stride information to use for the convolution.
111 * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
112 * @param[in] act_info Activation layer information in case of a fused activation.
Usama Arif881f2de2019-04-12 10:29:17 +0100113 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
114 *
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000115 */
116 void configure_generic(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
Usama Arif881f2de2019-04-12 10:29:17 +0100117 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000118 /** Configure the kernels/functions for the optimized pipeline.
119 *
120 * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
121 * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input.
Usama Arife73686a2019-04-08 17:30:48 +0100122 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000123 * Data type supported: Same as @p input.
124 * @param[out] output Destination tensor. Data type supported: same as @p input.
125 * @param[in] conv_info Padding and stride information to use for the convolution.
126 * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
127 * @param[in] act_info Activation layer information in case of a fused activation.
128 */
129 void configure_optimized(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
130 unsigned int depth_multiplier, const ActivationLayerInfo &act_info);
131 /** Run generic kernel */
132 void run_generic();
133 /** Run optimized function */
134 void run_optimized();
135
136private:
137 MemoryGroup _memory_group;
Georgios Pinitas4074c992018-01-30 18:13:46 +0000138 NEDepthwiseConvolutionLayer3x3Kernel _dwc_kernel;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000139 NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;
Michalis Spyroub91e34c2017-12-20 15:50:55 +0000140 NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
141 NEFillBorderKernel _border_handler;
Georgios Pinitas284cfe22018-02-13 12:15:13 +0000142 NEPermute _permute_input;
143 NEPermute _permute_weights;
144 NEPermute _permute_output;
Georgios Pinitas60e98252018-10-22 16:17:20 +0100145 NEActivationLayer _activationlayer_function;
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000146 Tensor _accumulator;
Giorgio Arena26b22162018-08-13 15:49:49 +0100147 Tensor _permuted_input;
148 Tensor _permuted_weights;
149 Tensor _permuted_output;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000150 const ITensor *_original_weights;
Michalis Spyroub91e34c2017-12-20 15:50:55 +0000151 bool _has_bias;
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000152 bool _is_quantized;
Georgios Pinitas4074c992018-01-30 18:13:46 +0000153 bool _is_optimized;
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100154 bool _is_nchw;
Giorgio Arena26b22162018-08-13 15:49:49 +0100155 bool _permute;
Georgios Pinitas60e98252018-10-22 16:17:20 +0100156 bool _is_activationlayer_enabled;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000157 bool _is_prepared;
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100158};
Michalis Spyroub7b31532017-11-23 12:10:21 +0000159
Giorgio Arena39725282017-12-12 15:04:43 +0000160/** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels:
Michalis Spyroub7b31532017-11-23 12:10:21 +0000161 *
162 * -# @ref NEDepthwiseIm2ColKernel
163 * -# @ref NEDepthwiseWeightsReshapeKernel
164 * -# @ref NEGEMMMatrixVectorMultiplyKernel
165 * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0)
166 *
167 */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +0000168class NEDepthwiseConvolutionLayer : public IFunction
Michalis Spyroub7b31532017-11-23 12:10:21 +0000169{
170public:
171 /** Default constructor */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +0000172 NEDepthwiseConvolutionLayer();
Georgios Pinitas1562be32018-03-08 19:09:19 +0000173 /** Prevent instances of this class from being copied (As this class contains pointers) */
174 NEDepthwiseConvolutionLayer(const NEDepthwiseConvolutionLayer &) = delete;
175 /** Default move constructor */
176 NEDepthwiseConvolutionLayer(NEDepthwiseConvolutionLayer &&) = default;
177 /** Prevent instances of this class from being copied (As this class contains pointers) */
178 NEDepthwiseConvolutionLayer &operator=(const NEDepthwiseConvolutionLayer &) = delete;
179 /** Default move assignment operator */
180 NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default;
Michalis Spyroub7b31532017-11-23 12:10:21 +0000181 /** Initialize the function's source, destination, weights and convolution information.
182 *
Georgios Pinitas8cffcd62018-11-16 17:11:50 +0000183 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
Giorgio Arena76572242018-04-04 17:44:26 +0100184 * @param[out] output Destination tensor. Data type supported: same as @p input.
185 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
186 * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
187 * Data type supported: Same as @p input, S32 when input is QASYMM8.
188 * @param[in] conv_info Padding and stride information to use for the convolution.
189 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
Georgios Pinitas60e98252018-10-22 16:17:20 +0100190 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
Usama Arif881f2de2019-04-12 10:29:17 +0100191 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Michalis Spyroub7b31532017-11-23 12:10:21 +0000192 */
Georgios Pinitas60e98252018-10-22 16:17:20 +0100193 void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
Usama Arife73686a2019-04-08 17:30:48 +0100194 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Michalis Spyroub7b31532017-11-23 12:10:21 +0000195
Abe Mbise7784c832018-05-31 16:48:41 +0100196 /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer
197 *
Georgios Pinitas8cffcd62018-11-16 17:11:50 +0000198 * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
Abe Mbise7784c832018-05-31 16:48:41 +0100199 * @param[in] output Destination tensor. Data type supported: same as @p input.
200 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
201 * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
202 * Data type supported: Same as @p input, S32 when input is QASYMM8.
203 * @param[in] conv_info Padding and stride information to use for the convolution.
204 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
Georgios Pinitas60e98252018-10-22 16:17:20 +0100205 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
Usama Arif881f2de2019-04-12 10:29:17 +0100206 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Abe Mbise7784c832018-05-31 16:48:41 +0100207 *
208 * @return a status
209 */
Georgios Pinitas60e98252018-10-22 16:17:20 +0100210 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
Usama Arife73686a2019-04-08 17:30:48 +0100211 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Abe Mbise7784c832018-05-31 16:48:41 +0100212
Michalis Spyroub7b31532017-11-23 12:10:21 +0000213 // Inherited methods overriden:
214 void run() override;
Georgios Pinitas72219332018-06-05 14:56:06 +0100215 void prepare() override;
Michalis Spyroub7b31532017-11-23 12:10:21 +0000216
217private:
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000218 NEDepthwiseIm2ColKernel _im2col_kernel;
219 NEDepthwiseWeightsReshapeKernel _weights_reshape_kernel;
220 NEGEMMMatrixVectorMultiplyKernel _v2mm_kernel;
221 NEDepthwiseVectorToTensorKernel _vector_to_tensor_kernel;
222 NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
223 NEFillBorderKernel _v2mm_input_fill_border;
224 NEFillBorderKernel _v2mm_weights_fill_border;
Giorgio Arena26b22162018-08-13 15:49:49 +0100225 NEPermute _permute_input;
226 NEPermute _permute_weights;
227 NEPermute _permute_output;
Georgios Pinitas60e98252018-10-22 16:17:20 +0100228 NEActivationLayer _activationlayer_function;
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000229 Tensor _input_reshaped;
230 Tensor _weights_reshaped;
231 Tensor _v2mm_output;
232 Tensor _output_reshaped;
Giorgio Arena26b22162018-08-13 15:49:49 +0100233 Tensor _permuted_input;
234 Tensor _permuted_weights;
235 Tensor _permuted_output;
Georgios Pinitas72219332018-06-05 14:56:06 +0100236 bool _is_prepared;
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000237 bool _is_quantized;
Giorgio Arena26b22162018-08-13 15:49:49 +0100238 bool _is_nhwc;
Georgios Pinitas60e98252018-10-22 16:17:20 +0100239 bool _is_activationlayer_enabled;
Georgios Pinitas1562be32018-03-08 19:09:19 +0000240 const ITensor *_original_weights;
Michalis Spyroub7b31532017-11-23 12:10:21 +0000241};
Georgios Pinitas8cffcd62018-11-16 17:11:50 +0000242} // namespace arm_compute
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100243#endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__ */