blob: 288d5136d2a95aeaa0b0c1526d58c61d95997ae8 [file] [log] [blame]
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01001/*
Georgios Pinitasf72f9362018-01-12 16:29:45 +00002 * Copyright (c) 2017-2018 ARM Limited.
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__
25#define __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__
26
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000027#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h"
Michalis Spyroub7b31532017-11-23 12:10:21 +000028#include "arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h"
29#include "arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h"
30#include "arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h"
Michalis Spyroub91e34c2017-12-20 15:50:55 +000031#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010032#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
Michalis Spyroub7b31532017-11-23 12:10:21 +000033#include "arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010034#include "arm_compute/core/Types.h"
35#include "arm_compute/runtime/IFunction.h"
36#include "arm_compute/runtime/IMemoryManager.h"
37#include "arm_compute/runtime/MemoryGroup.h"
Georgios Pinitas60e98252018-10-22 16:17:20 +010038#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
Georgios Pinitas284cfe22018-02-13 12:15:13 +000039#include "arm_compute/runtime/NEON/functions/NEPermute.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010040#include "arm_compute/runtime/Tensor.h"
41
42namespace arm_compute
43{
44class ITensor;
45
46/** Basic function to execute a depthwise convolution for kernel size 3x3xC. This function calls the following NEON kernels:
47 *
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000048 * -# @ref NEDepthwiseConvolutionLayer3x3
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010049 * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0)
50 *
51 */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000052class NEDepthwiseConvolutionLayer3x3 : public IFunction
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010053{
54public:
55 /** Default constructor */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000056 NEDepthwiseConvolutionLayer3x3();
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010057 /** Initialize the function's source, destination, kernels and border_size.
58 *
Georgios Pinitas20c246a2018-09-12 16:45:53 +010059 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
Giorgio Arena76572242018-04-04 17:44:26 +010060 * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input.
61 * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
62 * Data type supported: Same as @p input.
63 * @param[out] output Destination tensor. Data type supported: same as @p input.
64 * @param[in] conv_info Padding and stride information to use for the convolution.
65 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
Georgios Pinitas60e98252018-10-22 16:17:20 +010066 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010067 */
Georgios Pinitas60e98252018-10-22 16:17:20 +010068 void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
69 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo());
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010070
Abe Mbise7784c832018-05-31 16:48:41 +010071 /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3
72 *
Georgios Pinitas20c246a2018-09-12 16:45:53 +010073 * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
Abe Mbise7784c832018-05-31 16:48:41 +010074 * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input.
75 * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
76 * Data type supported: Same as @p input.
77 * @param[in] output Destination tensor. Data type supported: same as @p input.
78 * @param[in] conv_info Padding and stride information to use for the convolution.
79 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
Georgios Pinitas60e98252018-10-22 16:17:20 +010080 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
Abe Mbise7784c832018-05-31 16:48:41 +010081 *
82 * @return a status
83 */
Georgios Pinitas60e98252018-10-22 16:17:20 +010084 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
85 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo());
Abe Mbise7784c832018-05-31 16:48:41 +010086
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010087 // Inherited methods overriden:
88 void run() override;
89
90private:
Georgios Pinitas4074c992018-01-30 18:13:46 +000091 NEDepthwiseConvolutionLayer3x3Kernel _dwc_kernel;
Michalis Spyroub91e34c2017-12-20 15:50:55 +000092 NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
93 NEFillBorderKernel _border_handler;
Georgios Pinitas284cfe22018-02-13 12:15:13 +000094 NEPermute _permute_input;
95 NEPermute _permute_weights;
96 NEPermute _permute_output;
Georgios Pinitas60e98252018-10-22 16:17:20 +010097 NEActivationLayer _activationlayer_function;
Georgios Pinitasf72f9362018-01-12 16:29:45 +000098 Tensor _accumulator;
Giorgio Arena26b22162018-08-13 15:49:49 +010099 Tensor _permuted_input;
100 Tensor _permuted_weights;
101 Tensor _permuted_output;
Michalis Spyroub91e34c2017-12-20 15:50:55 +0000102 bool _has_bias;
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000103 bool _is_quantized;
Georgios Pinitas4074c992018-01-30 18:13:46 +0000104 bool _is_optimized;
105 bool _are_weights_reshaped;
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100106 bool _is_nchw;
107 bool _is_first_run;
Giorgio Arena26b22162018-08-13 15:49:49 +0100108 bool _permute;
Georgios Pinitas60e98252018-10-22 16:17:20 +0100109 bool _is_activationlayer_enabled;
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100110};
Michalis Spyroub7b31532017-11-23 12:10:21 +0000111
Giorgio Arena39725282017-12-12 15:04:43 +0000112/** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels:
Michalis Spyroub7b31532017-11-23 12:10:21 +0000113 *
114 * -# @ref NEDepthwiseIm2ColKernel
115 * -# @ref NEDepthwiseWeightsReshapeKernel
116 * -# @ref NEGEMMMatrixVectorMultiplyKernel
117 * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0)
118 *
119 */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +0000120class NEDepthwiseConvolutionLayer : public IFunction
Michalis Spyroub7b31532017-11-23 12:10:21 +0000121{
122public:
123 /** Default constructor */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +0000124 NEDepthwiseConvolutionLayer();
Georgios Pinitas1562be32018-03-08 19:09:19 +0000125 /** Prevent instances of this class from being copied (As this class contains pointers) */
126 NEDepthwiseConvolutionLayer(const NEDepthwiseConvolutionLayer &) = delete;
127 /** Default move constructor */
128 NEDepthwiseConvolutionLayer(NEDepthwiseConvolutionLayer &&) = default;
129 /** Prevent instances of this class from being copied (As this class contains pointers) */
130 NEDepthwiseConvolutionLayer &operator=(const NEDepthwiseConvolutionLayer &) = delete;
131 /** Default move assignment operator */
132 NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default;
Michalis Spyroub7b31532017-11-23 12:10:21 +0000133 /** Initialize the function's source, destination, weights and convolution information.
134 *
Giorgio Arena76572242018-04-04 17:44:26 +0100135 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling).
136 * @param[out] output Destination tensor. Data type supported: same as @p input.
137 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
138 * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
139 * Data type supported: Same as @p input, S32 when input is QASYMM8.
140 * @param[in] conv_info Padding and stride information to use for the convolution.
141 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
Georgios Pinitas60e98252018-10-22 16:17:20 +0100142 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
Michalis Spyroub7b31532017-11-23 12:10:21 +0000143 */
Georgios Pinitas60e98252018-10-22 16:17:20 +0100144 void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
145 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo());
Michalis Spyroub7b31532017-11-23 12:10:21 +0000146
Abe Mbise7784c832018-05-31 16:48:41 +0100147 /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer
148 *
149 * @param[in] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling).
150 * @param[in] output Destination tensor. Data type supported: same as @p input.
151 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
152 * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
153 * Data type supported: Same as @p input, S32 when input is QASYMM8.
154 * @param[in] conv_info Padding and stride information to use for the convolution.
155 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
Georgios Pinitas60e98252018-10-22 16:17:20 +0100156 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
Abe Mbise7784c832018-05-31 16:48:41 +0100157 *
158 * @return a status
159 */
Georgios Pinitas60e98252018-10-22 16:17:20 +0100160 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
161 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo());
Abe Mbise7784c832018-05-31 16:48:41 +0100162
Michalis Spyroub7b31532017-11-23 12:10:21 +0000163 // Inherited methods overriden:
164 void run() override;
Georgios Pinitas72219332018-06-05 14:56:06 +0100165 void prepare() override;
Michalis Spyroub7b31532017-11-23 12:10:21 +0000166
167private:
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000168 NEDepthwiseIm2ColKernel _im2col_kernel;
169 NEDepthwiseWeightsReshapeKernel _weights_reshape_kernel;
170 NEGEMMMatrixVectorMultiplyKernel _v2mm_kernel;
171 NEDepthwiseVectorToTensorKernel _vector_to_tensor_kernel;
172 NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
173 NEFillBorderKernel _v2mm_input_fill_border;
174 NEFillBorderKernel _v2mm_weights_fill_border;
Giorgio Arena26b22162018-08-13 15:49:49 +0100175 NEPermute _permute_input;
176 NEPermute _permute_weights;
177 NEPermute _permute_output;
Georgios Pinitas60e98252018-10-22 16:17:20 +0100178 NEActivationLayer _activationlayer_function;
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000179 Tensor _input_reshaped;
180 Tensor _weights_reshaped;
181 Tensor _v2mm_output;
182 Tensor _output_reshaped;
Giorgio Arena26b22162018-08-13 15:49:49 +0100183 Tensor _permuted_input;
184 Tensor _permuted_weights;
185 Tensor _permuted_output;
Georgios Pinitas72219332018-06-05 14:56:06 +0100186 bool _is_prepared;
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000187 bool _is_quantized;
Giorgio Arena26b22162018-08-13 15:49:49 +0100188 bool _is_nhwc;
Georgios Pinitas60e98252018-10-22 16:17:20 +0100189 bool _is_activationlayer_enabled;
Georgios Pinitas1562be32018-03-08 19:09:19 +0000190 const ITensor *_original_weights;
Michalis Spyroub7b31532017-11-23 12:10:21 +0000191};
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100192}
193#endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__ */