blob: 4668e82bab402b3a6fcb8dd0d63797fc1048ec14 [file] [log] [blame]
Giorgio Arena93a690e2017-08-01 16:09:33 +01001/*
Michele Di Giorgio4cd4cde2020-01-06 14:07:44 +00002 * Copyright (c) 2017-2020 ARM Limited.
Giorgio Arena93a690e2017-08-01 16:09:33 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouf4643372019-11-29 16:17:13 +000024#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H
25#define ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H
Giorgio Arena93a690e2017-08-01 16:09:33 +010026
Giorgio Arenadfca60b2018-01-31 10:30:59 +000027#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
28#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
Michele Di Giorgioa046e162019-10-08 09:36:26 +010029#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
giuros016d109962019-01-07 17:47:19 +000030#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010031#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
Giorgio Arenadfca60b2018-01-31 10:30:59 +000032#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010033#include "arm_compute/core/Types.h"
34#include "arm_compute/runtime/CL/CLTensor.h"
Georgios Pinitas05045c12018-12-07 18:31:47 +000035#include "arm_compute/runtime/CL/functions/CLPermute.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010036#include "arm_compute/runtime/IFunction.h"
Georgios Pinitas26014cf2019-09-09 19:00:57 +010037#include "arm_compute/runtime/MemoryGroup.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010038
Giorgio Arena93a690e2017-08-01 16:09:33 +010039namespace arm_compute
40{
41class ICLTensor;
42
Manuel Bottini05069f02019-09-26 17:18:26 +010043/** Function to execute a depthwise convolution
44 */
45class CLDepthwiseConvolutionLayer : public IFunction
46{
47public:
48 /** Default constructor */
49 CLDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
50 /** Prevent instances of this class from being copied (As this class contains pointers) */
51 CLDepthwiseConvolutionLayer(const CLDepthwiseConvolutionLayer &) = delete;
52 /** Default move constructor */
53 CLDepthwiseConvolutionLayer(CLDepthwiseConvolutionLayer &&) = default;
54 /** Prevent instances of this class from being copied (As this class contains pointers) */
55 CLDepthwiseConvolutionLayer &operator=(const CLDepthwiseConvolutionLayer &) = delete;
56 /** Default move assignment operator */
57 CLDepthwiseConvolutionLayer &operator=(CLDepthwiseConvolutionLayer &&) = default;
58 /** Initialize the function's source, destination, weights and convolution information.
59 *
60 * @param[in, out] input Source tensor. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +010061 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
62 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Manuel Bottini05069f02019-09-26 17:18:26 +010063 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
64 * Data type supported: Same as @p input, S32 when input is QASYMM8.
65 * @param[out] output Destination tensor. Data type supported: same as @p input.
66 * @param[in] conv_info Padding and stride information to use for the convolution.
67 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
68 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
69 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
70 */
71 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
72 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
73
74 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer
75 *
76 * @param[in] input Source tensor info. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +010077 * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
78 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Manuel Bottini05069f02019-09-26 17:18:26 +010079 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
80 * Data type supported: Same as @p input, S32 when input is QASYMM8.
81 * @param[in] output Destination tensor. Data type supported: same as @p input.
82 * @param[in] conv_info Padding and stride information to use for the convolution.
83 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
84 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
85 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
86 *
87 * @return a status
88 */
89 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
90 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
91
92 // Inherited methods overriden:
93 void run() override;
94 void prepare() override;
95
96private:
97 /** Static function to choose the best depthwise convolution function for @ref CLDepthwiseConvolutionLayer
98 *
99 * @param[in] input Source tensor info. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100100 * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
101 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Manuel Bottini05069f02019-09-26 17:18:26 +0100102 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
103 * Data type supported: Same as @p input, S32 when input is QASYMM8.
104 * @param[in] output Destination tensor. Data type supported: same as @p input.
105 * @param[in] conv_info Padding and stride information to use for the convolution.
106 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
107 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
108 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
109 * @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard.
110 *
111 * @return a Depthwise Convolution Function
112 */
113 static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
114 const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
115 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), GPUTarget gpu_target = GPUTarget::MIDGARD);
116
117 /** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels:
118 *
119 * -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW)
120 * -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC)
121 * -# @ref CLDepthwiseConvolutionLayerReshapeWeightsKernel (if data_layout == NHWC)
122 * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)
123 *
124 */
125 class CLDepthwiseConvolutionLayerInternal3x3 : public IFunction
126 {
127 public:
128 /** Default constructor */
129 CLDepthwiseConvolutionLayerInternal3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
130 /** Prevent instances of this class from being copied (As this class contains pointers) */
131 CLDepthwiseConvolutionLayerInternal3x3(const CLDepthwiseConvolutionLayerInternal3x3 &) = delete;
132 /** Default move constructor */
133 CLDepthwiseConvolutionLayerInternal3x3(CLDepthwiseConvolutionLayerInternal3x3 &&) = default;
134 /** Prevent instances of this class from being copied (As this class contains pointers) */
135 CLDepthwiseConvolutionLayerInternal3x3 &operator=(const CLDepthwiseConvolutionLayerInternal3x3 &) = delete;
136 /** Default move assignment operator */
137 CLDepthwiseConvolutionLayerInternal3x3 &operator=(CLDepthwiseConvolutionLayerInternal3x3 &&) = default;
138 /** Initialize the function's source, destination, conv and border_size.
139 *
140 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100141 * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM].
142 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Manuel Bottini05069f02019-09-26 17:18:26 +0100143 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
144 * Data type supported: Same as @p input.
145 * @param[out] output Destination tensor. Data type supported: same as @p input.
146 * @param[in] conv_info Padding and stride information to use for the convolution.
147 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
148 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
149 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
150 */
151 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
152 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
153
154 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3
155 *
156 * @param[in] input Source tensor info. Data type supported: QASYMM8 for all layouts, F16/F32 for NCHW.
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100157 * @param[in] weights Weights tensor info. A 3D tensor with shape [3, 3, IFM].
158 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Manuel Bottini05069f02019-09-26 17:18:26 +0100159 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
160 * Data type supported: Same as @p input, S32 when input is QASYMM8.
161 * @param[in] output Destination tensor. Data type supported: same as @p input.
162 * @param[in] conv_info Padding and stride information to use for the convolution.
163 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
164 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
165 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
166 *
167 * @return a status
168 */
169 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
170 ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD, const Size2D &dilation = Size2D(1U, 1U));
171
172 // Inherited methods overriden:
173 void run() override;
174 void prepare() override;
175
176 void set_memory_group(std::shared_ptr<IMemoryManager> memory_manager)
177 {
178 _memory_group = MemoryGroup(std::move(memory_manager));
179 };
180
181 private:
182 MemoryGroup _memory_group;
183 std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel;
184 CLFillBorderKernel _border_handler;
185 CLPermute _permute_input_to_nchw;
186 CLPermute _permute_weights_to_nchw;
187 CLPermute _permute_output_to_nhwc;
188 CLDepthwiseConvolutionLayerReshapeWeightsKernel _reshape_weights;
189 CLTensor _permuted_input;
190 CLTensor _permuted_weights;
191 CLTensor _permuted_output;
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100192 CLTensor _output_multipliers;
193 CLTensor _output_shifts;
Manuel Bottini05069f02019-09-26 17:18:26 +0100194 const ITensor *_original_weights;
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100195 const ITensor *_input;
196 const ITensor *_output;
Manuel Bottini05069f02019-09-26 17:18:26 +0100197 bool _needs_permute;
198 bool _needs_weights_reshape;
199 bool _is_prepared;
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100200 bool _is_quantized;
Manuel Bottini05069f02019-09-26 17:18:26 +0100201 };
202
203 /** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:
204 *
205 * -# @ref CLDepthwiseConvolutionLayerNativeKernel
206 * -# @ref CLPermute (x 3) if the data layout is NCHW
207 *
208 */
209 class CLDepthwiseConvolutionLayerGeneric : public IFunction
210 {
211 public:
212 /** Default constructor */
213 CLDepthwiseConvolutionLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
214 /** Prevent instances of this class from being copied (As this class contains pointers) */
215 CLDepthwiseConvolutionLayerGeneric(const CLDepthwiseConvolutionLayerGeneric &) = delete;
216 /** Default move constructor */
217 CLDepthwiseConvolutionLayerGeneric(CLDepthwiseConvolutionLayerGeneric &&) = default;
218 /** Prevent instances of this class from being copied (As this class contains pointers) */
219 CLDepthwiseConvolutionLayerGeneric &operator=(const CLDepthwiseConvolutionLayerGeneric &) = delete;
220 /** Default move assignment operator */
221 CLDepthwiseConvolutionLayerGeneric &operator=(CLDepthwiseConvolutionLayerGeneric &&) = default;
222 /** Initialize the function's source, destination, weights and convolution information.
223 *
Michele Di Giorgio4cd4cde2020-01-06 14:07:44 +0000224 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F32. (Written to only for border filling).
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100225 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
Michele Di Giorgio4cd4cde2020-01-06 14:07:44 +0000226 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Manuel Bottini05069f02019-09-26 17:18:26 +0100227 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Michele Di Giorgio4cd4cde2020-01-06 14:07:44 +0000228 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
Manuel Bottini05069f02019-09-26 17:18:26 +0100229 * @param[out] output Destination tensor. Data type supported: same as @p input.
230 * @param[in] conv_info Padding and stride information to use for the convolution.
231 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
232 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
233 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
234 */
235 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
236 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
237
238 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerGeneric
239 *
Michele Di Giorgio4cd4cde2020-01-06 14:07:44 +0000240 * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F32.
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100241 * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
Michele Di Giorgio4cd4cde2020-01-06 14:07:44 +0000242 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Manuel Bottini05069f02019-09-26 17:18:26 +0100243 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Michele Di Giorgio4cd4cde2020-01-06 14:07:44 +0000244 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
Manuel Bottini05069f02019-09-26 17:18:26 +0100245 * @param[in] output Destination tensor. Data type supported: same as @p input.
246 * @param[in] conv_info Padding and stride information to use for the convolution.
247 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
248 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
249 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
250 *
251 * @return a status
252 */
253 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
254 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
255
256 // Inherited methods overriden:
257 void run() override;
258 void prepare() override;
259
260 void set_memory_group(std::shared_ptr<IMemoryManager> memory_manager)
261 {
262 _memory_group = MemoryGroup(std::move(memory_manager));
263 };
264
265 private:
266 MemoryGroup _memory_group;
267
268 CLDepthwiseConvolutionLayerNativeKernel _dwc_native_kernel;
269 CLPermute _permute_input_to_nhwc;
270 CLPermute _permute_weights_to_nhwc;
271 CLPermute _permute_output_to_nchw;
272
273 CLTensor _permuted_input;
274 CLTensor _permuted_weights;
275 CLTensor _permuted_output;
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100276 CLTensor _output_multipliers;
277 CLTensor _output_shifts;
Manuel Bottini05069f02019-09-26 17:18:26 +0100278 const ITensor *_original_weights;
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100279 const ITensor *_input;
280 const ITensor *_output;
Manuel Bottini05069f02019-09-26 17:18:26 +0100281
282 bool _needs_permute;
283 bool _is_prepared;
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100284 bool _is_quantized;
Manuel Bottini05069f02019-09-26 17:18:26 +0100285 };
286
287 std::shared_ptr<IMemoryManager> _memory_manager;
288
289 DepthwiseConvolutionFunction _depth_conv_func;
290 CLDepthwiseConvolutionLayerInternal3x3 _func_3x3;
291 CLDepthwiseConvolutionLayerGeneric _func_generic;
292};
293
Giorgio Arenadfca60b2018-01-31 10:30:59 +0000294/** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels:
Giorgio Arena93a690e2017-08-01 16:09:33 +0100295 *
Giorgio Arenadfca60b2018-01-31 10:30:59 +0000296 * -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW)
297 * -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC)
giuros016d109962019-01-07 17:47:19 +0000298 * -# @ref CLDepthwiseConvolutionLayerReshapeWeightsKernel (if data_layout == NHWC)
Giorgio Arena9fe41442017-08-23 16:36:24 +0100299 * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)
Giorgio Arena93a690e2017-08-01 16:09:33 +0100300 *
301 */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +0000302class CLDepthwiseConvolutionLayer3x3 : public IFunction
Giorgio Arena93a690e2017-08-01 16:09:33 +0100303{
304public:
305 /** Default constructor */
Georgios Pinitas05045c12018-12-07 18:31:47 +0000306 CLDepthwiseConvolutionLayer3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
307 /** Prevent instances of this class from being copied (As this class contains pointers) */
308 CLDepthwiseConvolutionLayer3x3(const CLDepthwiseConvolutionLayer3x3 &) = delete;
309 /** Default move constructor */
310 CLDepthwiseConvolutionLayer3x3(CLDepthwiseConvolutionLayer3x3 &&) = default;
311 /** Prevent instances of this class from being copied (As this class contains pointers) */
312 CLDepthwiseConvolutionLayer3x3 &operator=(const CLDepthwiseConvolutionLayer3x3 &) = delete;
313 /** Default move assignment operator */
314 CLDepthwiseConvolutionLayer3x3 &operator=(CLDepthwiseConvolutionLayer3x3 &&) = default;
Giorgio Arena93a690e2017-08-01 16:09:33 +0100315 /** Initialize the function's source, destination, conv and border_size.
316 *
Giorgio Arena76572242018-04-04 17:44:26 +0100317 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100318 * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM].
319 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Usama Arife73686a2019-04-08 17:30:48 +0100320 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Giorgio Arena76572242018-04-04 17:44:26 +0100321 * Data type supported: Same as @p input.
322 * @param[out] output Destination tensor. Data type supported: same as @p input.
323 * @param[in] conv_info Padding and stride information to use for the convolution.
324 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
325 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
Usama Arife73686a2019-04-08 17:30:48 +0100326 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Giorgio Arena93a690e2017-08-01 16:09:33 +0100327 */
Manuel Bottini05069f02019-09-26 17:18:26 +0100328 ARM_COMPUTE_DEPRECATED_REL_REPLACE(20.02, CLDepthwiseConvolutionLayer)
Giorgio Arena76572242018-04-04 17:44:26 +0100329 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
Usama Arife73686a2019-04-08 17:30:48 +0100330 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Giorgio Arena93a690e2017-08-01 16:09:33 +0100331
Giorgio Arenaad0c7382018-04-23 16:16:21 +0100332 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3
333 *
Usama Arife73686a2019-04-08 17:30:48 +0100334 * @param[in] input Source tensor info. Data type supported: QASYMM8 for all layouts, F16/F32 for NCHW.
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100335 * @param[in] weights Weights tensor info. A 3D tensor with shape [3, 3, IFM].
336 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Usama Arife73686a2019-04-08 17:30:48 +0100337 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Giorgio Arenaad0c7382018-04-23 16:16:21 +0100338 * Data type supported: Same as @p input, S32 when input is QASYMM8.
339 * @param[in] output Destination tensor. Data type supported: same as @p input.
340 * @param[in] conv_info Padding and stride information to use for the convolution.
341 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
342 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
343 * @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard.
Usama Arife73686a2019-04-08 17:30:48 +0100344 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Giorgio Arenaad0c7382018-04-23 16:16:21 +0100345 *
346 * @return a status
347 */
348 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
Usama Arife73686a2019-04-08 17:30:48 +0100349 ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD, const Size2D &dilation = Size2D(1U, 1U));
Giorgio Arenaad0c7382018-04-23 16:16:21 +0100350
Giorgio Arena9fe41442017-08-23 16:36:24 +0100351 // Inherited methods overriden:
352 void run() override;
Georgios Pinitas72219332018-06-05 14:56:06 +0100353 void prepare() override;
Giorgio Arena9fe41442017-08-23 16:36:24 +0100354
355private:
Manuel Bottini05069f02019-09-26 17:18:26 +0100356 CLDepthwiseConvolutionLayer _func;
Giorgio Arena93a690e2017-08-01 16:09:33 +0100357};
Georgios Pinitas05045c12018-12-07 18:31:47 +0000358} // namespace arm_compute
Michalis Spyrouf4643372019-11-29 16:17:13 +0000359#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H */