blob: f31a17d9cb458675f5bfb7becb916beb03c96090 [file] [log] [blame]
Giorgio Arena93a690e2017-08-01 16:09:33 +01001/*
Giorgio Arena0a66abe2021-04-12 16:44:24 +01002 * Copyright (c) 2017-2021 Arm Limited.
Giorgio Arena93a690e2017-08-01 16:09:33 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouf4643372019-11-29 16:17:13 +000024#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H
25#define ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H
Giorgio Arena93a690e2017-08-01 16:09:33 +010026
Giorgio Arena93a690e2017-08-01 16:09:33 +010027#include "arm_compute/core/Types.h"
28#include "arm_compute/runtime/CL/CLTensor.h"
Georgios Pinitas05045c12018-12-07 18:31:47 +000029#include "arm_compute/runtime/CL/functions/CLPermute.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010030#include "arm_compute/runtime/IFunction.h"
Georgios Pinitas26014cf2019-09-09 19:00:57 +010031#include "arm_compute/runtime/MemoryGroup.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010032
Giorgio Arena93a690e2017-08-01 16:09:33 +010033namespace arm_compute
34{
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010035class CLCompileContext;
Gian Marco Iodicec63b7222021-06-30 08:39:44 +000036class CLFillBorderKernel;
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010037class CLDepthwiseConvolutionLayerNativeKernel;
Gian Marco Iodicec63b7222021-06-30 08:39:44 +000038class CLDepthwiseConvolutionLayer3x3NCHWKernel;
39class CLDepthwiseConvolutionLayer3x3NHWCKernel;
Giorgio Arena93a690e2017-08-01 16:09:33 +010040class ICLTensor;
41
Manuel Bottini05069f02019-09-26 17:18:26 +010042/** Function to execute a depthwise convolution
43 */
44class CLDepthwiseConvolutionLayer : public IFunction
45{
46public:
47 /** Default constructor */
48 CLDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
49 /** Prevent instances of this class from being copied (As this class contains pointers) */
50 CLDepthwiseConvolutionLayer(const CLDepthwiseConvolutionLayer &) = delete;
51 /** Default move constructor */
52 CLDepthwiseConvolutionLayer(CLDepthwiseConvolutionLayer &&) = default;
53 /** Prevent instances of this class from being copied (As this class contains pointers) */
54 CLDepthwiseConvolutionLayer &operator=(const CLDepthwiseConvolutionLayer &) = delete;
55 /** Default move assignment operator */
56 CLDepthwiseConvolutionLayer &operator=(CLDepthwiseConvolutionLayer &&) = default;
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010057 /** Default destructor */
58 ~CLDepthwiseConvolutionLayer();
Manuel Bottini05069f02019-09-26 17:18:26 +010059 /** Initialize the function's source, destination, weights and convolution information.
60 *
Teresa Charlin62687422021-04-28 10:58:49 +010061 * Valid data layouts:
62 * - NHWC
63 * - NCHW
64 *
65 * Valid data type configurations:
66 * |src0 |src1 |src2 |dst |
67 * |:--------------|:------------------|:------|:--------------|
68 * |F16 |F16 |F16 |F16 |
69 * |F32 |F32 |F32 |F32 |
70 * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
71 * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
72 * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
73 * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
74 *
morgolock3e77c272020-10-14 12:12:55 +010075 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP16/FP32. Data layout supported: NHWC, NCHW
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +010076 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
morgolock3e77c272020-10-14 12:12:55 +010077 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Manuel Bottini05069f02019-09-26 17:18:26 +010078 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
morgolock3e77c272020-10-14 12:12:55 +010079 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
Manuel Bottini05069f02019-09-26 17:18:26 +010080 * @param[out] output Destination tensor. Data type supported: same as @p input.
81 * @param[in] conv_info Padding and stride information to use for the convolution.
82 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
83 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
84 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
85 */
Gian Marco Iodicec63b7222021-06-30 08:39:44 +000086 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
87 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Manuel Bottini2b84be52020-04-08 10:15:51 +010088 /** Initialize the function's source, destination, weights and convolution information.
89 *
90 * @param[in] compile_context The compile context to be used.
morgolock3e77c272020-10-14 12:12:55 +010091 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP16/FP32. Data layout supported: NHWC, NCHW
Manuel Bottini2b84be52020-04-08 10:15:51 +010092 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
morgolock3e77c272020-10-14 12:12:55 +010093 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Manuel Bottini2b84be52020-04-08 10:15:51 +010094 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
morgolock3e77c272020-10-14 12:12:55 +010095 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
Manuel Bottini2b84be52020-04-08 10:15:51 +010096 * @param[out] output Destination tensor. Data type supported: same as @p input.
97 * @param[in] conv_info Padding and stride information to use for the convolution.
98 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
99 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
100 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
101 */
102 void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
103 unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Manuel Bottini05069f02019-09-26 17:18:26 +0100104
105 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer
106 *
Gian Marco Iodicec63b7222021-06-30 08:39:44 +0000107 * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/FP16/FP32. Data layout supported: NHWC, NCHW
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100108 * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
morgolock3e77c272020-10-14 12:12:55 +0100109 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Manuel Bottini05069f02019-09-26 17:18:26 +0100110 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
morgolock3e77c272020-10-14 12:12:55 +0100111 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
Manuel Bottini05069f02019-09-26 17:18:26 +0100112 * @param[in] output Destination tensor. Data type supported: same as @p input.
113 * @param[in] conv_info Padding and stride information to use for the convolution.
114 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
Gian Marco Iodicec63b7222021-06-30 08:39:44 +0000115 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
Manuel Bottini05069f02019-09-26 17:18:26 +0100116 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
117 *
118 * @return a status
119 */
Gian Marco Iodicec63b7222021-06-30 08:39:44 +0000120 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
121 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Manuel Bottini05069f02019-09-26 17:18:26 +0100122
123 // Inherited methods overriden:
124 void run() override;
125 void prepare() override;
126
Gian Marco Iodicec63b7222021-06-30 08:39:44 +0000127private:
128 /** Static function to choose the best depthwise convolution function for @ref CLDepthwiseConvolutionLayer
129 *
130 * @param[in] input Source tensor info. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW
131 * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
132 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
133 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
134 * Data type supported: Same as @p input, S32 when input is QASYMM8.
135 * @param[in] output Destination tensor. Data type supported: same as @p input.
136 * @param[in] conv_info Padding and stride information to use for the convolution.
137 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
138 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
139 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
140 *
141 * @return a Depthwise Convolution Function
142 */
143 static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
144 const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
145 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
146
147 /** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels:
148 *
149 * -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW)
150 * -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC)
151 * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)
152 *
153 */
154 class CLDepthwiseConvolutionLayerInternal3x3 : public IFunction
Gian Marco Iodice561c1762021-04-16 15:08:59 +0100155 {
Gian Marco Iodicec63b7222021-06-30 08:39:44 +0000156 public:
157 /** Default constructor */
158 CLDepthwiseConvolutionLayerInternal3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
159 /** Prevent instances of this class from being copied (As this class contains pointers) */
160 CLDepthwiseConvolutionLayerInternal3x3(const CLDepthwiseConvolutionLayerInternal3x3 &) = delete;
161 /** Default move constructor */
162 CLDepthwiseConvolutionLayerInternal3x3(CLDepthwiseConvolutionLayerInternal3x3 &&) = default;
163 /** Prevent instances of this class from being copied (As this class contains pointers) */
164 CLDepthwiseConvolutionLayerInternal3x3 &operator=(const CLDepthwiseConvolutionLayerInternal3x3 &) = delete;
165 /** Default move assignment operator */
166 CLDepthwiseConvolutionLayerInternal3x3 &operator=(CLDepthwiseConvolutionLayerInternal3x3 &&) = default;
167 /** Initialize the function's source, destination, conv and border_size.
168 *
169 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
170 * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM].
171 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
172 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
173 * Data type supported: Same as @p input.
174 * @param[out] output Destination tensor. Data type supported: same as @p input.
175 * @param[in] conv_info Padding and stride information to use for the convolution.
176 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
177 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
178 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
179 */
180 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
181 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
182 /** Initialize the function's source, destination, conv and border_size.
183 *
184 * @param[in] compile_context The compile context to be used.
185 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
186 * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM].
187 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
188 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
189 * Data type supported: Same as @p input.
190 * @param[out] output Destination tensor. Data type supported: same as @p input.
191 * @param[in] conv_info Padding and stride information to use for the convolution.
192 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
193 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
194 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
195 */
196 void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
197 unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
198
199 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3
200 *
201 * @param[in] input Source tensor info. Data type supported: QASYMM8 for all layouts, F16/F32 for NCHW.
202 * @param[in] weights Weights tensor info. A 3D tensor with shape [3, 3, IFM].
203 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
204 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
205 * Data type supported: Same as @p input, S32 when input is QASYMM8.
206 * @param[in] output Destination tensor. Data type supported: same as @p input.
207 * @param[in] conv_info Padding and stride information to use for the convolution.
208 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
209 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
210 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
211 *
212 * @return a status
213 */
214 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
215 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
216
217 // Inherited methods overriden:
218 void run() override;
219 void prepare() override;
220
221 void set_memory_group(std::shared_ptr<IMemoryManager> memory_manager)
222 {
223 _memory_group = MemoryGroup(std::move(memory_manager));
224 };
225
226 private:
227 MemoryGroup _memory_group;
228 std::unique_ptr<CLDepthwiseConvolutionLayer3x3NCHWKernel> _kernel_nchw;
229 std::unique_ptr<CLDepthwiseConvolutionLayer3x3NHWCKernel> _kernel_nhwc;
230 std::unique_ptr<CLFillBorderKernel> _border_handler;
231 CLPermute _permute_input_to_nchw;
232 CLPermute _permute_weights_to_nchw;
233 CLPermute _permute_output_to_nhwc;
234 CLTensor _permuted_input;
235 CLTensor _permuted_weights;
236 CLTensor _permuted_output;
237 CLTensor _output_multipliers;
238 CLTensor _output_shifts;
239 const ITensor *_original_weights;
240 const ITensor *_input;
241 const ITensor *_output;
242 bool _needs_permute;
243 bool _is_prepared;
244 bool _is_quantized;
245 bool _is_nhwc;
Gian Marco Iodice561c1762021-04-16 15:08:59 +0100246 };
247
Gian Marco Iodicec63b7222021-06-30 08:39:44 +0000248 /** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:
249 *
250 * -# @ref CLDepthwiseConvolutionLayerNativeKernel
251 * -# @ref CLPermute (x 3) if the data layout is NCHW
252 *
253 */
254 class CLDepthwiseConvolutionLayerGeneric : public IFunction
255 {
256 public:
257 /** Default constructor */
258 CLDepthwiseConvolutionLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
259 /** Prevent instances of this class from being copied (As this class contains pointers) */
260 CLDepthwiseConvolutionLayerGeneric(const CLDepthwiseConvolutionLayerGeneric &) = delete;
261 /** Default move constructor */
262 CLDepthwiseConvolutionLayerGeneric(CLDepthwiseConvolutionLayerGeneric &&) = default;
263 /** Prevent instances of this class from being copied (As this class contains pointers) */
264 CLDepthwiseConvolutionLayerGeneric &operator=(const CLDepthwiseConvolutionLayerGeneric &) = delete;
265 /** Default move assignment operator */
266 CLDepthwiseConvolutionLayerGeneric &operator=(CLDepthwiseConvolutionLayerGeneric &&) = default;
267 /** Initialize the function's source, destination, weights and convolution information.
268 *
269 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F32. (Written to only for border filling).
270 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
271 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
272 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
273 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
274 * @param[out] output Destination tensor. Data type supported: same as @p input.
275 * @param[in] conv_info Padding and stride information to use for the convolution.
276 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
277 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
278 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
279 */
280 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
281 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
282 /** Initialize the function's source, destination, weights and convolution information.
283 *
284 * @param[in] compile_context The compile context to be used.
285 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F32. (Written to only for border filling).
286 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
287 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
288 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
289 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
290 * @param[out] output Destination tensor. Data type supported: same as @p input.
291 * @param[in] conv_info Padding and stride information to use for the convolution.
292 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
293 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
294 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
295 */
296 void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
297 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Manuel Bottini05069f02019-09-26 17:18:26 +0100298
Gian Marco Iodicec63b7222021-06-30 08:39:44 +0000299 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerGeneric
300 *
301 * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F32.
302 * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
303 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
304 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
305 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
306 * @param[in] output Destination tensor. Data type supported: same as @p input.
307 * @param[in] conv_info Padding and stride information to use for the convolution.
308 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
309 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
310 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
311 *
312 * @return a status
313 */
314 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
315 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Manuel Bottini05069f02019-09-26 17:18:26 +0100316
Gian Marco Iodicec63b7222021-06-30 08:39:44 +0000317 // Inherited methods overriden:
318 void run() override;
319 void prepare() override;
Manuel Bottini05069f02019-09-26 17:18:26 +0100320
Gian Marco Iodicec63b7222021-06-30 08:39:44 +0000321 void set_memory_group(std::shared_ptr<IMemoryManager> memory_manager)
322 {
323 _memory_group = MemoryGroup(std::move(memory_manager));
324 };
325
326 private:
327 MemoryGroup _memory_group;
328
329 std::unique_ptr<CLDepthwiseConvolutionLayerNativeKernel> _dwc_native_kernel;
330 CLPermute _permute_input_to_nhwc;
331 CLPermute _permute_weights_to_nhwc;
332 CLPermute _permute_output_to_nchw;
333
334 CLTensor _permuted_input;
335 CLTensor _permuted_weights;
336 CLTensor _permuted_output;
337 CLTensor _output_multipliers;
338 CLTensor _output_shifts;
339 const ITensor *_original_weights;
340 const ITensor *_input;
341 const ITensor *_output;
342
343 bool _needs_permute;
344 bool _is_prepared;
345 bool _is_quantized;
346 };
347
348 std::shared_ptr<IMemoryManager> _memory_manager;
349
350 DepthwiseConvolutionFunction _depth_conv_func;
351 CLDepthwiseConvolutionLayerInternal3x3 _func_3x3;
352 CLDepthwiseConvolutionLayerGeneric _func_generic;
Manuel Bottini05069f02019-09-26 17:18:26 +0100353};
Georgios Pinitas05045c12018-12-07 18:31:47 +0000354} // namespace arm_compute
Michalis Spyrouf4643372019-11-29 16:17:13 +0000355#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H */