blob: b8b11f08b201952bfe7541d263bee2f35aef3ba0 [file] [log] [blame]
Giorgio Arena93a690e2017-08-01 16:09:33 +01001/*
giuros016d109962019-01-07 17:47:19 +00002 * Copyright (c) 2017-2019 ARM Limited.
Giorgio Arena93a690e2017-08-01 16:09:33 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Giorgio Arena9fe41442017-08-23 16:36:24 +010024#ifndef __ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__
25#define __ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__
Giorgio Arena93a690e2017-08-01 16:09:33 +010026
Giorgio Arenadfca60b2018-01-31 10:30:59 +000027#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
28#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
Michele Di Giorgioa046e162019-10-08 09:36:26 +010029#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
giuros016d109962019-01-07 17:47:19 +000030#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010031#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
Giorgio Arenadfca60b2018-01-31 10:30:59 +000032#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010033#include "arm_compute/core/Types.h"
34#include "arm_compute/runtime/CL/CLTensor.h"
Georgios Pinitas05045c12018-12-07 18:31:47 +000035#include "arm_compute/runtime/CL/functions/CLPermute.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010036#include "arm_compute/runtime/IFunction.h"
Georgios Pinitas26014cf2019-09-09 19:00:57 +010037#include "arm_compute/runtime/MemoryGroup.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010038
Giorgio Arena93a690e2017-08-01 16:09:33 +010039namespace arm_compute
40{
41class ICLTensor;
42
Manuel Bottini05069f02019-09-26 17:18:26 +010043/** Function to execute a depthwise convolution
44 */
45class CLDepthwiseConvolutionLayer : public IFunction
46{
47public:
48 /** Default constructor */
49 CLDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
50 /** Prevent instances of this class from being copied (As this class contains pointers) */
51 CLDepthwiseConvolutionLayer(const CLDepthwiseConvolutionLayer &) = delete;
52 /** Default move constructor */
53 CLDepthwiseConvolutionLayer(CLDepthwiseConvolutionLayer &&) = default;
54 /** Prevent instances of this class from being copied (As this class contains pointers) */
55 CLDepthwiseConvolutionLayer &operator=(const CLDepthwiseConvolutionLayer &) = delete;
56 /** Default move assignment operator */
57 CLDepthwiseConvolutionLayer &operator=(CLDepthwiseConvolutionLayer &&) = default;
58 /** Initialize the function's source, destination, weights and convolution information.
59 *
60 * @param[in, out] input Source tensor. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW
61 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
62 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
63 * Data type supported: Same as @p input, S32 when input is QASYMM8.
64 * @param[out] output Destination tensor. Data type supported: same as @p input.
65 * @param[in] conv_info Padding and stride information to use for the convolution.
66 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
67 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
68 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
69 */
70 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
71 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
72
73 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer
74 *
75 * @param[in] input Source tensor info. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW
76 * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
77 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
78 * Data type supported: Same as @p input, S32 when input is QASYMM8.
79 * @param[in] output Destination tensor. Data type supported: same as @p input.
80 * @param[in] conv_info Padding and stride information to use for the convolution.
81 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
82 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
83 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
84 *
85 * @return a status
86 */
87 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
88 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
89
90 // Inherited methods overriden:
91 void run() override;
92 void prepare() override;
93
94private:
95 /** Static function to choose the best depthwise convolution function for @ref CLDepthwiseConvolutionLayer
96 *
97 * @param[in] input Source tensor info. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW
98 * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
99 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
100 * Data type supported: Same as @p input, S32 when input is QASYMM8.
101 * @param[in] output Destination tensor. Data type supported: same as @p input.
102 * @param[in] conv_info Padding and stride information to use for the convolution.
103 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
104 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
105 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
106 * @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard.
107 *
108 * @return a Depthwise Convolution Function
109 */
110 static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
111 const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
112 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), GPUTarget gpu_target = GPUTarget::MIDGARD);
113
114 /** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels:
115 *
116 * -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW)
117 * -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC)
118 * -# @ref CLDepthwiseConvolutionLayerReshapeWeightsKernel (if data_layout == NHWC)
119 * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)
120 *
121 */
122 class CLDepthwiseConvolutionLayerInternal3x3 : public IFunction
123 {
124 public:
125 /** Default constructor */
126 CLDepthwiseConvolutionLayerInternal3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
127 /** Prevent instances of this class from being copied (As this class contains pointers) */
128 CLDepthwiseConvolutionLayerInternal3x3(const CLDepthwiseConvolutionLayerInternal3x3 &) = delete;
129 /** Default move constructor */
130 CLDepthwiseConvolutionLayerInternal3x3(CLDepthwiseConvolutionLayerInternal3x3 &&) = default;
131 /** Prevent instances of this class from being copied (As this class contains pointers) */
132 CLDepthwiseConvolutionLayerInternal3x3 &operator=(const CLDepthwiseConvolutionLayerInternal3x3 &) = delete;
133 /** Default move assignment operator */
134 CLDepthwiseConvolutionLayerInternal3x3 &operator=(CLDepthwiseConvolutionLayerInternal3x3 &&) = default;
135 /** Initialize the function's source, destination, conv and border_size.
136 *
137 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
138 * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input.
139 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
140 * Data type supported: Same as @p input.
141 * @param[out] output Destination tensor. Data type supported: same as @p input.
142 * @param[in] conv_info Padding and stride information to use for the convolution.
143 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
144 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
145 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
146 */
147 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
148 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
149
150 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3
151 *
152 * @param[in] input Source tensor info. Data type supported: QASYMM8 for all layouts, F16/F32 for NCHW.
153 * @param[in] weights Weights tensor info. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input.
154 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
155 * Data type supported: Same as @p input, S32 when input is QASYMM8.
156 * @param[in] output Destination tensor. Data type supported: same as @p input.
157 * @param[in] conv_info Padding and stride information to use for the convolution.
158 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
159 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
160 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
161 *
162 * @return a status
163 */
164 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
165 ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD, const Size2D &dilation = Size2D(1U, 1U));
166
167 // Inherited methods overriden:
168 void run() override;
169 void prepare() override;
170
171 void set_memory_group(std::shared_ptr<IMemoryManager> memory_manager)
172 {
173 _memory_group = MemoryGroup(std::move(memory_manager));
174 };
175
176 private:
177 MemoryGroup _memory_group;
178 std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel;
179 CLFillBorderKernel _border_handler;
180 CLPermute _permute_input_to_nchw;
181 CLPermute _permute_weights_to_nchw;
182 CLPermute _permute_output_to_nhwc;
183 CLDepthwiseConvolutionLayerReshapeWeightsKernel _reshape_weights;
184 CLTensor _permuted_input;
185 CLTensor _permuted_weights;
186 CLTensor _permuted_output;
187 const ITensor *_original_weights;
188 bool _needs_permute;
189 bool _needs_weights_reshape;
190 bool _is_prepared;
191 };
192
193 /** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:
194 *
195 * -# @ref CLDepthwiseConvolutionLayerNativeKernel
196 * -# @ref CLPermute (x 3) if the data layout is NCHW
197 *
198 */
199 class CLDepthwiseConvolutionLayerGeneric : public IFunction
200 {
201 public:
202 /** Default constructor */
203 CLDepthwiseConvolutionLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
204 /** Prevent instances of this class from being copied (As this class contains pointers) */
205 CLDepthwiseConvolutionLayerGeneric(const CLDepthwiseConvolutionLayerGeneric &) = delete;
206 /** Default move constructor */
207 CLDepthwiseConvolutionLayerGeneric(CLDepthwiseConvolutionLayerGeneric &&) = default;
208 /** Prevent instances of this class from being copied (As this class contains pointers) */
209 CLDepthwiseConvolutionLayerGeneric &operator=(const CLDepthwiseConvolutionLayerGeneric &) = delete;
210 /** Default move assignment operator */
211 CLDepthwiseConvolutionLayerGeneric &operator=(CLDepthwiseConvolutionLayerGeneric &&) = default;
212 /** Initialize the function's source, destination, weights and convolution information.
213 *
214 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling).
215 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
216 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
217 * Data type supported: Same as @p input, S32 when input is QASYMM8.
218 * @param[out] output Destination tensor. Data type supported: same as @p input.
219 * @param[in] conv_info Padding and stride information to use for the convolution.
220 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
221 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
222 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
223 */
224 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
225 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
226
227 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerGeneric
228 *
229 * @param[in] input Source tensor info. Data type supported: QASYMM8/F32.
230 * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
231 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
232 * Data type supported: Same as @p input, S32 when input is QASYMM8.
233 * @param[in] output Destination tensor. Data type supported: same as @p input.
234 * @param[in] conv_info Padding and stride information to use for the convolution.
235 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
236 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
237 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
238 *
239 * @return a status
240 */
241 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
242 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
243
244 // Inherited methods overriden:
245 void run() override;
246 void prepare() override;
247
248 void set_memory_group(std::shared_ptr<IMemoryManager> memory_manager)
249 {
250 _memory_group = MemoryGroup(std::move(memory_manager));
251 };
252
253 private:
254 MemoryGroup _memory_group;
255
256 CLDepthwiseConvolutionLayerNativeKernel _dwc_native_kernel;
257 CLPermute _permute_input_to_nhwc;
258 CLPermute _permute_weights_to_nhwc;
259 CLPermute _permute_output_to_nchw;
260
261 CLTensor _permuted_input;
262 CLTensor _permuted_weights;
263 CLTensor _permuted_output;
264 const ITensor *_original_weights;
265
266 bool _needs_permute;
267 bool _is_prepared;
268 };
269
270 std::shared_ptr<IMemoryManager> _memory_manager;
271
272 DepthwiseConvolutionFunction _depth_conv_func;
273 CLDepthwiseConvolutionLayerInternal3x3 _func_3x3;
274 CLDepthwiseConvolutionLayerGeneric _func_generic;
275};
276
Giorgio Arenadfca60b2018-01-31 10:30:59 +0000277/** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels:
Giorgio Arena93a690e2017-08-01 16:09:33 +0100278 *
Giorgio Arenadfca60b2018-01-31 10:30:59 +0000279 * -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW)
280 * -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC)
giuros016d109962019-01-07 17:47:19 +0000281 * -# @ref CLDepthwiseConvolutionLayerReshapeWeightsKernel (if data_layout == NHWC)
Giorgio Arena9fe41442017-08-23 16:36:24 +0100282 * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)
Giorgio Arena93a690e2017-08-01 16:09:33 +0100283 *
284 */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +0000285class CLDepthwiseConvolutionLayer3x3 : public IFunction
Giorgio Arena93a690e2017-08-01 16:09:33 +0100286{
287public:
288 /** Default constructor */
Georgios Pinitas05045c12018-12-07 18:31:47 +0000289 CLDepthwiseConvolutionLayer3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
290 /** Prevent instances of this class from being copied (As this class contains pointers) */
291 CLDepthwiseConvolutionLayer3x3(const CLDepthwiseConvolutionLayer3x3 &) = delete;
292 /** Default move constructor */
293 CLDepthwiseConvolutionLayer3x3(CLDepthwiseConvolutionLayer3x3 &&) = default;
294 /** Prevent instances of this class from being copied (As this class contains pointers) */
295 CLDepthwiseConvolutionLayer3x3 &operator=(const CLDepthwiseConvolutionLayer3x3 &) = delete;
296 /** Default move assignment operator */
297 CLDepthwiseConvolutionLayer3x3 &operator=(CLDepthwiseConvolutionLayer3x3 &&) = default;
Giorgio Arena93a690e2017-08-01 16:09:33 +0100298 /** Initialize the function's source, destination, conv and border_size.
299 *
Giorgio Arena76572242018-04-04 17:44:26 +0100300 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
301 * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input.
Usama Arife73686a2019-04-08 17:30:48 +0100302 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Giorgio Arena76572242018-04-04 17:44:26 +0100303 * Data type supported: Same as @p input.
304 * @param[out] output Destination tensor. Data type supported: same as @p input.
305 * @param[in] conv_info Padding and stride information to use for the convolution.
306 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
307 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
Usama Arife73686a2019-04-08 17:30:48 +0100308 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Giorgio Arena93a690e2017-08-01 16:09:33 +0100309 */
Manuel Bottini05069f02019-09-26 17:18:26 +0100310 ARM_COMPUTE_DEPRECATED_REL_REPLACE(20.02, CLDepthwiseConvolutionLayer)
Giorgio Arena76572242018-04-04 17:44:26 +0100311 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
Usama Arife73686a2019-04-08 17:30:48 +0100312 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Giorgio Arena93a690e2017-08-01 16:09:33 +0100313
Giorgio Arenaad0c7382018-04-23 16:16:21 +0100314 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3
315 *
Usama Arife73686a2019-04-08 17:30:48 +0100316 * @param[in] input Source tensor info. Data type supported: QASYMM8 for all layouts, F16/F32 for NCHW.
317 * @param[in] weights Weights tensor info. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input.
318 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Giorgio Arenaad0c7382018-04-23 16:16:21 +0100319 * Data type supported: Same as @p input, S32 when input is QASYMM8.
320 * @param[in] output Destination tensor. Data type supported: same as @p input.
321 * @param[in] conv_info Padding and stride information to use for the convolution.
322 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
323 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
324 * @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard.
Usama Arife73686a2019-04-08 17:30:48 +0100325 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Giorgio Arenaad0c7382018-04-23 16:16:21 +0100326 *
327 * @return a status
328 */
329 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
Usama Arife73686a2019-04-08 17:30:48 +0100330 ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD, const Size2D &dilation = Size2D(1U, 1U));
Giorgio Arenaad0c7382018-04-23 16:16:21 +0100331
Giorgio Arena9fe41442017-08-23 16:36:24 +0100332 // Inherited methods overriden:
333 void run() override;
Georgios Pinitas72219332018-06-05 14:56:06 +0100334 void prepare() override;
Giorgio Arena9fe41442017-08-23 16:36:24 +0100335
336private:
Manuel Bottini05069f02019-09-26 17:18:26 +0100337 CLDepthwiseConvolutionLayer _func;
Giorgio Arena93a690e2017-08-01 16:09:33 +0100338};
Georgios Pinitas05045c12018-12-07 18:31:47 +0000339} // namespace arm_compute
Giorgio Arena9fe41442017-08-23 16:36:24 +0100340#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ */