blob: 570b6ca38f5da9f29a9434a86f9454220e4760ea [file] [log] [blame]
Giorgio Arena93a690e2017-08-01 16:09:33 +01001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2017-2020 Arm Limited.
Giorgio Arena93a690e2017-08-01 16:09:33 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouf4643372019-11-29 16:17:13 +000024#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H
25#define ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H
Giorgio Arena93a690e2017-08-01 16:09:33 +010026
Giorgio Arenadfca60b2018-01-31 10:30:59 +000027#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
28#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
Michele Di Giorgioa046e162019-10-08 09:36:26 +010029#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
giuros016d109962019-01-07 17:47:19 +000030#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010031#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
Giorgio Arenadfca60b2018-01-31 10:30:59 +000032#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010033#include "arm_compute/core/Types.h"
34#include "arm_compute/runtime/CL/CLTensor.h"
Georgios Pinitas05045c12018-12-07 18:31:47 +000035#include "arm_compute/runtime/CL/functions/CLPermute.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010036#include "arm_compute/runtime/IFunction.h"
Georgios Pinitas26014cf2019-09-09 19:00:57 +010037#include "arm_compute/runtime/MemoryGroup.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010038
Giorgio Arena93a690e2017-08-01 16:09:33 +010039namespace arm_compute
40{
41class ICLTensor;
42
Manuel Bottini05069f02019-09-26 17:18:26 +010043/** Function to execute a depthwise convolution
44 */
45class CLDepthwiseConvolutionLayer : public IFunction
46{
47public:
48 /** Default constructor */
49 CLDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
50 /** Prevent instances of this class from being copied (As this class contains pointers) */
51 CLDepthwiseConvolutionLayer(const CLDepthwiseConvolutionLayer &) = delete;
52 /** Default move constructor */
53 CLDepthwiseConvolutionLayer(CLDepthwiseConvolutionLayer &&) = default;
54 /** Prevent instances of this class from being copied (As this class contains pointers) */
55 CLDepthwiseConvolutionLayer &operator=(const CLDepthwiseConvolutionLayer &) = delete;
56 /** Default move assignment operator */
57 CLDepthwiseConvolutionLayer &operator=(CLDepthwiseConvolutionLayer &&) = default;
58 /** Initialize the function's source, destination, weights and convolution information.
59 *
60 * @param[in, out] input Source tensor. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +010061 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
62 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Manuel Bottini05069f02019-09-26 17:18:26 +010063 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
64 * Data type supported: Same as @p input, S32 when input is QASYMM8.
65 * @param[out] output Destination tensor. Data type supported: same as @p input.
66 * @param[in] conv_info Padding and stride information to use for the convolution.
67 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
68 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
69 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
70 */
71 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
72 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Manuel Bottini2b84be52020-04-08 10:15:51 +010073 /** Initialize the function's source, destination, weights and convolution information.
74 *
75 * @param[in] compile_context The compile context to be used.
76 * @param[in, out] input Source tensor. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW
77 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
78 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
79 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
80 * Data type supported: Same as @p input, S32 when input is QASYMM8.
81 * @param[out] output Destination tensor. Data type supported: same as @p input.
82 * @param[in] conv_info Padding and stride information to use for the convolution.
83 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
84 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
85 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
86 */
87 void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
88 unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Manuel Bottini05069f02019-09-26 17:18:26 +010089
90 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer
91 *
92 * @param[in] input Source tensor info. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +010093 * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
94 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Manuel Bottini05069f02019-09-26 17:18:26 +010095 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
96 * Data type supported: Same as @p input, S32 when input is QASYMM8.
97 * @param[in] output Destination tensor. Data type supported: same as @p input.
98 * @param[in] conv_info Padding and stride information to use for the convolution.
99 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
100 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
101 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
102 *
103 * @return a status
104 */
105 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
106 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
107
108 // Inherited methods overriden:
109 void run() override;
110 void prepare() override;
111
112private:
113 /** Static function to choose the best depthwise convolution function for @ref CLDepthwiseConvolutionLayer
114 *
115 * @param[in] input Source tensor info. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100116 * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
117 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Manuel Bottini05069f02019-09-26 17:18:26 +0100118 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
119 * Data type supported: Same as @p input, S32 when input is QASYMM8.
120 * @param[in] output Destination tensor. Data type supported: same as @p input.
121 * @param[in] conv_info Padding and stride information to use for the convolution.
122 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
123 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
124 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
125 * @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard.
126 *
127 * @return a Depthwise Convolution Function
128 */
129 static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
130 const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
131 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), GPUTarget gpu_target = GPUTarget::MIDGARD);
132
133 /** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels:
134 *
135 * -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW)
136 * -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC)
137 * -# @ref CLDepthwiseConvolutionLayerReshapeWeightsKernel (if data_layout == NHWC)
138 * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)
139 *
140 */
141 class CLDepthwiseConvolutionLayerInternal3x3 : public IFunction
142 {
143 public:
144 /** Default constructor */
145 CLDepthwiseConvolutionLayerInternal3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
146 /** Prevent instances of this class from being copied (As this class contains pointers) */
147 CLDepthwiseConvolutionLayerInternal3x3(const CLDepthwiseConvolutionLayerInternal3x3 &) = delete;
148 /** Default move constructor */
149 CLDepthwiseConvolutionLayerInternal3x3(CLDepthwiseConvolutionLayerInternal3x3 &&) = default;
150 /** Prevent instances of this class from being copied (As this class contains pointers) */
151 CLDepthwiseConvolutionLayerInternal3x3 &operator=(const CLDepthwiseConvolutionLayerInternal3x3 &) = delete;
152 /** Default move assignment operator */
153 CLDepthwiseConvolutionLayerInternal3x3 &operator=(CLDepthwiseConvolutionLayerInternal3x3 &&) = default;
154 /** Initialize the function's source, destination, conv and border_size.
155 *
156 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100157 * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM].
158 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Manuel Bottini05069f02019-09-26 17:18:26 +0100159 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
160 * Data type supported: Same as @p input.
161 * @param[out] output Destination tensor. Data type supported: same as @p input.
162 * @param[in] conv_info Padding and stride information to use for the convolution.
163 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
164 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
165 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
166 */
167 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
168 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Manuel Bottini2b84be52020-04-08 10:15:51 +0100169 /** Initialize the function's source, destination, conv and border_size.
170 *
171 * @param[in] compile_context The compile context to be used.
172 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
173 * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM].
174 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
175 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
176 * Data type supported: Same as @p input.
177 * @param[out] output Destination tensor. Data type supported: same as @p input.
178 * @param[in] conv_info Padding and stride information to use for the convolution.
179 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
180 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
181 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
182 */
183 void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
184 unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Manuel Bottini05069f02019-09-26 17:18:26 +0100185
186 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3
187 *
188 * @param[in] input Source tensor info. Data type supported: QASYMM8 for all layouts, F16/F32 for NCHW.
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100189 * @param[in] weights Weights tensor info. A 3D tensor with shape [3, 3, IFM].
190 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Manuel Bottini05069f02019-09-26 17:18:26 +0100191 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
192 * Data type supported: Same as @p input, S32 when input is QASYMM8.
193 * @param[in] output Destination tensor. Data type supported: same as @p input.
194 * @param[in] conv_info Padding and stride information to use for the convolution.
195 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
196 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
197 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
198 *
199 * @return a status
200 */
201 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
202 ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD, const Size2D &dilation = Size2D(1U, 1U));
203
204 // Inherited methods overriden:
205 void run() override;
206 void prepare() override;
207
208 void set_memory_group(std::shared_ptr<IMemoryManager> memory_manager)
209 {
210 _memory_group = MemoryGroup(std::move(memory_manager));
211 };
212
213 private:
214 MemoryGroup _memory_group;
215 std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel;
216 CLFillBorderKernel _border_handler;
217 CLPermute _permute_input_to_nchw;
218 CLPermute _permute_weights_to_nchw;
219 CLPermute _permute_output_to_nhwc;
220 CLDepthwiseConvolutionLayerReshapeWeightsKernel _reshape_weights;
221 CLTensor _permuted_input;
222 CLTensor _permuted_weights;
223 CLTensor _permuted_output;
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100224 CLTensor _output_multipliers;
225 CLTensor _output_shifts;
Manuel Bottini05069f02019-09-26 17:18:26 +0100226 const ITensor *_original_weights;
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100227 const ITensor *_input;
228 const ITensor *_output;
Manuel Bottini05069f02019-09-26 17:18:26 +0100229 bool _needs_permute;
230 bool _needs_weights_reshape;
231 bool _is_prepared;
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100232 bool _is_quantized;
Manuel Bottini05069f02019-09-26 17:18:26 +0100233 };
234
235 /** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:
236 *
237 * -# @ref CLDepthwiseConvolutionLayerNativeKernel
238 * -# @ref CLPermute (x 3) if the data layout is NCHW
239 *
240 */
241 class CLDepthwiseConvolutionLayerGeneric : public IFunction
242 {
243 public:
244 /** Default constructor */
245 CLDepthwiseConvolutionLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
246 /** Prevent instances of this class from being copied (As this class contains pointers) */
247 CLDepthwiseConvolutionLayerGeneric(const CLDepthwiseConvolutionLayerGeneric &) = delete;
248 /** Default move constructor */
249 CLDepthwiseConvolutionLayerGeneric(CLDepthwiseConvolutionLayerGeneric &&) = default;
250 /** Prevent instances of this class from being copied (As this class contains pointers) */
251 CLDepthwiseConvolutionLayerGeneric &operator=(const CLDepthwiseConvolutionLayerGeneric &) = delete;
252 /** Default move assignment operator */
253 CLDepthwiseConvolutionLayerGeneric &operator=(CLDepthwiseConvolutionLayerGeneric &&) = default;
254 /** Initialize the function's source, destination, weights and convolution information.
255 *
Michele Di Giorgio4cd4cde2020-01-06 14:07:44 +0000256 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F32. (Written to only for border filling).
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100257 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
Michele Di Giorgio4cd4cde2020-01-06 14:07:44 +0000258 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Manuel Bottini05069f02019-09-26 17:18:26 +0100259 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Michele Di Giorgio4cd4cde2020-01-06 14:07:44 +0000260 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
Manuel Bottini05069f02019-09-26 17:18:26 +0100261 * @param[out] output Destination tensor. Data type supported: same as @p input.
262 * @param[in] conv_info Padding and stride information to use for the convolution.
263 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
264 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
265 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
266 */
267 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
268 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Manuel Bottini2b84be52020-04-08 10:15:51 +0100269 /** Initialize the function's source, destination, weights and convolution information.
270 *
271 * @param[in] compile_context The compile context to be used.
272 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F32. (Written to only for border filling).
273 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
274 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
275 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
276 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
277 * @param[out] output Destination tensor. Data type supported: same as @p input.
278 * @param[in] conv_info Padding and stride information to use for the convolution.
279 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
280 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
281 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
282 */
283 void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
284 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Manuel Bottini05069f02019-09-26 17:18:26 +0100285
286 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerGeneric
287 *
Michele Di Giorgio4cd4cde2020-01-06 14:07:44 +0000288 * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F32.
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100289 * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
Michele Di Giorgio4cd4cde2020-01-06 14:07:44 +0000290 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Manuel Bottini05069f02019-09-26 17:18:26 +0100291 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Michele Di Giorgio4cd4cde2020-01-06 14:07:44 +0000292 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
Manuel Bottini05069f02019-09-26 17:18:26 +0100293 * @param[in] output Destination tensor. Data type supported: same as @p input.
294 * @param[in] conv_info Padding and stride information to use for the convolution.
295 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
296 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
297 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
298 *
299 * @return a status
300 */
301 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
302 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
303
304 // Inherited methods overriden:
305 void run() override;
306 void prepare() override;
307
308 void set_memory_group(std::shared_ptr<IMemoryManager> memory_manager)
309 {
310 _memory_group = MemoryGroup(std::move(memory_manager));
311 };
312
313 private:
314 MemoryGroup _memory_group;
315
316 CLDepthwiseConvolutionLayerNativeKernel _dwc_native_kernel;
317 CLPermute _permute_input_to_nhwc;
318 CLPermute _permute_weights_to_nhwc;
319 CLPermute _permute_output_to_nchw;
320
321 CLTensor _permuted_input;
322 CLTensor _permuted_weights;
323 CLTensor _permuted_output;
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100324 CLTensor _output_multipliers;
325 CLTensor _output_shifts;
Manuel Bottini05069f02019-09-26 17:18:26 +0100326 const ITensor *_original_weights;
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100327 const ITensor *_input;
328 const ITensor *_output;
Manuel Bottini05069f02019-09-26 17:18:26 +0100329
330 bool _needs_permute;
331 bool _is_prepared;
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +0100332 bool _is_quantized;
Manuel Bottini05069f02019-09-26 17:18:26 +0100333 };
334
335 std::shared_ptr<IMemoryManager> _memory_manager;
336
337 DepthwiseConvolutionFunction _depth_conv_func;
338 CLDepthwiseConvolutionLayerInternal3x3 _func_3x3;
339 CLDepthwiseConvolutionLayerGeneric _func_generic;
340};
Georgios Pinitas05045c12018-12-07 18:31:47 +0000341} // namespace arm_compute
Michalis Spyrouf4643372019-11-29 16:17:13 +0000342#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H */