blob: bbb00a1ebcc5c8485fadb94e83ab84a82084e175 [file] [log] [blame]
Giorgio Arena93a690e2017-08-01 16:09:33 +01001/*
Giorgio Arena0a66abe2021-04-12 16:44:24 +01002 * Copyright (c) 2017-2021 Arm Limited.
Giorgio Arena93a690e2017-08-01 16:09:33 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouf4643372019-11-29 16:17:13 +000024#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H
25#define ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H
Giorgio Arena93a690e2017-08-01 16:09:33 +010026
Giorgio Arena93a690e2017-08-01 16:09:33 +010027#include "arm_compute/core/Types.h"
28#include "arm_compute/runtime/CL/CLTensor.h"
Georgios Pinitas05045c12018-12-07 18:31:47 +000029#include "arm_compute/runtime/CL/functions/CLPermute.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010030#include "arm_compute/runtime/IFunction.h"
Georgios Pinitas26014cf2019-09-09 19:00:57 +010031#include "arm_compute/runtime/MemoryGroup.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010032
Giorgio Arena93a690e2017-08-01 16:09:33 +010033namespace arm_compute
34{
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010035class CLCompileContext;
Gian Marco Iodicec63b7222021-06-30 08:39:44 +000036class CLFillBorderKernel;
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010037class CLDepthwiseConvolutionLayerNativeKernel;
Gian Marco Iodicec63b7222021-06-30 08:39:44 +000038class CLDepthwiseConvolutionLayer3x3NCHWKernel;
39class CLDepthwiseConvolutionLayer3x3NHWCKernel;
Giorgio Arena93a690e2017-08-01 16:09:33 +010040class ICLTensor;
41
Manuel Bottini05069f02019-09-26 17:18:26 +010042/** Function to execute a depthwise convolution
43 */
44class CLDepthwiseConvolutionLayer : public IFunction
45{
46public:
47 /** Default constructor */
48 CLDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
49 /** Prevent instances of this class from being copied (As this class contains pointers) */
50 CLDepthwiseConvolutionLayer(const CLDepthwiseConvolutionLayer &) = delete;
51 /** Default move constructor */
52 CLDepthwiseConvolutionLayer(CLDepthwiseConvolutionLayer &&) = default;
53 /** Prevent instances of this class from being copied (As this class contains pointers) */
54 CLDepthwiseConvolutionLayer &operator=(const CLDepthwiseConvolutionLayer &) = delete;
55 /** Default move assignment operator */
56 CLDepthwiseConvolutionLayer &operator=(CLDepthwiseConvolutionLayer &&) = default;
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010057 /** Default destructor */
58 ~CLDepthwiseConvolutionLayer();
Manuel Bottini05069f02019-09-26 17:18:26 +010059 /** Initialize the function's source, destination, weights and convolution information.
60 *
Teresa Charlin62687422021-04-28 10:58:49 +010061 * Valid data layouts:
62 * - NHWC
63 * - NCHW
64 *
65 * Valid data type configurations:
66 * |src0 |src1 |src2 |dst |
67 * |:--------------|:------------------|:------|:--------------|
68 * |F16 |F16 |F16 |F16 |
69 * |F32 |F32 |F32 |F32 |
70 * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
71 * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
72 * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
73 * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
74 *
Manuel Bottini2b84be52020-04-08 10:15:51 +010075 * @param[in] compile_context The compile context to be used.
morgolock3e77c272020-10-14 12:12:55 +010076 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP16/FP32. Data layout supported: NHWC, NCHW
Manuel Bottini2b84be52020-04-08 10:15:51 +010077 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
morgolock3e77c272020-10-14 12:12:55 +010078 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Manuel Bottini2b84be52020-04-08 10:15:51 +010079 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
morgolock3e77c272020-10-14 12:12:55 +010080 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
SiCongLibc4e3112021-06-29 13:18:30 +010081 * @param[out] output Destination tensor. Pass in nullptr or @p input for in-place operation. Data type supported: same as @p input.
Manuel Bottini2b84be52020-04-08 10:15:51 +010082 * @param[in] conv_info Padding and stride information to use for the convolution.
83 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
84 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
85 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
SiCongLibc4e3112021-06-29 13:18:30 +010086 *
87 * @note: For in-place support, please check @ref CLDepthwiseConvolutionLayerNativeKernel
Manuel Bottini2b84be52020-04-08 10:15:51 +010088 */
89 void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
90 unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Manuel Bottini05069f02019-09-26 17:18:26 +010091
SiCongLibc4e3112021-06-29 13:18:30 +010092 /** Initialize the function's source, destination, weights and convolution information.
93 *
94 * Similar to @ref CLDepthwiseConvolutionLayer::configure()
95 */
96 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
97 unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
98
Manuel Bottini05069f02019-09-26 17:18:26 +010099 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer
100 *
SiCongLibc4e3112021-06-29 13:18:30 +0100101 * Similar to @ref CLDepthwiseConvolutionLayer::configure()
Manuel Bottini05069f02019-09-26 17:18:26 +0100102 *
103 * @return a status
104 */
Gian Marco Iodicec63b7222021-06-30 08:39:44 +0000105 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
106 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Manuel Bottini05069f02019-09-26 17:18:26 +0100107
108 // Inherited methods overriden:
109 void run() override;
110 void prepare() override;
111
Gian Marco Iodicec63b7222021-06-30 08:39:44 +0000112private:
113 /** Static function to choose the best depthwise convolution function for @ref CLDepthwiseConvolutionLayer
114 *
115 * @param[in] input Source tensor info. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW
116 * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
117 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
118 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
119 * Data type supported: Same as @p input, S32 when input is QASYMM8.
120 * @param[in] output Destination tensor. Data type supported: same as @p input.
121 * @param[in] conv_info Padding and stride information to use for the convolution.
122 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
123 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
124 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
125 *
126 * @return a Depthwise Convolution Function
127 */
128 static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
129 const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
130 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
131
132 /** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels:
133 *
134 * -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW)
135 * -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC)
136 * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)
137 *
138 */
139 class CLDepthwiseConvolutionLayerInternal3x3 : public IFunction
Gian Marco Iodice561c1762021-04-16 15:08:59 +0100140 {
Gian Marco Iodicec63b7222021-06-30 08:39:44 +0000141 public:
142 /** Default constructor */
143 CLDepthwiseConvolutionLayerInternal3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
144 /** Prevent instances of this class from being copied (As this class contains pointers) */
145 CLDepthwiseConvolutionLayerInternal3x3(const CLDepthwiseConvolutionLayerInternal3x3 &) = delete;
146 /** Default move constructor */
147 CLDepthwiseConvolutionLayerInternal3x3(CLDepthwiseConvolutionLayerInternal3x3 &&) = default;
148 /** Prevent instances of this class from being copied (As this class contains pointers) */
149 CLDepthwiseConvolutionLayerInternal3x3 &operator=(const CLDepthwiseConvolutionLayerInternal3x3 &) = delete;
150 /** Default move assignment operator */
151 CLDepthwiseConvolutionLayerInternal3x3 &operator=(CLDepthwiseConvolutionLayerInternal3x3 &&) = default;
152 /** Initialize the function's source, destination, conv and border_size.
153 *
154 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
155 * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM].
156 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
157 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
158 * Data type supported: Same as @p input.
159 * @param[out] output Destination tensor. Data type supported: same as @p input.
160 * @param[in] conv_info Padding and stride information to use for the convolution.
161 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
162 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
163 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
164 */
165 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
166 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
167 /** Initialize the function's source, destination, conv and border_size.
168 *
169 * @param[in] compile_context The compile context to be used.
170 * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
171 * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM].
172 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
173 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
174 * Data type supported: Same as @p input.
175 * @param[out] output Destination tensor. Data type supported: same as @p input.
176 * @param[in] conv_info Padding and stride information to use for the convolution.
177 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
178 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
179 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
180 */
181 void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
182 unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
183
184 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3
185 *
186 * @param[in] input Source tensor info. Data type supported: QASYMM8 for all layouts, F16/F32 for NCHW.
187 * @param[in] weights Weights tensor info. A 3D tensor with shape [3, 3, IFM].
188 * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
189 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
190 * Data type supported: Same as @p input, S32 when input is QASYMM8.
191 * @param[in] output Destination tensor. Data type supported: same as @p input.
192 * @param[in] conv_info Padding and stride information to use for the convolution.
193 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
194 * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
195 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
196 *
197 * @return a status
198 */
199 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
200 ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
201
202 // Inherited methods overriden:
203 void run() override;
204 void prepare() override;
205
206 void set_memory_group(std::shared_ptr<IMemoryManager> memory_manager)
207 {
208 _memory_group = MemoryGroup(std::move(memory_manager));
209 };
210
211 private:
212 MemoryGroup _memory_group;
213 std::unique_ptr<CLDepthwiseConvolutionLayer3x3NCHWKernel> _kernel_nchw;
214 std::unique_ptr<CLDepthwiseConvolutionLayer3x3NHWCKernel> _kernel_nhwc;
215 std::unique_ptr<CLFillBorderKernel> _border_handler;
216 CLPermute _permute_input_to_nchw;
217 CLPermute _permute_weights_to_nchw;
218 CLPermute _permute_output_to_nhwc;
219 CLTensor _permuted_input;
220 CLTensor _permuted_weights;
221 CLTensor _permuted_output;
222 CLTensor _output_multipliers;
223 CLTensor _output_shifts;
224 const ITensor *_original_weights;
225 const ITensor *_input;
226 const ITensor *_output;
227 bool _needs_permute;
228 bool _is_prepared;
229 bool _is_quantized;
230 bool _is_nhwc;
Gian Marco Iodice561c1762021-04-16 15:08:59 +0100231 };
232
Gian Marco Iodicec63b7222021-06-30 08:39:44 +0000233 /** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:
234 *
235 * -# @ref CLDepthwiseConvolutionLayerNativeKernel
236 * -# @ref CLPermute (x 3) if the data layout is NCHW
237 *
238 */
239 class CLDepthwiseConvolutionLayerGeneric : public IFunction
240 {
241 public:
242 /** Default constructor */
243 CLDepthwiseConvolutionLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
244 /** Prevent instances of this class from being copied (As this class contains pointers) */
245 CLDepthwiseConvolutionLayerGeneric(const CLDepthwiseConvolutionLayerGeneric &) = delete;
246 /** Default move constructor */
247 CLDepthwiseConvolutionLayerGeneric(CLDepthwiseConvolutionLayerGeneric &&) = default;
248 /** Prevent instances of this class from being copied (As this class contains pointers) */
249 CLDepthwiseConvolutionLayerGeneric &operator=(const CLDepthwiseConvolutionLayerGeneric &) = delete;
250 /** Default move assignment operator */
251 CLDepthwiseConvolutionLayerGeneric &operator=(CLDepthwiseConvolutionLayerGeneric &&) = default;
252 /** Initialize the function's source, destination, weights and convolution information.
253 *
254 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F32. (Written to only for border filling).
255 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
256 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
257 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
258 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
259 * @param[out] output Destination tensor. Data type supported: same as @p input.
260 * @param[in] conv_info Padding and stride information to use for the convolution.
261 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
262 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
263 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
264 */
265 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
266 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
267 /** Initialize the function's source, destination, weights and convolution information.
268 *
269 * @param[in] compile_context The compile context to be used.
270 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F32. (Written to only for border filling).
271 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
272 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
273 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
274 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
275 * @param[out] output Destination tensor. Data type supported: same as @p input.
276 * @param[in] conv_info Padding and stride information to use for the convolution.
277 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
278 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
279 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
280 */
281 void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
282 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Manuel Bottini05069f02019-09-26 17:18:26 +0100283
Gian Marco Iodicec63b7222021-06-30 08:39:44 +0000284 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerGeneric
285 *
286 * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F32.
287 * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
288 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
289 * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
290 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
291 * @param[in] output Destination tensor. Data type supported: same as @p input.
292 * @param[in] conv_info Padding and stride information to use for the convolution.
293 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
294 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
295 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
296 *
297 * @return a status
298 */
299 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
300 unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Manuel Bottini05069f02019-09-26 17:18:26 +0100301
Gian Marco Iodicec63b7222021-06-30 08:39:44 +0000302 // Inherited methods overriden:
303 void run() override;
304 void prepare() override;
Manuel Bottini05069f02019-09-26 17:18:26 +0100305
Gian Marco Iodicec63b7222021-06-30 08:39:44 +0000306 void set_memory_group(std::shared_ptr<IMemoryManager> memory_manager)
307 {
308 _memory_group = MemoryGroup(std::move(memory_manager));
309 };
310
311 private:
312 MemoryGroup _memory_group;
313
314 std::unique_ptr<CLDepthwiseConvolutionLayerNativeKernel> _dwc_native_kernel;
315 CLPermute _permute_input_to_nhwc;
316 CLPermute _permute_weights_to_nhwc;
317 CLPermute _permute_output_to_nchw;
318
319 CLTensor _permuted_input;
320 CLTensor _permuted_weights;
321 CLTensor _permuted_output;
322 CLTensor _output_multipliers;
323 CLTensor _output_shifts;
324 const ITensor *_original_weights;
325 const ITensor *_input;
326 const ITensor *_output;
327
328 bool _needs_permute;
329 bool _is_prepared;
330 bool _is_quantized;
331 };
332
333 std::shared_ptr<IMemoryManager> _memory_manager;
334
335 DepthwiseConvolutionFunction _depth_conv_func;
336 CLDepthwiseConvolutionLayerInternal3x3 _func_3x3;
337 CLDepthwiseConvolutionLayerGeneric _func_generic;
Manuel Bottini05069f02019-09-26 17:18:26 +0100338};
Georgios Pinitas05045c12018-12-07 18:31:47 +0000339} // namespace arm_compute
Michalis Spyrouf4643372019-11-29 16:17:13 +0000340#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H */