blob: 168d7d5c842071bb15dacaca8f1c0e9ebaa691b2 [file] [log] [blame]
Giorgio Arena93a690e2017-08-01 16:09:33 +01001/*
giuros016d109962019-01-07 17:47:19 +00002 * Copyright (c) 2017-2019 ARM Limited.
Giorgio Arena93a690e2017-08-01 16:09:33 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000024#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010025
26#include "arm_compute/core/CL/ICLTensor.h"
Giorgio Arenaad0c7382018-04-23 16:16:21 +010027#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
28#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
Georgios Pinitas05045c12018-12-07 18:31:47 +000029#include "arm_compute/core/Helpers.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010030#include "arm_compute/core/PixelValue.h"
Georgios Pinitas9be0c5a2018-02-19 12:46:29 +000031#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Georgios Pinitasde5a1cc2018-02-02 12:52:07 +000032#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010033#include "arm_compute/runtime/CL/CLScheduler.h"
34#include "support/ToolchainSupport.h"
35
giuros016d109962019-01-07 17:47:19 +000036namespace arm_compute
37{
Georgios Pinitasde5a1cc2018-02-02 12:52:07 +000038using namespace arm_compute::misc;
Georgios Pinitas9be0c5a2018-02-19 12:46:29 +000039using namespace arm_compute::misc::shape_calculator;
Giorgio Arena93a690e2017-08-01 16:09:33 +010040
Manuel Bottini05069f02019-09-26 17:18:26 +010041namespace
42{
43Status validate_arguments_3x3(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
44 unsigned int depth_multiplier, ActivationLayerInfo act_info, GPUTarget gpu_target, const Size2D &dilation)
45{
46 // This function should be removed and incorporated inside CLDepthwiseConvolutionLayerInternal3x3 once CLDepthwiseConvolutionLayer3x3 is properly removed
47 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
48 ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
49
50 const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
51 const bool is_nhwc = input->data_layout() == DataLayout::NHWC;
52 const bool needs_permute = is_nhwc && (depth_multiplier > 1);
53 const bool needs_weights_reshape = is_nhwc && (depth_multiplier == 1) && is_quantized;
54 const bool is_stride_1 = ((conv_info.stride().first == conv_info.stride().second) && (conv_info.stride().first == 1));
55 const bool is_stride_1_dilation_1 = (is_stride_1 && dilation.x() == 1 && dilation.y() == 1);
56 const bool is_dot8_supported = dot8_supported(CLKernelLibrary::get().get_device());
57 DepthwiseConvolutionReshapeInfo info;
58 info.c0 = 4;
59 info.transpose = is_stride_1_dilation_1 && is_dot8_supported;
60
61 if(is_quantized)
62 {
63 const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
64 const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
65 const UniformQuantizationInfo oq_info = (output->total_size() == 0) ? iq_info : output->quantization_info().uniform();
66
67 const float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
68 ARM_COMPUTE_UNUSED(multiplier);
69 ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f);
70 }
71
72 if(needs_permute)
73 {
74 TensorShape permuted_input_shape = input->tensor_shape();
75 TensorShape permuted_weights_shape = weights->tensor_shape();
76 TensorShape permuted_output_shape = shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
77
78 permute(permuted_input_shape, PermutationVector(1U, 2U, 0U));
79 permute(permuted_weights_shape, PermutationVector(1U, 2U, 0U));
80 permute(permuted_output_shape, PermutationVector(1U, 2U, 0U));
81
82 const TensorInfo permuted_input = input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NCHW);
83 const TensorInfo permuted_weights = weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NCHW);
84 const TensorInfo permuted_output = output->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NCHW);
85
86 ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseConvolutionLayer3x3NCHWKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, conv_info, depth_multiplier, act_info, gpu_target,
87 dilation));
88 }
89 else if(is_nhwc)
90 {
91 if(needs_weights_reshape)
92 {
93 auto reshaped_weights_shape = arm_compute::misc::shape_calculator::compute_reshaped_depthwise_weights_shape(*weights, info);
94 ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseConvolutionLayer3x3NHWCKernel::validate(input, &weights->clone()->set_tensor_shape(reshaped_weights_shape), biases, output, conv_info, depth_multiplier,
95 act_info, dilation));
96 }
97 else
98 {
99 ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseConvolutionLayer3x3NHWCKernel::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation));
100 }
101 }
102 else
103 {
104 ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseConvolutionLayer3x3NCHWKernel::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, gpu_target, dilation));
105 }
106 return Status{};
107}
108} // namespace
109
Georgios Pinitas05045c12018-12-07 18:31:47 +0000110CLDepthwiseConvolutionLayer3x3::CLDepthwiseConvolutionLayer3x3(std::shared_ptr<IMemoryManager> memory_manager)
Manuel Bottini05069f02019-09-26 17:18:26 +0100111 : _func(std::move(memory_manager))
Giorgio Arena93a690e2017-08-01 16:09:33 +0100112{
113}
114
Giorgio Arena76572242018-04-04 17:44:26 +0100115void CLDepthwiseConvolutionLayer3x3::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
Usama Arife73686a2019-04-08 17:30:48 +0100116 ActivationLayerInfo act_info, const Size2D &dilation)
Giorgio Arena9fe41442017-08-23 16:36:24 +0100117{
Manuel Bottini05069f02019-09-26 17:18:26 +0100118 _func.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
119}
120
121Status CLDepthwiseConvolutionLayer3x3::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
122 unsigned int depth_multiplier, ActivationLayerInfo act_info, GPUTarget gpu_target, const Size2D &dilation)
123{
124 return validate_arguments_3x3(input, weights, biases, output, conv_info, depth_multiplier, act_info, gpu_target, dilation);
125}
126
127void CLDepthwiseConvolutionLayer3x3::run()
128{
129 _func.run();
130}
131
132void CLDepthwiseConvolutionLayer3x3::prepare()
133{
134 _func.prepare();
135}
136
137CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::CLDepthwiseConvolutionLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager)
138 : _memory_group(std::move(memory_manager)),
139 _dwc_native_kernel(),
140 _permute_input_to_nhwc(),
141 _permute_weights_to_nhwc(),
142 _permute_output_to_nchw(),
143 _permuted_input(),
144 _permuted_weights(),
145 _permuted_output(),
146 _original_weights(),
147 _needs_permute(false),
148 _is_prepared(false)
149{
150}
151
152void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
153 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
154{
155 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
156 ARM_COMPUTE_ERROR_THROW_ON(CLDepthwiseConvolutionLayer::validate(input->info(),
157 weights->info(),
158 biases != nullptr ? biases->info() : nullptr,
159 output->info(),
160 conv_info,
161 depth_multiplier,
162 act_info,
163 dilation));
164
165 _is_prepared = false;
166 _original_weights = weights;
167 _needs_permute = input->info()->data_layout() == DataLayout::NCHW;
168
169 ICLTensor *input_to_use = input;
170 const ICLTensor *weights_to_use = weights;
171 ICLTensor *output_to_use = output;
172 if(_needs_permute)
173 {
174 _memory_group.manage(&_permuted_input);
175 _memory_group.manage(&_permuted_output);
176
177 // Configure the function to transform the input tensor from NCHW -> NHWC
178 _permute_input_to_nhwc.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
179 _permuted_input.info()->set_data_layout(DataLayout::NHWC);
180
181 // Configure the function to transform the weights tensor from IHW -> HWI
182 _permute_weights_to_nhwc.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
183 _permuted_weights.info()->set_data_layout(DataLayout::NHWC);
184
185 // Set output quantization info before dwc kernel configure
186 _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
187
188 input_to_use = &_permuted_input;
189 weights_to_use = &_permuted_weights;
190 output_to_use = &_permuted_output;
191 }
192
193 DWCWeightsKernelInfo dwc_weights_info;
194 dwc_weights_info.n0 = (depth_multiplier == 1) ? 8 : 1;
195 DWCKernelInfo dwc_info;
196 dwc_info.activation_info = act_info;
197 _dwc_native_kernel.configure(input_to_use, weights_to_use, biases, output_to_use, dwc_weights_info, dwc_info, conv_info, depth_multiplier, dilation);
198
199 if(_needs_permute)
200 {
201 _permuted_input.allocator()->allocate();
202
203 // Configure the function to transform the convoluted output to NCHW format
204 _permuted_output.info()->set_data_layout(DataLayout::NCHW);
205 _permute_output_to_nchw.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
206 _permuted_output.allocator()->allocate();
207 }
208}
209
210Status CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
211 const PadStrideInfo &conv_info,
212 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
213{
214 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
215 const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
216 const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
217
218 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (dilation.x() - 1) > input->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());
219 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (dilation.y() - 1) > input->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());
220
221 DWCWeightsKernelInfo dwc_weights_info;
222 dwc_weights_info.n0 = (depth_multiplier == 1) ? 8 : 1;
223 DWCKernelInfo dwc_info;
224 dwc_info.activation_info = act_info;
225
226 const bool needs_permute = input->data_layout() == DataLayout::NCHW;
227
228 if(needs_permute)
229 {
230 TensorShape permuted_input_shape = input->tensor_shape();
231 TensorShape permuted_weights_shape = weights->tensor_shape();
232 TensorShape permuted_output_shape = shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
233
234 permute(permuted_input_shape, PermutationVector(2U, 0U, 1U));
235 permute(permuted_weights_shape, PermutationVector(2U, 0U, 1U));
236 permute(permuted_output_shape, PermutationVector(2U, 0U, 1U));
237
238 const TensorInfo permuted_input = input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NHWC);
239 const TensorInfo permuted_weights = weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NHWC);
240 const TensorInfo permuted_output = output->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NHWC);
241
242 ARM_COMPUTE_RETURN_ON_ERROR(CLPermute::validate(input, &permuted_input, PermutationVector(2U, 0U, 1U)));
243 ARM_COMPUTE_RETURN_ON_ERROR(CLPermute::validate(weights, &permuted_weights, PermutationVector(2U, 0U, 1U)));
244 ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseConvolutionLayerNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, dwc_weights_info,
245 dwc_info, conv_info, depth_multiplier, dilation));
246 ARM_COMPUTE_RETURN_ON_ERROR(CLPermute::validate(&permuted_output, output, PermutationVector(1U, 2U, 0U)));
247 }
248 else
249 {
250 ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseConvolutionLayerNativeKernel::validate(input, weights, biases, output, dwc_weights_info, dwc_info, conv_info, depth_multiplier, dilation));
251 }
252 return Status{};
253}
254
255void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::run()
256{
257 prepare();
258
259 MemoryGroupResourceScope scope_mg(_memory_group);
260
261 if(_needs_permute)
262 {
263 _permute_input_to_nhwc.run();
264 }
265 CLScheduler::get().enqueue(_dwc_native_kernel);
266 if(_needs_permute)
267 {
268 _permute_output_to_nchw.run();
269 }
270}
271
272void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::prepare()
273{
274 if(!_is_prepared)
275 {
276 if(_needs_permute)
277 {
278 ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
279
280 _permuted_weights.allocator()->allocate();
281 _permute_weights_to_nhwc.run();
282 _original_weights->mark_as_unused();
283 }
284 _is_prepared = true;
285 }
286}
287
288CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::CLDepthwiseConvolutionLayerInternal3x3(std::shared_ptr<IMemoryManager> memory_manager)
289 : _memory_group(std::move(memory_manager)), _kernel(nullptr), _border_handler(), _permute_input_to_nchw(), _permute_weights_to_nchw(), _permute_output_to_nhwc(), _reshape_weights(), _permuted_input(),
290 _permuted_weights(), _permuted_output(), _original_weights(nullptr), _needs_permute(false), _needs_weights_reshape(false), _is_prepared(false)
291{
292}
293
294void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
295 const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation)
296{
Michele Di Giorgio933fe862018-02-19 15:42:12 +0000297 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
Georgios Pinitas236bfe72017-11-23 15:59:55 +0000298 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
Usama Arif881f2de2019-04-12 10:29:17 +0100299 // idx_w and idx_h only used for validation
300 const size_t idx_w = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::WIDTH);
301 const size_t idx_h = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT);
302 ARM_COMPUTE_UNUSED(idx_w);
303 ARM_COMPUTE_UNUSED(idx_h);
304
305 ARM_COMPUTE_ERROR_ON(weights->info()->dimension(idx_w) + (weights->info()->dimension(idx_w) - 1) * (dilation.x() - 1) > input->info()->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());
306 ARM_COMPUTE_ERROR_ON(weights->info()->dimension(idx_h) + (weights->info()->dimension(idx_h) - 1) * (dilation.y() - 1) > input->info()->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());
Giorgio Arena9fe41442017-08-23 16:36:24 +0100307
Georgios Pinitas05045c12018-12-07 18:31:47 +0000308 const bool is_nhwc = input->info()->data_layout() == DataLayout::NHWC;
309
giuros016d109962019-01-07 17:47:19 +0000310 _needs_permute = is_nhwc && (depth_multiplier > 1);
311 _needs_weights_reshape = is_nhwc && (depth_multiplier == 1)
312 && is_data_type_quantized_asymmetric(input->info()->data_type());
Georgios Pinitas05045c12018-12-07 18:31:47 +0000313 _is_prepared = false;
314 _original_weights = weights;
315
316 ICLTensor *input_to_use = input;
317 const ICLTensor *weights_to_use = weights;
318 ICLTensor *output_to_use = output;
319
Usama Arife73686a2019-04-08 17:30:48 +0100320 const bool is_stride_1 = ((conv_info.stride().first == conv_info.stride().second) && (conv_info.stride().first == 1));
321 const bool is_dot8_supported = dot8_supported(CLKernelLibrary::get().get_device());
322 const bool is_stride_1_dilation_1 = (is_stride_1 && dilation.x() == 1 && dilation.y() == 1);
323
giuros016d109962019-01-07 17:47:19 +0000324 DepthwiseConvolutionReshapeInfo info;
325 info.c0 = 4;
Usama Arife73686a2019-04-08 17:30:48 +0100326 info.transpose = is_stride_1_dilation_1 && is_dot8_supported;
giuros016d109962019-01-07 17:47:19 +0000327
Georgios Pinitas05045c12018-12-07 18:31:47 +0000328 if(_needs_permute)
Giorgio Arenadfca60b2018-01-31 10:30:59 +0000329 {
Georgios Pinitas05045c12018-12-07 18:31:47 +0000330 _memory_group.manage(&_permuted_input);
331 _memory_group.manage(&_permuted_output);
332
333 // Configure the function to transform the input tensor from NHWC -> NCHW
334 _permute_input_to_nchw.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
335 _permuted_input.info()->set_data_layout(DataLayout::NCHW);
336
337 // Configure the function to transform the weights tensor from HWI -> IHW
338 _permute_weights_to_nchw.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
339 _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
Pablo Telloa28aebc2019-06-03 14:59:48 +0100340 _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
Georgios Pinitas05045c12018-12-07 18:31:47 +0000341
342 input_to_use = &_permuted_input;
343 weights_to_use = &_permuted_weights;
344 output_to_use = &_permuted_output;
345
Giorgio Arenadfca60b2018-01-31 10:30:59 +0000346 _kernel = arm_compute::support::cpp14::make_unique<CLDepthwiseConvolutionLayer3x3NCHWKernel>();
347 }
Georgios Pinitas05045c12018-12-07 18:31:47 +0000348 else if(is_nhwc)
Giorgio Arenadfca60b2018-01-31 10:30:59 +0000349 {
giuros016d109962019-01-07 17:47:19 +0000350 if(_needs_weights_reshape)
351 {
352 _reshape_weights.configure(weights, &_permuted_weights, info);
353 weights_to_use = &_permuted_weights;
354 }
Giorgio Arenadfca60b2018-01-31 10:30:59 +0000355 _kernel = arm_compute::support::cpp14::make_unique<CLDepthwiseConvolutionLayer3x3NHWCKernel>();
356 }
Georgios Pinitas05045c12018-12-07 18:31:47 +0000357 else
358 {
359 _kernel = arm_compute::support::cpp14::make_unique<CLDepthwiseConvolutionLayer3x3NCHWKernel>();
360 }
Giorgio Arenadfca60b2018-01-31 10:30:59 +0000361
Georgios Pinitas05045c12018-12-07 18:31:47 +0000362 // Configure kernel
Giorgio Arenadfca60b2018-01-31 10:30:59 +0000363 _kernel->set_target(CLScheduler::get().target());
Usama Arife73686a2019-04-08 17:30:48 +0100364 _kernel->configure(input_to_use, weights_to_use, biases, output_to_use, conv_info, depth_multiplier, act_info, dilation);
Georgios Pinitas05045c12018-12-07 18:31:47 +0000365
366 // Permute output if needed
367 if(_needs_permute)
368 {
369 // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
Georgios Pinitas3f8aac42018-12-24 13:09:02 +0000370 _permuted_output.info()->set_data_layout(DataLayout::NCHW);
Georgios Pinitas05045c12018-12-07 18:31:47 +0000371 _permute_output_to_nhwc.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
372
373 // Allocate tensors
374 _permuted_input.allocator()->allocate();
375 _permuted_output.allocator()->allocate();
376 }
Diego Lopez Recasfa0add12017-11-28 16:44:52 +0000377 // Configure border handler
378 PixelValue &&zero_value(0.f);
379 if(is_data_type_quantized_asymmetric(input->info()->data_type()))
380 {
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100381 zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().uniform().offset));
Diego Lopez Recasfa0add12017-11-28 16:44:52 +0000382 }
Georgios Pinitas3f8aac42018-12-24 13:09:02 +0000383 _border_handler.configure(input_to_use, _kernel->border_size(), BorderMode::CONSTANT, zero_value);
Giorgio Arena9fe41442017-08-23 16:36:24 +0100384}
385
Manuel Bottini05069f02019-09-26 17:18:26 +0100386Status CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
387 const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, GPUTarget gpu_target, const Size2D &dilation)
Giorgio Arenaad0c7382018-04-23 16:16:21 +0100388{
Manuel Bottini05069f02019-09-26 17:18:26 +0100389 return validate_arguments_3x3(input, weights, biases, output, conv_info, depth_multiplier, act_info, gpu_target, dilation);
Giorgio Arenaad0c7382018-04-23 16:16:21 +0100390}
391
Manuel Bottini05069f02019-09-26 17:18:26 +0100392void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::run()
Giorgio Arena9fe41442017-08-23 16:36:24 +0100393{
Georgios Pinitas05045c12018-12-07 18:31:47 +0000394 prepare();
395
Georgios Pinitasda953f22019-04-02 17:27:03 +0100396 MemoryGroupResourceScope scope_mg(_memory_group);
Georgios Pinitas05045c12018-12-07 18:31:47 +0000397
398 if(_needs_permute)
399 {
400 _permute_input_to_nchw.run();
401 }
Giorgio Arena9fe41442017-08-23 16:36:24 +0100402 CLScheduler::get().enqueue(_border_handler);
Giorgio Arenadfca60b2018-01-31 10:30:59 +0000403 CLScheduler::get().enqueue(*_kernel);
Georgios Pinitas05045c12018-12-07 18:31:47 +0000404
405 if(_needs_permute)
406 {
407 _permute_output_to_nhwc.run();
408 }
Georgios Pinitas05045c12018-12-07 18:31:47 +0000409}
410
Manuel Bottini05069f02019-09-26 17:18:26 +0100411void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::prepare()
Georgios Pinitas05045c12018-12-07 18:31:47 +0000412{
413 if(!_is_prepared)
414 {
415 if(_needs_permute)
416 {
417 ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
418
419 _permuted_weights.allocator()->allocate();
420 _permute_weights_to_nchw.run();
421 _original_weights->mark_as_unused();
422 }
giuros016d109962019-01-07 17:47:19 +0000423
424 if(_needs_weights_reshape)
425 {
426 ARM_COMPUTE_ERROR_ON(_needs_permute);
427 ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
428 _permuted_weights.allocator()->allocate();
429 CLScheduler::get().enqueue(_reshape_weights);
430 _original_weights->mark_as_unused();
431 }
Georgios Pinitas05045c12018-12-07 18:31:47 +0000432 _is_prepared = true;
433 }
Giorgio Arena9fe41442017-08-23 16:36:24 +0100434}
435
Michele Di Giorgioa046e162019-10-08 09:36:26 +0100436CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
Manuel Bottini05069f02019-09-26 17:18:26 +0100437 : _memory_manager(std::move(memory_manager)), _depth_conv_func(DepthwiseConvolutionFunction::GENERIC), _func_3x3(), _func_generic()
Giorgio Arena9fe41442017-08-23 16:36:24 +0100438{
439}
440
Manuel Bottini05069f02019-09-26 17:18:26 +0100441void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
442 ActivationLayerInfo act_info, const Size2D &dilation)
Giorgio Arena93a690e2017-08-01 16:09:33 +0100443{
Manuel Bottini05069f02019-09-26 17:18:26 +0100444 const GPUTarget gpu_target = CLScheduler::get().target();
445 _depth_conv_func = get_depthwiseconvolution_function(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, depth_multiplier, act_info,
446 dilation, gpu_target);
447 switch(_depth_conv_func)
Georgios Pinitasde5a1cc2018-02-02 12:52:07 +0000448 {
Manuel Bottini05069f02019-09-26 17:18:26 +0100449 case DepthwiseConvolutionFunction::OPTIMIZED:
450 _func_3x3.set_memory_group(_memory_manager);
451 _func_3x3.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
452 break;
453 case DepthwiseConvolutionFunction::GENERIC:
Pablo Tello8bf622a2018-12-03 15:54:49 +0000454 {
Manuel Bottini05069f02019-09-26 17:18:26 +0100455 _func_generic.set_memory_group(_memory_manager);
456 _func_generic.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
Pablo Tello8bf622a2018-12-03 15:54:49 +0000457 }
Manuel Bottini05069f02019-09-26 17:18:26 +0100458 break;
459 default:
460 ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
Georgios Pinitas60e98252018-10-22 16:17:20 +0100461 }
Giorgio Arena93a690e2017-08-01 16:09:33 +0100462}
463
Giorgio Arenaad0c7382018-04-23 16:16:21 +0100464Status CLDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
Manuel Bottini05069f02019-09-26 17:18:26 +0100465 unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation)
Giorgio Arenaad0c7382018-04-23 16:16:21 +0100466{
Manuel Bottini05069f02019-09-26 17:18:26 +0100467 const GPUTarget gpu_target = CLScheduler::get().target();
468 DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation, gpu_target);
469 switch(depth_conv_func)
Georgios Pinitas60e98252018-10-22 16:17:20 +0100470 {
Manuel Bottini05069f02019-09-26 17:18:26 +0100471 case DepthwiseConvolutionFunction::OPTIMIZED:
472 return CLDepthwiseConvolutionLayerInternal3x3::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, gpu_target, dilation);
473 case DepthwiseConvolutionFunction::GENERIC:
474 return CLDepthwiseConvolutionLayerGeneric::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
475 default:
476 ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
477 }
478}
Georgios Pinitas60e98252018-10-22 16:17:20 +0100479
Manuel Bottini05069f02019-09-26 17:18:26 +0100480DepthwiseConvolutionFunction CLDepthwiseConvolutionLayer::get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
481 const PadStrideInfo &conv_info,
482 unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation, GPUTarget gpu_target)
483{
484 if(bool(CLDepthwiseConvolutionLayerInternal3x3::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, gpu_target, dilation)) && (is_data_type_float(input->data_type())
485 || get_arch_from_target(gpu_target) == GPUTarget::MIDGARD))
486 {
487 return DepthwiseConvolutionFunction::OPTIMIZED;
Pablo Tello8bf622a2018-12-03 15:54:49 +0000488 }
giuros016d109962019-01-07 17:47:19 +0000489 else
490 {
Manuel Bottini05069f02019-09-26 17:18:26 +0100491 return DepthwiseConvolutionFunction::GENERIC;
giuros016d109962019-01-07 17:47:19 +0000492 }
Giorgio Arenaad0c7382018-04-23 16:16:21 +0100493}
494
Giorgio Arena04a8f8c2017-11-23 11:45:24 +0000495void CLDepthwiseConvolutionLayer::run()
Giorgio Arena93a690e2017-08-01 16:09:33 +0100496{
Manuel Bottini05069f02019-09-26 17:18:26 +0100497 switch(_depth_conv_func)
Georgios Pinitasde5a1cc2018-02-02 12:52:07 +0000498 {
Manuel Bottini05069f02019-09-26 17:18:26 +0100499 case DepthwiseConvolutionFunction::OPTIMIZED:
500 _func_3x3.run();
501 break;
502 case DepthwiseConvolutionFunction::GENERIC:
503 _func_generic.run();
504 break;
505 default:
506 ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
Georgios Pinitas60e98252018-10-22 16:17:20 +0100507 }
Giorgio Arena9fe41442017-08-23 16:36:24 +0100508}
Georgios Pinitas72219332018-06-05 14:56:06 +0100509
510void CLDepthwiseConvolutionLayer::prepare()
511{
Manuel Bottini05069f02019-09-26 17:18:26 +0100512 switch(_depth_conv_func)
Georgios Pinitas72219332018-06-05 14:56:06 +0100513 {
Manuel Bottini05069f02019-09-26 17:18:26 +0100514 case DepthwiseConvolutionFunction::OPTIMIZED:
515 _func_3x3.prepare();
516 break;
517 case DepthwiseConvolutionFunction::GENERIC:
518 _func_generic.prepare();
519 break;
520 default:
521 ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
Georgios Pinitas72219332018-06-05 14:56:06 +0100522 }
523}
giuros016d109962019-01-07 17:47:19 +0000524} // namespace arm_compute