blob: 6cf7b97e66a2c96ea30f3219083b4fa233f0199a [file] [log] [blame]
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01001/*
Georgios Pinitas2481d462019-02-19 18:47:46 +00002 * Copyright (c) 2017-2019 ARM Limited.
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000024#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010025
Giorgio Arenad93e2632019-10-15 11:09:33 +010026#include "arm_compute/core/utils/misc/InfoHelpers.h"
Georgios Pinitasd05dce42018-01-22 16:29:17 +000027#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Georgios Pinitasf72f9362018-01-12 16:29:45 +000028#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010029#include "arm_compute/runtime/NEON/NEScheduler.h"
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000030
Georgios Pinitasd05dce42018-01-22 16:29:17 +000031using namespace arm_compute::misc;
Georgios Pinitas4074c992018-01-30 18:13:46 +000032using namespace arm_compute::misc::shape_calculator;
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010033
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000034namespace arm_compute
35{
Manuel Bottini05069f02019-09-26 17:18:26 +010036namespace
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010037{
Manuel Bottini05069f02019-09-26 17:18:26 +010038Status validate_arguments_optimized(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
39 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
Georgios Pinitas30271c72019-06-24 14:56:34 +010040{
41 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
Michele Di Giorgioff271922019-07-17 15:59:32 +010042 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
43 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
Georgios Pinitas30271c72019-06-24 14:56:34 +010044 ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
45 ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() < 1 || dilation.y() < 1);
46 const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
47 const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
48 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (dilation.x() - 1) > input->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());
49 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (dilation.y() - 1) > input->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());
50
51 if(biases != nullptr)
52 {
53 const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
54 ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
55 ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));
56 }
57
Michele Di Giorgio601ba3f2019-08-22 16:20:04 +010058 const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
59
60 if(is_quantized)
61 {
62 const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
63 const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
64 const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
65
66 float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
67 ARM_COMPUTE_UNUSED(multiplier);
68 ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f);
69 }
70
Georgios Pinitas30271c72019-06-24 14:56:34 +010071 if(!NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input, weights, conv_info, depth_multiplier, dilation))
72 {
Michele Di Giorgio601ba3f2019-08-22 16:20:04 +010073 TensorInfo accumulator = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
Georgios Pinitas30271c72019-06-24 14:56:34 +010074 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayer3x3Kernel::validate(input, weights, is_quantized ? &accumulator : output, conv_info, depth_multiplier, dilation));
75
76 if(is_quantized)
77 {
78 ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&accumulator, biases, output));
79 }
80 }
81 else
82 {
83 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionAssemblyDispatch::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation));
84 }
85
86 //Validate Activation Layer
87 if(act_info.enabled())
88 {
89 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
90 }
91
92 return Status{};
93}
Manuel Bottini05069f02019-09-26 17:18:26 +010094} // namespace
Georgios Pinitas30271c72019-06-24 14:56:34 +010095
Manuel Bottini05069f02019-09-26 17:18:26 +010096NEDepthwiseConvolutionLayerOptimized::NEDepthwiseConvolutionLayerOptimized(std::shared_ptr<IMemoryManager> memory_manager)
97 : _func(std::move(memory_manager))
98{
99}
100
101void NEDepthwiseConvolutionLayerOptimized::configure(ITensor *input,
102 const ITensor *weights,
103 const ITensor *biases,
104 ITensor *output, const PadStrideInfo &conv_info,
105 unsigned int depth_multiplier,
106 const ActivationLayerInfo &act_info,
107 const Size2D &dilation)
108{
109 _func.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
110}
111
112Status NEDepthwiseConvolutionLayerOptimized::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
113 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
114{
115 return validate_arguments_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
116}
117
118void NEDepthwiseConvolutionLayerOptimized::run()
119{
120 _func.run();
121}
122
123void NEDepthwiseConvolutionLayerOptimized::prepare()
124{
125 _func.prepare();
126}
127
128NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager)
129 : _memory_group(memory_manager), _dwc_kernel(), _dwc_optimized_func(memory_manager), _output_stage_kernel(), _border_handler(), _permute_input(), _permute_weights(), _permute_output(),
130 _activationlayer_function(), _accumulator(), _permuted_input(), _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_optimized(false),
131 _is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false)
132{
133}
134
135void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::configure_generic(ITensor *input,
136 const ITensor *weights,
137 const ITensor *biases,
138 ITensor *output,
139 const PadStrideInfo &conv_info,
140 unsigned int depth_multiplier,
141 const ActivationLayerInfo &act_info,
142 const Size2D &dilation)
143{
144 ARM_COMPUTE_UNUSED(act_info);
145
146 PixelValue zero_value(0.f);
147
148 // Initialize the intermediate accumulator tensor in case of quantized input
149 if(_is_quantized)
150 {
151 TensorShape accum_shape = output->info()->tensor_shape();
152 DataLayout accum_layout = output->info()->data_layout();
153 if(!_is_nchw)
154 {
155 permute(accum_shape, PermutationVector(1U, 2U, 0U));
156 accum_layout = DataLayout::NCHW;
157 }
158
159 _memory_group.manage(&_accumulator);
160 _accumulator.allocator()->init(TensorInfo(accum_shape, 1, DataType::S32, output->info()->quantization_info()));
161 _accumulator.info()->set_data_layout(accum_layout);
162 zero_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().uniform().offset));
163 }
164
165 if(!_is_nchw)
166 {
167 _memory_group.manage(&_permuted_input);
168 _memory_group.manage(&_permuted_output);
169
170 // Configure the function to transform the input tensor from NHWC -> NCHW
171 _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
172 _permuted_input.info()->set_data_layout(DataLayout::NCHW);
173
174 // Configure the function to transform the weights tensor from HWI -> IHW
175 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
176 _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
177 _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
178
179 // Configure depthwise
180 _dwc_kernel.configure(&_permuted_input, &_permuted_weights, (_is_quantized) ? &_accumulator : &_permuted_output, conv_info, depth_multiplier, dilation);
181
182 // Configure border handler
183 _border_handler.configure(&_permuted_input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);
184
185 // Allocate tensors
186 _permuted_input.allocator()->allocate();
187 }
188 else
189 {
190 // Configure depthwise convolution kernel
191 _dwc_kernel.configure(input, weights, (_is_quantized) ? &_accumulator : output, conv_info, depth_multiplier, dilation);
192
193 // Configure border handler
194 _border_handler.configure(input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);
195 }
196
197 // Configure biases accumulation
198 if(_is_quantized)
199 {
200 const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
201 const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
202 const UniformQuantizationInfo oq_info = (output->info()->total_size() == 0) ? iq_info : output->info()->quantization_info().uniform();
203
204 float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
205 int output_multiplier;
206 int output_shift;
207 quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
208 _output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, output_multiplier, output_shift, oq_info.offset);
209 _accumulator.allocator()->allocate();
210 }
211 else if(_has_bias)
212 {
213 _output_stage_kernel.configure(_is_nchw ? output : &_permuted_output, biases);
214 }
215
216 // Permute output
217 if(!_is_nchw)
218 {
219 // Configure the function to transform the convoluted output to NHWC
220 _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
221 _permuted_output.allocator()->allocate();
222 }
223}
224
225void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::configure_optimized(const ITensor *input,
226 const ITensor *weights,
227 const ITensor *biases,
228 ITensor *output,
229 const PadStrideInfo &conv_info,
230 unsigned int depth_multiplier,
231 const ActivationLayerInfo &act_info,
232 const Size2D &dilation)
233{
234 ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
235 const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);
236 const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info);
237 _is_activationlayer_enabled = act_info.enabled() && !(is_relu || is_relu6);
238 if(!_is_activationlayer_enabled)
239 {
240 act_info_to_use = act_info;
241 }
242
243 if(_is_nchw)
244 {
245 _memory_group.manage(&_permuted_input);
246 _memory_group.manage(&_permuted_output);
247
248 // Configure the function to transform the input tensor from NCHW -> NHWC
249 _permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
250 _permuted_input.info()->set_data_layout(DataLayout::NHWC);
251
252 // Configure the function to transform the weights tensor from IHW -> HWI
253 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
254 _permuted_weights.info()->set_data_layout(DataLayout::NHWC);
255
256 _permuted_output.info()->set_data_layout(DataLayout::NHWC);
257 _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
258
259 // Configure optimized depthwise
260 _dwc_optimized_func.configure(&_permuted_input, &_permuted_weights, biases, &_permuted_output, conv_info, depth_multiplier, act_info_to_use, dilation);
261
262 // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
263 _permuted_output.info()->set_data_layout(DataLayout::NHWC);
264 _permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
265
266 // Allocate tensors
267 _permuted_input.allocator()->allocate();
268 _permuted_output.allocator()->allocate();
269 }
270 else
271 {
272 _dwc_optimized_func.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info_to_use, dilation);
273 }
274}
275
276void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::configure(ITensor *input,
277 const ITensor *weights,
278 const ITensor *biases,
279 ITensor *output, const PadStrideInfo &conv_info,
280 unsigned int depth_multiplier,
281 const ActivationLayerInfo &act_info,
282 const Size2D &dilation)
283{
284 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
285 // Perform validation step
286 ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayerOptimizedInternal::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
287 output->info(), conv_info, depth_multiplier, act_info, dilation));
288
289 _original_weights = weights;
290 _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
291 _has_bias = biases != nullptr;
292 _is_optimized = NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input->info(),
293 weights->info(),
294 conv_info,
295 depth_multiplier,
296 dilation);
297 _is_nchw = input->info()->data_layout() == DataLayout::NCHW;
298 _permute = _is_optimized == _is_nchw;
299 _is_prepared = false;
300 _is_activationlayer_enabled = act_info.enabled();
301
302 // Configure appropriate pipeline
303 if(_is_optimized)
304 {
305 configure_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
306 }
307 else
308 {
309 configure_generic(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
310 }
311
312 // Configure activation
313 if(_is_activationlayer_enabled)
314 {
315 _activationlayer_function.configure(output, nullptr, act_info);
316 }
317}
318
319Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::validate(const ITensorInfo *input,
320 const ITensorInfo *weights,
321 const ITensorInfo *biases,
322 const ITensorInfo *output,
323 const PadStrideInfo &conv_info,
324 unsigned int depth_multiplier,
325 const ActivationLayerInfo &act_info,
326 const Size2D &dilation)
327{
328 return validate_arguments_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
329}
330
331void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::run_generic()
Georgios Pinitas30271c72019-06-24 14:56:34 +0100332{
333 // Fill border
334 NEScheduler::get().schedule(&_border_handler, Window::DimX);
335
336 // Execute depthwise convolution
337 NEScheduler::get().schedule(&_dwc_kernel, Window::DimX);
338
339 // Add biases
340 if(_has_bias || _is_quantized)
341 {
342 NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);
343 }
344
345 // Permute output
346 if(!_is_nchw)
347 {
348 _permute_output.run();
349 }
350}
351
Manuel Bottini05069f02019-09-26 17:18:26 +0100352void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::run_optimized()
Georgios Pinitas30271c72019-06-24 14:56:34 +0100353{
354 // Run assembly function
355 _dwc_optimized_func.run();
356
357 // Permute output
358 if(_is_nchw)
359 {
360 _permute_output.run();
361 }
362}
363
Manuel Bottini05069f02019-09-26 17:18:26 +0100364void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::run()
Georgios Pinitas30271c72019-06-24 14:56:34 +0100365{
366 prepare();
367
368 MemoryGroupResourceScope scope_mg(_memory_group);
369
370 // Permute input
371 if(_permute)
372 {
373 _permute_input.run();
374 }
375
376 _is_optimized ? run_optimized() : run_generic();
377
378 // Run activation
379 if(_is_activationlayer_enabled)
380 {
381 _activationlayer_function.run();
382 }
383}
384
Manuel Bottini05069f02019-09-26 17:18:26 +0100385void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::prepare()
Georgios Pinitas30271c72019-06-24 14:56:34 +0100386{
387 if(!_is_prepared)
388 {
389 // Permute weights
390 if(_permute)
391 {
392 _permuted_weights.allocator()->allocate();
393 _permute_weights.run();
394 _original_weights->mark_as_unused();
395 }
396
397 // Prepare optimized function
398 if(_is_optimized)
399 {
400 _dwc_optimized_func.prepare();
401 if(!_permuted_weights.is_used())
402 {
403 _permuted_weights.allocator()->free();
404 }
405 }
406
407 _is_prepared = true;
408 }
409}
410
Manuel Bottini05069f02019-09-26 17:18:26 +0100411NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::NEDepthwiseConvolutionLayerGeneric()
Giorgio Arenad93e2632019-10-15 11:09:33 +0100412 : _depthwise_conv_kernel(), _fill_border(), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(), _permuted_input(), _permuted_weights(), _permuted_output(),
413 _is_prepared(false), _is_nchw(false), _is_activationlayer_enabled(false), _original_weights(nullptr)
Michalis Spyroub7b31532017-11-23 12:10:21 +0000414{
415}
416
Manuel Bottini05069f02019-09-26 17:18:26 +0100417void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
418 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
Michalis Spyroub7b31532017-11-23 12:10:21 +0000419{
Michele Di Giorgioff271922019-07-17 15:59:32 +0100420 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
Michele Di Giorgioff271922019-07-17 15:59:32 +0100421 ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayer::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
422 output->info(), conv_info, depth_multiplier, act_info, dilation));
Michalis Spyroub7b31532017-11-23 12:10:21 +0000423
Giorgio Arenad93e2632019-10-15 11:09:33 +0100424 _is_nchw = input->info()->data_layout() == DataLayout::NCHW;
425 _is_prepared = !_is_nchw;
Giorgio Arena26b22162018-08-13 15:49:49 +0100426
Giorgio Arenad93e2632019-10-15 11:09:33 +0100427 ITensor *input_to_use = input;
428 const ITensor *weights_to_use = weights;
429 ITensor *output_to_use = output;
430 if(_is_nchw)
Giorgio Arena26b22162018-08-13 15:49:49 +0100431 {
Giorgio Arenad93e2632019-10-15 11:09:33 +0100432 _permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
433 _permuted_input.info()->set_data_layout(DataLayout::NHWC);
434 input_to_use = &_permuted_input;
Giorgio Arena26b22162018-08-13 15:49:49 +0100435
Giorgio Arenad93e2632019-10-15 11:09:33 +0100436 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
437 _permuted_weights.info()->set_data_layout(DataLayout::NHWC);
438 weights_to_use = &_permuted_weights;
Giorgio Arena44f55722019-07-12 14:49:49 +0100439
Giorgio Arenad93e2632019-10-15 11:09:33 +0100440 _permuted_output.allocator()->init(output->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
441 output_to_use = &_permuted_output;
Giorgio Arena26b22162018-08-13 15:49:49 +0100442 }
Giorgio Arenad93e2632019-10-15 11:09:33 +0100443 _original_weights = weights_to_use;
Giorgio Arena44f55722019-07-12 14:49:49 +0100444
Giorgio Arenad93e2632019-10-15 11:09:33 +0100445 _depthwise_conv_kernel.configure(input_to_use, weights_to_use, biases, output_to_use, conv_info, depth_multiplier, dilation);
446 _fill_border.configure(input_to_use, _depthwise_conv_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint64_t>(0), input->info()->data_type(), input->info()->quantization_info()));
447
448 if(_is_nchw)
449 {
450 _permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
451 _permuted_output.info()->set_data_layout(DataLayout::NHWC);
452
453 _permuted_input.allocator()->allocate();
454 _permuted_weights.allocator()->allocate();
455 _permuted_output.allocator()->allocate();
Giorgio Arena26b22162018-08-13 15:49:49 +0100456 }
457
Georgios Pinitas60e98252018-10-22 16:17:20 +0100458 //Configure Activation Layer
459 _is_activationlayer_enabled = act_info.enabled();
Georgios Pinitas60e98252018-10-22 16:17:20 +0100460 if(_is_activationlayer_enabled)
461 {
462 _activationlayer_function.configure(output, nullptr, act_info);
463 }
Michalis Spyroub7b31532017-11-23 12:10:21 +0000464}
465
Manuel Bottini05069f02019-09-26 17:18:26 +0100466Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
467 const PadStrideInfo &conv_info,
468 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
Abe Mbise7784c832018-05-31 16:48:41 +0100469{
470 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
Giorgio Arenad93e2632019-10-15 11:09:33 +0100471 if(input->data_layout() == DataLayout::NCHW)
Giorgio Arena26b22162018-08-13 15:49:49 +0100472 {
Giorgio Arena44f55722019-07-12 14:49:49 +0100473 TensorShape permuted_input_shape = input->tensor_shape();
474 TensorShape permuted_weights_shape = weights->tensor_shape();
Giorgio Arenad93e2632019-10-15 11:09:33 +0100475 TensorShape permuted_output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
476 permute(permuted_input_shape, PermutationVector(2U, 0U, 1U));
477 permute(permuted_weights_shape, PermutationVector(2U, 0U, 1U));
478 permute(permuted_output_shape, PermutationVector(2U, 0U, 1U));
Giorgio Arena44f55722019-07-12 14:49:49 +0100479
Giorgio Arenad93e2632019-10-15 11:09:33 +0100480 const TensorInfo permuted_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NHWC));
481 const TensorInfo permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NHWC));
482 const TensorInfo permuted_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NCHW));
Giorgio Arena44f55722019-07-12 14:49:49 +0100483
Giorgio Arenad93e2632019-10-15 11:09:33 +0100484 ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(input, &permuted_input, PermutationVector(2U, 0U, 1U)));
485 ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(weights, &permuted_weights, PermutationVector(2U, 0U, 1U)));
486 ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(&permuted_output, output, PermutationVector(1U, 2U, 0U)));
Giorgio Arena44f55722019-07-12 14:49:49 +0100487
Giorgio Arenad93e2632019-10-15 11:09:33 +0100488 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayerNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, conv_info, depth_multiplier, dilation));
Giorgio Arena26b22162018-08-13 15:49:49 +0100489 }
Giorgio Arena44f55722019-07-12 14:49:49 +0100490 else
Giorgio Arena26b22162018-08-13 15:49:49 +0100491 {
Gian Marco Iodicebd9097d2019-07-26 15:31:02 +0100492 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayerNativeKernel::validate(input, weights, biases, output, conv_info, depth_multiplier, dilation));
Abe Mbise7784c832018-05-31 16:48:41 +0100493 }
494
Georgios Pinitas60e98252018-10-22 16:17:20 +0100495 // Validate Activation Layer
496 if(act_info.enabled())
497 {
498 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
499 }
500
Abe Mbise7784c832018-05-31 16:48:41 +0100501 return Status{};
502}
503
Manuel Bottini05069f02019-09-26 17:18:26 +0100504void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::run()
Michalis Spyroub7b31532017-11-23 12:10:21 +0000505{
Giorgio Arenad93e2632019-10-15 11:09:33 +0100506 if(_is_nchw)
Giorgio Arena26b22162018-08-13 15:49:49 +0100507 {
Giorgio Arena44f55722019-07-12 14:49:49 +0100508 prepare();
Giorgio Arenad93e2632019-10-15 11:09:33 +0100509 _permute_input.run();
Giorgio Arena26b22162018-08-13 15:49:49 +0100510 }
Giorgio Arenad93e2632019-10-15 11:09:33 +0100511
512 NEScheduler::get().schedule(&_fill_border, Window::DimX);
513 NEScheduler::get().schedule(&_depthwise_conv_kernel, Window::DimY);
514
515 if(_is_nchw)
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000516 {
Giorgio Arenad93e2632019-10-15 11:09:33 +0100517 _permute_output.run();
Giorgio Arena26b22162018-08-13 15:49:49 +0100518 }
Georgios Pinitas60e98252018-10-22 16:17:20 +0100519
520 if(_is_activationlayer_enabled)
521 {
522 _activationlayer_function.run();
523 }
Anthony Barbierfb8dda22018-01-30 09:27:05 +0000524}
Georgios Pinitas72219332018-06-05 14:56:06 +0100525
Manuel Bottini05069f02019-09-26 17:18:26 +0100526void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::prepare()
Georgios Pinitas72219332018-06-05 14:56:06 +0100527{
Giorgio Arenad93e2632019-10-15 11:09:33 +0100528 if(!_is_prepared)
Georgios Pinitas72219332018-06-05 14:56:06 +0100529 {
530 ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
531
Giorgio Arenad93e2632019-10-15 11:09:33 +0100532 _permute_weights.run();
Georgios Pinitas72219332018-06-05 14:56:06 +0100533 _original_weights->mark_as_unused();
Georgios Pinitas72219332018-06-05 14:56:06 +0100534 _is_prepared = true;
535 }
536}
Manuel Bottini05069f02019-09-26 17:18:26 +0100537
538NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
539 : _depth_conv_func(DepthwiseConvolutionFunction::GENERIC), _func_optimized(std::move(memory_manager)), _func_generic()
540{
541}
542
543void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
544 const ActivationLayerInfo &act_info, const Size2D &dilation)
545{
546 _depth_conv_func = get_depthwiseconvolution_function(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, depth_multiplier, act_info, dilation);
547 switch(_depth_conv_func)
548 {
549 case DepthwiseConvolutionFunction::OPTIMIZED:
550 _func_optimized.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
551 break;
552 case DepthwiseConvolutionFunction::GENERIC:
553 _func_generic.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
554 break;
555 default:
556 ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
557 }
558}
559
560Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
561 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
562{
563 DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
564 switch(depth_conv_func)
565 {
566 case DepthwiseConvolutionFunction::OPTIMIZED:
567 return NEDepthwiseConvolutionLayerOptimized::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
568 break;
569 case DepthwiseConvolutionFunction::GENERIC:
570 return NEDepthwiseConvolutionLayerGeneric::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
571 break;
572 default:
573 ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
574 }
575}
576
577DepthwiseConvolutionFunction NEDepthwiseConvolutionLayer::get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
578 const PadStrideInfo &conv_info,
579 unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation)
580{
581 if(bool(NEDepthwiseConvolutionLayerOptimized::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation)))
582 {
583 return DepthwiseConvolutionFunction::OPTIMIZED;
584 }
585 else
586 {
587 return DepthwiseConvolutionFunction::GENERIC;
588 }
589}
590
591void NEDepthwiseConvolutionLayer::run()
592{
593 switch(_depth_conv_func)
594 {
595 case DepthwiseConvolutionFunction::OPTIMIZED:
596 _func_optimized.run();
597 break;
598 case DepthwiseConvolutionFunction::GENERIC:
599 _func_generic.run();
600 break;
601 default:
602 ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
603 }
604}
605
606void NEDepthwiseConvolutionLayer::prepare()
607{
608 switch(_depth_conv_func)
609 {
610 case DepthwiseConvolutionFunction::OPTIMIZED:
611 _func_optimized.prepare();
612 break;
613 case DepthwiseConvolutionFunction::GENERIC:
614 _func_generic.prepare();
615 break;
616 default:
617 ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
618 }
619}
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000620} // namespace arm_compute