blob: cfdf2038b94419a762df7b0b7f79d4cb46aab8ee [file] [log] [blame]
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2017-2020 Arm Limited.
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000024#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010025
Giorgio Arenad93e2632019-10-15 11:09:33 +010026#include "arm_compute/core/utils/misc/InfoHelpers.h"
Georgios Pinitasd05dce42018-01-22 16:29:17 +000027#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Georgios Pinitasf72f9362018-01-12 16:29:45 +000028#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010029#include "arm_compute/runtime/NEON/NEScheduler.h"
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000030
Georgios Pinitasd05dce42018-01-22 16:29:17 +000031using namespace arm_compute::misc;
Georgios Pinitas4074c992018-01-30 18:13:46 +000032using namespace arm_compute::misc::shape_calculator;
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010033
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000034namespace arm_compute
35{
Manuel Bottini05069f02019-09-26 17:18:26 +010036namespace
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010037{
Manuel Bottini05069f02019-09-26 17:18:26 +010038Status validate_arguments_optimized(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
39 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
Georgios Pinitas30271c72019-06-24 14:56:34 +010040{
41 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
Michele Di Giorgio13ec5f02020-01-02 12:11:13 +000042 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
Giuseppe Rossinif01201a2019-11-06 14:57:49 +000043 if(!is_data_type_quantized_per_channel(weights->data_type()))
44 {
45 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
46 }
Georgios Pinitas30271c72019-06-24 14:56:34 +010047 ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
48 ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() < 1 || dilation.y() < 1);
49 const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
50 const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
51 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (dilation.x() - 1) > input->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());
52 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (dilation.y() - 1) > input->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());
53
54 if(biases != nullptr)
55 {
56 const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
57 ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
58 ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));
59 }
60
Giuseppe Rossinif01201a2019-11-06 14:57:49 +000061 const bool is_quantized = (!is_data_type_quantized_per_channel(weights->data_type())) && is_data_type_quantized_asymmetric(input->data_type());
Michele Di Giorgio601ba3f2019-08-22 16:20:04 +010062
Georgios Pinitas30271c72019-06-24 14:56:34 +010063 if(!NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input, weights, conv_info, depth_multiplier, dilation))
64 {
Michele Di Giorgio601ba3f2019-08-22 16:20:04 +010065 TensorInfo accumulator = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
Georgios Pinitas30271c72019-06-24 14:56:34 +010066 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayer3x3Kernel::validate(input, weights, is_quantized ? &accumulator : output, conv_info, depth_multiplier, dilation));
67
68 if(is_quantized)
69 {
Michele Di Giorgio45361932019-12-19 13:53:44 +000070 DirectConvolutionLayerOutputStageKernelInfo direct_conv_info;
71 direct_conv_info.output_data_type = input->data_type();
72 ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&accumulator, biases, output, direct_conv_info));
Georgios Pinitas30271c72019-06-24 14:56:34 +010073 }
74 }
75 else
76 {
77 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionAssemblyDispatch::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation));
78 }
79
80 //Validate Activation Layer
81 if(act_info.enabled())
82 {
83 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
84 }
Georgios Pinitas30271c72019-06-24 14:56:34 +010085 return Status{};
86}
Manuel Bottini05069f02019-09-26 17:18:26 +010087} // namespace
Georgios Pinitas30271c72019-06-24 14:56:34 +010088
Manuel Bottini05069f02019-09-26 17:18:26 +010089NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager)
90 : _memory_group(memory_manager), _dwc_kernel(), _dwc_optimized_func(memory_manager), _output_stage_kernel(), _border_handler(), _permute_input(), _permute_weights(), _permute_output(),
91 _activationlayer_function(), _accumulator(), _permuted_input(), _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_optimized(false),
92 _is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false)
93{
94}
95
96void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::configure_generic(ITensor *input,
97 const ITensor *weights,
98 const ITensor *biases,
99 ITensor *output,
100 const PadStrideInfo &conv_info,
101 unsigned int depth_multiplier,
102 const ActivationLayerInfo &act_info,
103 const Size2D &dilation)
104{
105 ARM_COMPUTE_UNUSED(act_info);
106
107 PixelValue zero_value(0.f);
108
109 // Initialize the intermediate accumulator tensor in case of quantized input
110 if(_is_quantized)
111 {
112 TensorShape accum_shape = output->info()->tensor_shape();
113 DataLayout accum_layout = output->info()->data_layout();
114 if(!_is_nchw)
115 {
116 permute(accum_shape, PermutationVector(1U, 2U, 0U));
117 accum_layout = DataLayout::NCHW;
118 }
119
120 _memory_group.manage(&_accumulator);
121 _accumulator.allocator()->init(TensorInfo(accum_shape, 1, DataType::S32, output->info()->quantization_info()));
122 _accumulator.info()->set_data_layout(accum_layout);
123 zero_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().uniform().offset));
124 }
125
126 if(!_is_nchw)
127 {
128 _memory_group.manage(&_permuted_input);
129 _memory_group.manage(&_permuted_output);
130
131 // Configure the function to transform the input tensor from NHWC -> NCHW
132 _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
133 _permuted_input.info()->set_data_layout(DataLayout::NCHW);
134
135 // Configure the function to transform the weights tensor from HWI -> IHW
136 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
137 _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
138 _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
139
140 // Configure depthwise
141 _dwc_kernel.configure(&_permuted_input, &_permuted_weights, (_is_quantized) ? &_accumulator : &_permuted_output, conv_info, depth_multiplier, dilation);
142
143 // Configure border handler
144 _border_handler.configure(&_permuted_input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);
145
146 // Allocate tensors
147 _permuted_input.allocator()->allocate();
148 }
149 else
150 {
151 // Configure depthwise convolution kernel
152 _dwc_kernel.configure(input, weights, (_is_quantized) ? &_accumulator : output, conv_info, depth_multiplier, dilation);
153
154 // Configure border handler
155 _border_handler.configure(input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);
156 }
157
158 // Configure biases accumulation
159 if(_is_quantized)
160 {
161 const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
162 const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
163 const UniformQuantizationInfo oq_info = (output->info()->total_size() == 0) ? iq_info : output->info()->quantization_info().uniform();
164
Michalis Spyroue7be8a02019-12-12 16:16:09 +0000165 float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
166 int32_t output_multiplier;
167 int32_t output_shift;
Michele Di Giorgiof29d1b72019-10-29 10:58:13 +0000168 quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
Michele Di Giorgio45361932019-12-19 13:53:44 +0000169
170 DirectConvolutionLayerOutputStageKernelInfo direct_conv_info;
171 direct_conv_info.result_fixedpoint_multiplier = output_multiplier;
172 direct_conv_info.result_shift = output_shift;
173 direct_conv_info.result_offset_after_shift = oq_info.offset;
174 direct_conv_info.output_data_type = input->info()->data_type();
175 _output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, direct_conv_info);
Manuel Bottini05069f02019-09-26 17:18:26 +0100176 _accumulator.allocator()->allocate();
177 }
178 else if(_has_bias)
179 {
180 _output_stage_kernel.configure(_is_nchw ? output : &_permuted_output, biases);
181 }
182
183 // Permute output
184 if(!_is_nchw)
185 {
186 // Configure the function to transform the convoluted output to NHWC
187 _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
188 _permuted_output.allocator()->allocate();
189 }
190}
191
192void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::configure_optimized(const ITensor *input,
193 const ITensor *weights,
194 const ITensor *biases,
195 ITensor *output,
196 const PadStrideInfo &conv_info,
197 unsigned int depth_multiplier,
198 const ActivationLayerInfo &act_info,
199 const Size2D &dilation)
200{
201 ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
202 const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);
203 const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info);
204 _is_activationlayer_enabled = act_info.enabled() && !(is_relu || is_relu6);
205 if(!_is_activationlayer_enabled)
206 {
207 act_info_to_use = act_info;
208 }
209
210 if(_is_nchw)
211 {
212 _memory_group.manage(&_permuted_input);
213 _memory_group.manage(&_permuted_output);
214
215 // Configure the function to transform the input tensor from NCHW -> NHWC
216 _permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
217 _permuted_input.info()->set_data_layout(DataLayout::NHWC);
218
219 // Configure the function to transform the weights tensor from IHW -> HWI
220 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
221 _permuted_weights.info()->set_data_layout(DataLayout::NHWC);
222
223 _permuted_output.info()->set_data_layout(DataLayout::NHWC);
224 _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
225
226 // Configure optimized depthwise
227 _dwc_optimized_func.configure(&_permuted_input, &_permuted_weights, biases, &_permuted_output, conv_info, depth_multiplier, act_info_to_use, dilation);
228
229 // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
230 _permuted_output.info()->set_data_layout(DataLayout::NHWC);
231 _permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
232
233 // Allocate tensors
234 _permuted_input.allocator()->allocate();
235 _permuted_output.allocator()->allocate();
236 }
237 else
238 {
239 _dwc_optimized_func.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info_to_use, dilation);
240 }
241}
242
243void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::configure(ITensor *input,
244 const ITensor *weights,
245 const ITensor *biases,
246 ITensor *output, const PadStrideInfo &conv_info,
247 unsigned int depth_multiplier,
248 const ActivationLayerInfo &act_info,
249 const Size2D &dilation)
250{
251 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
252 // Perform validation step
253 ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayerOptimizedInternal::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
254 output->info(), conv_info, depth_multiplier, act_info, dilation));
255
256 _original_weights = weights;
257 _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
258 _has_bias = biases != nullptr;
259 _is_optimized = NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input->info(),
260 weights->info(),
261 conv_info,
262 depth_multiplier,
263 dilation);
264 _is_nchw = input->info()->data_layout() == DataLayout::NCHW;
265 _permute = _is_optimized == _is_nchw;
266 _is_prepared = false;
267 _is_activationlayer_enabled = act_info.enabled();
268
269 // Configure appropriate pipeline
270 if(_is_optimized)
271 {
272 configure_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
273 }
274 else
275 {
276 configure_generic(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
277 }
278
279 // Configure activation
280 if(_is_activationlayer_enabled)
281 {
282 _activationlayer_function.configure(output, nullptr, act_info);
283 }
284}
285
286Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::validate(const ITensorInfo *input,
287 const ITensorInfo *weights,
288 const ITensorInfo *biases,
289 const ITensorInfo *output,
290 const PadStrideInfo &conv_info,
291 unsigned int depth_multiplier,
292 const ActivationLayerInfo &act_info,
293 const Size2D &dilation)
294{
295 return validate_arguments_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
296}
297
298void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::run_generic()
Georgios Pinitas30271c72019-06-24 14:56:34 +0100299{
300 // Fill border
301 NEScheduler::get().schedule(&_border_handler, Window::DimX);
302
303 // Execute depthwise convolution
304 NEScheduler::get().schedule(&_dwc_kernel, Window::DimX);
305
306 // Add biases
307 if(_has_bias || _is_quantized)
308 {
309 NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);
310 }
311
312 // Permute output
313 if(!_is_nchw)
314 {
315 _permute_output.run();
316 }
317}
318
Manuel Bottini05069f02019-09-26 17:18:26 +0100319void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::run_optimized()
Georgios Pinitas30271c72019-06-24 14:56:34 +0100320{
321 // Run assembly function
322 _dwc_optimized_func.run();
323
324 // Permute output
325 if(_is_nchw)
326 {
327 _permute_output.run();
328 }
329}
330
Manuel Bottini05069f02019-09-26 17:18:26 +0100331void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::run()
Georgios Pinitas30271c72019-06-24 14:56:34 +0100332{
333 prepare();
334
335 MemoryGroupResourceScope scope_mg(_memory_group);
336
337 // Permute input
338 if(_permute)
339 {
340 _permute_input.run();
341 }
342
343 _is_optimized ? run_optimized() : run_generic();
344
345 // Run activation
346 if(_is_activationlayer_enabled)
347 {
348 _activationlayer_function.run();
349 }
350}
351
Manuel Bottini05069f02019-09-26 17:18:26 +0100352void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::prepare()
Georgios Pinitas30271c72019-06-24 14:56:34 +0100353{
354 if(!_is_prepared)
355 {
356 // Permute weights
357 if(_permute)
358 {
359 _permuted_weights.allocator()->allocate();
360 _permute_weights.run();
361 _original_weights->mark_as_unused();
362 }
363
364 // Prepare optimized function
365 if(_is_optimized)
366 {
367 _dwc_optimized_func.prepare();
368 if(!_permuted_weights.is_used())
369 {
370 _permuted_weights.allocator()->free();
371 }
372 }
373
374 _is_prepared = true;
375 }
376}
377
Manuel Bottini05069f02019-09-26 17:18:26 +0100378NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::NEDepthwiseConvolutionLayerGeneric()
Giorgio Arenad93e2632019-10-15 11:09:33 +0100379 : _depthwise_conv_kernel(), _fill_border(), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(), _permuted_input(), _permuted_weights(), _permuted_output(),
380 _is_prepared(false), _is_nchw(false), _is_activationlayer_enabled(false), _original_weights(nullptr)
Michalis Spyroub7b31532017-11-23 12:10:21 +0000381{
382}
383
Manuel Bottini05069f02019-09-26 17:18:26 +0100384void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
385 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
Michalis Spyroub7b31532017-11-23 12:10:21 +0000386{
Michele Di Giorgioff271922019-07-17 15:59:32 +0100387 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
Michele Di Giorgioff271922019-07-17 15:59:32 +0100388 ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayer::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
389 output->info(), conv_info, depth_multiplier, act_info, dilation));
Michalis Spyroub7b31532017-11-23 12:10:21 +0000390
Giorgio Arenad93e2632019-10-15 11:09:33 +0100391 _is_nchw = input->info()->data_layout() == DataLayout::NCHW;
392 _is_prepared = !_is_nchw;
Giorgio Arena26b22162018-08-13 15:49:49 +0100393
Giorgio Arenad93e2632019-10-15 11:09:33 +0100394 ITensor *input_to_use = input;
395 const ITensor *weights_to_use = weights;
396 ITensor *output_to_use = output;
397 if(_is_nchw)
Giorgio Arena26b22162018-08-13 15:49:49 +0100398 {
Giorgio Arenad93e2632019-10-15 11:09:33 +0100399 _permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
400 _permuted_input.info()->set_data_layout(DataLayout::NHWC);
401 input_to_use = &_permuted_input;
Giorgio Arena26b22162018-08-13 15:49:49 +0100402
Giorgio Arenad93e2632019-10-15 11:09:33 +0100403 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
404 _permuted_weights.info()->set_data_layout(DataLayout::NHWC);
405 weights_to_use = &_permuted_weights;
Giorgio Arena44f55722019-07-12 14:49:49 +0100406
Giorgio Arenad93e2632019-10-15 11:09:33 +0100407 _permuted_output.allocator()->init(output->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
408 output_to_use = &_permuted_output;
Giorgio Arena26b22162018-08-13 15:49:49 +0100409 }
Giorgio Arenad93e2632019-10-15 11:09:33 +0100410 _original_weights = weights_to_use;
Giorgio Arena44f55722019-07-12 14:49:49 +0100411
Giorgio Arenad93e2632019-10-15 11:09:33 +0100412 _depthwise_conv_kernel.configure(input_to_use, weights_to_use, biases, output_to_use, conv_info, depth_multiplier, dilation);
413 _fill_border.configure(input_to_use, _depthwise_conv_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint64_t>(0), input->info()->data_type(), input->info()->quantization_info()));
414
415 if(_is_nchw)
416 {
417 _permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
418 _permuted_output.info()->set_data_layout(DataLayout::NHWC);
419
420 _permuted_input.allocator()->allocate();
421 _permuted_weights.allocator()->allocate();
422 _permuted_output.allocator()->allocate();
Giorgio Arena26b22162018-08-13 15:49:49 +0100423 }
424
Georgios Pinitas60e98252018-10-22 16:17:20 +0100425 //Configure Activation Layer
426 _is_activationlayer_enabled = act_info.enabled();
Georgios Pinitas60e98252018-10-22 16:17:20 +0100427 if(_is_activationlayer_enabled)
428 {
429 _activationlayer_function.configure(output, nullptr, act_info);
430 }
Michalis Spyroub7b31532017-11-23 12:10:21 +0000431}
432
Manuel Bottini05069f02019-09-26 17:18:26 +0100433Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
434 const PadStrideInfo &conv_info,
435 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
Abe Mbise7784c832018-05-31 16:48:41 +0100436{
437 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
Giorgio Arenad93e2632019-10-15 11:09:33 +0100438 if(input->data_layout() == DataLayout::NCHW)
Giorgio Arena26b22162018-08-13 15:49:49 +0100439 {
Giorgio Arena44f55722019-07-12 14:49:49 +0100440 TensorShape permuted_input_shape = input->tensor_shape();
441 TensorShape permuted_weights_shape = weights->tensor_shape();
Giorgio Arenad93e2632019-10-15 11:09:33 +0100442 TensorShape permuted_output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
443 permute(permuted_input_shape, PermutationVector(2U, 0U, 1U));
444 permute(permuted_weights_shape, PermutationVector(2U, 0U, 1U));
445 permute(permuted_output_shape, PermutationVector(2U, 0U, 1U));
Giorgio Arena44f55722019-07-12 14:49:49 +0100446
Giorgio Arenad93e2632019-10-15 11:09:33 +0100447 const TensorInfo permuted_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NHWC));
448 const TensorInfo permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NHWC));
449 const TensorInfo permuted_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NCHW));
Giorgio Arena44f55722019-07-12 14:49:49 +0100450
Giorgio Arenad93e2632019-10-15 11:09:33 +0100451 ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(input, &permuted_input, PermutationVector(2U, 0U, 1U)));
452 ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(weights, &permuted_weights, PermutationVector(2U, 0U, 1U)));
453 ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(&permuted_output, output, PermutationVector(1U, 2U, 0U)));
Giorgio Arena44f55722019-07-12 14:49:49 +0100454
Giorgio Arenad93e2632019-10-15 11:09:33 +0100455 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayerNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, conv_info, depth_multiplier, dilation));
Giorgio Arena26b22162018-08-13 15:49:49 +0100456 }
Giorgio Arena44f55722019-07-12 14:49:49 +0100457 else
Giorgio Arena26b22162018-08-13 15:49:49 +0100458 {
Gian Marco Iodicebd9097d2019-07-26 15:31:02 +0100459 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayerNativeKernel::validate(input, weights, biases, output, conv_info, depth_multiplier, dilation));
Abe Mbise7784c832018-05-31 16:48:41 +0100460 }
461
Georgios Pinitas60e98252018-10-22 16:17:20 +0100462 // Validate Activation Layer
463 if(act_info.enabled())
464 {
465 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
466 }
467
Abe Mbise7784c832018-05-31 16:48:41 +0100468 return Status{};
469}
470
Manuel Bottini05069f02019-09-26 17:18:26 +0100471void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::run()
Michalis Spyroub7b31532017-11-23 12:10:21 +0000472{
Giorgio Arenad93e2632019-10-15 11:09:33 +0100473 if(_is_nchw)
Giorgio Arena26b22162018-08-13 15:49:49 +0100474 {
Giorgio Arena44f55722019-07-12 14:49:49 +0100475 prepare();
Giorgio Arenad93e2632019-10-15 11:09:33 +0100476 _permute_input.run();
Giorgio Arena26b22162018-08-13 15:49:49 +0100477 }
Giorgio Arenad93e2632019-10-15 11:09:33 +0100478
479 NEScheduler::get().schedule(&_fill_border, Window::DimX);
480 NEScheduler::get().schedule(&_depthwise_conv_kernel, Window::DimY);
481
482 if(_is_nchw)
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000483 {
Giorgio Arenad93e2632019-10-15 11:09:33 +0100484 _permute_output.run();
Giorgio Arena26b22162018-08-13 15:49:49 +0100485 }
Georgios Pinitas60e98252018-10-22 16:17:20 +0100486
487 if(_is_activationlayer_enabled)
488 {
489 _activationlayer_function.run();
490 }
Anthony Barbierfb8dda22018-01-30 09:27:05 +0000491}
Georgios Pinitas72219332018-06-05 14:56:06 +0100492
Manuel Bottini05069f02019-09-26 17:18:26 +0100493void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::prepare()
Georgios Pinitas72219332018-06-05 14:56:06 +0100494{
Giorgio Arenad93e2632019-10-15 11:09:33 +0100495 if(!_is_prepared)
Georgios Pinitas72219332018-06-05 14:56:06 +0100496 {
497 ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
498
Giorgio Arenad93e2632019-10-15 11:09:33 +0100499 _permute_weights.run();
Georgios Pinitas72219332018-06-05 14:56:06 +0100500 _original_weights->mark_as_unused();
Georgios Pinitas72219332018-06-05 14:56:06 +0100501 _is_prepared = true;
502 }
503}
Manuel Bottini05069f02019-09-26 17:18:26 +0100504
505NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
506 : _depth_conv_func(DepthwiseConvolutionFunction::GENERIC), _func_optimized(std::move(memory_manager)), _func_generic()
507{
508}
509
510void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
511 const ActivationLayerInfo &act_info, const Size2D &dilation)
512{
513 _depth_conv_func = get_depthwiseconvolution_function(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, depth_multiplier, act_info, dilation);
514 switch(_depth_conv_func)
515 {
516 case DepthwiseConvolutionFunction::OPTIMIZED:
517 _func_optimized.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
518 break;
519 case DepthwiseConvolutionFunction::GENERIC:
520 _func_generic.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
521 break;
522 default:
523 ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
524 }
525}
526
527Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
528 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
529{
530 DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
531 switch(depth_conv_func)
532 {
533 case DepthwiseConvolutionFunction::OPTIMIZED:
Manuel Bottini387259a2020-05-21 17:14:36 +0100534 return NEDepthwiseConvolutionLayerOptimizedInternal::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
Manuel Bottini05069f02019-09-26 17:18:26 +0100535 break;
536 case DepthwiseConvolutionFunction::GENERIC:
537 return NEDepthwiseConvolutionLayerGeneric::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
538 break;
539 default:
540 ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
541 }
542}
543
544DepthwiseConvolutionFunction NEDepthwiseConvolutionLayer::get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
545 const PadStrideInfo &conv_info,
546 unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation)
547{
Manuel Bottini387259a2020-05-21 17:14:36 +0100548 if(bool(NEDepthwiseConvolutionLayerOptimizedInternal::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation)))
Manuel Bottini05069f02019-09-26 17:18:26 +0100549 {
550 return DepthwiseConvolutionFunction::OPTIMIZED;
551 }
552 else
553 {
554 return DepthwiseConvolutionFunction::GENERIC;
555 }
556}
557
558void NEDepthwiseConvolutionLayer::run()
559{
560 switch(_depth_conv_func)
561 {
562 case DepthwiseConvolutionFunction::OPTIMIZED:
563 _func_optimized.run();
564 break;
565 case DepthwiseConvolutionFunction::GENERIC:
566 _func_generic.run();
567 break;
568 default:
569 ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
570 }
571}
572
573void NEDepthwiseConvolutionLayer::prepare()
574{
575 switch(_depth_conv_func)
576 {
577 case DepthwiseConvolutionFunction::OPTIMIZED:
578 _func_optimized.prepare();
579 break;
580 case DepthwiseConvolutionFunction::GENERIC:
581 _func_generic.prepare();
582 break;
583 default:
584 ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
585 }
586}
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000587} // namespace arm_compute