blob: fbdee8447483a741808a15edf028e1210f1952e5 [file] [log] [blame]
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01001/*
Georgios Pinitas2481d462019-02-19 18:47:46 +00002 * Copyright (c) 2017-2019 ARM Limited.
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000024#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010025
26#include "arm_compute/core/Helpers.h"
27#include "arm_compute/core/ITensor.h"
28#include "arm_compute/core/PixelValue.h"
Georgios Pinitasd05dce42018-01-22 16:29:17 +000029#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Georgios Pinitasf72f9362018-01-12 16:29:45 +000030#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010031#include "arm_compute/runtime/NEON/NEScheduler.h"
32#include "support/ToolchainSupport.h"
33
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000034#include "arm_compute/core/utils/misc/InfoHelpers.h"
35
Georgios Pinitasd05dce42018-01-22 16:29:17 +000036using namespace arm_compute::misc;
Georgios Pinitas4074c992018-01-30 18:13:46 +000037using namespace arm_compute::misc::shape_calculator;
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010038
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000039namespace arm_compute
40{
41NEDepthwiseConvolutionLayer3x3::NEDepthwiseConvolutionLayer3x3(std::shared_ptr<IMemoryManager> memory_manager)
42 : _memory_group(memory_manager), _dwc_kernel(), _dwc_optimized_func(memory_manager), _output_stage_kernel(), _border_handler(), _permute_input(), _permute_weights(), _permute_output(),
43 _activationlayer_function(), _accumulator(), _permuted_input(), _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_optimized(false),
44 _is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false)
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010045{
46}
47
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000048void NEDepthwiseConvolutionLayer3x3::configure_generic(ITensor *input,
49 const ITensor *weights,
50 const ITensor *biases,
51 ITensor *output,
52 const PadStrideInfo &conv_info,
53 unsigned int depth_multiplier,
Usama Arif881f2de2019-04-12 10:29:17 +010054 const ActivationLayerInfo &act_info,
55 const Size2D &dilation)
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010056{
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000057 ARM_COMPUTE_UNUSED(act_info);
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010058
Georgios Pinitasf72f9362018-01-12 16:29:45 +000059 PixelValue zero_value(0.f);
60
Georgios Pinitasa799ce02018-09-12 20:11:34 +010061 // Initialize the intermediate accumulator tensor in case of quantized input
62 if(_is_quantized)
63 {
64 TensorShape accum_shape = output->info()->tensor_shape();
65 DataLayout accum_layout = output->info()->data_layout();
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000066 if(!_is_nchw)
Georgios Pinitasa799ce02018-09-12 20:11:34 +010067 {
68 permute(accum_shape, PermutationVector(1U, 2U, 0U));
69 accum_layout = DataLayout::NCHW;
70 }
71
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000072 _memory_group.manage(&_accumulator);
Georgios Pinitas2481d462019-02-19 18:47:46 +000073 _accumulator.allocator()->init(TensorInfo(accum_shape, 1, DataType::S32, output->info()->quantization_info()));
Georgios Pinitasa799ce02018-09-12 20:11:34 +010074 _accumulator.info()->set_data_layout(accum_layout);
Georgios Pinitas4c5469b2019-05-21 13:32:43 +010075 zero_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().uniform().offset));
Georgios Pinitasa799ce02018-09-12 20:11:34 +010076 }
77
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000078 if(!_is_nchw)
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010079 {
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000080 _memory_group.manage(&_permuted_input);
81 _memory_group.manage(&_permuted_output);
Georgios Pinitas4074c992018-01-30 18:13:46 +000082
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000083 // Configure the function to transform the input tensor from NHWC -> NCHW
84 _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
85 _permuted_input.info()->set_data_layout(DataLayout::NCHW);
Georgios Pinitas4074c992018-01-30 18:13:46 +000086
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000087 // Configure the function to transform the weights tensor from HWI -> IHW
88 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
89 _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
Pablo Telloa28aebc2019-06-03 14:59:48 +010090 _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
Georgios Pinitas4074c992018-01-30 18:13:46 +000091
Usama Arif881f2de2019-04-12 10:29:17 +010092 // Configure depthwise
93 _dwc_kernel.configure(&_permuted_input, &_permuted_weights, (_is_quantized) ? &_accumulator : &_permuted_output, conv_info, depth_multiplier, dilation);
Georgios Pinitas4074c992018-01-30 18:13:46 +000094
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000095 // Configure border handler
96 _border_handler.configure(&_permuted_input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);
97
98 // Allocate tensors
99 _permuted_input.allocator()->allocate();
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000100 }
Georgios Pinitas4074c992018-01-30 18:13:46 +0000101 else
102 {
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000103 // Configure depthwise convolution kernel
Usama Arif881f2de2019-04-12 10:29:17 +0100104 _dwc_kernel.configure(input, weights, (_is_quantized) ? &_accumulator : output, conv_info, depth_multiplier, dilation);
Georgios Pinitas9be0c5a2018-02-19 12:46:29 +0000105
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000106 // Configure border handler
107 _border_handler.configure(input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100108 }
Giorgio Arena26b22162018-08-13 15:49:49 +0100109
110 // Configure biases accumulation
111 if(_is_quantized)
112 {
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100113 const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
114 const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
115 const UniformQuantizationInfo oq_info = (output->info()->total_size() == 0) ? iq_info : output->info()->quantization_info().uniform();
Giorgio Arena26b22162018-08-13 15:49:49 +0100116
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100117 float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100118 int output_multiplier;
119 int output_shift;
Giorgio Arena26b22162018-08-13 15:49:49 +0100120 quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100121 _output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, output_multiplier, output_shift, oq_info.offset);
Giorgio Arena26b22162018-08-13 15:49:49 +0100122 _accumulator.allocator()->allocate();
123 }
124 else if(_has_bias)
125 {
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000126 _output_stage_kernel.configure(_is_nchw ? output : &_permuted_output, biases);
Giorgio Arena26b22162018-08-13 15:49:49 +0100127 }
128
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000129 // Permute output
130 if(!_is_nchw)
Giorgio Arena26b22162018-08-13 15:49:49 +0100131 {
132 // Configure the function to transform the convoluted output to NHWC
133 _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
134 _permuted_output.allocator()->allocate();
135 }
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000136}
Georgios Pinitas60e98252018-10-22 16:17:20 +0100137
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000138void NEDepthwiseConvolutionLayer3x3::configure_optimized(const ITensor *input,
139 const ITensor *weights,
140 const ITensor *biases,
141 ITensor *output,
142 const PadStrideInfo &conv_info,
143 unsigned int depth_multiplier,
144 const ActivationLayerInfo &act_info)
145{
146 ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
147 const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);
148 const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info);
149 _is_activationlayer_enabled = act_info.enabled() && !(is_relu || is_relu6);
150 if(!_is_activationlayer_enabled)
151 {
152 act_info_to_use = act_info;
153 }
154
155 if(_is_nchw)
156 {
157 _memory_group.manage(&_permuted_input);
158 _memory_group.manage(&_permuted_output);
159
160 // Configure the function to transform the input tensor from NCHW -> NHWC
161 _permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
162 _permuted_input.info()->set_data_layout(DataLayout::NHWC);
163
164 // Configure the function to transform the weights tensor from IHW -> HWI
165 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
166 _permuted_weights.info()->set_data_layout(DataLayout::NHWC);
167
Pablo Telloa28aebc2019-06-03 14:59:48 +0100168 _permuted_output.info()->set_data_layout(DataLayout::NHWC);
169 _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
170
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000171 // Configure optimized depthwise
172 _dwc_optimized_func.configure(&_permuted_input, &_permuted_weights, biases, &_permuted_output, conv_info, depth_multiplier, act_info_to_use);
173
174 // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
175 _permuted_output.info()->set_data_layout(DataLayout::NHWC);
176 _permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
177
178 // Allocate tensors
179 _permuted_input.allocator()->allocate();
180 _permuted_output.allocator()->allocate();
181 }
182 else
183 {
184 _dwc_optimized_func.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info_to_use);
185 }
186}
187
188void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input,
189 const ITensor *weights,
190 const ITensor *biases,
191 ITensor *output, const PadStrideInfo &conv_info,
192 unsigned int depth_multiplier,
Usama Arife73686a2019-04-08 17:30:48 +0100193 const ActivationLayerInfo &act_info,
194 const Size2D &dilation)
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000195{
Michele Di Giorgioff271922019-07-17 15:59:32 +0100196 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
197 // Perform validation step
198 ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayer3x3::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
199 output->info(), conv_info, depth_multiplier, act_info, dilation));
Usama Arif881f2de2019-04-12 10:29:17 +0100200
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000201 _original_weights = weights;
202 _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
203 _has_bias = biases != nullptr;
204 _is_optimized = NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input->info(),
205 weights->info(),
206 conv_info,
Usama Arif881f2de2019-04-12 10:29:17 +0100207 depth_multiplier, dilation);
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000208 _is_nchw = input->info()->data_layout() == DataLayout::NCHW;
209 _permute = _is_optimized == _is_nchw;
210 _is_prepared = false;
Georgios Pinitas60e98252018-10-22 16:17:20 +0100211 _is_activationlayer_enabled = act_info.enabled();
212
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000213 // Configure appropriate pipeline
214 if(_is_optimized)
215 {
216 configure_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info);
217 }
218 else
219 {
Usama Arif881f2de2019-04-12 10:29:17 +0100220 configure_generic(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000221 }
222
223 // Configure activation
Georgios Pinitas60e98252018-10-22 16:17:20 +0100224 if(_is_activationlayer_enabled)
225 {
226 _activationlayer_function.configure(output, nullptr, act_info);
227 }
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100228}
229
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000230Status NEDepthwiseConvolutionLayer3x3::validate(const ITensorInfo *input,
231 const ITensorInfo *weights,
232 const ITensorInfo *biases,
233 const ITensorInfo *output,
234 const PadStrideInfo &conv_info,
235 unsigned int depth_multiplier,
Usama Arife73686a2019-04-08 17:30:48 +0100236 const ActivationLayerInfo &act_info,
237 const Size2D &dilation)
Abe Mbise7784c832018-05-31 16:48:41 +0100238{
239 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
Michele Di Giorgioff271922019-07-17 15:59:32 +0100240 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
241 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
Gian Marco Iodice23e24792018-09-07 15:32:14 +0100242 ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
Usama Arif881f2de2019-04-12 10:29:17 +0100243 ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() < 1 || dilation.y() < 1);
244 const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
245 const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
246 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (dilation.x() - 1) > input->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());
247 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (dilation.y() - 1) > input->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());
Abe Mbise7784c832018-05-31 16:48:41 +0100248
Giorgio Arena66cbafb2018-08-23 14:51:00 +0100249 if(biases != nullptr)
250 {
Gian Marco Iodice23e24792018-09-07 15:32:14 +0100251 const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
Giorgio Arena66cbafb2018-08-23 14:51:00 +0100252 ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
Gian Marco Iodice23e24792018-09-07 15:32:14 +0100253 ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));
Giorgio Arena66cbafb2018-08-23 14:51:00 +0100254 }
255
Usama Arif881f2de2019-04-12 10:29:17 +0100256 if(!NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input, weights, conv_info, depth_multiplier, dilation))
Gian Marco Iodice23e24792018-09-07 15:32:14 +0100257 {
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000258 const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
259 TensorInfo accumulator = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
260 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayer3x3Kernel::validate(input, weights, is_quantized ? &accumulator : output, conv_info, depth_multiplier));
261
262 if(is_quantized)
263 {
Michele Di Giorgioff271922019-07-17 15:59:32 +0100264 const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
265 const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
266 const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
267
268 float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
269 int output_multiplier;
270 int output_shift;
271 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift));
272 ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&accumulator, biases, output, output_multiplier, output_shift, oq_info.offset));
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000273 }
274 }
275 else
276 {
277 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionAssemblyDispatch::validate(input, weights, biases, output, conv_info, depth_multiplier));
Gian Marco Iodice23e24792018-09-07 15:32:14 +0100278 }
279
Georgios Pinitas60e98252018-10-22 16:17:20 +0100280 //Validate Activation Layer
281 if(act_info.enabled())
282 {
283 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
284 }
285
Gian Marco Iodice23e24792018-09-07 15:32:14 +0100286 return Status{};
Abe Mbise7784c832018-05-31 16:48:41 +0100287}
288
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000289void NEDepthwiseConvolutionLayer3x3::run_generic()
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100290{
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000291 // Fill border
292 NEScheduler::get().schedule(&_border_handler, Window::DimX);
Georgios Pinitas4074c992018-01-30 18:13:46 +0000293
294 // Execute depthwise convolution
295 NEScheduler::get().schedule(&_dwc_kernel, Window::DimX);
296
Georgios Pinitas4074c992018-01-30 18:13:46 +0000297 // Add biases
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000298 if(_has_bias || _is_quantized)
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100299 {
Michalis Spyroub91e34c2017-12-20 15:50:55 +0000300 NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100301 }
Giorgio Arena26b22162018-08-13 15:49:49 +0100302
303 // Permute output
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000304 if(!_is_nchw)
Giorgio Arena26b22162018-08-13 15:49:49 +0100305 {
306 _permute_output.run();
307 }
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000308}
Georgios Pinitas60e98252018-10-22 16:17:20 +0100309
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000310void NEDepthwiseConvolutionLayer3x3::run_optimized()
311{
312 // Run assembly function
313 _dwc_optimized_func.run();
314
315 // Permute output
316 if(_is_nchw)
317 {
318 _permute_output.run();
319 }
320}
321
322void NEDepthwiseConvolutionLayer3x3::run()
323{
324 prepare();
325
Georgios Pinitasda953f22019-04-02 17:27:03 +0100326 MemoryGroupResourceScope scope_mg(_memory_group);
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000327
328 // Permute input
329 if(_permute)
330 {
331 _permute_input.run();
332 }
333
334 _is_optimized ? run_optimized() : run_generic();
335
336 // Run activation
Georgios Pinitas60e98252018-10-22 16:17:20 +0100337 if(_is_activationlayer_enabled)
338 {
339 _activationlayer_function.run();
340 }
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000341}
342
343void NEDepthwiseConvolutionLayer3x3::prepare()
344{
345 if(!_is_prepared)
346 {
347 // Permute weights
348 if(_permute)
349 {
350 _permuted_weights.allocator()->allocate();
351 _permute_weights.run();
352 _original_weights->mark_as_unused();
353 }
354
355 // Prepare optimized function
356 if(_is_optimized)
357 {
358 _dwc_optimized_func.prepare();
359 if(!_permuted_weights.is_used())
360 {
361 _permuted_weights.allocator()->free();
362 }
363 }
364
365 _is_prepared = true;
366 }
Michalis Spyroub7b31532017-11-23 12:10:21 +0000367}
368
Georgios Pinitas30271c72019-06-24 14:56:34 +0100369NEDepthwiseConvolutionLayerOptimized::NEDepthwiseConvolutionLayerOptimized(std::shared_ptr<IMemoryManager> memory_manager)
370 : _memory_group(memory_manager), _dwc_kernel(), _dwc_optimized_func(memory_manager), _output_stage_kernel(), _border_handler(), _permute_input(), _permute_weights(), _permute_output(),
371 _activationlayer_function(), _accumulator(), _permuted_input(), _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_optimized(false),
372 _is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false)
373{
374}
375
376void NEDepthwiseConvolutionLayerOptimized::configure_generic(ITensor *input,
377 const ITensor *weights,
378 const ITensor *biases,
379 ITensor *output,
380 const PadStrideInfo &conv_info,
381 unsigned int depth_multiplier,
382 const ActivationLayerInfo &act_info,
383 const Size2D &dilation)
384{
385 ARM_COMPUTE_UNUSED(act_info);
386
387 PixelValue zero_value(0.f);
388
389 // Initialize the intermediate accumulator tensor in case of quantized input
390 if(_is_quantized)
391 {
392 TensorShape accum_shape = output->info()->tensor_shape();
393 DataLayout accum_layout = output->info()->data_layout();
394 if(!_is_nchw)
395 {
396 permute(accum_shape, PermutationVector(1U, 2U, 0U));
397 accum_layout = DataLayout::NCHW;
398 }
399
400 _memory_group.manage(&_accumulator);
401 _accumulator.allocator()->init(TensorInfo(accum_shape, 1, DataType::S32, output->info()->quantization_info()));
402 _accumulator.info()->set_data_layout(accum_layout);
403 zero_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().uniform().offset));
404 }
405
406 if(!_is_nchw)
407 {
408 _memory_group.manage(&_permuted_input);
409 _memory_group.manage(&_permuted_output);
410
411 // Configure the function to transform the input tensor from NHWC -> NCHW
412 _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
413 _permuted_input.info()->set_data_layout(DataLayout::NCHW);
414
415 // Configure the function to transform the weights tensor from HWI -> IHW
416 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
417 _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
418 _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
419
420 // Configure depthwise
421 _dwc_kernel.configure(&_permuted_input, &_permuted_weights, (_is_quantized) ? &_accumulator : &_permuted_output, conv_info, depth_multiplier, dilation);
422
423 // Configure border handler
424 _border_handler.configure(&_permuted_input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);
425
426 // Allocate tensors
427 _permuted_input.allocator()->allocate();
428 }
429 else
430 {
431 // Configure depthwise convolution kernel
432 _dwc_kernel.configure(input, weights, (_is_quantized) ? &_accumulator : output, conv_info, depth_multiplier, dilation);
433
434 // Configure border handler
435 _border_handler.configure(input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);
436 }
437
438 // Configure biases accumulation
439 if(_is_quantized)
440 {
441 const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
442 const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
443 const UniformQuantizationInfo oq_info = (output->info()->total_size() == 0) ? iq_info : output->info()->quantization_info().uniform();
444
445 float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
446 int output_multiplier;
447 int output_shift;
448 quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
449 _output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, output_multiplier, output_shift, oq_info.offset);
450 _accumulator.allocator()->allocate();
451 }
452 else if(_has_bias)
453 {
454 _output_stage_kernel.configure(_is_nchw ? output : &_permuted_output, biases);
455 }
456
457 // Permute output
458 if(!_is_nchw)
459 {
460 // Configure the function to transform the convoluted output to NHWC
461 _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
462 _permuted_output.allocator()->allocate();
463 }
464}
465
466void NEDepthwiseConvolutionLayerOptimized::configure_optimized(const ITensor *input,
467 const ITensor *weights,
468 const ITensor *biases,
469 ITensor *output,
470 const PadStrideInfo &conv_info,
471 unsigned int depth_multiplier,
472 const ActivationLayerInfo &act_info,
473 const Size2D &dilation)
474{
475 ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
476 const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);
477 const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info);
478 _is_activationlayer_enabled = act_info.enabled() && !(is_relu || is_relu6);
479 if(!_is_activationlayer_enabled)
480 {
481 act_info_to_use = act_info;
482 }
483
484 if(_is_nchw)
485 {
486 _memory_group.manage(&_permuted_input);
487 _memory_group.manage(&_permuted_output);
488
489 // Configure the function to transform the input tensor from NCHW -> NHWC
490 _permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
491 _permuted_input.info()->set_data_layout(DataLayout::NHWC);
492
493 // Configure the function to transform the weights tensor from IHW -> HWI
494 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
495 _permuted_weights.info()->set_data_layout(DataLayout::NHWC);
496
497 _permuted_output.info()->set_data_layout(DataLayout::NHWC);
498 _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
499
500 // Configure optimized depthwise
501 _dwc_optimized_func.configure(&_permuted_input, &_permuted_weights, biases, &_permuted_output, conv_info, depth_multiplier, act_info_to_use, dilation);
502
503 // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
504 _permuted_output.info()->set_data_layout(DataLayout::NHWC);
505 _permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
506
507 // Allocate tensors
508 _permuted_input.allocator()->allocate();
509 _permuted_output.allocator()->allocate();
510 }
511 else
512 {
513 _dwc_optimized_func.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info_to_use, dilation);
514 }
515}
516
517void NEDepthwiseConvolutionLayerOptimized::configure(ITensor *input,
518 const ITensor *weights,
519 const ITensor *biases,
520 ITensor *output, const PadStrideInfo &conv_info,
521 unsigned int depth_multiplier,
522 const ActivationLayerInfo &act_info,
523 const Size2D &dilation)
524{
Michele Di Giorgioff271922019-07-17 15:59:32 +0100525 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
526 // Perform validation step
527 ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayerOptimized::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
528 output->info(), conv_info, depth_multiplier, act_info, dilation));
Georgios Pinitas30271c72019-06-24 14:56:34 +0100529
530 _original_weights = weights;
531 _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
532 _has_bias = biases != nullptr;
533 _is_optimized = NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input->info(),
534 weights->info(),
535 conv_info,
536 depth_multiplier,
537 dilation);
538 _is_nchw = input->info()->data_layout() == DataLayout::NCHW;
539 _permute = _is_optimized == _is_nchw;
540 _is_prepared = false;
541 _is_activationlayer_enabled = act_info.enabled();
542
543 // Configure appropriate pipeline
544 if(_is_optimized)
545 {
546 configure_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
547 }
548 else
549 {
550 configure_generic(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
551 }
552
553 // Configure activation
554 if(_is_activationlayer_enabled)
555 {
556 _activationlayer_function.configure(output, nullptr, act_info);
557 }
558}
559
560Status NEDepthwiseConvolutionLayerOptimized::validate(const ITensorInfo *input,
561 const ITensorInfo *weights,
562 const ITensorInfo *biases,
563 const ITensorInfo *output,
564 const PadStrideInfo &conv_info,
565 unsigned int depth_multiplier,
566 const ActivationLayerInfo &act_info,
567 const Size2D &dilation)
568{
569 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
Michele Di Giorgioff271922019-07-17 15:59:32 +0100570 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
571 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
Georgios Pinitas30271c72019-06-24 14:56:34 +0100572 ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
573 ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() < 1 || dilation.y() < 1);
574 const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
575 const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
576 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (dilation.x() - 1) > input->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());
577 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (dilation.y() - 1) > input->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());
578
579 if(biases != nullptr)
580 {
581 const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
582 ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
583 ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));
584 }
585
Michele Di Giorgio601ba3f2019-08-22 16:20:04 +0100586 const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
587
588 if(is_quantized)
589 {
590 const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
591 const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
592 const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
593
594 float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
595 ARM_COMPUTE_UNUSED(multiplier);
596 ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f);
597 }
598
Georgios Pinitas30271c72019-06-24 14:56:34 +0100599 if(!NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input, weights, conv_info, depth_multiplier, dilation))
600 {
Michele Di Giorgio601ba3f2019-08-22 16:20:04 +0100601 TensorInfo accumulator = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
Georgios Pinitas30271c72019-06-24 14:56:34 +0100602 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayer3x3Kernel::validate(input, weights, is_quantized ? &accumulator : output, conv_info, depth_multiplier, dilation));
603
604 if(is_quantized)
605 {
606 ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&accumulator, biases, output));
607 }
608 }
609 else
610 {
611 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionAssemblyDispatch::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation));
612 }
613
614 //Validate Activation Layer
615 if(act_info.enabled())
616 {
617 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
618 }
619
620 return Status{};
621}
622
623void NEDepthwiseConvolutionLayerOptimized::run_generic()
624{
625 // Fill border
626 NEScheduler::get().schedule(&_border_handler, Window::DimX);
627
628 // Execute depthwise convolution
629 NEScheduler::get().schedule(&_dwc_kernel, Window::DimX);
630
631 // Add biases
632 if(_has_bias || _is_quantized)
633 {
634 NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);
635 }
636
637 // Permute output
638 if(!_is_nchw)
639 {
640 _permute_output.run();
641 }
642}
643
644void NEDepthwiseConvolutionLayerOptimized::run_optimized()
645{
646 // Run assembly function
647 _dwc_optimized_func.run();
648
649 // Permute output
650 if(_is_nchw)
651 {
652 _permute_output.run();
653 }
654}
655
656void NEDepthwiseConvolutionLayerOptimized::run()
657{
658 prepare();
659
660 MemoryGroupResourceScope scope_mg(_memory_group);
661
662 // Permute input
663 if(_permute)
664 {
665 _permute_input.run();
666 }
667
668 _is_optimized ? run_optimized() : run_generic();
669
670 // Run activation
671 if(_is_activationlayer_enabled)
672 {
673 _activationlayer_function.run();
674 }
675}
676
677void NEDepthwiseConvolutionLayerOptimized::prepare()
678{
679 if(!_is_prepared)
680 {
681 // Permute weights
682 if(_permute)
683 {
684 _permuted_weights.allocator()->allocate();
685 _permute_weights.run();
686 _original_weights->mark_as_unused();
687 }
688
689 // Prepare optimized function
690 if(_is_optimized)
691 {
692 _dwc_optimized_func.prepare();
693 if(!_permuted_weights.is_used())
694 {
695 _permuted_weights.allocator()->free();
696 }
697 }
698
699 _is_prepared = true;
700 }
701}
702
Giorgio Arena04a8f8c2017-11-23 11:45:24 +0000703NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer()
Giorgio Arena44f55722019-07-12 14:49:49 +0100704 : _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _depthwise_conv_kernel(), _vector_to_tensor_kernel(), _output_stage_kernel(), _fill_border(), _v2mm_input_fill_border(),
705 _v2mm_weights_fill_border(), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(), _input_reshaped(), _weights_reshaped(), _v2mm_output(), _output_reshaped(),
706 _permuted_input(), _permuted_weights(), _permuted_output(), _is_prepared(false), _is_quantized(false), _is_nhwc(false), _is_activationlayer_enabled(false), _is_optimized(false),
707 _original_weights(nullptr)
Michalis Spyroub7b31532017-11-23 12:10:21 +0000708{
709}
710
Georgios Pinitas60e98252018-10-22 16:17:20 +0100711void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
Usama Arife73686a2019-04-08 17:30:48 +0100712 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
Michalis Spyroub7b31532017-11-23 12:10:21 +0000713{
Michele Di Giorgioff271922019-07-17 15:59:32 +0100714 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
715 // Perform validation step
716 ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayer::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
717 output->info(), conv_info, depth_multiplier, act_info, dilation));
Michalis Spyroub7b31532017-11-23 12:10:21 +0000718
Giorgio Arena44f55722019-07-12 14:49:49 +0100719 _is_nhwc = input->info()->data_layout() == DataLayout::NHWC;
720 _is_optimized = _is_nhwc && input->info()->data_type() == DataType::F32;
Giorgio Arena26b22162018-08-13 15:49:49 +0100721
Giorgio Arena44f55722019-07-12 14:49:49 +0100722 if(!_is_optimized)
Giorgio Arena26b22162018-08-13 15:49:49 +0100723 {
Giorgio Arena44f55722019-07-12 14:49:49 +0100724 ITensor *input_to_use = input;
725 const ITensor *weights_to_use = weights;
726 ITensor *output_to_use = output;
Giorgio Arena26b22162018-08-13 15:49:49 +0100727
Giorgio Arena44f55722019-07-12 14:49:49 +0100728 if(_is_nhwc)
729 {
730 _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
731 _permuted_input.info()->set_data_layout(DataLayout::NCHW);
732 input_to_use = &_permuted_input;
733
734 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
735 _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
736 weights_to_use = &_permuted_weights;
737 }
738
739 const size_t weights_w = weights_to_use->info()->dimension(0);
740 const size_t weights_h = weights_to_use->info()->dimension(1);
741 const size_t weights_z = weights_to_use->info()->dimension(2);
742
743 _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
744 _is_prepared = false;
745 _original_weights = weights_to_use;
746
747 // Should bias be appended ?
748 bool append_bias = (biases != nullptr) && !_is_quantized;
749
750 // Calculate output shape
751 TensorShape output_shape = shape_calculator::compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info, depth_multiplier, dilation);
752
753 // Output auto inizialitation if not yet initialized
754 auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
755 ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
756
757 if(_is_nhwc)
758 {
759 permute(output_shape, PermutationVector(1U, 2U, 0U));
760 _permuted_output.allocator()->init(output->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape));
761 _permuted_output.info()->set_data_layout(DataLayout::NCHW);
762 _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
763 output_to_use = &_permuted_output;
764 }
765
766 // Output width and height
767 const unsigned int conv_w = output_shape.x();
768 const unsigned int conv_h = output_shape.y();
769
770 // Set up intermediate tensors
771 const size_t patch_size = weights_w * weights_h + (append_bias ? 1 : 0);
772 const size_t conv_size = conv_w * conv_h;
773
774 // Im2Col configuration
775 TensorShape shape_im2col = input_to_use->info()->tensor_shape();
776 shape_im2col.set(0, patch_size);
777 shape_im2col.set(1, conv_size);
778 shape_im2col.set(2, weights_z);
779 _input_reshaped.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col).set_data_layout(DataLayout::NCHW));
780 _im2col_kernel.configure(input_to_use, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, append_bias, depth_multiplier, dilation);
781
782 // Weights reshape configuration
783 const TensorShape shape_weights_reshape(patch_size, weights_z);
784 _weights_reshaped.allocator()->init(weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_weights_reshape).set_data_layout(DataLayout::NCHW));
785 _weights_reshape_kernel.configure(weights_to_use, &_weights_reshaped, append_bias ? biases : nullptr);
786
787 // GEMV configuration
788 DataType v2mm_dt = (input->info()->data_type() == DataType::QASYMM8) ? DataType::S32 : input->info()->data_type();
789 TensorShape shape_v2mm_out = input_to_use->info()->tensor_shape();
790 shape_v2mm_out.set(0, conv_size * weights_z);
791 shape_v2mm_out.set(1, 1);
792 shape_v2mm_out.set(2, 1);
793 _v2mm_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(v2mm_dt).set_tensor_shape(shape_v2mm_out).set_data_layout(DataLayout::NCHW));
794 _v2mm_kernel.configure(&_input_reshaped, &_weights_reshaped, &_v2mm_output);
795 _output_reshaped.allocator()->init(_v2mm_output.info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape));
796 _vector_to_tensor_kernel.configure(&_v2mm_output, (_is_quantized) ? &_output_reshaped : output_to_use, conv_w, conv_h);
797
798 // Output staged configuration
799 if(_is_quantized)
800 {
801 const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
802 const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
803 const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
804
805 float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
806 int output_multiplier;
807 int output_shift;
808 quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
809 _output_stage_kernel.configure(&_output_reshaped, biases, output_to_use, output_multiplier, output_shift, oq_info.offset);
810 _output_reshaped.allocator()->allocate();
811 }
812
813 if(_is_nhwc)
814 {
815 _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
816
817 _permuted_input.allocator()->allocate();
818 _permuted_weights.allocator()->allocate();
819 _permuted_output.allocator()->allocate();
820 }
821
822 // Fill borders on inputs
823 PixelValue zero_in(static_cast<int32_t>(0));
824 PixelValue zero_w(static_cast<int32_t>(0));
825 if(_is_quantized)
826 {
827 zero_in = PixelValue(static_cast<int32_t>(input->info()->quantization_info().uniform().offset));
828 zero_w = PixelValue(static_cast<int32_t>(weights->info()->quantization_info().uniform().offset));
829 }
830 BorderSize border_size = _v2mm_kernel.border_size();
831 _v2mm_input_fill_border.configure(&_input_reshaped, border_size, BorderMode::CONSTANT, zero_in);
832
833 border_size.bottom = 0;
834 _v2mm_weights_fill_border.configure(&_weights_reshaped, border_size, BorderMode::CONSTANT, zero_w);
835
836 // Allocate intermediate tensors
837 _input_reshaped.allocator()->allocate();
838 _v2mm_output.allocator()->allocate();
Giorgio Arena26b22162018-08-13 15:49:49 +0100839 }
Giorgio Arena44f55722019-07-12 14:49:49 +0100840 else
Giorgio Arena26b22162018-08-13 15:49:49 +0100841 {
Giorgio Arena44f55722019-07-12 14:49:49 +0100842 // Configure kernel
843 _depthwise_conv_kernel.configure(input, weights, biases, output, conv_info, depth_multiplier, dilation);
844
845 // Fill input borders
846 _fill_border.configure(input, _depthwise_conv_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint64_t>(0), input->info()->data_type()));
Giorgio Arena26b22162018-08-13 15:49:49 +0100847 }
848
Georgios Pinitas60e98252018-10-22 16:17:20 +0100849 //Configure Activation Layer
850 _is_activationlayer_enabled = act_info.enabled();
851
852 if(_is_activationlayer_enabled)
853 {
854 _activationlayer_function.configure(output, nullptr, act_info);
855 }
Michalis Spyroub7b31532017-11-23 12:10:21 +0000856}
857
Georgios Pinitas10490202018-08-17 17:16:06 +0100858Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
Usama Arife73686a2019-04-08 17:30:48 +0100859 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
Abe Mbise7784c832018-05-31 16:48:41 +0100860{
861 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
Gian Marco Iodice23e24792018-09-07 15:32:14 +0100862 ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
Usama Arif881f2de2019-04-12 10:29:17 +0100863 ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() < 1 || dilation.y() < 1);
Gian Marco Iodice23e24792018-09-07 15:32:14 +0100864
Michele Di Giorgioff271922019-07-17 15:59:32 +0100865 const unsigned int width_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
866 const unsigned int height_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
867 const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
Abe Mbise7784c832018-05-31 16:48:41 +0100868
Usama Arif881f2de2019-04-12 10:29:17 +0100869 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) + (weights->dimension(width_idx) - 1) * (dilation.x() - 1) > input->dimension(width_idx) + conv_info.pad_left() + conv_info.pad_right());
870 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(height_idx) + (weights->dimension(height_idx) - 1) * (dilation.y() - 1) > input->dimension(height_idx) + conv_info.pad_top() + conv_info.pad_bottom());
Michele Di Giorgioff271922019-07-17 15:59:32 +0100871 ARM_COMPUTE_RETURN_ERROR_ON((input->dimension(channel_idx) * depth_multiplier) != weights->dimension(channel_idx));
872
Giorgio Arena44f55722019-07-12 14:49:49 +0100873 if(input->data_layout() != DataLayout::NHWC || input->data_type() != DataType::F32)
Giorgio Arena26b22162018-08-13 15:49:49 +0100874 {
Giorgio Arena44f55722019-07-12 14:49:49 +0100875 // Clone output to use auto init
876 auto output_clone = output->clone();
Giorgio Arena26b22162018-08-13 15:49:49 +0100877
Giorgio Arena44f55722019-07-12 14:49:49 +0100878 const ITensorInfo *input_to_use = input;
879 const ITensorInfo *weights_to_use = weights;
880 const ITensorInfo *output_to_use = output_clone.get();
Giorgio Arena26b22162018-08-13 15:49:49 +0100881
Giorgio Arena44f55722019-07-12 14:49:49 +0100882 TensorShape permuted_input_shape = input->tensor_shape();
883 TensorShape permuted_weights_shape = weights->tensor_shape();
884 TensorInfo permuted_input;
885 TensorInfo permuted_weights;
886
887 if(input->data_layout() == DataLayout::NHWC)
888 {
889 permute(permuted_input_shape, PermutationVector(1U, 2U, 0U));
890 permute(permuted_weights_shape, PermutationVector(1U, 2U, 0U));
891
892 permuted_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NCHW));
893 permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NCHW));
894
895 input_to_use = &permuted_input;
896 weights_to_use = &permuted_weights;
897 }
898
899 const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
900 const bool append_bias = (biases != nullptr) && !is_quantized;
901 TensorShape output_shape = shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
902 const size_t weights_w = weights_to_use->dimension(0);
903 const size_t weights_h = weights_to_use->dimension(1);
904 const size_t weights_z = weights_to_use->dimension(2);
905 const unsigned int conv_w = output_shape[width_idx];
906 const unsigned int conv_h = output_shape[height_idx];
907 const size_t patch_size = weights_w * weights_h + (append_bias ? 1 : 0);
908 const size_t conv_size = conv_w * conv_h;
909
910 // Output auto inizialitation if not yet initialized
911 auto_init_if_empty(*output_clone, input->clone()->set_tensor_shape(output_shape));
912 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
913
914 TensorInfo permuted_output;
915 if(input->data_layout() == DataLayout::NHWC)
916 {
917 permute(output_shape, PermutationVector(1U, 2U, 0U));
918 permuted_output = TensorInfo(output_clone->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_data_layout(DataLayout::NCHW));
919 output_to_use = &permuted_output;
920 }
921
922 // Im2Col configuration
923 TensorShape shape_im2col = input_to_use->tensor_shape();
924 shape_im2col.set(0, patch_size);
925 shape_im2col.set(1, conv_size);
926 shape_im2col.set(2, weights_z);
927 TensorInfo input_reshaped(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col).set_data_layout(DataLayout::NCHW));
928 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseIm2ColKernel::validate(input_to_use, &input_reshaped, Size2D(weights_w, weights_h), conv_info, append_bias, depth_multiplier, dilation));
929
930 // Weights reshape configuration
931 const TensorShape shape_weights_reshape(patch_size, weights_z);
932 TensorInfo weights_reshaped(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_weights_reshape).set_data_layout(DataLayout::NCHW));
933 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseWeightsReshapeKernel::validate(weights_to_use, &weights_reshaped, append_bias ? biases : nullptr));
934
935 // GEMV configuration
936 DataType v2mm_dt = (input->data_type() == DataType::QASYMM8) ? DataType::S32 : input->data_type();
937 TensorShape shape_v2mm_out = input_to_use->tensor_shape();
938 shape_v2mm_out.set(0, conv_size * weights_z);
939 shape_v2mm_out.set(1, 1);
940 shape_v2mm_out.set(2, 1);
941 TensorInfo v2mm_output(input->clone()->set_is_resizable(true).reset_padding().set_data_type(v2mm_dt).set_tensor_shape(shape_v2mm_out).set_data_layout(DataLayout::NCHW));
942 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixVectorMultiplyKernel::validate(&input_reshaped, &weights_reshaped, &v2mm_output));
943
944 TensorInfo output_reshaped(v2mm_output.clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_to_use->tensor_shape()));
945 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseVectorToTensorKernel::validate(&v2mm_output, (is_quantized) ? &output_reshaped : output_to_use, conv_w, conv_h));
946
947 if(is_quantized)
948 {
949 const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
950 const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
951 const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
952
953 float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
954 int output_multiplier;
955 int output_shift;
956 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift));
957 ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&output_reshaped, biases, output_to_use, output_multiplier, output_shift, oq_info.offset));
958 }
Giorgio Arena26b22162018-08-13 15:49:49 +0100959 }
Giorgio Arena44f55722019-07-12 14:49:49 +0100960 else
Giorgio Arena26b22162018-08-13 15:49:49 +0100961 {
Gian Marco Iodicebd9097d2019-07-26 15:31:02 +0100962 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayerNativeKernel::validate(input, weights, biases, output, conv_info, depth_multiplier, dilation));
Abe Mbise7784c832018-05-31 16:48:41 +0100963 }
964
Georgios Pinitas60e98252018-10-22 16:17:20 +0100965 // Validate Activation Layer
966 if(act_info.enabled())
967 {
968 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
969 }
970
Abe Mbise7784c832018-05-31 16:48:41 +0100971 return Status{};
972}
973
Giorgio Arena04a8f8c2017-11-23 11:45:24 +0000974void NEDepthwiseConvolutionLayer::run()
Michalis Spyroub7b31532017-11-23 12:10:21 +0000975{
Giorgio Arena44f55722019-07-12 14:49:49 +0100976 if(!_is_optimized)
Giorgio Arena26b22162018-08-13 15:49:49 +0100977 {
Giorgio Arena44f55722019-07-12 14:49:49 +0100978 prepare();
979
980 if(_is_nhwc)
981 {
982 _permute_input.run();
983 }
984
985 NEScheduler::get().schedule(&_im2col_kernel, Window::DimX);
986 NEScheduler::get().schedule(&_v2mm_input_fill_border, Window::DimX);
987 NEScheduler::get().schedule(&_v2mm_kernel, Window::DimX);
988 NEScheduler::get().schedule(&_vector_to_tensor_kernel, Window::DimX);
989 if(_is_quantized)
990 {
991 NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);
992 }
993
994 if(_is_nhwc)
995 {
996 _permute_output.run();
997 }
Giorgio Arena26b22162018-08-13 15:49:49 +0100998 }
Giorgio Arena44f55722019-07-12 14:49:49 +0100999 else
Georgios Pinitasd05dce42018-01-22 16:29:17 +00001000 {
Giorgio Arena44f55722019-07-12 14:49:49 +01001001 NEScheduler::get().schedule(&_fill_border, Window::DimX);
1002 NEScheduler::get().schedule(&_depthwise_conv_kernel, Window::DimY);
Giorgio Arena26b22162018-08-13 15:49:49 +01001003 }
Georgios Pinitas60e98252018-10-22 16:17:20 +01001004
1005 if(_is_activationlayer_enabled)
1006 {
1007 _activationlayer_function.run();
1008 }
Anthony Barbierfb8dda22018-01-30 09:27:05 +00001009}
Georgios Pinitas72219332018-06-05 14:56:06 +01001010
1011void NEDepthwiseConvolutionLayer::prepare()
1012{
Giorgio Arena44f55722019-07-12 14:49:49 +01001013 if(!_is_prepared && !_is_optimized)
Georgios Pinitas72219332018-06-05 14:56:06 +01001014 {
1015 ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
1016
Giorgio Arena26b22162018-08-13 15:49:49 +01001017 if(_is_nhwc)
1018 {
1019 _permute_weights.run();
1020 }
1021
Georgios Pinitas72219332018-06-05 14:56:06 +01001022 // Run reshape and mark original weights as unused
1023 _weights_reshaped.allocator()->allocate();
1024 NEScheduler::get().schedule(&_weights_reshape_kernel, Window::DimX);
1025 NEScheduler::get().schedule(&_v2mm_weights_fill_border, Window::DimX);
1026 _original_weights->mark_as_unused();
1027
1028 _is_prepared = true;
1029 }
1030}
Georgios Pinitas47d39dc2019-03-11 14:03:23 +00001031} // namespace arm_compute