blob: 001bece9334b0262b4a93b79ebb482ec869d3354 [file] [log] [blame]
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01001/*
Georgios Pinitas2481d462019-02-19 18:47:46 +00002 * Copyright (c) 2017-2019 ARM Limited.
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000024#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010025
26#include "arm_compute/core/Helpers.h"
27#include "arm_compute/core/ITensor.h"
28#include "arm_compute/core/PixelValue.h"
Georgios Pinitasd05dce42018-01-22 16:29:17 +000029#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Georgios Pinitasf72f9362018-01-12 16:29:45 +000030#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010031#include "arm_compute/runtime/NEON/NEScheduler.h"
32#include "support/ToolchainSupport.h"
33
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000034#include "arm_compute/core/utils/misc/InfoHelpers.h"
35
Georgios Pinitasd05dce42018-01-22 16:29:17 +000036using namespace arm_compute::misc;
Georgios Pinitas4074c992018-01-30 18:13:46 +000037using namespace arm_compute::misc::shape_calculator;
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010038
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000039namespace arm_compute
40{
41NEDepthwiseConvolutionLayer3x3::NEDepthwiseConvolutionLayer3x3(std::shared_ptr<IMemoryManager> memory_manager)
42 : _memory_group(memory_manager), _dwc_kernel(), _dwc_optimized_func(memory_manager), _output_stage_kernel(), _border_handler(), _permute_input(), _permute_weights(), _permute_output(),
43 _activationlayer_function(), _accumulator(), _permuted_input(), _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_optimized(false),
44 _is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false)
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010045{
46}
47
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000048void NEDepthwiseConvolutionLayer3x3::configure_generic(ITensor *input,
49 const ITensor *weights,
50 const ITensor *biases,
51 ITensor *output,
52 const PadStrideInfo &conv_info,
53 unsigned int depth_multiplier,
Usama Arif881f2de2019-04-12 10:29:17 +010054 const ActivationLayerInfo &act_info,
55 const Size2D &dilation)
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010056{
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000057 ARM_COMPUTE_UNUSED(act_info);
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010058
Georgios Pinitasf72f9362018-01-12 16:29:45 +000059 PixelValue zero_value(0.f);
60
Georgios Pinitasa799ce02018-09-12 20:11:34 +010061 // Initialize the intermediate accumulator tensor in case of quantized input
62 if(_is_quantized)
63 {
64 TensorShape accum_shape = output->info()->tensor_shape();
65 DataLayout accum_layout = output->info()->data_layout();
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000066 if(!_is_nchw)
Georgios Pinitasa799ce02018-09-12 20:11:34 +010067 {
68 permute(accum_shape, PermutationVector(1U, 2U, 0U));
69 accum_layout = DataLayout::NCHW;
70 }
71
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000072 _memory_group.manage(&_accumulator);
Georgios Pinitas2481d462019-02-19 18:47:46 +000073 _accumulator.allocator()->init(TensorInfo(accum_shape, 1, DataType::S32, output->info()->quantization_info()));
Georgios Pinitasa799ce02018-09-12 20:11:34 +010074 _accumulator.info()->set_data_layout(accum_layout);
Georgios Pinitas4c5469b2019-05-21 13:32:43 +010075 zero_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().uniform().offset));
Georgios Pinitasa799ce02018-09-12 20:11:34 +010076 }
77
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000078 if(!_is_nchw)
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010079 {
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000080 _memory_group.manage(&_permuted_input);
81 _memory_group.manage(&_permuted_output);
Georgios Pinitas4074c992018-01-30 18:13:46 +000082
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000083 // Configure the function to transform the input tensor from NHWC -> NCHW
84 _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
85 _permuted_input.info()->set_data_layout(DataLayout::NCHW);
Georgios Pinitas4074c992018-01-30 18:13:46 +000086
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000087 // Configure the function to transform the weights tensor from HWI -> IHW
88 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
89 _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
Pablo Telloa28aebc2019-06-03 14:59:48 +010090 _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
Georgios Pinitas4074c992018-01-30 18:13:46 +000091
Usama Arif881f2de2019-04-12 10:29:17 +010092 // Configure depthwise
93 _dwc_kernel.configure(&_permuted_input, &_permuted_weights, (_is_quantized) ? &_accumulator : &_permuted_output, conv_info, depth_multiplier, dilation);
Georgios Pinitas4074c992018-01-30 18:13:46 +000094
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000095 // Configure border handler
96 _border_handler.configure(&_permuted_input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);
97
98 // Allocate tensors
99 _permuted_input.allocator()->allocate();
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000100 }
Georgios Pinitas4074c992018-01-30 18:13:46 +0000101 else
102 {
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000103 // Configure depthwise convolution kernel
Usama Arif881f2de2019-04-12 10:29:17 +0100104 _dwc_kernel.configure(input, weights, (_is_quantized) ? &_accumulator : output, conv_info, depth_multiplier, dilation);
Georgios Pinitas9be0c5a2018-02-19 12:46:29 +0000105
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000106 // Configure border handler
107 _border_handler.configure(input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100108 }
Giorgio Arena26b22162018-08-13 15:49:49 +0100109
110 // Configure biases accumulation
111 if(_is_quantized)
112 {
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100113 const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
114 const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
115 const UniformQuantizationInfo oq_info = (output->info()->total_size() == 0) ? iq_info : output->info()->quantization_info().uniform();
Giorgio Arena26b22162018-08-13 15:49:49 +0100116
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100117 float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100118 int output_multiplier;
119 int output_shift;
Giorgio Arena26b22162018-08-13 15:49:49 +0100120 quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100121 _output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, output_multiplier, output_shift, oq_info.offset);
Giorgio Arena26b22162018-08-13 15:49:49 +0100122 _accumulator.allocator()->allocate();
123 }
124 else if(_has_bias)
125 {
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000126 _output_stage_kernel.configure(_is_nchw ? output : &_permuted_output, biases);
Giorgio Arena26b22162018-08-13 15:49:49 +0100127 }
128
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000129 // Permute output
130 if(!_is_nchw)
Giorgio Arena26b22162018-08-13 15:49:49 +0100131 {
132 // Configure the function to transform the convoluted output to NHWC
133 _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
134 _permuted_output.allocator()->allocate();
135 }
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000136}
Georgios Pinitas60e98252018-10-22 16:17:20 +0100137
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000138void NEDepthwiseConvolutionLayer3x3::configure_optimized(const ITensor *input,
139 const ITensor *weights,
140 const ITensor *biases,
141 ITensor *output,
142 const PadStrideInfo &conv_info,
143 unsigned int depth_multiplier,
144 const ActivationLayerInfo &act_info)
145{
146 ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
147 const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);
148 const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info);
149 _is_activationlayer_enabled = act_info.enabled() && !(is_relu || is_relu6);
150 if(!_is_activationlayer_enabled)
151 {
152 act_info_to_use = act_info;
153 }
154
155 if(_is_nchw)
156 {
157 _memory_group.manage(&_permuted_input);
158 _memory_group.manage(&_permuted_output);
159
160 // Configure the function to transform the input tensor from NCHW -> NHWC
161 _permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
162 _permuted_input.info()->set_data_layout(DataLayout::NHWC);
163
164 // Configure the function to transform the weights tensor from IHW -> HWI
165 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
166 _permuted_weights.info()->set_data_layout(DataLayout::NHWC);
167
Pablo Telloa28aebc2019-06-03 14:59:48 +0100168 _permuted_output.info()->set_data_layout(DataLayout::NHWC);
169 _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
170
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000171 // Configure optimized depthwise
172 _dwc_optimized_func.configure(&_permuted_input, &_permuted_weights, biases, &_permuted_output, conv_info, depth_multiplier, act_info_to_use);
173
174 // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
175 _permuted_output.info()->set_data_layout(DataLayout::NHWC);
176 _permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
177
178 // Allocate tensors
179 _permuted_input.allocator()->allocate();
180 _permuted_output.allocator()->allocate();
181 }
182 else
183 {
184 _dwc_optimized_func.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info_to_use);
185 }
186}
187
188void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input,
189 const ITensor *weights,
190 const ITensor *biases,
191 ITensor *output, const PadStrideInfo &conv_info,
192 unsigned int depth_multiplier,
Usama Arife73686a2019-04-08 17:30:48 +0100193 const ActivationLayerInfo &act_info,
194 const Size2D &dilation)
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000195{
Michele Di Giorgioff271922019-07-17 15:59:32 +0100196 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
197 // Perform validation step
198 ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayer3x3::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
199 output->info(), conv_info, depth_multiplier, act_info, dilation));
Usama Arif881f2de2019-04-12 10:29:17 +0100200
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000201 _original_weights = weights;
202 _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
203 _has_bias = biases != nullptr;
204 _is_optimized = NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input->info(),
205 weights->info(),
206 conv_info,
Usama Arif881f2de2019-04-12 10:29:17 +0100207 depth_multiplier, dilation);
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000208 _is_nchw = input->info()->data_layout() == DataLayout::NCHW;
209 _permute = _is_optimized == _is_nchw;
210 _is_prepared = false;
Georgios Pinitas60e98252018-10-22 16:17:20 +0100211 _is_activationlayer_enabled = act_info.enabled();
212
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000213 // Configure appropriate pipeline
214 if(_is_optimized)
215 {
216 configure_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info);
217 }
218 else
219 {
Usama Arif881f2de2019-04-12 10:29:17 +0100220 configure_generic(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000221 }
222
223 // Configure activation
Georgios Pinitas60e98252018-10-22 16:17:20 +0100224 if(_is_activationlayer_enabled)
225 {
226 _activationlayer_function.configure(output, nullptr, act_info);
227 }
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100228}
229
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000230Status NEDepthwiseConvolutionLayer3x3::validate(const ITensorInfo *input,
231 const ITensorInfo *weights,
232 const ITensorInfo *biases,
233 const ITensorInfo *output,
234 const PadStrideInfo &conv_info,
235 unsigned int depth_multiplier,
Usama Arife73686a2019-04-08 17:30:48 +0100236 const ActivationLayerInfo &act_info,
237 const Size2D &dilation)
Abe Mbise7784c832018-05-31 16:48:41 +0100238{
239 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
Michele Di Giorgioff271922019-07-17 15:59:32 +0100240 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
241 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
Gian Marco Iodice23e24792018-09-07 15:32:14 +0100242 ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
Usama Arif881f2de2019-04-12 10:29:17 +0100243 ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() < 1 || dilation.y() < 1);
244 const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
245 const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
246 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (dilation.x() - 1) > input->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());
247 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (dilation.y() - 1) > input->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());
Abe Mbise7784c832018-05-31 16:48:41 +0100248
Giorgio Arena66cbafb2018-08-23 14:51:00 +0100249 if(biases != nullptr)
250 {
Gian Marco Iodice23e24792018-09-07 15:32:14 +0100251 const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
Giorgio Arena66cbafb2018-08-23 14:51:00 +0100252 ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
Gian Marco Iodice23e24792018-09-07 15:32:14 +0100253 ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));
Giorgio Arena66cbafb2018-08-23 14:51:00 +0100254 }
255
Usama Arif881f2de2019-04-12 10:29:17 +0100256 if(!NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input, weights, conv_info, depth_multiplier, dilation))
Gian Marco Iodice23e24792018-09-07 15:32:14 +0100257 {
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000258 const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
259 TensorInfo accumulator = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
260 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayer3x3Kernel::validate(input, weights, is_quantized ? &accumulator : output, conv_info, depth_multiplier));
261
262 if(is_quantized)
263 {
Michele Di Giorgioff271922019-07-17 15:59:32 +0100264 const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
265 const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
266 const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
267
268 float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
269 int output_multiplier;
270 int output_shift;
271 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift));
272 ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&accumulator, biases, output, output_multiplier, output_shift, oq_info.offset));
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000273 }
274 }
275 else
276 {
277 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionAssemblyDispatch::validate(input, weights, biases, output, conv_info, depth_multiplier));
Gian Marco Iodice23e24792018-09-07 15:32:14 +0100278 }
279
Georgios Pinitas60e98252018-10-22 16:17:20 +0100280 //Validate Activation Layer
281 if(act_info.enabled())
282 {
283 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
284 }
285
Gian Marco Iodice23e24792018-09-07 15:32:14 +0100286 return Status{};
Abe Mbise7784c832018-05-31 16:48:41 +0100287}
288
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000289void NEDepthwiseConvolutionLayer3x3::run_generic()
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100290{
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000291 // Fill border
292 NEScheduler::get().schedule(&_border_handler, Window::DimX);
Georgios Pinitas4074c992018-01-30 18:13:46 +0000293
294 // Execute depthwise convolution
295 NEScheduler::get().schedule(&_dwc_kernel, Window::DimX);
296
Georgios Pinitas4074c992018-01-30 18:13:46 +0000297 // Add biases
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000298 if(_has_bias || _is_quantized)
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100299 {
Michalis Spyroub91e34c2017-12-20 15:50:55 +0000300 NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);
Michalis Spyrou7362f0d2017-10-18 17:58:22 +0100301 }
Giorgio Arena26b22162018-08-13 15:49:49 +0100302
303 // Permute output
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000304 if(!_is_nchw)
Giorgio Arena26b22162018-08-13 15:49:49 +0100305 {
306 _permute_output.run();
307 }
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000308}
Georgios Pinitas60e98252018-10-22 16:17:20 +0100309
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000310void NEDepthwiseConvolutionLayer3x3::run_optimized()
311{
312 // Run assembly function
313 _dwc_optimized_func.run();
314
315 // Permute output
316 if(_is_nchw)
317 {
318 _permute_output.run();
319 }
320}
321
322void NEDepthwiseConvolutionLayer3x3::run()
323{
324 prepare();
325
Georgios Pinitasda953f22019-04-02 17:27:03 +0100326 MemoryGroupResourceScope scope_mg(_memory_group);
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000327
328 // Permute input
329 if(_permute)
330 {
331 _permute_input.run();
332 }
333
334 _is_optimized ? run_optimized() : run_generic();
335
336 // Run activation
Georgios Pinitas60e98252018-10-22 16:17:20 +0100337 if(_is_activationlayer_enabled)
338 {
339 _activationlayer_function.run();
340 }
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000341}
342
343void NEDepthwiseConvolutionLayer3x3::prepare()
344{
345 if(!_is_prepared)
346 {
347 // Permute weights
348 if(_permute)
349 {
350 _permuted_weights.allocator()->allocate();
351 _permute_weights.run();
352 _original_weights->mark_as_unused();
353 }
354
355 // Prepare optimized function
356 if(_is_optimized)
357 {
358 _dwc_optimized_func.prepare();
359 if(!_permuted_weights.is_used())
360 {
361 _permuted_weights.allocator()->free();
362 }
363 }
364
365 _is_prepared = true;
366 }
Michalis Spyroub7b31532017-11-23 12:10:21 +0000367}
368
Georgios Pinitas30271c72019-06-24 14:56:34 +0100369NEDepthwiseConvolutionLayerOptimized::NEDepthwiseConvolutionLayerOptimized(std::shared_ptr<IMemoryManager> memory_manager)
370 : _memory_group(memory_manager), _dwc_kernel(), _dwc_optimized_func(memory_manager), _output_stage_kernel(), _border_handler(), _permute_input(), _permute_weights(), _permute_output(),
371 _activationlayer_function(), _accumulator(), _permuted_input(), _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_optimized(false),
372 _is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false)
373{
374}
375
376void NEDepthwiseConvolutionLayerOptimized::configure_generic(ITensor *input,
377 const ITensor *weights,
378 const ITensor *biases,
379 ITensor *output,
380 const PadStrideInfo &conv_info,
381 unsigned int depth_multiplier,
382 const ActivationLayerInfo &act_info,
383 const Size2D &dilation)
384{
385 ARM_COMPUTE_UNUSED(act_info);
386
387 PixelValue zero_value(0.f);
388
389 // Initialize the intermediate accumulator tensor in case of quantized input
390 if(_is_quantized)
391 {
392 TensorShape accum_shape = output->info()->tensor_shape();
393 DataLayout accum_layout = output->info()->data_layout();
394 if(!_is_nchw)
395 {
396 permute(accum_shape, PermutationVector(1U, 2U, 0U));
397 accum_layout = DataLayout::NCHW;
398 }
399
400 _memory_group.manage(&_accumulator);
401 _accumulator.allocator()->init(TensorInfo(accum_shape, 1, DataType::S32, output->info()->quantization_info()));
402 _accumulator.info()->set_data_layout(accum_layout);
403 zero_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().uniform().offset));
404 }
405
406 if(!_is_nchw)
407 {
408 _memory_group.manage(&_permuted_input);
409 _memory_group.manage(&_permuted_output);
410
411 // Configure the function to transform the input tensor from NHWC -> NCHW
412 _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
413 _permuted_input.info()->set_data_layout(DataLayout::NCHW);
414
415 // Configure the function to transform the weights tensor from HWI -> IHW
416 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
417 _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
418 _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
419
420 // Configure depthwise
421 _dwc_kernel.configure(&_permuted_input, &_permuted_weights, (_is_quantized) ? &_accumulator : &_permuted_output, conv_info, depth_multiplier, dilation);
422
423 // Configure border handler
424 _border_handler.configure(&_permuted_input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);
425
426 // Allocate tensors
427 _permuted_input.allocator()->allocate();
428 }
429 else
430 {
431 // Configure depthwise convolution kernel
432 _dwc_kernel.configure(input, weights, (_is_quantized) ? &_accumulator : output, conv_info, depth_multiplier, dilation);
433
434 // Configure border handler
435 _border_handler.configure(input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);
436 }
437
438 // Configure biases accumulation
439 if(_is_quantized)
440 {
441 const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
442 const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
443 const UniformQuantizationInfo oq_info = (output->info()->total_size() == 0) ? iq_info : output->info()->quantization_info().uniform();
444
445 float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
446 int output_multiplier;
447 int output_shift;
448 quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
449 _output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, output_multiplier, output_shift, oq_info.offset);
450 _accumulator.allocator()->allocate();
451 }
452 else if(_has_bias)
453 {
454 _output_stage_kernel.configure(_is_nchw ? output : &_permuted_output, biases);
455 }
456
457 // Permute output
458 if(!_is_nchw)
459 {
460 // Configure the function to transform the convoluted output to NHWC
461 _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
462 _permuted_output.allocator()->allocate();
463 }
464}
465
466void NEDepthwiseConvolutionLayerOptimized::configure_optimized(const ITensor *input,
467 const ITensor *weights,
468 const ITensor *biases,
469 ITensor *output,
470 const PadStrideInfo &conv_info,
471 unsigned int depth_multiplier,
472 const ActivationLayerInfo &act_info,
473 const Size2D &dilation)
474{
475 ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
476 const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);
477 const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info);
478 _is_activationlayer_enabled = act_info.enabled() && !(is_relu || is_relu6);
479 if(!_is_activationlayer_enabled)
480 {
481 act_info_to_use = act_info;
482 }
483
484 if(_is_nchw)
485 {
486 _memory_group.manage(&_permuted_input);
487 _memory_group.manage(&_permuted_output);
488
489 // Configure the function to transform the input tensor from NCHW -> NHWC
490 _permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
491 _permuted_input.info()->set_data_layout(DataLayout::NHWC);
492
493 // Configure the function to transform the weights tensor from IHW -> HWI
494 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
495 _permuted_weights.info()->set_data_layout(DataLayout::NHWC);
496
497 _permuted_output.info()->set_data_layout(DataLayout::NHWC);
498 _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
499
500 // Configure optimized depthwise
501 _dwc_optimized_func.configure(&_permuted_input, &_permuted_weights, biases, &_permuted_output, conv_info, depth_multiplier, act_info_to_use, dilation);
502
503 // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
504 _permuted_output.info()->set_data_layout(DataLayout::NHWC);
505 _permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
506
507 // Allocate tensors
508 _permuted_input.allocator()->allocate();
509 _permuted_output.allocator()->allocate();
510 }
511 else
512 {
513 _dwc_optimized_func.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info_to_use, dilation);
514 }
515}
516
517void NEDepthwiseConvolutionLayerOptimized::configure(ITensor *input,
518 const ITensor *weights,
519 const ITensor *biases,
520 ITensor *output, const PadStrideInfo &conv_info,
521 unsigned int depth_multiplier,
522 const ActivationLayerInfo &act_info,
523 const Size2D &dilation)
524{
Michele Di Giorgioff271922019-07-17 15:59:32 +0100525 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
526 // Perform validation step
527 ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayerOptimized::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
528 output->info(), conv_info, depth_multiplier, act_info, dilation));
Georgios Pinitas30271c72019-06-24 14:56:34 +0100529
530 _original_weights = weights;
531 _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
532 _has_bias = biases != nullptr;
533 _is_optimized = NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input->info(),
534 weights->info(),
535 conv_info,
536 depth_multiplier,
537 dilation);
538 _is_nchw = input->info()->data_layout() == DataLayout::NCHW;
539 _permute = _is_optimized == _is_nchw;
540 _is_prepared = false;
541 _is_activationlayer_enabled = act_info.enabled();
542
543 // Configure appropriate pipeline
544 if(_is_optimized)
545 {
546 configure_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
547 }
548 else
549 {
550 configure_generic(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
551 }
552
553 // Configure activation
554 if(_is_activationlayer_enabled)
555 {
556 _activationlayer_function.configure(output, nullptr, act_info);
557 }
558}
559
560Status NEDepthwiseConvolutionLayerOptimized::validate(const ITensorInfo *input,
561 const ITensorInfo *weights,
562 const ITensorInfo *biases,
563 const ITensorInfo *output,
564 const PadStrideInfo &conv_info,
565 unsigned int depth_multiplier,
566 const ActivationLayerInfo &act_info,
567 const Size2D &dilation)
568{
569 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
Michele Di Giorgioff271922019-07-17 15:59:32 +0100570 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
571 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
Georgios Pinitas30271c72019-06-24 14:56:34 +0100572 ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
573 ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() < 1 || dilation.y() < 1);
574 const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
575 const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
576 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (dilation.x() - 1) > input->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());
577 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (dilation.y() - 1) > input->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());
578
579 if(biases != nullptr)
580 {
581 const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
582 ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
583 ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));
584 }
585
586 if(!NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input, weights, conv_info, depth_multiplier, dilation))
587 {
588 const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
589 TensorInfo accumulator = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
590 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayer3x3Kernel::validate(input, weights, is_quantized ? &accumulator : output, conv_info, depth_multiplier, dilation));
591
592 if(is_quantized)
593 {
594 ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&accumulator, biases, output));
595 }
596 }
597 else
598 {
599 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionAssemblyDispatch::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation));
600 }
601
602 //Validate Activation Layer
603 if(act_info.enabled())
604 {
605 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
606 }
607
608 return Status{};
609}
610
611void NEDepthwiseConvolutionLayerOptimized::run_generic()
612{
613 // Fill border
614 NEScheduler::get().schedule(&_border_handler, Window::DimX);
615
616 // Execute depthwise convolution
617 NEScheduler::get().schedule(&_dwc_kernel, Window::DimX);
618
619 // Add biases
620 if(_has_bias || _is_quantized)
621 {
622 NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);
623 }
624
625 // Permute output
626 if(!_is_nchw)
627 {
628 _permute_output.run();
629 }
630}
631
632void NEDepthwiseConvolutionLayerOptimized::run_optimized()
633{
634 // Run assembly function
635 _dwc_optimized_func.run();
636
637 // Permute output
638 if(_is_nchw)
639 {
640 _permute_output.run();
641 }
642}
643
644void NEDepthwiseConvolutionLayerOptimized::run()
645{
646 prepare();
647
648 MemoryGroupResourceScope scope_mg(_memory_group);
649
650 // Permute input
651 if(_permute)
652 {
653 _permute_input.run();
654 }
655
656 _is_optimized ? run_optimized() : run_generic();
657
658 // Run activation
659 if(_is_activationlayer_enabled)
660 {
661 _activationlayer_function.run();
662 }
663}
664
665void NEDepthwiseConvolutionLayerOptimized::prepare()
666{
667 if(!_is_prepared)
668 {
669 // Permute weights
670 if(_permute)
671 {
672 _permuted_weights.allocator()->allocate();
673 _permute_weights.run();
674 _original_weights->mark_as_unused();
675 }
676
677 // Prepare optimized function
678 if(_is_optimized)
679 {
680 _dwc_optimized_func.prepare();
681 if(!_permuted_weights.is_used())
682 {
683 _permuted_weights.allocator()->free();
684 }
685 }
686
687 _is_prepared = true;
688 }
689}
690
Giorgio Arena04a8f8c2017-11-23 11:45:24 +0000691NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer()
Giorgio Arena26b22162018-08-13 15:49:49 +0100692 : _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _output_stage_kernel(), _v2mm_input_fill_border(), _v2mm_weights_fill_border(), _permute_input(),
Georgios Pinitas60e98252018-10-22 16:17:20 +0100693 _permute_weights(), _permute_output(), _activationlayer_function(), _input_reshaped(), _weights_reshaped(), _v2mm_output(), _output_reshaped(), _permuted_input(), _permuted_weights(),
694 _permuted_output(), _is_prepared(false), _is_quantized(false), _is_nhwc(false), _is_activationlayer_enabled(false), _original_weights(nullptr)
Michalis Spyroub7b31532017-11-23 12:10:21 +0000695{
696}
697
Georgios Pinitas60e98252018-10-22 16:17:20 +0100698void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
Usama Arife73686a2019-04-08 17:30:48 +0100699 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
Michalis Spyroub7b31532017-11-23 12:10:21 +0000700{
Michele Di Giorgioff271922019-07-17 15:59:32 +0100701 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
702 // Perform validation step
703 ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayer::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
704 output->info(), conv_info, depth_multiplier, act_info, dilation));
Michalis Spyroub7b31532017-11-23 12:10:21 +0000705
Giorgio Arena26b22162018-08-13 15:49:49 +0100706 _is_nhwc = input->info()->data_layout() == DataLayout::NHWC;
707
708 ITensor *input_to_use = input;
709 const ITensor *weights_to_use = weights;
710 ITensor *output_to_use = output;
711
712 if(_is_nhwc)
713 {
714 _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
715 _permuted_input.info()->set_data_layout(DataLayout::NCHW);
716 input_to_use = &_permuted_input;
717
718 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
719 _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
720 weights_to_use = &_permuted_weights;
721 }
722
723 const size_t weights_w = weights_to_use->info()->dimension(0);
724 const size_t weights_h = weights_to_use->info()->dimension(1);
725 const size_t weights_z = weights_to_use->info()->dimension(2);
Michalis Spyroub7b31532017-11-23 12:10:21 +0000726
Georgios Pinitas1562be32018-03-08 19:09:19 +0000727 _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
Georgios Pinitas72219332018-06-05 14:56:06 +0100728 _is_prepared = false;
Giorgio Arena26b22162018-08-13 15:49:49 +0100729 _original_weights = weights_to_use;
Michalis Spyroub7b31532017-11-23 12:10:21 +0000730
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000731 // Should bias be appended ?
732 bool append_bias = (biases != nullptr) && !_is_quantized;
733
734 // Calculate output shape
Usama Arif881f2de2019-04-12 10:29:17 +0100735 TensorShape output_shape = shape_calculator::compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info, depth_multiplier, dilation);
Giorgio Arena76572242018-04-04 17:44:26 +0100736
737 // Output auto inizialitation if not yet initialized
738 auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
739 ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000740
Giorgio Arena26b22162018-08-13 15:49:49 +0100741 if(_is_nhwc)
742 {
743 permute(output_shape, PermutationVector(1U, 2U, 0U));
744 _permuted_output.allocator()->init(output->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape));
745 _permuted_output.info()->set_data_layout(DataLayout::NCHW);
Pablo Telloa28aebc2019-06-03 14:59:48 +0100746 _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
Giorgio Arena26b22162018-08-13 15:49:49 +0100747 output_to_use = &_permuted_output;
748 }
749
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000750 // Output width and height
Giorgio Arena76572242018-04-04 17:44:26 +0100751 const unsigned int conv_w = output_shape.x();
752 const unsigned int conv_h = output_shape.y();
Michalis Spyroub7b31532017-11-23 12:10:21 +0000753
754 // Set up intermediate tensors
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000755 const size_t patch_size = weights_w * weights_h + (append_bias ? 1 : 0);
Michalis Spyroub7b31532017-11-23 12:10:21 +0000756 const size_t conv_size = conv_w * conv_h;
757
758 // Im2Col configuration
Giorgio Arena26b22162018-08-13 15:49:49 +0100759 TensorShape shape_im2col = input_to_use->info()->tensor_shape();
Michalis Spyroub7b31532017-11-23 12:10:21 +0000760 shape_im2col.set(0, patch_size);
761 shape_im2col.set(1, conv_size);
762 shape_im2col.set(2, weights_z);
Giorgio Arena26b22162018-08-13 15:49:49 +0100763 _input_reshaped.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col).set_data_layout(DataLayout::NCHW));
Usama Arif881f2de2019-04-12 10:29:17 +0100764 _im2col_kernel.configure(input_to_use, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, append_bias, depth_multiplier, dilation);
Michalis Spyroub7b31532017-11-23 12:10:21 +0000765
766 // Weights reshape configuration
767 const TensorShape shape_weights_reshape(patch_size, weights_z);
Giorgio Arena26b22162018-08-13 15:49:49 +0100768 _weights_reshaped.allocator()->init(weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_weights_reshape).set_data_layout(DataLayout::NCHW));
769 _weights_reshape_kernel.configure(weights_to_use, &_weights_reshaped, append_bias ? biases : nullptr);
Michalis Spyroub7b31532017-11-23 12:10:21 +0000770
771 // GEMV configuration
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000772 DataType v2mm_dt = (input->info()->data_type() == DataType::QASYMM8) ? DataType::S32 : input->info()->data_type();
Giorgio Arena26b22162018-08-13 15:49:49 +0100773 TensorShape shape_v2mm_out = input_to_use->info()->tensor_shape();
Michalis Spyroub7b31532017-11-23 12:10:21 +0000774 shape_v2mm_out.set(0, conv_size * weights_z);
775 shape_v2mm_out.set(1, 1);
776 shape_v2mm_out.set(2, 1);
Giorgio Arena26b22162018-08-13 15:49:49 +0100777 _v2mm_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(v2mm_dt).set_tensor_shape(shape_v2mm_out).set_data_layout(DataLayout::NCHW));
Michalis Spyroub7b31532017-11-23 12:10:21 +0000778 _v2mm_kernel.configure(&_input_reshaped, &_weights_reshaped, &_v2mm_output);
Giorgio Arena76572242018-04-04 17:44:26 +0100779 _output_reshaped.allocator()->init(_v2mm_output.info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape));
Giorgio Arena26b22162018-08-13 15:49:49 +0100780 _vector_to_tensor_kernel.configure(&_v2mm_output, (_is_quantized) ? &_output_reshaped : output_to_use, conv_w, conv_h);
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000781
782 // Output staged configuration
783 if(_is_quantized)
784 {
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100785 const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
786 const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
787 const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
Georgios Pinitas9be0c5a2018-02-19 12:46:29 +0000788
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100789 float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100790 int output_multiplier;
791 int output_shift;
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000792 quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100793 _output_stage_kernel.configure(&_output_reshaped, biases, output_to_use, output_multiplier, output_shift, oq_info.offset);
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000794 _output_reshaped.allocator()->allocate();
795 }
796
Giorgio Arena26b22162018-08-13 15:49:49 +0100797 if(_is_nhwc)
798 {
799 _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
800
801 _permuted_input.allocator()->allocate();
802 _permuted_weights.allocator()->allocate();
803 _permuted_output.allocator()->allocate();
804 }
805
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000806 // Fill borders on inputs
Anthony Barbierfb8dda22018-01-30 09:27:05 +0000807 PixelValue zero_in(static_cast<int32_t>(0));
808 PixelValue zero_w(static_cast<int32_t>(0));
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000809 if(_is_quantized)
810 {
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100811 zero_in = PixelValue(static_cast<int32_t>(input->info()->quantization_info().uniform().offset));
812 zero_w = PixelValue(static_cast<int32_t>(weights->info()->quantization_info().uniform().offset));
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000813 }
814 BorderSize border_size = _v2mm_kernel.border_size();
815 _v2mm_input_fill_border.configure(&_input_reshaped, border_size, BorderMode::CONSTANT, zero_in);
816
817 border_size.bottom = 0;
818 _v2mm_weights_fill_border.configure(&_weights_reshaped, border_size, BorderMode::CONSTANT, zero_w);
Michalis Spyroub7b31532017-11-23 12:10:21 +0000819
820 // Allocate intermediate tensors
821 _input_reshaped.allocator()->allocate();
Michalis Spyroub7b31532017-11-23 12:10:21 +0000822 _v2mm_output.allocator()->allocate();
Georgios Pinitas60e98252018-10-22 16:17:20 +0100823
824 //Configure Activation Layer
825 _is_activationlayer_enabled = act_info.enabled();
826
827 if(_is_activationlayer_enabled)
828 {
829 _activationlayer_function.configure(output, nullptr, act_info);
830 }
Michalis Spyroub7b31532017-11-23 12:10:21 +0000831}
832
Georgios Pinitas10490202018-08-17 17:16:06 +0100833Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
Usama Arife73686a2019-04-08 17:30:48 +0100834 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
Abe Mbise7784c832018-05-31 16:48:41 +0100835{
836 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
Gian Marco Iodice23e24792018-09-07 15:32:14 +0100837 ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
Usama Arif881f2de2019-04-12 10:29:17 +0100838 ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() < 1 || dilation.y() < 1);
Gian Marco Iodice23e24792018-09-07 15:32:14 +0100839
Michele Di Giorgioff271922019-07-17 15:59:32 +0100840 const unsigned int width_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
841 const unsigned int height_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
842 const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
Abe Mbise7784c832018-05-31 16:48:41 +0100843
Usama Arif881f2de2019-04-12 10:29:17 +0100844 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) + (weights->dimension(width_idx) - 1) * (dilation.x() - 1) > input->dimension(width_idx) + conv_info.pad_left() + conv_info.pad_right());
845 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(height_idx) + (weights->dimension(height_idx) - 1) * (dilation.y() - 1) > input->dimension(height_idx) + conv_info.pad_top() + conv_info.pad_bottom());
Michele Di Giorgioff271922019-07-17 15:59:32 +0100846 ARM_COMPUTE_RETURN_ERROR_ON((input->dimension(channel_idx) * depth_multiplier) != weights->dimension(channel_idx));
847
Georgios Pinitas10490202018-08-17 17:16:06 +0100848 // Clone output to use auto init
849 auto output_clone = output->clone();
850
Giorgio Arena26b22162018-08-13 15:49:49 +0100851 const ITensorInfo *input_to_use = input;
852 const ITensorInfo *weights_to_use = weights;
Georgios Pinitas10490202018-08-17 17:16:06 +0100853 const ITensorInfo *output_to_use = output_clone.get();
Giorgio Arena26b22162018-08-13 15:49:49 +0100854
855 TensorShape permuted_input_shape = input->tensor_shape();
856 TensorShape permuted_weights_shape = weights->tensor_shape();
857 TensorInfo permuted_input;
858 TensorInfo permuted_weights;
859
860 if(input->data_layout() == DataLayout::NHWC)
861 {
862 permute(permuted_input_shape, PermutationVector(1U, 2U, 0U));
863 permute(permuted_weights_shape, PermutationVector(1U, 2U, 0U));
864
865 permuted_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NCHW));
866 permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NCHW));
867
868 input_to_use = &permuted_input;
869 weights_to_use = &permuted_weights;
870 }
871
Abe Mbise7784c832018-05-31 16:48:41 +0100872 const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
873 const bool append_bias = (biases != nullptr) && !is_quantized;
Usama Arif881f2de2019-04-12 10:29:17 +0100874 TensorShape output_shape = shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
Giorgio Arena26b22162018-08-13 15:49:49 +0100875 const size_t weights_w = weights_to_use->dimension(0);
876 const size_t weights_h = weights_to_use->dimension(1);
877 const size_t weights_z = weights_to_use->dimension(2);
Gian Marco Iodice23e24792018-09-07 15:32:14 +0100878 const unsigned int conv_w = output_shape[width_idx];
879 const unsigned int conv_h = output_shape[height_idx];
Abe Mbise7784c832018-05-31 16:48:41 +0100880 const size_t patch_size = weights_w * weights_h + (append_bias ? 1 : 0);
881 const size_t conv_size = conv_w * conv_h;
882
Giorgio Arena26b22162018-08-13 15:49:49 +0100883 // Output auto inizialitation if not yet initialized
Georgios Pinitas10490202018-08-17 17:16:06 +0100884 auto_init_if_empty(*output_clone, input->clone()->set_tensor_shape(output_shape));
Abe Mbise7784c832018-05-31 16:48:41 +0100885 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
886
Giorgio Arena26b22162018-08-13 15:49:49 +0100887 TensorInfo permuted_output;
888 if(input->data_layout() == DataLayout::NHWC)
889 {
890 permute(output_shape, PermutationVector(1U, 2U, 0U));
Georgios Pinitas10490202018-08-17 17:16:06 +0100891 permuted_output = TensorInfo(output_clone->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_data_layout(DataLayout::NCHW));
Giorgio Arena26b22162018-08-13 15:49:49 +0100892 output_to_use = &permuted_output;
893 }
894
Abe Mbise7784c832018-05-31 16:48:41 +0100895 // Im2Col configuration
Giorgio Arena26b22162018-08-13 15:49:49 +0100896 TensorShape shape_im2col = input_to_use->tensor_shape();
Abe Mbise7784c832018-05-31 16:48:41 +0100897 shape_im2col.set(0, patch_size);
898 shape_im2col.set(1, conv_size);
899 shape_im2col.set(2, weights_z);
Giorgio Arena26b22162018-08-13 15:49:49 +0100900 TensorInfo input_reshaped(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col).set_data_layout(DataLayout::NCHW));
Usama Arif881f2de2019-04-12 10:29:17 +0100901 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseIm2ColKernel::validate(input_to_use, &input_reshaped, Size2D(weights_w, weights_h), conv_info, append_bias, depth_multiplier, dilation));
Abe Mbise7784c832018-05-31 16:48:41 +0100902
903 // Weights reshape configuration
904 const TensorShape shape_weights_reshape(patch_size, weights_z);
Giorgio Arena26b22162018-08-13 15:49:49 +0100905 TensorInfo weights_reshaped(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_weights_reshape).set_data_layout(DataLayout::NCHW));
906 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseWeightsReshapeKernel::validate(weights_to_use, &weights_reshaped, append_bias ? biases : nullptr));
Abe Mbise7784c832018-05-31 16:48:41 +0100907
908 // GEMV configuration
909 DataType v2mm_dt = (input->data_type() == DataType::QASYMM8) ? DataType::S32 : input->data_type();
Giorgio Arena26b22162018-08-13 15:49:49 +0100910 TensorShape shape_v2mm_out = input_to_use->tensor_shape();
Abe Mbise7784c832018-05-31 16:48:41 +0100911 shape_v2mm_out.set(0, conv_size * weights_z);
912 shape_v2mm_out.set(1, 1);
913 shape_v2mm_out.set(2, 1);
Giorgio Arena26b22162018-08-13 15:49:49 +0100914 TensorInfo v2mm_output(input->clone()->set_is_resizable(true).reset_padding().set_data_type(v2mm_dt).set_tensor_shape(shape_v2mm_out).set_data_layout(DataLayout::NCHW));
Abe Mbise7784c832018-05-31 16:48:41 +0100915 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixVectorMultiplyKernel::validate(&input_reshaped, &weights_reshaped, &v2mm_output));
916
Giorgio Arena26b22162018-08-13 15:49:49 +0100917 TensorInfo output_reshaped(v2mm_output.clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_to_use->tensor_shape()));
918 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseVectorToTensorKernel::validate(&v2mm_output, (is_quantized) ? &output_reshaped : output_to_use, conv_w, conv_h));
Abe Mbise7784c832018-05-31 16:48:41 +0100919
920 if(is_quantized)
921 {
Michele Di Giorgioff271922019-07-17 15:59:32 +0100922 const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
923 const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
924 const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
925
926 float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
927 int output_multiplier;
928 int output_shift;
929 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift));
930 ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&output_reshaped, biases, output_to_use, output_multiplier, output_shift, oq_info.offset));
Abe Mbise7784c832018-05-31 16:48:41 +0100931 }
932
Georgios Pinitas60e98252018-10-22 16:17:20 +0100933 // Validate Activation Layer
934 if(act_info.enabled())
935 {
936 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
937 }
938
Abe Mbise7784c832018-05-31 16:48:41 +0100939 return Status{};
940}
941
Giorgio Arena04a8f8c2017-11-23 11:45:24 +0000942void NEDepthwiseConvolutionLayer::run()
Michalis Spyroub7b31532017-11-23 12:10:21 +0000943{
Georgios Pinitas72219332018-06-05 14:56:06 +0100944 prepare();
Georgios Pinitas1562be32018-03-08 19:09:19 +0000945
Giorgio Arena26b22162018-08-13 15:49:49 +0100946 if(_is_nhwc)
947 {
948 _permute_input.run();
949 }
950
Michalis Spyroub7b31532017-11-23 12:10:21 +0000951 NEScheduler::get().schedule(&_im2col_kernel, Window::DimX);
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000952 NEScheduler::get().schedule(&_v2mm_input_fill_border, Window::DimX);
Michalis Spyroub7b31532017-11-23 12:10:21 +0000953 NEScheduler::get().schedule(&_v2mm_kernel, Window::DimX);
954 NEScheduler::get().schedule(&_vector_to_tensor_kernel, Window::DimX);
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000955 if(_is_quantized)
956 {
957 NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);
958 }
Giorgio Arena26b22162018-08-13 15:49:49 +0100959
960 if(_is_nhwc)
961 {
962 _permute_output.run();
963 }
Georgios Pinitas60e98252018-10-22 16:17:20 +0100964
965 if(_is_activationlayer_enabled)
966 {
967 _activationlayer_function.run();
968 }
Anthony Barbierfb8dda22018-01-30 09:27:05 +0000969}
Georgios Pinitas72219332018-06-05 14:56:06 +0100970
971void NEDepthwiseConvolutionLayer::prepare()
972{
973 if(!_is_prepared)
974 {
975 ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
976
Giorgio Arena26b22162018-08-13 15:49:49 +0100977 if(_is_nhwc)
978 {
979 _permute_weights.run();
980 }
981
Georgios Pinitas72219332018-06-05 14:56:06 +0100982 // Run reshape and mark original weights as unused
983 _weights_reshaped.allocator()->allocate();
984 NEScheduler::get().schedule(&_weights_reshape_kernel, Window::DimX);
985 NEScheduler::get().schedule(&_v2mm_weights_fill_border, Window::DimX);
986 _original_weights->mark_as_unused();
987
988 _is_prepared = true;
989 }
990}
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000991} // namespace arm_compute