blob: 5e47dd56ae29bd54877110699d73e64e9b8281b4 [file] [log] [blame]
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01001/*
Georgios Pinitas2481d462019-02-19 18:47:46 +00002 * Copyright (c) 2017-2019 ARM Limited.
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000024#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010025
Giorgio Arenad93e2632019-10-15 11:09:33 +010026#include "arm_compute/core/utils/misc/InfoHelpers.h"
Georgios Pinitasd05dce42018-01-22 16:29:17 +000027#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Georgios Pinitasf72f9362018-01-12 16:29:45 +000028#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010029#include "arm_compute/runtime/NEON/NEScheduler.h"
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000030
Georgios Pinitasd05dce42018-01-22 16:29:17 +000031using namespace arm_compute::misc;
Georgios Pinitas4074c992018-01-30 18:13:46 +000032using namespace arm_compute::misc::shape_calculator;
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010033
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000034namespace arm_compute
35{
Manuel Bottini05069f02019-09-26 17:18:26 +010036namespace
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010037{
Manuel Bottini05069f02019-09-26 17:18:26 +010038Status validate_arguments_optimized(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
39 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
Georgios Pinitas30271c72019-06-24 14:56:34 +010040{
41 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
Michele Di Giorgioff271922019-07-17 15:59:32 +010042 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
Giuseppe Rossinif01201a2019-11-06 14:57:49 +000043 if(!is_data_type_quantized_per_channel(weights->data_type()))
44 {
45 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
46 }
Georgios Pinitas30271c72019-06-24 14:56:34 +010047 ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
48 ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() < 1 || dilation.y() < 1);
49 const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
50 const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
51 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (dilation.x() - 1) > input->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());
52 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (dilation.y() - 1) > input->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());
53
54 if(biases != nullptr)
55 {
56 const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
57 ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
58 ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));
59 }
60
Giuseppe Rossinif01201a2019-11-06 14:57:49 +000061 const bool is_quantized = (!is_data_type_quantized_per_channel(weights->data_type())) && is_data_type_quantized_asymmetric(input->data_type());
Michele Di Giorgio601ba3f2019-08-22 16:20:04 +010062
63 if(is_quantized)
64 {
65 const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
66 const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
67 const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
68
69 float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
70 ARM_COMPUTE_UNUSED(multiplier);
71 ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f);
72 }
Georgios Pinitas30271c72019-06-24 14:56:34 +010073 if(!NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input, weights, conv_info, depth_multiplier, dilation))
74 {
Michele Di Giorgio601ba3f2019-08-22 16:20:04 +010075 TensorInfo accumulator = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
Georgios Pinitas30271c72019-06-24 14:56:34 +010076 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayer3x3Kernel::validate(input, weights, is_quantized ? &accumulator : output, conv_info, depth_multiplier, dilation));
77
78 if(is_quantized)
79 {
80 ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerOutputStageKernel::validate(&accumulator, biases, output));
81 }
82 }
83 else
84 {
85 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionAssemblyDispatch::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation));
86 }
87
88 //Validate Activation Layer
89 if(act_info.enabled())
90 {
91 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
92 }
Georgios Pinitas30271c72019-06-24 14:56:34 +010093 return Status{};
94}
Manuel Bottini05069f02019-09-26 17:18:26 +010095} // namespace
Georgios Pinitas30271c72019-06-24 14:56:34 +010096
Manuel Bottini05069f02019-09-26 17:18:26 +010097NEDepthwiseConvolutionLayerOptimized::NEDepthwiseConvolutionLayerOptimized(std::shared_ptr<IMemoryManager> memory_manager)
98 : _func(std::move(memory_manager))
99{
100}
101
102void NEDepthwiseConvolutionLayerOptimized::configure(ITensor *input,
103 const ITensor *weights,
104 const ITensor *biases,
105 ITensor *output, const PadStrideInfo &conv_info,
106 unsigned int depth_multiplier,
107 const ActivationLayerInfo &act_info,
108 const Size2D &dilation)
109{
110 _func.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
111}
112
113Status NEDepthwiseConvolutionLayerOptimized::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
114 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
115{
116 return validate_arguments_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
117}
118
119void NEDepthwiseConvolutionLayerOptimized::run()
120{
121 _func.run();
122}
123
124void NEDepthwiseConvolutionLayerOptimized::prepare()
125{
126 _func.prepare();
127}
128
129NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager)
130 : _memory_group(memory_manager), _dwc_kernel(), _dwc_optimized_func(memory_manager), _output_stage_kernel(), _border_handler(), _permute_input(), _permute_weights(), _permute_output(),
131 _activationlayer_function(), _accumulator(), _permuted_input(), _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_optimized(false),
132 _is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false)
133{
134}
135
136void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::configure_generic(ITensor *input,
137 const ITensor *weights,
138 const ITensor *biases,
139 ITensor *output,
140 const PadStrideInfo &conv_info,
141 unsigned int depth_multiplier,
142 const ActivationLayerInfo &act_info,
143 const Size2D &dilation)
144{
145 ARM_COMPUTE_UNUSED(act_info);
146
147 PixelValue zero_value(0.f);
148
149 // Initialize the intermediate accumulator tensor in case of quantized input
150 if(_is_quantized)
151 {
152 TensorShape accum_shape = output->info()->tensor_shape();
153 DataLayout accum_layout = output->info()->data_layout();
154 if(!_is_nchw)
155 {
156 permute(accum_shape, PermutationVector(1U, 2U, 0U));
157 accum_layout = DataLayout::NCHW;
158 }
159
160 _memory_group.manage(&_accumulator);
161 _accumulator.allocator()->init(TensorInfo(accum_shape, 1, DataType::S32, output->info()->quantization_info()));
162 _accumulator.info()->set_data_layout(accum_layout);
163 zero_value = PixelValue(static_cast<uint32_t>(input->info()->quantization_info().uniform().offset));
164 }
165
166 if(!_is_nchw)
167 {
168 _memory_group.manage(&_permuted_input);
169 _memory_group.manage(&_permuted_output);
170
171 // Configure the function to transform the input tensor from NHWC -> NCHW
172 _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
173 _permuted_input.info()->set_data_layout(DataLayout::NCHW);
174
175 // Configure the function to transform the weights tensor from HWI -> IHW
176 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
177 _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
178 _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
179
180 // Configure depthwise
181 _dwc_kernel.configure(&_permuted_input, &_permuted_weights, (_is_quantized) ? &_accumulator : &_permuted_output, conv_info, depth_multiplier, dilation);
182
183 // Configure border handler
184 _border_handler.configure(&_permuted_input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);
185
186 // Allocate tensors
187 _permuted_input.allocator()->allocate();
188 }
189 else
190 {
191 // Configure depthwise convolution kernel
192 _dwc_kernel.configure(input, weights, (_is_quantized) ? &_accumulator : output, conv_info, depth_multiplier, dilation);
193
194 // Configure border handler
195 _border_handler.configure(input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value);
196 }
197
198 // Configure biases accumulation
199 if(_is_quantized)
200 {
201 const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
202 const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform();
203 const UniformQuantizationInfo oq_info = (output->info()->total_size() == 0) ? iq_info : output->info()->quantization_info().uniform();
204
205 float multiplier = (iq_info.scale * wq_info.scale) / oq_info.scale;
206 int output_multiplier;
207 int output_shift;
208 quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
209 _output_stage_kernel.configure(&_accumulator, biases, _is_nchw ? output : &_permuted_output, output_multiplier, output_shift, oq_info.offset);
210 _accumulator.allocator()->allocate();
211 }
212 else if(_has_bias)
213 {
214 _output_stage_kernel.configure(_is_nchw ? output : &_permuted_output, biases);
215 }
216
217 // Permute output
218 if(!_is_nchw)
219 {
220 // Configure the function to transform the convoluted output to NHWC
221 _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
222 _permuted_output.allocator()->allocate();
223 }
224}
225
226void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::configure_optimized(const ITensor *input,
227 const ITensor *weights,
228 const ITensor *biases,
229 ITensor *output,
230 const PadStrideInfo &conv_info,
231 unsigned int depth_multiplier,
232 const ActivationLayerInfo &act_info,
233 const Size2D &dilation)
234{
235 ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
236 const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);
237 const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info);
238 _is_activationlayer_enabled = act_info.enabled() && !(is_relu || is_relu6);
239 if(!_is_activationlayer_enabled)
240 {
241 act_info_to_use = act_info;
242 }
243
244 if(_is_nchw)
245 {
246 _memory_group.manage(&_permuted_input);
247 _memory_group.manage(&_permuted_output);
248
249 // Configure the function to transform the input tensor from NCHW -> NHWC
250 _permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
251 _permuted_input.info()->set_data_layout(DataLayout::NHWC);
252
253 // Configure the function to transform the weights tensor from IHW -> HWI
254 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
255 _permuted_weights.info()->set_data_layout(DataLayout::NHWC);
256
257 _permuted_output.info()->set_data_layout(DataLayout::NHWC);
258 _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
259
260 // Configure optimized depthwise
261 _dwc_optimized_func.configure(&_permuted_input, &_permuted_weights, biases, &_permuted_output, conv_info, depth_multiplier, act_info_to_use, dilation);
262
263 // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
264 _permuted_output.info()->set_data_layout(DataLayout::NHWC);
265 _permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
266
267 // Allocate tensors
268 _permuted_input.allocator()->allocate();
269 _permuted_output.allocator()->allocate();
270 }
271 else
272 {
273 _dwc_optimized_func.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info_to_use, dilation);
274 }
275}
276
277void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::configure(ITensor *input,
278 const ITensor *weights,
279 const ITensor *biases,
280 ITensor *output, const PadStrideInfo &conv_info,
281 unsigned int depth_multiplier,
282 const ActivationLayerInfo &act_info,
283 const Size2D &dilation)
284{
285 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
286 // Perform validation step
287 ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayerOptimizedInternal::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
288 output->info(), conv_info, depth_multiplier, act_info, dilation));
289
290 _original_weights = weights;
291 _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
292 _has_bias = biases != nullptr;
293 _is_optimized = NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(input->info(),
294 weights->info(),
295 conv_info,
296 depth_multiplier,
297 dilation);
298 _is_nchw = input->info()->data_layout() == DataLayout::NCHW;
299 _permute = _is_optimized == _is_nchw;
300 _is_prepared = false;
301 _is_activationlayer_enabled = act_info.enabled();
302
303 // Configure appropriate pipeline
304 if(_is_optimized)
305 {
306 configure_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
307 }
308 else
309 {
310 configure_generic(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
311 }
312
313 // Configure activation
314 if(_is_activationlayer_enabled)
315 {
316 _activationlayer_function.configure(output, nullptr, act_info);
317 }
318}
319
320Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::validate(const ITensorInfo *input,
321 const ITensorInfo *weights,
322 const ITensorInfo *biases,
323 const ITensorInfo *output,
324 const PadStrideInfo &conv_info,
325 unsigned int depth_multiplier,
326 const ActivationLayerInfo &act_info,
327 const Size2D &dilation)
328{
329 return validate_arguments_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
330}
331
332void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::run_generic()
Georgios Pinitas30271c72019-06-24 14:56:34 +0100333{
334 // Fill border
335 NEScheduler::get().schedule(&_border_handler, Window::DimX);
336
337 // Execute depthwise convolution
338 NEScheduler::get().schedule(&_dwc_kernel, Window::DimX);
339
340 // Add biases
341 if(_has_bias || _is_quantized)
342 {
343 NEScheduler::get().schedule(&_output_stage_kernel, Window::DimX);
344 }
345
346 // Permute output
347 if(!_is_nchw)
348 {
349 _permute_output.run();
350 }
351}
352
Manuel Bottini05069f02019-09-26 17:18:26 +0100353void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::run_optimized()
Georgios Pinitas30271c72019-06-24 14:56:34 +0100354{
355 // Run assembly function
356 _dwc_optimized_func.run();
357
358 // Permute output
359 if(_is_nchw)
360 {
361 _permute_output.run();
362 }
363}
364
Manuel Bottini05069f02019-09-26 17:18:26 +0100365void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::run()
Georgios Pinitas30271c72019-06-24 14:56:34 +0100366{
367 prepare();
368
369 MemoryGroupResourceScope scope_mg(_memory_group);
370
371 // Permute input
372 if(_permute)
373 {
374 _permute_input.run();
375 }
376
377 _is_optimized ? run_optimized() : run_generic();
378
379 // Run activation
380 if(_is_activationlayer_enabled)
381 {
382 _activationlayer_function.run();
383 }
384}
385
Manuel Bottini05069f02019-09-26 17:18:26 +0100386void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::prepare()
Georgios Pinitas30271c72019-06-24 14:56:34 +0100387{
388 if(!_is_prepared)
389 {
390 // Permute weights
391 if(_permute)
392 {
393 _permuted_weights.allocator()->allocate();
394 _permute_weights.run();
395 _original_weights->mark_as_unused();
396 }
397
398 // Prepare optimized function
399 if(_is_optimized)
400 {
401 _dwc_optimized_func.prepare();
402 if(!_permuted_weights.is_used())
403 {
404 _permuted_weights.allocator()->free();
405 }
406 }
407
408 _is_prepared = true;
409 }
410}
411
Manuel Bottini05069f02019-09-26 17:18:26 +0100412NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::NEDepthwiseConvolutionLayerGeneric()
Giorgio Arenad93e2632019-10-15 11:09:33 +0100413 : _depthwise_conv_kernel(), _fill_border(), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(), _permuted_input(), _permuted_weights(), _permuted_output(),
414 _is_prepared(false), _is_nchw(false), _is_activationlayer_enabled(false), _original_weights(nullptr)
Michalis Spyroub7b31532017-11-23 12:10:21 +0000415{
416}
417
Manuel Bottini05069f02019-09-26 17:18:26 +0100418void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
419 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
Michalis Spyroub7b31532017-11-23 12:10:21 +0000420{
Michele Di Giorgioff271922019-07-17 15:59:32 +0100421 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
Michele Di Giorgioff271922019-07-17 15:59:32 +0100422 ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayer::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
423 output->info(), conv_info, depth_multiplier, act_info, dilation));
Michalis Spyroub7b31532017-11-23 12:10:21 +0000424
Giorgio Arenad93e2632019-10-15 11:09:33 +0100425 _is_nchw = input->info()->data_layout() == DataLayout::NCHW;
426 _is_prepared = !_is_nchw;
Giorgio Arena26b22162018-08-13 15:49:49 +0100427
Giorgio Arenad93e2632019-10-15 11:09:33 +0100428 ITensor *input_to_use = input;
429 const ITensor *weights_to_use = weights;
430 ITensor *output_to_use = output;
431 if(_is_nchw)
Giorgio Arena26b22162018-08-13 15:49:49 +0100432 {
Giorgio Arenad93e2632019-10-15 11:09:33 +0100433 _permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
434 _permuted_input.info()->set_data_layout(DataLayout::NHWC);
435 input_to_use = &_permuted_input;
Giorgio Arena26b22162018-08-13 15:49:49 +0100436
Giorgio Arenad93e2632019-10-15 11:09:33 +0100437 _permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
438 _permuted_weights.info()->set_data_layout(DataLayout::NHWC);
439 weights_to_use = &_permuted_weights;
Giorgio Arena44f55722019-07-12 14:49:49 +0100440
Giorgio Arenad93e2632019-10-15 11:09:33 +0100441 _permuted_output.allocator()->init(output->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
442 output_to_use = &_permuted_output;
Giorgio Arena26b22162018-08-13 15:49:49 +0100443 }
Giorgio Arenad93e2632019-10-15 11:09:33 +0100444 _original_weights = weights_to_use;
Giorgio Arena44f55722019-07-12 14:49:49 +0100445
Giorgio Arenad93e2632019-10-15 11:09:33 +0100446 _depthwise_conv_kernel.configure(input_to_use, weights_to_use, biases, output_to_use, conv_info, depth_multiplier, dilation);
447 _fill_border.configure(input_to_use, _depthwise_conv_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint64_t>(0), input->info()->data_type(), input->info()->quantization_info()));
448
449 if(_is_nchw)
450 {
451 _permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
452 _permuted_output.info()->set_data_layout(DataLayout::NHWC);
453
454 _permuted_input.allocator()->allocate();
455 _permuted_weights.allocator()->allocate();
456 _permuted_output.allocator()->allocate();
Giorgio Arena26b22162018-08-13 15:49:49 +0100457 }
458
Georgios Pinitas60e98252018-10-22 16:17:20 +0100459 //Configure Activation Layer
460 _is_activationlayer_enabled = act_info.enabled();
Georgios Pinitas60e98252018-10-22 16:17:20 +0100461 if(_is_activationlayer_enabled)
462 {
463 _activationlayer_function.configure(output, nullptr, act_info);
464 }
Michalis Spyroub7b31532017-11-23 12:10:21 +0000465}
466
Manuel Bottini05069f02019-09-26 17:18:26 +0100467Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
468 const PadStrideInfo &conv_info,
469 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
Abe Mbise7784c832018-05-31 16:48:41 +0100470{
471 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
Giorgio Arenad93e2632019-10-15 11:09:33 +0100472 if(input->data_layout() == DataLayout::NCHW)
Giorgio Arena26b22162018-08-13 15:49:49 +0100473 {
Giorgio Arena44f55722019-07-12 14:49:49 +0100474 TensorShape permuted_input_shape = input->tensor_shape();
475 TensorShape permuted_weights_shape = weights->tensor_shape();
Giorgio Arenad93e2632019-10-15 11:09:33 +0100476 TensorShape permuted_output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
477 permute(permuted_input_shape, PermutationVector(2U, 0U, 1U));
478 permute(permuted_weights_shape, PermutationVector(2U, 0U, 1U));
479 permute(permuted_output_shape, PermutationVector(2U, 0U, 1U));
Giorgio Arena44f55722019-07-12 14:49:49 +0100480
Giorgio Arenad93e2632019-10-15 11:09:33 +0100481 const TensorInfo permuted_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NHWC));
482 const TensorInfo permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NHWC));
483 const TensorInfo permuted_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NCHW));
Giorgio Arena44f55722019-07-12 14:49:49 +0100484
Giorgio Arenad93e2632019-10-15 11:09:33 +0100485 ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(input, &permuted_input, PermutationVector(2U, 0U, 1U)));
486 ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(weights, &permuted_weights, PermutationVector(2U, 0U, 1U)));
487 ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(&permuted_output, output, PermutationVector(1U, 2U, 0U)));
Giorgio Arena44f55722019-07-12 14:49:49 +0100488
Giorgio Arenad93e2632019-10-15 11:09:33 +0100489 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayerNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, conv_info, depth_multiplier, dilation));
Giorgio Arena26b22162018-08-13 15:49:49 +0100490 }
Giorgio Arena44f55722019-07-12 14:49:49 +0100491 else
Giorgio Arena26b22162018-08-13 15:49:49 +0100492 {
Gian Marco Iodicebd9097d2019-07-26 15:31:02 +0100493 ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayerNativeKernel::validate(input, weights, biases, output, conv_info, depth_multiplier, dilation));
Abe Mbise7784c832018-05-31 16:48:41 +0100494 }
495
Georgios Pinitas60e98252018-10-22 16:17:20 +0100496 // Validate Activation Layer
497 if(act_info.enabled())
498 {
499 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
500 }
501
Abe Mbise7784c832018-05-31 16:48:41 +0100502 return Status{};
503}
504
Manuel Bottini05069f02019-09-26 17:18:26 +0100505void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::run()
Michalis Spyroub7b31532017-11-23 12:10:21 +0000506{
Giorgio Arenad93e2632019-10-15 11:09:33 +0100507 if(_is_nchw)
Giorgio Arena26b22162018-08-13 15:49:49 +0100508 {
Giorgio Arena44f55722019-07-12 14:49:49 +0100509 prepare();
Giorgio Arenad93e2632019-10-15 11:09:33 +0100510 _permute_input.run();
Giorgio Arena26b22162018-08-13 15:49:49 +0100511 }
Giorgio Arenad93e2632019-10-15 11:09:33 +0100512
513 NEScheduler::get().schedule(&_fill_border, Window::DimX);
514 NEScheduler::get().schedule(&_depthwise_conv_kernel, Window::DimY);
515
516 if(_is_nchw)
Georgios Pinitasd05dce42018-01-22 16:29:17 +0000517 {
Giorgio Arenad93e2632019-10-15 11:09:33 +0100518 _permute_output.run();
Giorgio Arena26b22162018-08-13 15:49:49 +0100519 }
Georgios Pinitas60e98252018-10-22 16:17:20 +0100520
521 if(_is_activationlayer_enabled)
522 {
523 _activationlayer_function.run();
524 }
Anthony Barbierfb8dda22018-01-30 09:27:05 +0000525}
Georgios Pinitas72219332018-06-05 14:56:06 +0100526
Manuel Bottini05069f02019-09-26 17:18:26 +0100527void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::prepare()
Georgios Pinitas72219332018-06-05 14:56:06 +0100528{
Giorgio Arenad93e2632019-10-15 11:09:33 +0100529 if(!_is_prepared)
Georgios Pinitas72219332018-06-05 14:56:06 +0100530 {
531 ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
532
Giorgio Arenad93e2632019-10-15 11:09:33 +0100533 _permute_weights.run();
Georgios Pinitas72219332018-06-05 14:56:06 +0100534 _original_weights->mark_as_unused();
Georgios Pinitas72219332018-06-05 14:56:06 +0100535 _is_prepared = true;
536 }
537}
Manuel Bottini05069f02019-09-26 17:18:26 +0100538
539NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
540 : _depth_conv_func(DepthwiseConvolutionFunction::GENERIC), _func_optimized(std::move(memory_manager)), _func_generic()
541{
542}
543
544void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
545 const ActivationLayerInfo &act_info, const Size2D &dilation)
546{
547 _depth_conv_func = get_depthwiseconvolution_function(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, depth_multiplier, act_info, dilation);
548 switch(_depth_conv_func)
549 {
550 case DepthwiseConvolutionFunction::OPTIMIZED:
551 _func_optimized.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
552 break;
553 case DepthwiseConvolutionFunction::GENERIC:
554 _func_generic.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
555 break;
556 default:
557 ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
558 }
559}
560
561Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
562 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
563{
564 DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
565 switch(depth_conv_func)
566 {
567 case DepthwiseConvolutionFunction::OPTIMIZED:
568 return NEDepthwiseConvolutionLayerOptimized::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
569 break;
570 case DepthwiseConvolutionFunction::GENERIC:
571 return NEDepthwiseConvolutionLayerGeneric::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
572 break;
573 default:
574 ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
575 }
576}
577
578DepthwiseConvolutionFunction NEDepthwiseConvolutionLayer::get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
579 const PadStrideInfo &conv_info,
580 unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation)
581{
582 if(bool(NEDepthwiseConvolutionLayerOptimized::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation)))
583 {
584 return DepthwiseConvolutionFunction::OPTIMIZED;
585 }
586 else
587 {
588 return DepthwiseConvolutionFunction::GENERIC;
589 }
590}
591
592void NEDepthwiseConvolutionLayer::run()
593{
594 switch(_depth_conv_func)
595 {
596 case DepthwiseConvolutionFunction::OPTIMIZED:
597 _func_optimized.run();
598 break;
599 case DepthwiseConvolutionFunction::GENERIC:
600 _func_generic.run();
601 break;
602 default:
603 ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
604 }
605}
606
607void NEDepthwiseConvolutionLayer::prepare()
608{
609 switch(_depth_conv_func)
610 {
611 case DepthwiseConvolutionFunction::OPTIMIZED:
612 _func_optimized.prepare();
613 break;
614 case DepthwiseConvolutionFunction::GENERIC:
615 _func_generic.prepare();
616 break;
617 default:
618 ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
619 }
620}
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000621} // namespace arm_compute