blob: 6c085645dbfab85ae8e5e3b879f98f2560c58546 [file] [log] [blame]
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01001/*
Ramy Elgammala8db6122023-05-08 03:33:43 +01002 * Copyright (c) 2017-2021, 2023 Arm Limited.
Michalis Spyrou7362f0d2017-10-18 17:58:22 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000024#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010025
Giorgio Arenad93e2632019-10-15 11:09:33 +010026#include "arm_compute/core/utils/misc/InfoHelpers.h"
Georgios Pinitasd05dce42018-01-22 16:29:17 +000027#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Georgios Pinitasf72f9362018-01-12 16:29:45 +000028#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010029#include "arm_compute/runtime/NEON/NEScheduler.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010030
ramelg01cbbb0382021-09-17 17:36:57 +010031#include "src/common/utils/Log.h"
Georgios Pinitas7891a732021-08-20 21:39:25 +010032#include "src/cpu/operators/CpuDepthwiseConv2d.h"
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000033
Georgios Pinitasd05dce42018-01-22 16:29:17 +000034using namespace arm_compute::misc;
Georgios Pinitas4074c992018-01-30 18:13:46 +000035using namespace arm_compute::misc::shape_calculator;
Michalis Spyrou7362f0d2017-10-18 17:58:22 +010036
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000037namespace arm_compute
38{
Michalis Spyrouebcebf12020-10-21 00:04:14 +010039NEDepthwiseConvolutionLayer::~NEDepthwiseConvolutionLayer() = default;
40
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010041struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::Impl
42{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010043 ITensor *src{nullptr}; // SRC_0
44 ITensor *dst{nullptr}; // DST_0
45 const ITensor *weights{nullptr}; // SRC_1
46 const ITensor *biases{nullptr}; // SRC_2
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010047 Tensor permuted_input{}; // INT_0
48 Tensor permuted_weights{}; // INT_1
49 Tensor permuted_output{}; // INT_2
50 Tensor workspace{}; // INT_3
51 Tensor packed_weights{}; // INT_4
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010052 std::shared_ptr<cpu::CpuDepthwiseConv2d> op{nullptr};
53 bool is_prepared{false};
54 bool permute{false};
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010055};
56
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010057NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(
58 std::shared_ptr<IMemoryManager> memory_manager)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010059 : _memory_group(memory_manager), _impl(std::make_unique<Impl>())
Manuel Bottini05069f02019-09-26 17:18:26 +010060{
61}
62
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010063void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::configure(
64 ITensor *input,
65 const ITensor *weights,
66 const ITensor *biases,
67 ITensor *output,
68 const PadStrideInfo &conv_info,
69 unsigned int depth_multiplier,
70 const ActivationLayerInfo &act_info,
71 const Size2D &dilation)
Manuel Bottini05069f02019-09-26 17:18:26 +010072{
Georgios Pinitas7d0adc62020-09-04 15:25:24 +010073 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
Manuel Bottini05069f02019-09-26 17:18:26 +010074
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010075 bool is_nhwc = input->info()->data_layout() == DataLayout::NCHW;
76 _impl->src = input;
77 _impl->weights = weights;
78 _impl->biases = biases;
79 _impl->dst = output;
80 _impl->permute = is_nhwc;
81
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010082 _impl->op = std::make_unique<cpu::CpuDepthwiseConv2d>();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010083 ConvolutionInfo info{conv_info, depth_multiplier, act_info, dilation};
84 _impl->op->configure(_impl->src->info(), _impl->weights->info(),
85 _impl->biases == nullptr ? nullptr : _impl->biases->info(), _impl->dst->info(), info);
Manuel Bottini05069f02019-09-26 17:18:26 +010086
Georgios Pinitas7d0adc62020-09-04 15:25:24 +010087 // Configure pipeline
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010088 ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
89 const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);
90 const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info);
91 bool is_activationlayer_enabled = act_info.enabled() && !(is_relu || is_relu6);
92
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010093 if (!is_activationlayer_enabled)
Manuel Bottini05069f02019-09-26 17:18:26 +010094 {
95 act_info_to_use = act_info;
96 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010097 info = ConvolutionInfo{conv_info, depth_multiplier, act_info_to_use, dilation};
Manuel Bottini05069f02019-09-26 17:18:26 +010098
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010099 auto dwc_optimized_func = std::make_unique<cpu::CpuDepthwiseConv2dAssemblyDispatch>();
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100100
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100101 if (is_nhwc)
Manuel Bottini05069f02019-09-26 17:18:26 +0100102 {
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100103 auto permute_input = std::make_unique<cpu::CpuPermute>();
104 auto permute_weights = std::make_unique<cpu::CpuPermute>();
105 auto permute_output = std::make_unique<cpu::CpuPermute>();
106
107 _memory_group.manage(&_impl->permuted_input);
108 _memory_group.manage(&_impl->permuted_weights);
109 _memory_group.manage(&_impl->permuted_output);
Manuel Bottini05069f02019-09-26 17:18:26 +0100110
111 // Configure the function to transform the input tensor from NCHW -> NHWC
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100112 permute_input->configure(input->info(), _impl->permuted_input.info(), PermutationVector(2U, 0U, 1U));
113 _impl->permuted_input.info()->set_data_layout(DataLayout::NHWC);
Manuel Bottini05069f02019-09-26 17:18:26 +0100114
115 // Configure the function to transform the weights tensor from IHW -> HWI
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100116 permute_weights->configure(weights->info(), _impl->permuted_weights.info(), PermutationVector(2U, 0U, 1U));
117 _impl->permuted_weights.info()->set_data_layout(DataLayout::NHWC);
Manuel Bottini05069f02019-09-26 17:18:26 +0100118
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100119 _impl->permuted_output.info()->set_data_layout(DataLayout::NHWC);
120 _impl->permuted_output.info()->set_quantization_info(output->info()->quantization_info());
Manuel Bottini05069f02019-09-26 17:18:26 +0100121
122 // Configure optimized depthwise
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100123 dwc_optimized_func->configure(_impl->permuted_input.info(), _impl->permuted_weights.info(),
124 biases == nullptr ? nullptr : biases->info(), _impl->permuted_output.info(),
125 info);
Manuel Bottini05069f02019-09-26 17:18:26 +0100126
127 // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100128 _impl->permuted_output.info()->set_data_layout(DataLayout::NHWC);
129 permute_output->configure(_impl->permuted_output.info(), output->info(), PermutationVector(1U, 2U, 0U));
Manuel Bottini05069f02019-09-26 17:18:26 +0100130
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100131 _impl->permuted_input.allocator()->allocate();
132 _impl->permuted_output.allocator()->allocate();
Manuel Bottini05069f02019-09-26 17:18:26 +0100133 }
134 else
135 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100136 dwc_optimized_func->configure(_impl->src->info(), _impl->weights->info(),
137 biases == nullptr ? nullptr : biases->info(), _impl->dst->info(), info);
Manuel Bottini05069f02019-09-26 17:18:26 +0100138 }
Manuel Bottini05069f02019-09-26 17:18:26 +0100139
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100140 // Allocate memory based on the internal memory requirements
141 experimental::MemoryRequirements mem_req = dwc_optimized_func->workspace();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100142 _impl->workspace.allocator()->init(TensorInfo(TensorShape{mem_req[0].size + mem_req[0].alignment}, 1, DataType::S8),
143 mem_req[0].alignment);
144 _impl->packed_weights.allocator()->init(
145 TensorInfo(TensorShape{mem_req[1].size + mem_req[1].alignment}, 1, DataType::S8), mem_req[1].alignment);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000146 _memory_group.manage(&_impl->workspace);
147 _memory_group.manage(&_impl->packed_weights);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100148 _impl->workspace.allocator()->allocate();
149 _impl->packed_weights.allocator()->allocate();
Manuel Bottini05069f02019-09-26 17:18:26 +0100150}
151
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100152Status
153NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::validate(const ITensorInfo *input,
154 const ITensorInfo *weights,
155 const ITensorInfo *biases,
156 const ITensorInfo *output,
157 const PadStrideInfo &conv_info,
158 unsigned int depth_multiplier,
159 const ActivationLayerInfo &act_info,
160 const Size2D &dilation)
Manuel Bottini05069f02019-09-26 17:18:26 +0100161{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100162 ConvolutionInfo info{conv_info, depth_multiplier, act_info, dilation};
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100163 return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info);
Manuel Bottini05069f02019-09-26 17:18:26 +0100164}
165
Manuel Bottini05069f02019-09-26 17:18:26 +0100166void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::run()
Georgios Pinitas30271c72019-06-24 14:56:34 +0100167{
168 prepare();
Georgios Pinitas30271c72019-06-24 14:56:34 +0100169 MemoryGroupResourceScope scope_mg(_memory_group);
170
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100171 ITensorPack pack;
172 pack.add_tensor(TensorType::ACL_SRC_0, _impl->src);
173 pack.add_tensor(TensorType::ACL_SRC_1, _impl->weights);
174 pack.add_tensor(TensorType::ACL_SRC_2, _impl->biases);
175 pack.add_tensor(TensorType::ACL_INT_0, &_impl->permuted_input);
176 pack.add_tensor(TensorType::ACL_INT_1, &_impl->permuted_weights);
177 pack.add_tensor(TensorType::ACL_INT_2, &_impl->permuted_output);
178 pack.add_tensor(TensorType::ACL_INT_3, &_impl->workspace);
179 pack.add_tensor(TensorType::ACL_INT_4, &_impl->packed_weights);
180 pack.add_tensor(TensorType::ACL_DST_0, _impl->dst);
Georgios Pinitas30271c72019-06-24 14:56:34 +0100181
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100182 _impl->op->run(pack);
Georgios Pinitas30271c72019-06-24 14:56:34 +0100183}
184
Manuel Bottini05069f02019-09-26 17:18:26 +0100185void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::prepare()
Georgios Pinitas30271c72019-06-24 14:56:34 +0100186{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100187 if (!_impl->is_prepared)
Georgios Pinitas30271c72019-06-24 14:56:34 +0100188 {
189 // Permute weights
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100190 if (_impl->permute)
Georgios Pinitas30271c72019-06-24 14:56:34 +0100191 {
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100192 _impl->permuted_weights.allocator()->allocate();
Georgios Pinitas30271c72019-06-24 14:56:34 +0100193 }
194
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100195 if (!_impl->permuted_weights.is_used())
Georgios Pinitas30271c72019-06-24 14:56:34 +0100196 {
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100197 _impl->permuted_weights.allocator()->free();
Georgios Pinitas30271c72019-06-24 14:56:34 +0100198 }
199
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100200 _impl->is_prepared = true;
Georgios Pinitas30271c72019-06-24 14:56:34 +0100201 }
202}
203
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100204struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::Impl
205{
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100206 Tensor permuted_input{};
207 Tensor permuted_weights{};
208 Tensor permuted_output{};
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100209 bool is_prepared{false};
210 bool is_nchw{false};
211 bool is_activationlayer_enabled{false};
212 const ITensor *weights{nullptr};
213 const ITensor *biases{nullptr};
214 const ITensor *src{nullptr};
215 ITensor *dst{nullptr};
216 std::shared_ptr<cpu::CpuDepthwiseConv2d> op{nullptr};
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100217};
218
Manuel Bottini05069f02019-09-26 17:18:26 +0100219NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::NEDepthwiseConvolutionLayerGeneric()
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100220 : _impl(std::make_unique<Impl>())
Michalis Spyroub7b31532017-11-23 12:10:21 +0000221{
222}
223
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100224void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure(ITensor *input,
225 const ITensor *weights,
226 const ITensor *biases,
227 ITensor *output,
228 const PadStrideInfo &conv_info,
229 unsigned int depth_multiplier,
230 const ActivationLayerInfo &act_info,
231 const Size2D &dilation)
Michalis Spyroub7b31532017-11-23 12:10:21 +0000232{
Michele Di Giorgioff271922019-07-17 15:59:32 +0100233 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
Michalis Spyroub7b31532017-11-23 12:10:21 +0000234
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100235 const ConvolutionInfo info{conv_info, depth_multiplier, act_info, dilation};
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100236 _impl->op = std::make_unique<cpu::CpuDepthwiseConv2d>();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100237 _impl->op->configure(input->info(), weights->info(), biases == nullptr ? nullptr : biases->info(), output->info(),
238 info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100239
240 _impl->src = input;
241 _impl->dst = output;
242 _impl->weights = weights;
243 _impl->biases = biases;
244 _impl->is_nchw = input->info()->data_layout() == DataLayout::NCHW;
245 _impl->is_prepared = !_impl->is_nchw;
Giorgio Arena26b22162018-08-13 15:49:49 +0100246
Giorgio Arenad93e2632019-10-15 11:09:33 +0100247 ITensor *input_to_use = input;
248 const ITensor *weights_to_use = weights;
249 ITensor *output_to_use = output;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100250 if (_impl->is_nchw)
Giorgio Arena26b22162018-08-13 15:49:49 +0100251 {
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100252 auto permute_input = std::make_unique<cpu::CpuPermute>();
253 auto permute_weights = std::make_unique<cpu::CpuPermute>();
Giorgio Arena26b22162018-08-13 15:49:49 +0100254
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100255 permute_input->configure(input->info(), _impl->permuted_input.info(), PermutationVector(2U, 0U, 1U));
256 _impl->permuted_input.info()->set_data_layout(DataLayout::NHWC);
257 input_to_use = &_impl->permuted_input;
Giorgio Arena44f55722019-07-12 14:49:49 +0100258
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100259 permute_weights->configure(weights->info(), _impl->permuted_weights.info(), PermutationVector(2U, 0U, 1U));
260 _impl->permuted_weights.info()->set_data_layout(DataLayout::NHWC);
261 weights_to_use = &_impl->permuted_weights;
Giorgio Arena44f55722019-07-12 14:49:49 +0100262
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100263 _impl->permuted_output.allocator()->init(
264 output->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100265 output_to_use = &_impl->permuted_output;
Giorgio Arena26b22162018-08-13 15:49:49 +0100266 }
267
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100268 auto depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConv2dNativeKernel>();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100269 depthwise_conv_kernel->configure(input_to_use->info(), weights_to_use->info(),
270 biases == nullptr ? nullptr : biases->info(), output_to_use->info(), info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100271
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100272 if (_impl->is_nchw)
Georgios Pinitas60e98252018-10-22 16:17:20 +0100273 {
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100274 auto permute_output = std::make_unique<cpu::CpuPermute>();
275 permute_output->configure(_impl->permuted_output.info(), output->info(), PermutationVector(1U, 2U, 0U));
276 _impl->permuted_output.info()->set_data_layout(DataLayout::NHWC);
277
278 _impl->permuted_input.allocator()->allocate();
279 _impl->permuted_weights.allocator()->allocate();
280 _impl->permuted_output.allocator()->allocate();
Georgios Pinitas60e98252018-10-22 16:17:20 +0100281 }
Michalis Spyroub7b31532017-11-23 12:10:21 +0000282}
283
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100284Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::validate(const ITensorInfo *input,
285 const ITensorInfo *weights,
286 const ITensorInfo *biases,
287 const ITensorInfo *output,
Manuel Bottini05069f02019-09-26 17:18:26 +0100288 const PadStrideInfo &conv_info,
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100289 unsigned int depth_multiplier,
290 const ActivationLayerInfo &act_info,
291 const Size2D &dilation)
Abe Mbise7784c832018-05-31 16:48:41 +0100292{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100293 ConvolutionInfo info{conv_info, depth_multiplier, act_info, dilation};
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100294 return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info);
Abe Mbise7784c832018-05-31 16:48:41 +0100295}
296
Manuel Bottini05069f02019-09-26 17:18:26 +0100297void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::run()
Michalis Spyroub7b31532017-11-23 12:10:21 +0000298{
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100299 ITensorPack pack;
300 pack.add_tensor(TensorType::ACL_SRC_0, _impl->src);
301 pack.add_tensor(TensorType::ACL_SRC_1, _impl->weights);
302 pack.add_tensor(TensorType::ACL_SRC_2, _impl->biases);
303 pack.add_tensor(TensorType::ACL_INT_0, &_impl->permuted_input);
304 pack.add_tensor(TensorType::ACL_INT_1, &_impl->permuted_weights);
305 pack.add_tensor(TensorType::ACL_INT_2, &_impl->permuted_output);
306 pack.add_tensor(TensorType::ACL_DST_0, _impl->dst);
Giorgio Arenad93e2632019-10-15 11:09:33 +0100307
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100308 _impl->op->run(pack);
Georgios Pinitas72219332018-06-05 14:56:06 +0100309}
Manuel Bottini05069f02019-09-26 17:18:26 +0100310
311NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100312 : _memory_group(std::move(memory_manager)), _impl(std::make_unique<Impl>())
Manuel Bottini05069f02019-09-26 17:18:26 +0100313{
314}
315
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100316#ifndef DOXYGEN_SKIP_THIS
317struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer::Impl
318{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100319 DepthwiseConvolutionFunction depth_conv_func{DepthwiseConvolutionFunction::OPTIMIZED};
320 NEDepthwiseConvolutionLayerOptimizedInternal func_optimized{nullptr};
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100321 NEDepthwiseConvolutionLayerGeneric func_generic{};
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100322 std::shared_ptr<cpu::CpuDepthwiseConv2d> op{nullptr};
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100323};
324#endif // DOXYGEN_SKIP_THIS
325
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100326void NEDepthwiseConvolutionLayer::configure(ITensor *input,
327 const ITensor *weights,
328 const ITensor *biases,
329 ITensor *output,
330 const PadStrideInfo &conv_info,
331 unsigned int depth_multiplier,
332 const ActivationLayerInfo &act_info,
333 const Size2D &dilation)
Manuel Bottini05069f02019-09-26 17:18:26 +0100334{
Jakub Sujaked9eb302023-05-11 13:15:46 +0100335 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
Ramy Elgammala8db6122023-05-08 03:33:43 +0100336
ramelg01cbbb0382021-09-17 17:36:57 +0100337 ARM_COMPUTE_LOG_PARAMS(input, weights, output, conv_info, depth_multiplier, biases, act_info, dilation);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100338 ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayer::validate(
339 input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(), output->info(), conv_info,
340 depth_multiplier, act_info, dilation));
ramelg01cbbb0382021-09-17 17:36:57 +0100341
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100342 const ConvolutionInfo info{conv_info, depth_multiplier, act_info, dilation};
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100343 _impl->op = std::make_shared<cpu::CpuDepthwiseConv2d>();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100344 _impl->depth_conv_func = _impl->op->get_depthwiseconvolution_function(
345 input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), info);
346 switch (_impl->depth_conv_func)
Manuel Bottini05069f02019-09-26 17:18:26 +0100347 {
348 case DepthwiseConvolutionFunction::OPTIMIZED:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100349 _impl->func_optimized.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info,
350 dilation);
Manuel Bottini05069f02019-09-26 17:18:26 +0100351 break;
352 case DepthwiseConvolutionFunction::GENERIC:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100353 _impl->func_generic.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info,
354 dilation);
Manuel Bottini05069f02019-09-26 17:18:26 +0100355 break;
356 default:
357 ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
358 }
359}
360
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100361Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input,
362 const ITensorInfo *weights,
363 const ITensorInfo *biases,
364 const ITensorInfo *output,
365 const PadStrideInfo &conv_info,
366 unsigned int depth_multiplier,
367 const ActivationLayerInfo &act_info,
368 const Size2D &dilation)
Manuel Bottini05069f02019-09-26 17:18:26 +0100369{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100370 ConvolutionInfo info{conv_info, depth_multiplier, act_info, dilation};
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100371 return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info);
Manuel Bottini05069f02019-09-26 17:18:26 +0100372}
373
374void NEDepthwiseConvolutionLayer::run()
375{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100376 switch (_impl->depth_conv_func)
Manuel Bottini05069f02019-09-26 17:18:26 +0100377 {
378 case DepthwiseConvolutionFunction::OPTIMIZED:
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100379 _impl->func_optimized.run();
Manuel Bottini05069f02019-09-26 17:18:26 +0100380 break;
381 case DepthwiseConvolutionFunction::GENERIC:
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100382 _impl->func_generic.run();
Manuel Bottini05069f02019-09-26 17:18:26 +0100383 break;
384 default:
385 ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
386 }
387}
388
389void NEDepthwiseConvolutionLayer::prepare()
390{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100391 switch (_impl->depth_conv_func)
Manuel Bottini05069f02019-09-26 17:18:26 +0100392 {
393 case DepthwiseConvolutionFunction::OPTIMIZED:
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100394 _impl->func_optimized.prepare();
Manuel Bottini05069f02019-09-26 17:18:26 +0100395 break;
396 case DepthwiseConvolutionFunction::GENERIC:
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100397 _impl->func_generic.prepare();
Manuel Bottini05069f02019-09-26 17:18:26 +0100398 break;
399 default:
400 ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
401 }
402}
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000403} // namespace arm_compute