blob: c93ffb113dffbf69ff4bf36aac1757dbb8d73d90 [file] [log] [blame]
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +01001/*
2 * Copyright (c) 2021 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Georgios Pinitas7891a732021-08-20 21:39:25 +010024#include "src/cpu/operators/CpuDepthwiseConv2d.h"
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010025
26#include "arm_compute/core/TensorInfo.h"
27#include "arm_compute/core/Validate.h"
28#include "arm_compute/core/utils/misc/InfoHelpers.h"
29#include "arm_compute/core/utils/misc/ShapeCalculator.h"
30#include "arm_compute/runtime/NEON/NEScheduler.h"
ramelg013ae3d882021-09-12 23:07:47 +010031#include "src/common/utils/Log.h"
Georgios Pinitas7891a732021-08-20 21:39:25 +010032#include "src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h"
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010033
34namespace arm_compute
35{
36namespace cpu
37{
38namespace
39{
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010040Status validate_arguments_optimized(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010041{
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010042 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
43 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010044 if(!is_data_type_quantized_per_channel(weights->data_type()))
45 {
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010046 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010047 }
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010048 ARM_COMPUTE_RETURN_ERROR_ON(src->data_layout() == DataLayout::UNKNOWN);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010049 ARM_COMPUTE_RETURN_ERROR_ON(info.dilation.x() < 1 || info.dilation.y() < 1);
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010050 const size_t idx_w = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH);
51 const size_t idx_h = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT);
52 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (info.dilation.x() - 1) > src->dimension(idx_w) + info.pad_stride_info.pad_left() +
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010053 info.pad_stride_info.pad_right());
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010054 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (info.dilation.y() - 1) > src->dimension(idx_h) + info.pad_stride_info.pad_top() +
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010055 info.pad_stride_info.pad_bottom());
56
57 if(biases != nullptr)
58 {
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010059 const unsigned int channel_idx = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::CHANNEL);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010060 ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
61 ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));
62 }
63
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010064 ARM_COMPUTE_RETURN_ON_ERROR(CpuDepthwiseConv2dAssemblyDispatch::validate(src, weights, biases, dst, info));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010065
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000066 // Validate Activation Layer
67 if(info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info))
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010068 {
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010069 ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(dst, nullptr, info.act_info));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010070 }
71 return Status{};
72}
73} // namespace
74
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010075void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::configure(ITensorInfo *src,
76 const ITensorInfo *weights,
77 const ITensorInfo *biases,
78 ITensorInfo *dst,
79 const ConvolutionInfo &info)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010080{
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010081 ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010082 // Perform validation step
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010083 ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, (biases == nullptr) ? nullptr : biases,
84 dst, info));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010085
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010086 _is_quantized = is_data_type_quantized_asymmetric(src->data_type());
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010087 _has_bias = biases != nullptr;
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010088 _is_nchw = src->data_layout() == DataLayout::NCHW;
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010089 _permute = _is_nchw;
90 _is_prepared = false;
91
92 // Configure pipeline
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000093 _is_activationlayer_enabled = info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010094
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010095 _dwc_optimized_func = std::make_unique<CpuDepthwiseConv2dAssemblyDispatch>();
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010096 if(_is_nchw)
97 {
98 _permute_input = std::make_unique<cpu::CpuPermute>();
99 _permute_weights = std::make_unique<cpu::CpuPermute>();
100 _permute_output = std::make_unique<cpu::CpuPermute>();
101
102 auto input_perm = std::make_unique<TensorInfo>();
103 auto weights_perm = std::make_unique<TensorInfo>();
104 auto output_perm = std::make_unique<TensorInfo>();
105
106 // Configure the function to transform the input tensor from NCHW -> NHWC
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100107 _permute_input->configure(src, input_perm.get(), PermutationVector(2U, 0U, 1U));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100108 input_perm->set_data_layout(DataLayout::NHWC);
109
110 // Configure the function to transform the weights tensor from IHW -> HWI
111 _permute_weights->configure(weights, weights_perm.get(), PermutationVector(2U, 0U, 1U));
112 weights_perm->set_data_layout(DataLayout::NHWC);
113
114 output_perm->set_data_layout(DataLayout::NHWC);
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100115 output_perm->set_quantization_info(dst->quantization_info());
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100116
117 // Configure optimized depthwise
118 _dwc_optimized_func->configure(input_perm.get(), weights_perm.get(), biases, output_perm.get(), info);
119
120 // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
121 output_perm->set_data_layout(DataLayout::NHWC);
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100122 _permute_output->configure(output_perm.get(), dst, PermutationVector(1U, 2U, 0U));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100123 }
124 else
125 {
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100126 _dwc_optimized_func->configure(src, weights, biases, dst, info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100127 }
128
129 // Configure activation
130 if(_is_activationlayer_enabled)
131 {
132 _activationlayer_function = std::make_unique<cpu::CpuActivation>();
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100133 _activationlayer_function->configure(dst, nullptr, info.act_info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100134 }
135}
136
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100137Status CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::validate(const ITensorInfo *src,
138 const ITensorInfo *weights,
139 const ITensorInfo *biases,
140 const ITensorInfo *dst,
141 const ConvolutionInfo &info)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100142{
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100143 return validate_arguments_optimized(src, weights, biases, dst, info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100144}
145
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100146void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::run(ITensorPack &tensors)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100147{
148 ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
149 prepare(tensors);
150
151 auto bias = tensors.get_const_tensor(TensorType::ACL_SRC_2);
152 auto dst = tensors.get_tensor(TensorType::ACL_DST_0);
153 auto workspace = tensors.get_tensor(TensorType::ACL_INT_3);
154 auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
155
156 // Permute input
157 if(_permute)
158 {
159 ITensorPack pack;
Michalis Spyroua7a74362021-04-23 10:32:48 +0100160 auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100161 auto src_perm = tensors.get_tensor(TensorType::ACL_INT_0);
162 pack.add_tensor(TensorType::ACL_SRC, src);
163 pack.add_tensor(TensorType::ACL_DST, src_perm);
164 _permute_input->run(pack);
165 }
166
167 // Run assembly function
168 if(_is_nchw)
169 {
170 auto src_perm = tensors.get_tensor(TensorType::ACL_INT_0);
171 auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
172 auto dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
173
174 ITensorPack pack;
175 pack.add_tensor(TensorType::ACL_SRC_0, src_perm);
176 pack.add_tensor(TensorType::ACL_SRC_1, weights_perm);
177 pack.add_tensor(TensorType::ACL_SRC_2, bias);
178 pack.add_tensor(TensorType::ACL_INT_0, workspace);
179 pack.add_tensor(TensorType::ACL_INT_1, packed_weights);
180 pack.add_tensor(TensorType::ACL_DST, dst_perm);
181 _dwc_optimized_func->run(pack);
182 }
183 else
184 {
185 auto src = tensors.get_tensor(TensorType::ACL_SRC_0);
186 auto weights = tensors.get_tensor(TensorType::ACL_SRC_1);
187 auto dst = tensors.get_tensor(TensorType::ACL_DST);
188
189 ITensorPack pack;
190 pack.add_tensor(TensorType::ACL_SRC_0, src);
191 pack.add_tensor(TensorType::ACL_SRC_1, weights);
192 pack.add_tensor(TensorType::ACL_SRC_2, bias);
193 pack.add_tensor(TensorType::ACL_INT_0, workspace);
194 pack.add_tensor(TensorType::ACL_INT_1, packed_weights);
195 pack.add_tensor(TensorType::ACL_DST, dst);
196 _dwc_optimized_func->run(pack);
197 }
198
199 // Permute output
200 if(_is_nchw)
201 {
202 ITensorPack pack;
203 auto dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
204 pack.add_tensor(TensorType::ACL_SRC, dst_perm);
205 pack.add_tensor(TensorType::ACL_DST, dst);
206 _permute_output->run(pack);
207 }
208
209 // Run activation
210 if(_is_activationlayer_enabled)
211 {
212 ITensorPack pack;
213 pack.add_tensor(TensorType::ACL_SRC, dst);
214 pack.add_tensor(TensorType::ACL_DST, dst);
215 _activationlayer_function->run(pack);
216 }
217}
218
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100219void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::prepare(ITensorPack &tensors)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100220{
221 if(!_is_prepared)
222 {
223 auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
224 auto bias = tensors.get_const_tensor(TensorType::ACL_SRC_2);
225 auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
226
227 // Permute weights
228 if(_permute)
229 {
230 auto permuted_weights = tensors.get_tensor(TensorType::ACL_INT_1);
231
232 ITensorPack pack;
233 pack.add_tensor(TensorType::ACL_SRC, weights);
234 pack.add_tensor(TensorType::ACL_DST, permuted_weights);
235 _permute_weights->run(pack);
236
Michalis Spyroua7a74362021-04-23 10:32:48 +0100237 weights->mark_as_unused();
238
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100239 ITensorPack pack_opt;
240 pack_opt.add_const_tensor(TensorType::ACL_SRC_1, permuted_weights);
241 pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
242 pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
243
244 // Prepare optimized function
245 _dwc_optimized_func->prepare(pack_opt);
246 }
247 else
248 {
249 ITensorPack pack_opt;
250 pack_opt.add_tensor(TensorType::ACL_SRC_1, weights);
251 pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
252 pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
253
254 // Prepare optimized function
255 _dwc_optimized_func->prepare(pack_opt);
256 }
257
258 _is_prepared = true;
259 }
260}
261
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100262void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100263{
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100264 ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
265 ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConv2d::validate(src, weights, (biases == nullptr) ? nullptr : biases,
266 dst, info));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100267
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100268 _is_nchw = src->data_layout() == DataLayout::NCHW;
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100269 _is_prepared = !_is_nchw;
270
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100271 ITensorInfo *input_to_use = src;
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100272 const ITensorInfo *weights_to_use = weights;
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100273 ITensorInfo *output_to_use = dst;
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100274
275 auto input_perm = std::make_unique<TensorInfo>();
276 auto weights_perm = std::make_unique<TensorInfo>();
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100277 auto output_perm = std::make_unique<TensorInfo>(dst->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100278
279 if(_is_nchw)
280 {
281 _permute_input = std::make_unique<cpu::CpuPermute>();
282 _permute_weights = std::make_unique<cpu::CpuPermute>();
283
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100284 _permute_input->configure(src, input_perm.get(), PermutationVector(2U, 0U, 1U));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100285 input_perm->set_data_layout(DataLayout::NHWC);
286 input_to_use = input_perm.get();
287
288 _permute_weights->configure(weights, weights_perm.get(), PermutationVector(2U, 0U, 1U));
289 weights_perm->set_data_layout(DataLayout::NHWC);
290 weights_to_use = weights_perm.get();
291
292 output_to_use = output_perm.get();
293 }
294
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100295 _depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConv2dNativeKernel>();
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100296 _depthwise_conv_kernel->configure(input_to_use, weights_to_use, biases, output_to_use, info);
297
298 if(_is_nchw)
299 {
300 _permute_output = std::make_unique<cpu::CpuPermute>();
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100301 _permute_output->configure(output_perm.get(), dst, PermutationVector(1U, 2U, 0U));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100302 output_perm->set_data_layout(DataLayout::NHWC);
303 }
304
305 //Configure Activation Layer
306 _is_activationlayer_enabled = info.act_info.enabled();
307 if(_is_activationlayer_enabled)
308 {
309 _activationlayer_function = std::make_unique<cpu::CpuActivation>();
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100310 _activationlayer_function->configure(dst, nullptr, info.act_info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100311 }
312}
313
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100314Status CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
315 const ConvolutionInfo &info)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100316{
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100317 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
318 if(src->data_layout() == DataLayout::NCHW)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100319 {
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100320 TensorShape permuted_input_shape = src->tensor_shape();
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100321 TensorShape permuted_weights_shape = weights->tensor_shape();
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100322 TensorShape permuted_output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *weights, info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100323 permute(permuted_input_shape, PermutationVector(2U, 0U, 1U));
324 permute(permuted_weights_shape, PermutationVector(2U, 0U, 1U));
325 permute(permuted_output_shape, PermutationVector(2U, 0U, 1U));
326
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100327 const TensorInfo permuted_input = TensorInfo(src->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NHWC));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100328 const TensorInfo permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NHWC));
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100329 const TensorInfo permuted_output = TensorInfo(dst->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NCHW));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100330
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100331 ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(src, &permuted_input, PermutationVector(2U, 0U, 1U)));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100332 ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(weights, &permuted_weights, PermutationVector(2U, 0U, 1U)));
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100333 ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(&permuted_output, dst, PermutationVector(1U, 2U, 0U)));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100334
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100335 ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConv2dNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, info));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100336 }
337 else
338 {
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100339 ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConv2dNativeKernel::validate(src, weights, biases, dst, info));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100340 }
341
342 // Validate Activation Layer
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000343 if(info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info))
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100344 {
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100345 ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(dst, nullptr, info.act_info));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100346 }
347
348 return Status{};
349}
350
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100351void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::run(ITensorPack &tensors)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100352{
353 auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
354 auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
355 auto biases = tensors.get_const_tensor(TensorType::ACL_SRC_2);
356 auto dst = tensors.get_tensor(TensorType::ACL_DST_0);
357
358 if(_is_nchw)
359 {
360 prepare(tensors);
361 auto src_perm = tensors.get_tensor(TensorType::ACL_INT_0);
362 auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
363 auto dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
364
365 ITensorPack pack;
366 pack.add_tensor(TensorType::ACL_SRC, src);
367 pack.add_tensor(TensorType::ACL_DST, src_perm);
368 _permute_input->run(pack);
369
370 ITensorPack pack_depth;
371 pack_depth.add_const_tensor(TensorType::ACL_SRC_0, src_perm);
372 pack_depth.add_const_tensor(TensorType::ACL_SRC_1, weights_perm);
373 pack_depth.add_tensor(TensorType::ACL_SRC_2, biases);
374 pack_depth.add_tensor(TensorType::ACL_DST, dst_perm);
375 NEScheduler::get().schedule_op(_depthwise_conv_kernel.get(), Window::DimY, _depthwise_conv_kernel->window(), pack_depth);
376 }
377 else
378 {
379 ITensorPack pack_depth;
380 pack_depth.add_tensor(TensorType::ACL_SRC_0, src);
381 pack_depth.add_tensor(TensorType::ACL_SRC_1, weights);
382 pack_depth.add_tensor(TensorType::ACL_SRC_2, biases);
383 pack_depth.add_tensor(TensorType::ACL_DST, dst);
384 NEScheduler::get().schedule_op(_depthwise_conv_kernel.get(), Window::DimY, _depthwise_conv_kernel->window(), pack_depth);
385 }
386
387 if(_is_nchw)
388 {
389 ITensorPack pack;
390 auto dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
391 pack.add_tensor(TensorType::ACL_SRC, dst_perm);
392 pack.add_tensor(TensorType::ACL_DST, dst);
393 _permute_output->run(pack);
394 }
395
396 if(_is_activationlayer_enabled)
397 {
398 ITensorPack pack;
399 pack.add_tensor(TensorType::ACL_SRC, dst);
400 pack.add_tensor(TensorType::ACL_DST, dst);
401 _activationlayer_function->run(pack);
402 }
403}
404
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100405void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::prepare(ITensorPack &tensors)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100406{
407 if(!_is_prepared)
408 {
409 auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
410 auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
411
412 ARM_COMPUTE_ERROR_ON(!weights->is_used());
413
414 ITensorPack pack;
415 pack.add_tensor(TensorType::ACL_SRC, weights);
416 pack.add_tensor(TensorType::ACL_DST, weights_perm);
417
418 _permute_weights->run(pack);
419 weights->mark_as_unused();
420 _is_prepared = true;
421 }
422}
423
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100424void CpuDepthwiseConv2d::configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100425{
ramelg013ae3d882021-09-12 23:07:47 +0100426 ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, info);
427
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100428 _depth_conv_func = get_depthwiseconvolution_function(src, weights, (biases != nullptr) ? biases : nullptr, dst, info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100429 switch(_depth_conv_func)
430 {
431 case DepthwiseConvolutionFunction::OPTIMIZED:
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100432 _func_optimized.configure(src, weights, biases, dst, info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100433 break;
434 case DepthwiseConvolutionFunction::GENERIC:
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100435 _func_generic.configure(src, weights, biases, dst, info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100436 break;
437 default:
438 ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
439 }
440}
441
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100442Status CpuDepthwiseConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100443{
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100444 DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(src, weights, biases, dst, info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100445 switch(depth_conv_func)
446 {
447 case DepthwiseConvolutionFunction::OPTIMIZED:
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100448 return CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, biases, dst, info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100449 break;
450 case DepthwiseConvolutionFunction::GENERIC:
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100451 return CpuDepthwiseConv2dGeneric::validate(src, weights, biases, dst, info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100452 break;
453 default:
454 ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
455 }
456}
457
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100458DepthwiseConvolutionFunction CpuDepthwiseConv2d::get_depthwiseconvolution_function(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
459 const ConvolutionInfo &info)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100460{
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100461 if(bool(CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, biases, dst, info)))
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100462 {
463 return DepthwiseConvolutionFunction::OPTIMIZED;
464 }
465 else
466 {
467 return DepthwiseConvolutionFunction::GENERIC;
468 }
469}
470
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100471void CpuDepthwiseConv2d::run(ITensorPack &tensors)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100472{
473 switch(_depth_conv_func)
474 {
475 case DepthwiseConvolutionFunction::OPTIMIZED:
476 _func_optimized.run(tensors);
477 break;
478 case DepthwiseConvolutionFunction::GENERIC:
479 _func_generic.run(tensors);
480 break;
481 default:
482 ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
483 }
484}
485
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100486void CpuDepthwiseConv2d::prepare(ITensorPack &tensors)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100487{
488 switch(_depth_conv_func)
489 {
490 case DepthwiseConvolutionFunction::OPTIMIZED:
491 _func_optimized.prepare(tensors);
492 break;
493 case DepthwiseConvolutionFunction::GENERIC:
494 _func_generic.prepare(tensors);
495 break;
496 default:
497 ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
498 }
499}
500} // namespace cpu
501} // namespace arm_compute