blob: f577e94def0663e9a95f76e849ee521a3964d54d [file] [log] [blame]
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +01001/*
2 * Copyright (c) 2021 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010024#include "src/runtime/cpu/operators/CpuDepthwiseConv2d.h"
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010025
26#include "arm_compute/core/TensorInfo.h"
27#include "arm_compute/core/Validate.h"
28#include "arm_compute/core/utils/misc/InfoHelpers.h"
29#include "arm_compute/core/utils/misc/ShapeCalculator.h"
30#include "arm_compute/runtime/NEON/NEScheduler.h"
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010031#include "src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h"
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010032
33namespace arm_compute
34{
35namespace cpu
36{
37namespace
38{
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010039Status validate_arguments_optimized(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010040{
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010041 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
42 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010043 if(!is_data_type_quantized_per_channel(weights->data_type()))
44 {
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010045 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010046 }
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010047 ARM_COMPUTE_RETURN_ERROR_ON(src->data_layout() == DataLayout::UNKNOWN);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010048 ARM_COMPUTE_RETURN_ERROR_ON(info.dilation.x() < 1 || info.dilation.y() < 1);
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010049 const size_t idx_w = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH);
50 const size_t idx_h = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT);
51 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (info.dilation.x() - 1) > src->dimension(idx_w) + info.pad_stride_info.pad_left() +
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010052 info.pad_stride_info.pad_right());
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010053 ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (info.dilation.y() - 1) > src->dimension(idx_h) + info.pad_stride_info.pad_top() +
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010054 info.pad_stride_info.pad_bottom());
55
56 if(biases != nullptr)
57 {
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010058 const unsigned int channel_idx = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::CHANNEL);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010059 ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
60 ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));
61 }
62
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010063 ARM_COMPUTE_RETURN_ON_ERROR(CpuDepthwiseConv2dAssemblyDispatch::validate(src, weights, biases, dst, info));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010064
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000065 // Validate Activation Layer
66 if(info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info))
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010067 {
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010068 ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(dst, nullptr, info.act_info));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010069 }
70 return Status{};
71}
72} // namespace
73
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010074CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::CpuDepthwiseConv2dOptimizedInternal()
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010075 : _dwc_optimized_func(nullptr), _permute_input(nullptr), _permute_weights(nullptr), _permute_output(nullptr), _activationlayer_function(nullptr), _has_bias(false), _is_quantized(false),
76 _is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false)
77{
78}
79
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010080void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::configure(ITensorInfo *src,
81 const ITensorInfo *weights,
82 const ITensorInfo *biases,
83 ITensorInfo *dst,
84 const ConvolutionInfo &info)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010085{
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010086 ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010087 // Perform validation step
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010088 ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, (biases == nullptr) ? nullptr : biases,
89 dst, info));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010090
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010091 _is_quantized = is_data_type_quantized_asymmetric(src->data_type());
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010092 _has_bias = biases != nullptr;
Manuel Bottinib4bb6a02021-05-24 16:01:32 +010093 _is_nchw = src->data_layout() == DataLayout::NCHW;
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010094 _permute = _is_nchw;
95 _is_prepared = false;
96
97 // Configure pipeline
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000098 _is_activationlayer_enabled = info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +010099
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100100 _dwc_optimized_func = std::make_unique<CpuDepthwiseConv2dAssemblyDispatch>();
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100101 if(_is_nchw)
102 {
103 _permute_input = std::make_unique<cpu::CpuPermute>();
104 _permute_weights = std::make_unique<cpu::CpuPermute>();
105 _permute_output = std::make_unique<cpu::CpuPermute>();
106
107 auto input_perm = std::make_unique<TensorInfo>();
108 auto weights_perm = std::make_unique<TensorInfo>();
109 auto output_perm = std::make_unique<TensorInfo>();
110
111 // Configure the function to transform the input tensor from NCHW -> NHWC
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100112 _permute_input->configure(src, input_perm.get(), PermutationVector(2U, 0U, 1U));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100113 input_perm->set_data_layout(DataLayout::NHWC);
114
115 // Configure the function to transform the weights tensor from IHW -> HWI
116 _permute_weights->configure(weights, weights_perm.get(), PermutationVector(2U, 0U, 1U));
117 weights_perm->set_data_layout(DataLayout::NHWC);
118
119 output_perm->set_data_layout(DataLayout::NHWC);
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100120 output_perm->set_quantization_info(dst->quantization_info());
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100121
122 // Configure optimized depthwise
123 _dwc_optimized_func->configure(input_perm.get(), weights_perm.get(), biases, output_perm.get(), info);
124
125 // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
126 output_perm->set_data_layout(DataLayout::NHWC);
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100127 _permute_output->configure(output_perm.get(), dst, PermutationVector(1U, 2U, 0U));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100128 }
129 else
130 {
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100131 _dwc_optimized_func->configure(src, weights, biases, dst, info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100132 }
133
134 // Configure activation
135 if(_is_activationlayer_enabled)
136 {
137 _activationlayer_function = std::make_unique<cpu::CpuActivation>();
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100138 _activationlayer_function->configure(dst, nullptr, info.act_info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100139 }
140}
141
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100142Status CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::validate(const ITensorInfo *src,
143 const ITensorInfo *weights,
144 const ITensorInfo *biases,
145 const ITensorInfo *dst,
146 const ConvolutionInfo &info)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100147{
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100148 return validate_arguments_optimized(src, weights, biases, dst, info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100149}
150
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100151void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::run(ITensorPack &tensors)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100152{
153 ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
154 prepare(tensors);
155
156 auto bias = tensors.get_const_tensor(TensorType::ACL_SRC_2);
157 auto dst = tensors.get_tensor(TensorType::ACL_DST_0);
158 auto workspace = tensors.get_tensor(TensorType::ACL_INT_3);
159 auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
160
161 // Permute input
162 if(_permute)
163 {
164 ITensorPack pack;
Michalis Spyroua7a74362021-04-23 10:32:48 +0100165 auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100166 auto src_perm = tensors.get_tensor(TensorType::ACL_INT_0);
167 pack.add_tensor(TensorType::ACL_SRC, src);
168 pack.add_tensor(TensorType::ACL_DST, src_perm);
169 _permute_input->run(pack);
170 }
171
172 // Run assembly function
173 if(_is_nchw)
174 {
175 auto src_perm = tensors.get_tensor(TensorType::ACL_INT_0);
176 auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
177 auto dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
178
179 ITensorPack pack;
180 pack.add_tensor(TensorType::ACL_SRC_0, src_perm);
181 pack.add_tensor(TensorType::ACL_SRC_1, weights_perm);
182 pack.add_tensor(TensorType::ACL_SRC_2, bias);
183 pack.add_tensor(TensorType::ACL_INT_0, workspace);
184 pack.add_tensor(TensorType::ACL_INT_1, packed_weights);
185 pack.add_tensor(TensorType::ACL_DST, dst_perm);
186 _dwc_optimized_func->run(pack);
187 }
188 else
189 {
190 auto src = tensors.get_tensor(TensorType::ACL_SRC_0);
191 auto weights = tensors.get_tensor(TensorType::ACL_SRC_1);
192 auto dst = tensors.get_tensor(TensorType::ACL_DST);
193
194 ITensorPack pack;
195 pack.add_tensor(TensorType::ACL_SRC_0, src);
196 pack.add_tensor(TensorType::ACL_SRC_1, weights);
197 pack.add_tensor(TensorType::ACL_SRC_2, bias);
198 pack.add_tensor(TensorType::ACL_INT_0, workspace);
199 pack.add_tensor(TensorType::ACL_INT_1, packed_weights);
200 pack.add_tensor(TensorType::ACL_DST, dst);
201 _dwc_optimized_func->run(pack);
202 }
203
204 // Permute output
205 if(_is_nchw)
206 {
207 ITensorPack pack;
208 auto dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
209 pack.add_tensor(TensorType::ACL_SRC, dst_perm);
210 pack.add_tensor(TensorType::ACL_DST, dst);
211 _permute_output->run(pack);
212 }
213
214 // Run activation
215 if(_is_activationlayer_enabled)
216 {
217 ITensorPack pack;
218 pack.add_tensor(TensorType::ACL_SRC, dst);
219 pack.add_tensor(TensorType::ACL_DST, dst);
220 _activationlayer_function->run(pack);
221 }
222}
223
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100224void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::prepare(ITensorPack &tensors)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100225{
226 if(!_is_prepared)
227 {
228 auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
229 auto bias = tensors.get_const_tensor(TensorType::ACL_SRC_2);
230 auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
231
232 // Permute weights
233 if(_permute)
234 {
235 auto permuted_weights = tensors.get_tensor(TensorType::ACL_INT_1);
236
237 ITensorPack pack;
238 pack.add_tensor(TensorType::ACL_SRC, weights);
239 pack.add_tensor(TensorType::ACL_DST, permuted_weights);
240 _permute_weights->run(pack);
241
Michalis Spyroua7a74362021-04-23 10:32:48 +0100242 weights->mark_as_unused();
243
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100244 ITensorPack pack_opt;
245 pack_opt.add_const_tensor(TensorType::ACL_SRC_1, permuted_weights);
246 pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
247 pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
248
249 // Prepare optimized function
250 _dwc_optimized_func->prepare(pack_opt);
251 }
252 else
253 {
254 ITensorPack pack_opt;
255 pack_opt.add_tensor(TensorType::ACL_SRC_1, weights);
256 pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
257 pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
258
259 // Prepare optimized function
260 _dwc_optimized_func->prepare(pack_opt);
261 }
262
263 _is_prepared = true;
264 }
265}
266
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100267CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::CpuDepthwiseConv2dGeneric()
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100268 : _depthwise_conv_kernel(nullptr), _permute_input(nullptr), _permute_weights(nullptr), _permute_output(nullptr), _activationlayer_function(nullptr), _is_nchw(true), _is_prepared(false),
269 _is_activationlayer_enabled(false)
270{
271}
272
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100273void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100274{
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100275 ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
276 ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConv2d::validate(src, weights, (biases == nullptr) ? nullptr : biases,
277 dst, info));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100278
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100279 _is_nchw = src->data_layout() == DataLayout::NCHW;
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100280 _is_prepared = !_is_nchw;
281
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100282 ITensorInfo *input_to_use = src;
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100283 const ITensorInfo *weights_to_use = weights;
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100284 ITensorInfo *output_to_use = dst;
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100285
286 auto input_perm = std::make_unique<TensorInfo>();
287 auto weights_perm = std::make_unique<TensorInfo>();
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100288 auto output_perm = std::make_unique<TensorInfo>(dst->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100289
290 if(_is_nchw)
291 {
292 _permute_input = std::make_unique<cpu::CpuPermute>();
293 _permute_weights = std::make_unique<cpu::CpuPermute>();
294
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100295 _permute_input->configure(src, input_perm.get(), PermutationVector(2U, 0U, 1U));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100296 input_perm->set_data_layout(DataLayout::NHWC);
297 input_to_use = input_perm.get();
298
299 _permute_weights->configure(weights, weights_perm.get(), PermutationVector(2U, 0U, 1U));
300 weights_perm->set_data_layout(DataLayout::NHWC);
301 weights_to_use = weights_perm.get();
302
303 output_to_use = output_perm.get();
304 }
305
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100306 _depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConv2dNativeKernel>();
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100307 _depthwise_conv_kernel->configure(input_to_use, weights_to_use, biases, output_to_use, info);
308
309 if(_is_nchw)
310 {
311 _permute_output = std::make_unique<cpu::CpuPermute>();
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100312 _permute_output->configure(output_perm.get(), dst, PermutationVector(1U, 2U, 0U));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100313 output_perm->set_data_layout(DataLayout::NHWC);
314 }
315
316 //Configure Activation Layer
317 _is_activationlayer_enabled = info.act_info.enabled();
318 if(_is_activationlayer_enabled)
319 {
320 _activationlayer_function = std::make_unique<cpu::CpuActivation>();
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100321 _activationlayer_function->configure(dst, nullptr, info.act_info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100322 }
323}
324
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100325Status CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
326 const ConvolutionInfo &info)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100327{
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100328 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
329 if(src->data_layout() == DataLayout::NCHW)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100330 {
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100331 TensorShape permuted_input_shape = src->tensor_shape();
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100332 TensorShape permuted_weights_shape = weights->tensor_shape();
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100333 TensorShape permuted_output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *weights, info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100334 permute(permuted_input_shape, PermutationVector(2U, 0U, 1U));
335 permute(permuted_weights_shape, PermutationVector(2U, 0U, 1U));
336 permute(permuted_output_shape, PermutationVector(2U, 0U, 1U));
337
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100338 const TensorInfo permuted_input = TensorInfo(src->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NHWC));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100339 const TensorInfo permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NHWC));
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100340 const TensorInfo permuted_output = TensorInfo(dst->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NCHW));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100341
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100342 ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(src, &permuted_input, PermutationVector(2U, 0U, 1U)));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100343 ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(weights, &permuted_weights, PermutationVector(2U, 0U, 1U)));
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100344 ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(&permuted_output, dst, PermutationVector(1U, 2U, 0U)));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100345
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100346 ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConv2dNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, info));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100347 }
348 else
349 {
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100350 ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConv2dNativeKernel::validate(src, weights, biases, dst, info));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100351 }
352
353 // Validate Activation Layer
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000354 if(info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info))
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100355 {
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100356 ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(dst, nullptr, info.act_info));
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100357 }
358
359 return Status{};
360}
361
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100362void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::run(ITensorPack &tensors)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100363{
364 auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
365 auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
366 auto biases = tensors.get_const_tensor(TensorType::ACL_SRC_2);
367 auto dst = tensors.get_tensor(TensorType::ACL_DST_0);
368
369 if(_is_nchw)
370 {
371 prepare(tensors);
372 auto src_perm = tensors.get_tensor(TensorType::ACL_INT_0);
373 auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
374 auto dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
375
376 ITensorPack pack;
377 pack.add_tensor(TensorType::ACL_SRC, src);
378 pack.add_tensor(TensorType::ACL_DST, src_perm);
379 _permute_input->run(pack);
380
381 ITensorPack pack_depth;
382 pack_depth.add_const_tensor(TensorType::ACL_SRC_0, src_perm);
383 pack_depth.add_const_tensor(TensorType::ACL_SRC_1, weights_perm);
384 pack_depth.add_tensor(TensorType::ACL_SRC_2, biases);
385 pack_depth.add_tensor(TensorType::ACL_DST, dst_perm);
386 NEScheduler::get().schedule_op(_depthwise_conv_kernel.get(), Window::DimY, _depthwise_conv_kernel->window(), pack_depth);
387 }
388 else
389 {
390 ITensorPack pack_depth;
391 pack_depth.add_tensor(TensorType::ACL_SRC_0, src);
392 pack_depth.add_tensor(TensorType::ACL_SRC_1, weights);
393 pack_depth.add_tensor(TensorType::ACL_SRC_2, biases);
394 pack_depth.add_tensor(TensorType::ACL_DST, dst);
395 NEScheduler::get().schedule_op(_depthwise_conv_kernel.get(), Window::DimY, _depthwise_conv_kernel->window(), pack_depth);
396 }
397
398 if(_is_nchw)
399 {
400 ITensorPack pack;
401 auto dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
402 pack.add_tensor(TensorType::ACL_SRC, dst_perm);
403 pack.add_tensor(TensorType::ACL_DST, dst);
404 _permute_output->run(pack);
405 }
406
407 if(_is_activationlayer_enabled)
408 {
409 ITensorPack pack;
410 pack.add_tensor(TensorType::ACL_SRC, dst);
411 pack.add_tensor(TensorType::ACL_DST, dst);
412 _activationlayer_function->run(pack);
413 }
414}
415
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100416void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::prepare(ITensorPack &tensors)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100417{
418 if(!_is_prepared)
419 {
420 auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
421 auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
422
423 ARM_COMPUTE_ERROR_ON(!weights->is_used());
424
425 ITensorPack pack;
426 pack.add_tensor(TensorType::ACL_SRC, weights);
427 pack.add_tensor(TensorType::ACL_DST, weights_perm);
428
429 _permute_weights->run(pack);
430 weights->mark_as_unused();
431 _is_prepared = true;
432 }
433}
434
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100435CpuDepthwiseConv2d::CpuDepthwiseConv2d()
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100436 : _depth_conv_func(DepthwiseConvolutionFunction::GENERIC), _func_optimized(), _func_generic()
437{
438}
439
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100440void CpuDepthwiseConv2d::configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100441{
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100442 _depth_conv_func = get_depthwiseconvolution_function(src, weights, (biases != nullptr) ? biases : nullptr, dst, info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100443 switch(_depth_conv_func)
444 {
445 case DepthwiseConvolutionFunction::OPTIMIZED:
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100446 _func_optimized.configure(src, weights, biases, dst, info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100447 break;
448 case DepthwiseConvolutionFunction::GENERIC:
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100449 _func_generic.configure(src, weights, biases, dst, info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100450 break;
451 default:
452 ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
453 }
454}
455
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100456Status CpuDepthwiseConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100457{
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100458 DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(src, weights, biases, dst, info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100459 switch(depth_conv_func)
460 {
461 case DepthwiseConvolutionFunction::OPTIMIZED:
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100462 return CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, biases, dst, info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100463 break;
464 case DepthwiseConvolutionFunction::GENERIC:
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100465 return CpuDepthwiseConv2dGeneric::validate(src, weights, biases, dst, info);
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100466 break;
467 default:
468 ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
469 }
470}
471
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100472DepthwiseConvolutionFunction CpuDepthwiseConv2d::get_depthwiseconvolution_function(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
473 const ConvolutionInfo &info)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100474{
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100475 if(bool(CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, biases, dst, info)))
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100476 {
477 return DepthwiseConvolutionFunction::OPTIMIZED;
478 }
479 else
480 {
481 return DepthwiseConvolutionFunction::GENERIC;
482 }
483}
484
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100485void CpuDepthwiseConv2d::run(ITensorPack &tensors)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100486{
487 switch(_depth_conv_func)
488 {
489 case DepthwiseConvolutionFunction::OPTIMIZED:
490 _func_optimized.run(tensors);
491 break;
492 case DepthwiseConvolutionFunction::GENERIC:
493 _func_generic.run(tensors);
494 break;
495 default:
496 ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
497 }
498}
499
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100500void CpuDepthwiseConv2d::prepare(ITensorPack &tensors)
Michalis Spyrou60c3b0e2021-04-08 12:02:58 +0100501{
502 switch(_depth_conv_func)
503 {
504 case DepthwiseConvolutionFunction::OPTIMIZED:
505 _func_optimized.prepare(tensors);
506 break;
507 case DepthwiseConvolutionFunction::GENERIC:
508 _func_generic.prepare(tensors);
509 break;
510 default:
511 ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
512 }
513}
514} // namespace cpu
515} // namespace arm_compute