blob: 73c1fda711dc9f3796f1e49328750216966b6f51 [file] [log] [blame]
Sheri Zhang61243902021-01-12 18:25:16 +00001/*
2 * Copyright (c) 2021 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Georgios Pinitas7891a732021-08-20 21:39:25 +010024#include "src/cpu/kernels/CpuAddKernel.h"
Sheri Zhang61243902021-01-12 18:25:16 +000025
26#include "arm_compute/core/ITensor.h"
27#include "arm_compute/core/TensorInfo.h"
28#include "arm_compute/core/Validate.h"
29#include "src/core/CPP/Validate.h"
30#include "src/core/common/Registrars.h"
Sheri Zhang61243902021-01-12 18:25:16 +000031#include "src/core/helpers/AutoConfiguration.h"
32#include "src/core/helpers/WindowHelpers.h"
Georgios Pinitas7891a732021-08-20 21:39:25 +010033#include "src/cpu/kernels/add/neon/list.h"
34#include "src/cpu/kernels/add/sve/list.h"
Sheri Zhang61243902021-01-12 18:25:16 +000035
36#include <array>
37
38namespace arm_compute
39{
40namespace cpu
41{
42namespace kernels
43{
44namespace
45{
46struct AddSelectorData
47{
Georgios Pinitasda816752021-07-02 09:22:14 +010048 DataType dt;
Michalis Spyrou20fca522021-06-07 14:23:57 +010049 const CPUInfo &ci;
Sheri Zhang61243902021-01-12 18:25:16 +000050};
51
52using AddSelectorPtr = std::add_pointer<bool(const AddSelectorData &data)>::type;
53using AddKernelPtr = std::add_pointer<void(const ITensor *, const ITensor *, ITensor *, const ConvertPolicy &, const Window &)>::type;
54struct AddKernel
55{
56 const char *name;
57 const AddSelectorPtr is_selected;
58 AddKernelPtr ukernel;
59};
60
61static const AddKernel available_kernels[] =
62{
Michalis Spyrou20fca522021-06-07 14:23:57 +010063#if defined(ARM_COMPUTE_ENABLE_SVE2)
64 {
Georgios Pinitas5fdde992021-06-25 05:42:57 +010065 "sve2_qu8_add",
Michalis Spyrou20fca522021-06-07 14:23:57 +010066 [](const AddSelectorData & data)
67 {
Georgios Pinitas1024d9f2021-10-05 12:56:07 +010068 return (data.dt == DataType::QASYMM8) && data.ci.has_sve2();
Michalis Spyrou20fca522021-06-07 14:23:57 +010069 },
70 REGISTER_QASYMM8_SVE(arm_compute::cpu::add_qasymm8_sve)
71 },
72 {
Georgios Pinitas5fdde992021-06-25 05:42:57 +010073 "sve2_qs8_add",
Michalis Spyrou20fca522021-06-07 14:23:57 +010074 [](const AddSelectorData & data)
75 {
Georgios Pinitas1024d9f2021-10-05 12:56:07 +010076 return (data.dt == DataType::QASYMM8_SIGNED) && data.ci.has_sve2();
Michalis Spyrou20fca522021-06-07 14:23:57 +010077 },
78 REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::add_qasymm8_signed_sve)
79 },
80 {
Georgios Pinitas5fdde992021-06-25 05:42:57 +010081 "sve2_qs16_add",
Michalis Spyrou20fca522021-06-07 14:23:57 +010082 [](const AddSelectorData & data)
83 {
Georgios Pinitas1024d9f2021-10-05 12:56:07 +010084 return (data.dt == DataType::QSYMM16) && data.ci.has_sve2();
Michalis Spyrou20fca522021-06-07 14:23:57 +010085 },
86 REGISTER_QSYMM16_SVE(arm_compute::cpu::add_qsymm16_sve)
87 },
88#endif /* !defined(ARM_COMPUTE_ENABLE_SVE2) */
89#if defined(ARM_COMPUTE_ENABLE_SVE)
Sheri Zhang61243902021-01-12 18:25:16 +000090 {
Georgios Pinitas5fdde992021-06-25 05:42:57 +010091 "sve_fp32_add",
Michalis Spyrou20fca522021-06-07 14:23:57 +010092 [](const AddSelectorData & data)
93 {
Georgios Pinitasda816752021-07-02 09:22:14 +010094 return (data.dt == DataType::F32) && data.ci.has_sve();
Michalis Spyrou20fca522021-06-07 14:23:57 +010095 },
Sheri Zhang61243902021-01-12 18:25:16 +000096 REGISTER_FP32_SVE(arm_compute::cpu::add_same_sve<float>)
97 },
98 {
Georgios Pinitas5fdde992021-06-25 05:42:57 +010099 "sve_fp16_add",
Michalis Spyrou20fca522021-06-07 14:23:57 +0100100 [](const AddSelectorData & data)
101 {
Georgios Pinitasda816752021-07-02 09:22:14 +0100102 return (data.dt == DataType::F16) && data.ci.has_sve();
Michalis Spyrou20fca522021-06-07 14:23:57 +0100103 },
Sheri Zhang61243902021-01-12 18:25:16 +0000104 REGISTER_FP16_SVE(arm_compute::cpu::add_same_sve<float16_t>)
105 },
106 {
Georgios Pinitas5fdde992021-06-25 05:42:57 +0100107 "sve_u8_add",
Michalis Spyrou20fca522021-06-07 14:23:57 +0100108 [](const AddSelectorData & data)
109 {
Georgios Pinitasda816752021-07-02 09:22:14 +0100110 return (data.dt == DataType::U8) && data.ci.has_sve();
Michalis Spyrou20fca522021-06-07 14:23:57 +0100111 },
Sheri Zhang61243902021-01-12 18:25:16 +0000112 REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<uint8_t>)
113 },
114 {
Georgios Pinitas5fdde992021-06-25 05:42:57 +0100115 "sve_s16_add",
Michalis Spyrou20fca522021-06-07 14:23:57 +0100116 [](const AddSelectorData & data)
117 {
Georgios Pinitasda816752021-07-02 09:22:14 +0100118 return (data.dt == DataType::S16) && data.ci.has_sve();
Michalis Spyrou20fca522021-06-07 14:23:57 +0100119 },
Sheri Zhang61243902021-01-12 18:25:16 +0000120 REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<int16_t>)
121 },
122 {
Georgios Pinitas5fdde992021-06-25 05:42:57 +0100123 "sve_s32_add",
Michalis Spyrou20fca522021-06-07 14:23:57 +0100124 [](const AddSelectorData & data)
125 {
Georgios Pinitasda816752021-07-02 09:22:14 +0100126 return (data.dt == DataType::S32) && data.ci.has_sve();
Michalis Spyrou20fca522021-06-07 14:23:57 +0100127 },
Sheri Zhang61243902021-01-12 18:25:16 +0000128 REGISTER_INTEGER_SVE(arm_compute::cpu::add_same_sve<int32_t>)
129 },
Michalis Spyrou20fca522021-06-07 14:23:57 +0100130#endif /* defined(ARM_COMPUTE_ENABLE_SVE) */
131#if defined(ARM_COMPUTE_ENABLE_NEON)
Sheri Zhang61243902021-01-12 18:25:16 +0000132 {
Georgios Pinitas5fdde992021-06-25 05:42:57 +0100133 "neon_fp32_add",
Georgios Pinitasda816752021-07-02 09:22:14 +0100134 [](const AddSelectorData & data) { return (data.dt == DataType::F32); },
Sheri Zhang61243902021-01-12 18:25:16 +0000135 REGISTER_FP32_NEON(arm_compute::cpu::add_same_neon<float>)
136 },
137#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
138 {
Georgios Pinitas5fdde992021-06-25 05:42:57 +0100139 "neon_fp16_add",
Michalis Spyrou20fca522021-06-07 14:23:57 +0100140 [](const AddSelectorData & data)
141 {
Georgios Pinitasda816752021-07-02 09:22:14 +0100142 return (data.dt == DataType::F16) && data.ci.has_fp16();
Michalis Spyrou20fca522021-06-07 14:23:57 +0100143 },
Sheri Zhang61243902021-01-12 18:25:16 +0000144 REGISTER_FP16_NEON(arm_compute::cpu::add_same_neon<float16_t>)
145 },
146#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
147 {
Georgios Pinitas5fdde992021-06-25 05:42:57 +0100148 "neon_u8_add",
Georgios Pinitasda816752021-07-02 09:22:14 +0100149 [](const AddSelectorData & data) { return (data.dt == DataType::U8); },
Sheri Zhang61243902021-01-12 18:25:16 +0000150 REGISTER_INTEGER_NEON(arm_compute::cpu::add_same_neon<uint8_t>)
151 },
152 {
Georgios Pinitas5fdde992021-06-25 05:42:57 +0100153 "neon_s16_add",
Georgios Pinitasda816752021-07-02 09:22:14 +0100154 [](const AddSelectorData & data) { return (data.dt == DataType::S16); },
Sheri Zhang61243902021-01-12 18:25:16 +0000155 REGISTER_INTEGER_NEON(arm_compute::cpu::add_same_neon<int16_t>)
156 },
157 {
Georgios Pinitas5fdde992021-06-25 05:42:57 +0100158 "neon_s32_add",
Georgios Pinitasda816752021-07-02 09:22:14 +0100159 [](const AddSelectorData & data) { return (data.dt == DataType::S32); },
Sheri Zhang61243902021-01-12 18:25:16 +0000160 REGISTER_INTEGER_NEON(arm_compute::cpu::add_same_neon<int32_t>)
161 },
Michalis Spyrou20fca522021-06-07 14:23:57 +0100162#endif /* defined(ARM_COMPUTE_ENABLE_NEON) */
163#if defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE)
Sheri Zhang61243902021-01-12 18:25:16 +0000164 {
Georgios Pinitas5fdde992021-06-25 05:42:57 +0100165 "neon_qu8_add",
Georgios Pinitasda816752021-07-02 09:22:14 +0100166 [](const AddSelectorData & data) { return (data.dt == DataType::QASYMM8); },
Sheri Zhang61243902021-01-12 18:25:16 +0000167 REGISTER_QASYMM8_NEON(arm_compute::cpu::add_qasymm8_neon)
168 },
169 {
Georgios Pinitas5fdde992021-06-25 05:42:57 +0100170 "neon_qs8_add",
Georgios Pinitasda816752021-07-02 09:22:14 +0100171 [](const AddSelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
Sheri Zhang61243902021-01-12 18:25:16 +0000172 REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::add_qasymm8_signed_neon)
173 },
174 {
Georgios Pinitas5fdde992021-06-25 05:42:57 +0100175 "neon_qs16_add",
Georgios Pinitasda816752021-07-02 09:22:14 +0100176 [](const AddSelectorData & data) { return (data.dt == DataType::QSYMM16); },
Sheri Zhang61243902021-01-12 18:25:16 +0000177 REGISTER_QSYMM16_NEON(arm_compute::cpu::add_qsymm16_neon)
178 },
Michalis Spyrou20fca522021-06-07 14:23:57 +0100179#endif /* defined(ARM_COMPUTE_ENABLE_NEON) || defined(ARM_COMPUTE_ENABLE_SVE) */
Sheri Zhang61243902021-01-12 18:25:16 +0000180};
181
182/** Micro-kernel selector
183 *
184 * @param[in] data Selection data passed to help pick the appropriate micro-kernel
185 *
186 * @return A matching micro-kernel else nullptr
187 */
Georgios Pinitasda816752021-07-02 09:22:14 +0100188const AddKernel *get_implementation(const CPUInfo &cpuinfo, DataType dt)
Sheri Zhang61243902021-01-12 18:25:16 +0000189{
190 for(const auto &uk : available_kernels)
191 {
Georgios Pinitasda816752021-07-02 09:22:14 +0100192 if(uk.is_selected({ dt, cpuinfo }))
Sheri Zhang61243902021-01-12 18:25:16 +0000193 {
194 return &uk;
195 }
196 }
197 return nullptr;
198}
199
200Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst, ConvertPolicy policy)
201{
202 ARM_COMPUTE_UNUSED(policy);
203
204 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&src0);
205 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src0, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
206 DataType::S16, DataType::QSYMM16, DataType::F16,
207 DataType::S32, DataType::F32);
Georgios Pinitasda816752021-07-02 09:22:14 +0100208 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&src0, &src1);
Sheri Zhang61243902021-01-12 18:25:16 +0000209
210 const TensorShape out_shape = TensorShape::broadcast_shape(src0.tensor_shape(), src1.tensor_shape());
211
212 ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible");
213 ARM_COMPUTE_RETURN_ERROR_ON_MSG((src0.tensor_shape().x() != src1.tensor_shape().x()) && ((src0.data_type() != src1.data_type()) || (src0.data_type() != dst.data_type())
214 || (src1.data_type() != dst.data_type())),
215 "Broadcasting across width is supported on configurations where all tensors have the same data type");
216
217 // Validate in case of configured dst
218 if(dst.total_size() > 0)
219 {
Georgios Pinitasda816752021-07-02 09:22:14 +0100220 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&src0, &dst);
Sheri Zhang61243902021-01-12 18:25:16 +0000221 ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, dst.tensor_shape(), 0),
222 "Wrong shape for dst");
223 }
224
Georgios Pinitasda816752021-07-02 09:22:14 +0100225 const auto *uk = get_implementation(CPUInfo::get(), src0.data_type());
Sheri Zhang61243902021-01-12 18:25:16 +0000226 ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
227
228 return Status{};
229}
230
231std::pair<Status, Window> validate_and_configure_window(const ITensorInfo &src0, const ITensorInfo &src1, ITensorInfo &dst)
232{
SiCongLic7b1e842021-02-22 14:28:33 +0000233 const TensorShape &out_shape = TensorShape::broadcast_shape(src0.tensor_shape(), src1.tensor_shape());
Sheri Zhang61243902021-01-12 18:25:16 +0000234
235 // Auto initialize dst if not initialized
Georgios Pinitasda816752021-07-02 09:22:14 +0100236 set_shape_if_empty(dst, out_shape);
237 set_data_type_if_unknown(dst, src0.data_type());
Sheri Zhang61243902021-01-12 18:25:16 +0000238
SiCongLic7b1e842021-02-22 14:28:33 +0000239 Window win = calculate_max_window(out_shape, Steps());
Sheri Zhang61243902021-01-12 18:25:16 +0000240
241 // CpuAddKernel doesn't need padding so update_window_and_padding() can be skipped
Sheri Zhang61243902021-01-12 18:25:16 +0000242 return std::make_pair(Status{}, win);
243}
244} // namespace
245
246void CpuAddKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst, ConvertPolicy policy)
247{
248 ARM_COMPUTE_ERROR_ON_NULLPTR(src0, src1, dst);
249 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst, policy));
250
Georgios Pinitasda816752021-07-02 09:22:14 +0100251 const auto uk = get_implementation(CPUInfo::get(), src0->data_type());
Georgios Pinitas5fdde992021-06-25 05:42:57 +0100252 ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
253
254 _policy = policy;
255 _run_method = uk->ukernel;
256 _name = std::string("CpuAddKernel").append("/").append(uk->name);
Sheri Zhang61243902021-01-12 18:25:16 +0000257
258 // Configure kernel window
259 auto win_config = validate_and_configure_window(*src0, *src1, *dst);
260 ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
261 ICpuKernel::configure(win_config.second);
262}
263
264Status CpuAddKernel::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, ConvertPolicy policy)
265{
266 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src0, src1, dst);
267
268 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*src0, *src1, *dst, policy));
269 ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(*src0->clone(), *src1->clone(), *dst->clone()).first);
270
271 return Status{};
272}
273
274void CpuAddKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
275{
276 ARM_COMPUTE_UNUSED(info);
277 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
278 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
279
280 ARM_COMPUTE_ERROR_ON(tensors.empty());
Georgios Pinitas5fdde992021-06-25 05:42:57 +0100281 ARM_COMPUTE_ERROR_ON(_run_method == nullptr);
Sheri Zhang61243902021-01-12 18:25:16 +0000282
283 const ITensor *src0 = tensors.get_const_tensor(TensorType::ACL_SRC_0);
284 const ITensor *src1 = tensors.get_const_tensor(TensorType::ACL_SRC_1);
285 ITensor *dst = tensors.get_tensor(TensorType::ACL_DST);
286
Georgios Pinitas5fdde992021-06-25 05:42:57 +0100287 _run_method(src0, src1, dst, _policy, window);
Sheri Zhang61243902021-01-12 18:25:16 +0000288}
289
290const char *CpuAddKernel::name() const
291{
Georgios Pinitas5fdde992021-06-25 05:42:57 +0100292 return _name.c_str();
Sheri Zhang61243902021-01-12 18:25:16 +0000293}
Dana Zlotnik4cdd6b82021-10-07 15:31:54 +0300294
295size_t CpuAddKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
296{
Dana Zlotnikd7154db2021-11-10 11:50:58 +0200297 ARM_COMPUTE_UNUSED(thread_count);
298 // Tuning results that gave optimized results in performance investigation
299 if (platform.get_cpu_model() == CPUModel::A73 )
300 {
301 return 10240;
302 }
303 else
304 {
305 return 9216;
306 }
Dana Zlotnik4cdd6b82021-10-07 15:31:54 +0300307}
308
Sheri Zhang61243902021-01-12 18:25:16 +0000309} // namespace kernels
310} // namespace cpu
311} // namespace arm_compute