blob: 04a7f15715efc356a4a7f56daf80979d919e8d74 [file] [log] [blame]
George Wort5a97b282018-12-21 16:21:04 +00001/*
Viet-Hoa Dofd472f02023-03-15 14:05:06 +00002 * Copyright (c) 2018-2023 Arm Limited.
George Wort5a97b282018-12-21 16:21:04 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Georgios Pinitas7891a732021-08-20 21:39:25 +010024#include "src/cpu/kernels/CpuElementwiseUnaryKernel.h"
George Wort5a97b282018-12-21 16:21:04 +000025
George Wort5a97b282018-12-21 16:21:04 +000026#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
George Wort5a97b282018-12-21 16:21:04 +000028#include "arm_compute/core/ITensor.h"
Viet-Hoa Dofd472f02023-03-15 14:05:06 +000029#include "arm_compute/core/Utils.h"
George Wort5a97b282018-12-21 16:21:04 +000030#include "arm_compute/core/Validate.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010031#include "src/core/CPP/Validate.h"
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +000032#include "src/core/common/Registrars.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010033#include "src/core/helpers/AutoConfiguration.h"
34#include "src/core/helpers/WindowHelpers.h"
Dana Zlotnikd5c496d2021-11-28 14:46:12 +020035#include "src/cpu/kernels/elementwise_unary/list.h"
Michalis Spyroue6bcb5b2019-06-07 11:47:16 +010036#include "support/ToolchainSupport.h"
George Wort5a97b282018-12-21 16:21:04 +000037
George Wort5a97b282018-12-21 16:21:04 +000038namespace arm_compute
39{
Sang-Hoon Park7249f152021-01-22 11:55:03 +000040namespace cpu
41{
42namespace kernels
43{
George Wort5a97b282018-12-21 16:21:04 +000044namespace
45{
Viet-Hoa Dofd472f02023-03-15 14:05:06 +000046#ifdef __aarch64__
47
48std::unique_ptr<uint8_t[]> q8_prepare_lut(ElementWiseUnary op, const ITensorInfo *src, const ITensorInfo *dst)
49{
50 ARM_COMPUTE_ERROR_ON(src->data_type() != dst->data_type());
51 ARM_COMPUTE_ERROR_ON(!is_data_type_quantized(src->data_type()));
52 ARM_COMPUTE_ERROR_ON(src->element_size() != 1);
53
Ramy Elgammal8b7f42a2023-03-31 16:16:15 +010054 auto lut = std::unique_ptr<uint8_t[]>(new uint8_t[256]);
Viet-Hoa Dofd472f02023-03-15 14:05:06 +000055 const auto is_signed = src->data_type() == DataType::QASYMM8_SIGNED;
Ramy Elgammal8b7f42a2023-03-31 16:16:15 +010056 const auto src_qi = src->quantization_info().uniform();
57 const auto dst_qi = dst->quantization_info().uniform();
Viet-Hoa Dofd472f02023-03-15 14:05:06 +000058
59 const auto dst_min_fp = (((is_signed) ? -128 : 0) - dst_qi.offset) * dst_qi.scale;
60 const auto dst_max_fp = (((is_signed) ? 127 : 255) - dst_qi.offset) * dst_qi.scale;
61
62 for(int i = 0; i < 256; ++i)
63 {
Ramy Elgammal8b7f42a2023-03-31 16:16:15 +010064 const auto in = (is_signed) ? dequantize_qasymm8_signed(static_cast<int8_t>(i), src_qi) : dequantize_qasymm8(i, src_qi);
65 float result = 0;
Viet-Hoa Dofd472f02023-03-15 14:05:06 +000066
67 switch(op)
68 {
69 case ElementWiseUnary::RSQRT:
70 result = 1 / sqrt(in);
71 break;
72
73 case ElementWiseUnary::EXP:
74 result = std::exp(in);
75 break;
76
77 case ElementWiseUnary::NEG:
78 result = -in;
79 break;
80
81 case ElementWiseUnary::LOG:
82 result = std::log(in);
83 break;
84
85 case ElementWiseUnary::ABS:
86 result = std::abs(in);
87 break;
88
89 case ElementWiseUnary::ROUND:
90 result = support::cpp11::nearbyint(in);
91 break;
92
93 case ElementWiseUnary::SIN:
94 result = std::sin(in);
95 break;
96
97 default:
98 ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
99 }
100
101 result = utility::clamp(result, dst_min_fp, dst_max_fp);
102
103 const auto out = (is_signed) ? static_cast<uint8_t>(quantize_qasymm8_signed(result, dst_qi)) : quantize_qasymm8(result, dst_qi);
Ramy Elgammal8b7f42a2023-03-31 16:16:15 +0100104 lut[i] = out;
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000105 }
106
107 return lut;
108}
109
110#endif // __aarch64__
111
Giorgio Arena5ae8d802021-11-18 18:02:13 +0000112static const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> available_kernels =
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000113{
George Wort5a97b282018-12-21 16:21:04 +0000114 {
Georgios Pinitas5ee0d952021-07-05 07:21:28 +0100115 "sve_fp32_elementwise_unary",
Giorgio Arena5ae8d802021-11-18 18:02:13 +0000116 [](const DataTypeISASelectorData & data)
117 {
Dana Zlotnik6a2df882022-01-17 09:54:26 +0200118 return (data.dt == DataType::F32 && data.isa.sve);
Giorgio Arena5ae8d802021-11-18 18:02:13 +0000119 },
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000120 REGISTER_FP32_SVE(sve_fp32_elementwise_unary),
121 nullptr,
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000122 },
123 {
Georgios Pinitas5ee0d952021-07-05 07:21:28 +0100124 "sve_fp16_elementwise_unary",
Giorgio Arena5ae8d802021-11-18 18:02:13 +0000125 [](const DataTypeISASelectorData & data)
126 {
Dana Zlotnik6a2df882022-01-17 09:54:26 +0200127 return (data.dt == DataType::F16 && data.isa.sve && data.isa.fp16);
Giorgio Arena5ae8d802021-11-18 18:02:13 +0000128 },
Dana Zlotnikd5c496d2021-11-28 14:46:12 +0200129 REGISTER_FP16_SVE(sve_fp16_elementwise_unary),
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000130 nullptr,
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000131 },
132 {
Georgios Pinitas5ee0d952021-07-05 07:21:28 +0100133 "sve_s32_elementwise_unary",
Dana Zlotnik6a2df882022-01-17 09:54:26 +0200134 [](const DataTypeISASelectorData & data)
135 {
136 return (data.dt == DataType::S32 && data.isa.sve);
137 },
Dana Zlotnikd5c496d2021-11-28 14:46:12 +0200138 REGISTER_INTEGER_SVE(sve_s32_elementwise_unary),
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000139 nullptr,
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000140 },
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000141 {
Georgios Pinitas5ee0d952021-07-05 07:21:28 +0100142 "neon_fp32_elementwise_unary",
Dana Zlotnik6a2df882022-01-17 09:54:26 +0200143 [](const DataTypeISASelectorData & data)
144 {
145 return data.dt == DataType::F32;
146 },
Dana Zlotnikd5c496d2021-11-28 14:46:12 +0200147 REGISTER_FP32_NEON(neon_fp32_elementwise_unary),
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000148 nullptr,
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000149 },
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000150 {
Georgios Pinitas5ee0d952021-07-05 07:21:28 +0100151 "neon_fp16_elementwise_unary",
Dana Zlotnik6a2df882022-01-17 09:54:26 +0200152 [](const DataTypeISASelectorData & data)
153 {
154 return data.dt == DataType::F16 && data.isa.fp16;
155 },
Dana Zlotnik8d8208c2022-01-24 09:13:55 +0200156 REGISTER_FP16_NEON(neon_fp16_elementwise_unary),
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000157 nullptr,
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000158 },
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000159 {
Georgios Pinitas5ee0d952021-07-05 07:21:28 +0100160 "neon_s32_elementwise_unary",
Dana Zlotnik6a2df882022-01-17 09:54:26 +0200161 [](const DataTypeISASelectorData & data)
162 {
163 return data.dt == DataType::S32;
164 },
Dana Zlotnikd5c496d2021-11-28 14:46:12 +0200165 REGISTER_INTEGER_NEON(neon_s32_elementwise_unary),
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000166 nullptr,
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000167 },
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000168#ifdef __aarch64__
169 {
SiCong Li3f70cd02023-05-17 13:46:13 +0100170 "sve2_q8_elementwise_unary",
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000171 [](const DataTypeISASelectorData & data)
172 {
SiCong Li3f70cd02023-05-17 13:46:13 +0100173 return (data.dt == DataType::QASYMM8 || data.dt == DataType::QASYMM8_SIGNED) && data.isa.sve2;
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000174 },
SiCong Li3f70cd02023-05-17 13:46:13 +0100175 REGISTER_QASYMM8_SVE2(sve2_q8_elementwise_unary),
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000176 &q8_prepare_lut,
Ramy Elgammal8b7f42a2023-03-31 16:16:15 +0100177 },
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000178 {
179 "neon_q8_elementwise_unary",
180 [](const DataTypeISASelectorData & data)
181 {
182 return data.dt == DataType::QASYMM8 || data.dt == DataType::QASYMM8_SIGNED;
183 },
184 REGISTER_QASYMM8_NEON(neon_q8_elementwise_unary),
185 &q8_prepare_lut,
186 },
Ramy Elgammal8b7f42a2023-03-31 16:16:15 +0100187#else // __aarch64__
188 {
189 "neon_qasymm8_signed_elementwise_unary",
190 [](const DataTypeISASelectorData & data)
191 {
192 return data.dt == DataType::QASYMM8_SIGNED;
193 },
Ramy Elgammalb84df202023-04-12 17:31:10 +0100194 REGISTER_QASYMM8_SIGNED_NEON(neon_qasymm8_signed_elementwise_unary),
Ramy Elgammal8b7f42a2023-03-31 16:16:15 +0100195 nullptr,
196 },
197 {
198 "neon_qasymm8_elementwise_unary",
199 [](const DataTypeISASelectorData & data)
200 {
201 return data.dt == DataType::QASYMM8;
202 },
203 REGISTER_QASYMM8_NEON(neon_qasymm8_elementwise_unary),
204 nullptr,
205 },
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000206#endif // __aarch64__
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000207};
208
Michalis Spyrou18e20ff2020-05-06 17:03:59 +0100209} // namespace
George Wort5a97b282018-12-21 16:21:04 +0000210
Sang-Hoon Park668ccdc2021-02-03 10:32:59 +0000211void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo &src, ITensorInfo &dst)
George Wort5a97b282018-12-21 16:21:04 +0000212{
Sang-Hoon Park668ccdc2021-02-03 10:32:59 +0000213 ARM_COMPUTE_ERROR_THROW_ON(validate(op, src, dst));
Giorgio Arena5ae8d802021-11-18 18:02:13 +0000214 const auto uk = CpuElementwiseUnaryKernel::get_implementation(DataTypeISASelectorData{ src.data_type(), CPUInfo::get().get_isa() });
Georgios Pinitas5ee0d952021-07-05 07:21:28 +0100215 ARM_COMPUTE_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
George Wort5a97b282018-12-21 16:21:04 +0000216
Georgios Pinitas5ee0d952021-07-05 07:21:28 +0100217 _op = op;
218 _run_method = uk->ukernel;
219 _name = std::string("CpuElementwiseUnaryKernel").append("/").append(uk->name);
George Wort5a97b282018-12-21 16:21:04 +0000220
Sang-Hoon Park668ccdc2021-02-03 10:32:59 +0000221 // If input shape is dynamic, expect a configured window and dst at run-time.
222 if(src.is_dynamic())
223 {
224 return;
225 }
226
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000227 if(uk->prepare_func != nullptr)
228 {
229 _lut = uk->prepare_func(op, &src, &dst);
230 }
231
Sang-Hoon Parkd0b7b4b2021-03-09 10:47:30 +0000232 auto shape_and_window = compute_output_shape_and_window(src.tensor_shape());
Sang-Hoon Park668ccdc2021-02-03 10:32:59 +0000233 auto_init_if_empty(dst, shape_and_window.first, 1, src.data_type());
Yair Schwarzbaum46d44d22022-01-12 16:38:58 +0200234 ICpuKernel::configure(shape_and_window.second);
George Wort5a97b282018-12-21 16:21:04 +0000235}
236
Sang-Hoon Park668ccdc2021-02-03 10:32:59 +0000237Status CpuElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInfo &src, const ITensorInfo &dst)
George Wort5a97b282018-12-21 16:21:04 +0000238{
Sang-Hoon Park668ccdc2021-02-03 10:32:59 +0000239 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&src);
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000240
Giorgio Arena5ae8d802021-11-18 18:02:13 +0000241 const auto *uk = CpuElementwiseUnaryKernel::get_implementation(DataTypeISASelectorData{ src.data_type(), CPUInfo::get().get_isa() });
242
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000243 ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
244
Michalis Spyrou18e20ff2020-05-06 17:03:59 +0100245 switch(op)
246 {
247 case ElementWiseUnary::EXP:
248 case ElementWiseUnary::RSQRT:
249 case ElementWiseUnary::LOG:
250 case ElementWiseUnary::ROUND:
251 case ElementWiseUnary::SIN:
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000252 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src, 1, DataType::F16, DataType::F32, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
Michalis Spyrou18e20ff2020-05-06 17:03:59 +0100253 break;
254 case ElementWiseUnary::NEG:
255 case ElementWiseUnary::ABS:
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000256 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src, 1, DataType::F16, DataType::F32, DataType::S32, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
Michalis Spyrou18e20ff2020-05-06 17:03:59 +0100257 break;
258 default:
259 ARM_COMPUTE_ERROR("ElementWiseUnary operation not supported");
260 }
Sang-Hoon Park668ccdc2021-02-03 10:32:59 +0000261 // Validate in case of configured dst
262 if(dst.total_size() > 0)
Michalis Spyrou18e20ff2020-05-06 17:03:59 +0100263 {
Sang-Hoon Park668ccdc2021-02-03 10:32:59 +0000264 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&src, &dst);
Michalis Spyrou18e20ff2020-05-06 17:03:59 +0100265 }
266
George Wort5a97b282018-12-21 16:21:04 +0000267 return Status{};
268}
269
Sang-Hoon Park7249f152021-01-22 11:55:03 +0000270void CpuElementwiseUnaryKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
George Wort5a97b282018-12-21 16:21:04 +0000271{
272 ARM_COMPUTE_UNUSED(info);
Sang-Hoon Park7249f152021-01-22 11:55:03 +0000273
Georgios Pinitas5ee0d952021-07-05 07:21:28 +0100274 auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
275 auto dst = tensors.get_tensor(TensorType::ACL_DST);
276
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000277 _run_method(src, dst, window, _op, _lut.get());
George Wort5a97b282018-12-21 16:21:04 +0000278}
Georgios Pinitas2eb5d162021-07-02 09:01:49 +0100279
280const char *CpuElementwiseUnaryKernel::name() const
281{
Georgios Pinitas5ee0d952021-07-05 07:21:28 +0100282 return _name.c_str();
Georgios Pinitas2eb5d162021-07-02 09:01:49 +0100283}
Giorgio Arena5ae8d802021-11-18 18:02:13 +0000284
285const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> &CpuElementwiseUnaryKernel::get_available_kernels()
286{
287 return available_kernels;
288}
289
Sang-Hoon Park7249f152021-01-22 11:55:03 +0000290} // namespace kernels
291} // namespace cpu
George Wort5a97b282018-12-21 16:21:04 +0000292} // namespace arm_compute