blob: 88545ee756c41bd4c45014c7e69b0ddf9606d09e [file] [log] [blame]
George Wort5a97b282018-12-21 16:21:04 +00001/*
Viet-Hoa Dofd472f02023-03-15 14:05:06 +00002 * Copyright (c) 2018-2023 Arm Limited.
George Wort5a97b282018-12-21 16:21:04 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Georgios Pinitas7891a732021-08-20 21:39:25 +010024#include "src/cpu/kernels/CpuElementwiseUnaryKernel.h"
George Wort5a97b282018-12-21 16:21:04 +000025
George Wort5a97b282018-12-21 16:21:04 +000026#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
George Wort5a97b282018-12-21 16:21:04 +000028#include "arm_compute/core/ITensor.h"
Viet-Hoa Dofd472f02023-03-15 14:05:06 +000029#include "arm_compute/core/Utils.h"
George Wort5a97b282018-12-21 16:21:04 +000030#include "arm_compute/core/Validate.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010031
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +000032#include "src/core/common/Registrars.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010033#include "src/core/CPP/Validate.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010034#include "src/core/helpers/AutoConfiguration.h"
35#include "src/core/helpers/WindowHelpers.h"
Dana Zlotnikd5c496d2021-11-28 14:46:12 +020036#include "src/cpu/kernels/elementwise_unary/list.h"
Michalis Spyroue6bcb5b2019-06-07 11:47:16 +010037#include "support/ToolchainSupport.h"
George Wort5a97b282018-12-21 16:21:04 +000038
George Wort5a97b282018-12-21 16:21:04 +000039namespace arm_compute
40{
Sang-Hoon Park7249f152021-01-22 11:55:03 +000041namespace cpu
42{
43namespace kernels
44{
George Wort5a97b282018-12-21 16:21:04 +000045namespace
46{
Viet-Hoa Dofd472f02023-03-15 14:05:06 +000047#ifdef __aarch64__
48
49std::unique_ptr<uint8_t[]> q8_prepare_lut(ElementWiseUnary op, const ITensorInfo *src, const ITensorInfo *dst)
50{
51 ARM_COMPUTE_ERROR_ON(src->data_type() != dst->data_type());
52 ARM_COMPUTE_ERROR_ON(!is_data_type_quantized(src->data_type()));
53 ARM_COMPUTE_ERROR_ON(src->element_size() != 1);
54
Ramy Elgammal8b7f42a2023-03-31 16:16:15 +010055 auto lut = std::unique_ptr<uint8_t[]>(new uint8_t[256]);
Viet-Hoa Dofd472f02023-03-15 14:05:06 +000056 const auto is_signed = src->data_type() == DataType::QASYMM8_SIGNED;
Ramy Elgammal8b7f42a2023-03-31 16:16:15 +010057 const auto src_qi = src->quantization_info().uniform();
58 const auto dst_qi = dst->quantization_info().uniform();
Viet-Hoa Dofd472f02023-03-15 14:05:06 +000059
60 const auto dst_min_fp = (((is_signed) ? -128 : 0) - dst_qi.offset) * dst_qi.scale;
61 const auto dst_max_fp = (((is_signed) ? 127 : 255) - dst_qi.offset) * dst_qi.scale;
62
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010063 for (int i = 0; i < 256; ++i)
Viet-Hoa Dofd472f02023-03-15 14:05:06 +000064 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010065 const auto in =
66 (is_signed) ? dequantize_qasymm8_signed(static_cast<int8_t>(i), src_qi) : dequantize_qasymm8(i, src_qi);
67 float result = 0;
Viet-Hoa Dofd472f02023-03-15 14:05:06 +000068
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010069 switch (op)
Viet-Hoa Dofd472f02023-03-15 14:05:06 +000070 {
71 case ElementWiseUnary::RSQRT:
72 result = 1 / sqrt(in);
73 break;
74
75 case ElementWiseUnary::EXP:
76 result = std::exp(in);
77 break;
78
79 case ElementWiseUnary::NEG:
80 result = -in;
81 break;
82
83 case ElementWiseUnary::LOG:
84 result = std::log(in);
85 break;
86
87 case ElementWiseUnary::ABS:
88 result = std::abs(in);
89 break;
90
91 case ElementWiseUnary::ROUND:
92 result = support::cpp11::nearbyint(in);
93 break;
94
95 case ElementWiseUnary::SIN:
96 result = std::sin(in);
97 break;
98
99 default:
100 ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
101 }
102
103 result = utility::clamp(result, dst_min_fp, dst_max_fp);
104
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100105 const auto out = (is_signed) ? static_cast<uint8_t>(quantize_qasymm8_signed(result, dst_qi))
106 : quantize_qasymm8(result, dst_qi);
Ramy Elgammal8b7f42a2023-03-31 16:16:15 +0100107 lut[i] = out;
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000108 }
109
110 return lut;
111}
112
113#endif // __aarch64__
114
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100115static const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> available_kernels = {
George Wort5a97b282018-12-21 16:21:04 +0000116 {
Georgios Pinitas5ee0d952021-07-05 07:21:28 +0100117 "sve_fp32_elementwise_unary",
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100118 [](const DataTypeISASelectorData &data) { return (data.dt == DataType::F32 && data.isa.sve); },
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000119 REGISTER_FP32_SVE(sve_fp32_elementwise_unary),
120 nullptr,
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000121 },
122 {
Georgios Pinitas5ee0d952021-07-05 07:21:28 +0100123 "sve_fp16_elementwise_unary",
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100124 [](const DataTypeISASelectorData &data) { return (data.dt == DataType::F16 && data.isa.sve && data.isa.fp16); },
Dana Zlotnikd5c496d2021-11-28 14:46:12 +0200125 REGISTER_FP16_SVE(sve_fp16_elementwise_unary),
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000126 nullptr,
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000127 },
128 {
Georgios Pinitas5ee0d952021-07-05 07:21:28 +0100129 "sve_s32_elementwise_unary",
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100130 [](const DataTypeISASelectorData &data) { return (data.dt == DataType::S32 && data.isa.sve); },
Dana Zlotnikd5c496d2021-11-28 14:46:12 +0200131 REGISTER_INTEGER_SVE(sve_s32_elementwise_unary),
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000132 nullptr,
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000133 },
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000134 {
Georgios Pinitas5ee0d952021-07-05 07:21:28 +0100135 "neon_fp32_elementwise_unary",
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100136 [](const DataTypeISASelectorData &data) { return data.dt == DataType::F32; },
Dana Zlotnikd5c496d2021-11-28 14:46:12 +0200137 REGISTER_FP32_NEON(neon_fp32_elementwise_unary),
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000138 nullptr,
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000139 },
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000140 {
Georgios Pinitas5ee0d952021-07-05 07:21:28 +0100141 "neon_fp16_elementwise_unary",
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100142 [](const DataTypeISASelectorData &data) { return data.dt == DataType::F16 && data.isa.fp16; },
Dana Zlotnik8d8208c2022-01-24 09:13:55 +0200143 REGISTER_FP16_NEON(neon_fp16_elementwise_unary),
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000144 nullptr,
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000145 },
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000146 {
Georgios Pinitas5ee0d952021-07-05 07:21:28 +0100147 "neon_s32_elementwise_unary",
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100148 [](const DataTypeISASelectorData &data) { return data.dt == DataType::S32; },
Dana Zlotnikd5c496d2021-11-28 14:46:12 +0200149 REGISTER_INTEGER_NEON(neon_s32_elementwise_unary),
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000150 nullptr,
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000151 },
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000152#ifdef __aarch64__
153 {
SiCong Lic0463a22023-05-17 13:46:13 +0100154 "sve2_q8_elementwise_unary",
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100155 [](const DataTypeISASelectorData &data)
156 { return (data.dt == DataType::QASYMM8 || data.dt == DataType::QASYMM8_SIGNED) && data.isa.sve2; },
SiCong Lic0463a22023-05-17 13:46:13 +0100157 REGISTER_QASYMM8_SVE2(sve2_q8_elementwise_unary),
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000158 &q8_prepare_lut,
Ramy Elgammal8b7f42a2023-03-31 16:16:15 +0100159 },
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000160 {
161 "neon_q8_elementwise_unary",
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100162 [](const DataTypeISASelectorData &data)
163 { return data.dt == DataType::QASYMM8 || data.dt == DataType::QASYMM8_SIGNED; },
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000164 REGISTER_QASYMM8_NEON(neon_q8_elementwise_unary),
165 &q8_prepare_lut,
166 },
Ramy Elgammal8b7f42a2023-03-31 16:16:15 +0100167#else // __aarch64__
168 {
169 "neon_qasymm8_signed_elementwise_unary",
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100170 [](const DataTypeISASelectorData &data) { return data.dt == DataType::QASYMM8_SIGNED; },
Ramy Elgammalb84df202023-04-12 17:31:10 +0100171 REGISTER_QASYMM8_SIGNED_NEON(neon_qasymm8_signed_elementwise_unary),
Ramy Elgammal8b7f42a2023-03-31 16:16:15 +0100172 nullptr,
173 },
174 {
175 "neon_qasymm8_elementwise_unary",
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100176 [](const DataTypeISASelectorData &data) { return data.dt == DataType::QASYMM8; },
Ramy Elgammal8b7f42a2023-03-31 16:16:15 +0100177 REGISTER_QASYMM8_NEON(neon_qasymm8_elementwise_unary),
178 nullptr,
179 },
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000180#endif // __aarch64__
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000181};
182
Michalis Spyrou18e20ff2020-05-06 17:03:59 +0100183} // namespace
George Wort5a97b282018-12-21 16:21:04 +0000184
Sang-Hoon Park668ccdc2021-02-03 10:32:59 +0000185void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo &src, ITensorInfo &dst)
George Wort5a97b282018-12-21 16:21:04 +0000186{
Sang-Hoon Park668ccdc2021-02-03 10:32:59 +0000187 ARM_COMPUTE_ERROR_THROW_ON(validate(op, src, dst));
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100188 const auto uk = CpuElementwiseUnaryKernel::get_implementation(
189 DataTypeISASelectorData{src.data_type(), CPUInfo::get().get_isa()});
Georgios Pinitas5ee0d952021-07-05 07:21:28 +0100190 ARM_COMPUTE_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
George Wort5a97b282018-12-21 16:21:04 +0000191
Georgios Pinitas5ee0d952021-07-05 07:21:28 +0100192 _op = op;
193 _run_method = uk->ukernel;
194 _name = std::string("CpuElementwiseUnaryKernel").append("/").append(uk->name);
George Wort5a97b282018-12-21 16:21:04 +0000195
Sang-Hoon Park668ccdc2021-02-03 10:32:59 +0000196 // If input shape is dynamic, expect a configured window and dst at run-time.
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100197 if (src.is_dynamic())
Sang-Hoon Park668ccdc2021-02-03 10:32:59 +0000198 {
199 return;
200 }
201
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100202 if (uk->prepare_func != nullptr)
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000203 {
204 _lut = uk->prepare_func(op, &src, &dst);
205 }
206
Sang-Hoon Parkd0b7b4b2021-03-09 10:47:30 +0000207 auto shape_and_window = compute_output_shape_and_window(src.tensor_shape());
Sang-Hoon Park668ccdc2021-02-03 10:32:59 +0000208 auto_init_if_empty(dst, shape_and_window.first, 1, src.data_type());
Yair Schwarzbaum46d44d22022-01-12 16:38:58 +0200209 ICpuKernel::configure(shape_and_window.second);
George Wort5a97b282018-12-21 16:21:04 +0000210}
211
Sang-Hoon Park668ccdc2021-02-03 10:32:59 +0000212Status CpuElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInfo &src, const ITensorInfo &dst)
George Wort5a97b282018-12-21 16:21:04 +0000213{
Sang-Hoon Park668ccdc2021-02-03 10:32:59 +0000214 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&src);
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000215
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100216 const auto *uk = CpuElementwiseUnaryKernel::get_implementation(
217 DataTypeISASelectorData{src.data_type(), CPUInfo::get().get_isa()});
Giorgio Arena5ae8d802021-11-18 18:02:13 +0000218
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000219 ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
220
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100221 switch (op)
Michalis Spyrou18e20ff2020-05-06 17:03:59 +0100222 {
223 case ElementWiseUnary::EXP:
224 case ElementWiseUnary::RSQRT:
225 case ElementWiseUnary::LOG:
226 case ElementWiseUnary::ROUND:
227 case ElementWiseUnary::SIN:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100228 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src, 1, DataType::F16, DataType::F32,
229 DataType::QASYMM8, DataType::QASYMM8_SIGNED);
Michalis Spyrou18e20ff2020-05-06 17:03:59 +0100230 break;
231 case ElementWiseUnary::NEG:
232 case ElementWiseUnary::ABS:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100233 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src, 1, DataType::F16, DataType::F32, DataType::S32,
234 DataType::QASYMM8, DataType::QASYMM8_SIGNED);
Michalis Spyrou18e20ff2020-05-06 17:03:59 +0100235 break;
236 default:
237 ARM_COMPUTE_ERROR("ElementWiseUnary operation not supported");
238 }
Sang-Hoon Park668ccdc2021-02-03 10:32:59 +0000239 // Validate in case of configured dst
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100240 if (dst.total_size() > 0)
Michalis Spyrou18e20ff2020-05-06 17:03:59 +0100241 {
Sang-Hoon Park668ccdc2021-02-03 10:32:59 +0000242 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&src, &dst);
Michalis Spyrou18e20ff2020-05-06 17:03:59 +0100243 }
244
George Wort5a97b282018-12-21 16:21:04 +0000245 return Status{};
246}
247
Sang-Hoon Park7249f152021-01-22 11:55:03 +0000248void CpuElementwiseUnaryKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
George Wort5a97b282018-12-21 16:21:04 +0000249{
250 ARM_COMPUTE_UNUSED(info);
Sang-Hoon Park7249f152021-01-22 11:55:03 +0000251
Georgios Pinitas5ee0d952021-07-05 07:21:28 +0100252 auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
253 auto dst = tensors.get_tensor(TensorType::ACL_DST);
254
Viet-Hoa Dofd472f02023-03-15 14:05:06 +0000255 _run_method(src, dst, window, _op, _lut.get());
George Wort5a97b282018-12-21 16:21:04 +0000256}
Georgios Pinitas2eb5d162021-07-02 09:01:49 +0100257
258const char *CpuElementwiseUnaryKernel::name() const
259{
Georgios Pinitas5ee0d952021-07-05 07:21:28 +0100260 return _name.c_str();
Georgios Pinitas2eb5d162021-07-02 09:01:49 +0100261}
Giorgio Arena5ae8d802021-11-18 18:02:13 +0000262
263const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> &CpuElementwiseUnaryKernel::get_available_kernels()
264{
265 return available_kernels;
266}
267
Sang-Hoon Park7249f152021-01-22 11:55:03 +0000268} // namespace kernels
269} // namespace cpu
George Wort5a97b282018-12-21 16:21:04 +0000270} // namespace arm_compute