blob: ed1cb6fca414ae526445dc4cc24ffaa798cfd961 [file] [log] [blame]
George Wort5a97b282018-12-21 16:21:04 +00001/*
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +00002 * Copyright (c) 2018-2021 Arm Limited.
George Wort5a97b282018-12-21 16:21:04 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouebcebf12020-10-21 00:04:14 +010024#include "src/core/NEON/kernels/NEElementwiseUnaryKernel.h"
George Wort5a97b282018-12-21 16:21:04 +000025
George Wort5a97b282018-12-21 16:21:04 +000026#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
George Wort5a97b282018-12-21 16:21:04 +000028#include "arm_compute/core/ITensor.h"
George Wort5a97b282018-12-21 16:21:04 +000029#include "arm_compute/core/Validate.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010030#include "src/core/CPP/Validate.h"
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +000031#include "src/core/NEON/kernels/elementwise/impl/elementwise_unary_list.h"
32#include "src/core/SVE/kernels/elementwise/impl/elementwise_unary_list.h"
33#include "src/core/common/Registrars.h"
34#include "src/core/common/StdTypes.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010035#include "src/core/helpers/AutoConfiguration.h"
36#include "src/core/helpers/WindowHelpers.h"
Michalis Spyroue6bcb5b2019-06-07 11:47:16 +010037#include "support/ToolchainSupport.h"
George Wort5a97b282018-12-21 16:21:04 +000038
George Wort5a97b282018-12-21 16:21:04 +000039namespace arm_compute
40{
George Wort5a97b282018-12-21 16:21:04 +000041namespace
42{
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +000043using ElementwiseUnarySelector = std::add_pointer<bool(DataType)>::type;
George Wort5a97b282018-12-21 16:21:04 +000044
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +000045struct ElementwiseUnaryKernel
George Wort5a97b282018-12-21 16:21:04 +000046{
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +000047 const char *name;
48 const ElementwiseUnarySelector is_selected;
49 NEElementwiseUnaryKernel::ElementwiseUnaryUkernelPtr ukernel;
50};
51
52static const ElementwiseUnaryKernel available_kernels[] =
53{
54#if defined(__ARM_FEATURE_SVE)
George Wort5a97b282018-12-21 16:21:04 +000055 {
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +000056 "fp32_sve_elementwise_unary",
57 [](DataType dt) { return dt == DataType::F32; },
58 REGISTER_FP32_SVE(arm_compute::cpu::elementwise_sve_op<f32>),
59 },
60 {
61 "fp16_sve_elementwise_unary",
62 [](DataType dt) { return dt == DataType::F16; },
63 REGISTER_FP16_SVE(arm_compute::cpu::elementwise_sve_op<f16>),
64 },
65 {
66 "s32_sve_elementwise_unary",
67 [](DataType dt) { return dt == DataType::S32; },
68 REGISTER_INTEGER_SVE(arm_compute::cpu::elementwise_sve_op<s32>),
69 },
70#endif // defined(__ARM_FEATURE_SVE)
71 {
72 "fp32_neon_elementwise_unary",
73 [](DataType dt) { return dt == DataType::F32; },
74 REGISTER_FP32_NEON(arm_compute::cpu::elementwise_op<f32>),
75 },
76#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
77 {
78 "fp16_neon_elementwise_unary",
79 [](DataType dt) { return dt == DataType::F16; },
80 REGISTER_FP32_NEON(arm_compute::cpu::elementwise_op<f16>),
81 },
82#endif // defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
83 {
84 "s32_neon_elementwise_unary",
85 [](DataType dt) { return dt == DataType::S32; },
86 REGISTER_INTEGER_NEON(arm_compute::cpu::elementwise_op<s32>),
87 },
88};
89
90const ElementwiseUnaryKernel *get_implementation(DataType dt)
91{
92 for(const auto &uk : available_kernels)
93 {
94 if(uk.is_selected(dt))
95 {
96 return &uk;
97 }
George Wort5a97b282018-12-21 16:21:04 +000098 }
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +000099 return nullptr;
George Wort5a97b282018-12-21 16:21:04 +0000100}
Michalis Spyrou18e20ff2020-05-06 17:03:59 +0100101} // namespace
George Wort5a97b282018-12-21 16:21:04 +0000102
George Wort5a97b282018-12-21 16:21:04 +0000103NEElementwiseUnaryKernel::NEElementwiseUnaryKernel()
Michalis Spyrou18e20ff2020-05-06 17:03:59 +0100104 : _func(nullptr), _input(nullptr), _output(nullptr), _op()
George Wort5a97b282018-12-21 16:21:04 +0000105{
106}
107
108void NEElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensor *input, ITensor *output)
109{
Michalis Spyrou18e20ff2020-05-06 17:03:59 +0100110 ARM_COMPUTE_ERROR_THROW_ON(validate(op, input->info(), output->info()));
George Wort5a97b282018-12-21 16:21:04 +0000111 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
112
113 // Configure kernel window
114 const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*input->info());
115 const TensorShape &out_shape = broadcast_pair.first;
116 const ValidRegion &valid_region = broadcast_pair.second;
117
118 // Auto initialize output if not initialized
119 auto_init_if_empty(*output->info(), out_shape, 1, input->info()->data_type());
120
121 Window win = calculate_max_window(valid_region);
122
123 _input = input;
124 _output = output;
Michalis Spyrou18e20ff2020-05-06 17:03:59 +0100125 _op = op;
George Wort5a97b282018-12-21 16:21:04 +0000126
127 INEKernel::configure(win);
128
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000129 _func = get_implementation(input->info()->data_type())->ukernel;
George Wort5a97b282018-12-21 16:21:04 +0000130}
131
George Wort5a97b282018-12-21 16:21:04 +0000132Status NEElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInfo *input, const ITensorInfo *output)
133{
George Wort5a97b282018-12-21 16:21:04 +0000134 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
Michalis Spyrou18e20ff2020-05-06 17:03:59 +0100135 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000136
137 const auto *uk = get_implementation(input->data_type());
138 ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
139
Michalis Spyrou18e20ff2020-05-06 17:03:59 +0100140 switch(op)
141 {
142 case ElementWiseUnary::EXP:
143 case ElementWiseUnary::RSQRT:
144 case ElementWiseUnary::LOG:
145 case ElementWiseUnary::ROUND:
146 case ElementWiseUnary::SIN:
147 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
148 break;
149 case ElementWiseUnary::NEG:
150 case ElementWiseUnary::ABS:
151 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32, DataType::S32);
152 break;
153 default:
154 ARM_COMPUTE_ERROR("ElementWiseUnary operation not supported");
155 }
156 // Validate in case of configured output
157 if(output->total_size() > 0)
158 {
159 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
160 }
161
George Wort5a97b282018-12-21 16:21:04 +0000162 return Status{};
163}
164
165void NEElementwiseUnaryKernel::run(const Window &window, const ThreadInfo &info)
166{
167 ARM_COMPUTE_UNUSED(info);
168 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
169 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
Michalis Spyrou18e20ff2020-05-06 17:03:59 +0100170 ARM_COMPUTE_ERROR_ON(_func == nullptr);
Sang-Hoon Parkaf1870b2020-12-08 18:50:56 +0000171 (*_func)(_input, _output, window, _op);
George Wort5a97b282018-12-21 16:21:04 +0000172}
173} // namespace arm_compute