blob: c338ef09c7564a0443cd62f0e9e9476cc9041d97 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Georgios Pinitas5a594532018-12-03 14:30:05 +00002 * Copyright (c) 2017-2019 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
25
Anthony Barbiereaefd002018-07-20 17:49:35 +010026#include "arm_compute/core/CPP/Validate.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010027#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
Michel Iwaniec5dfeae62017-11-29 10:48:23 +000029#include "arm_compute/core/NEON/NEAsymm.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010030#include "arm_compute/core/NEON/NEFixedPoint.h"
31#include "arm_compute/core/NEON/NEMath.h"
giuros01c9573f32019-06-20 10:30:17 +010032#include "arm_compute/core/NEON/NESymm.h"
Georgios Pinitas5a594532018-12-03 14:30:05 +000033#include "arm_compute/core/NEON/wrapper/wrapper.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010034#include "arm_compute/core/TensorInfo.h"
35#include "arm_compute/core/Utils.h"
36#include "arm_compute/core/Validate.h"
37#include "arm_compute/core/Window.h"
38
39#include <arm_neon.h>
40#include <array>
41#include <cmath>
42#include <map>
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010043#include <set>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010044
45using namespace arm_compute;
Michalis Spyrouafa5d812017-11-30 14:25:57 +000046namespace
47{
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010048Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &activation_info)
Michalis Spyrouafa5d812017-11-30 14:25:57 +000049{
Anthony Barbiereaefd002018-07-20 17:49:35 +010050 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
giuros01c9573f32019-06-20 10:30:17 +010051 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32);
Michalis Spyrouafa5d812017-11-30 14:25:57 +000052
giuros01c9573f32019-06-20 10:30:17 +010053 static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations =
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010054 {
55 ActivationLayerInfo::ActivationFunction::RELU,
56 ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
57 ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
58 ActivationLayerInfo::ActivationFunction::LOGISTIC,
59 ActivationLayerInfo::ActivationFunction::TANH
60 };
giuros01c9573f32019-06-20 10:30:17 +010061 static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations =
62 {
63 ActivationLayerInfo::ActivationFunction::LOGISTIC,
64 ActivationLayerInfo::ActivationFunction::TANH
65 };
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010066 const DataType data_type = input->data_type();
67 const QuantizationInfo &oq_info = (output != nullptr) ? output->quantization_info() : input->quantization_info();
68 const ActivationLayerInfo::ActivationFunction f_act = activation_info.activation();
69
giuros01c9573f32019-06-20 10:30:17 +010070 ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_asymmetric(data_type) && (qasymm8_supported_activations.count(f_act) == 0),
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010071 "For QASYMM8 only tanh, logistic, relu and lower/upper bounded relu are supported");
giuros01c9573f32019-06-20 10:30:17 +010072
73 ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_symmetric(data_type) && (qsymm16_supported_activations.count(f_act) == 0),
74 "For QSYMM16 only tanh and logistic are supported");
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010075 ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 128)));
76 ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 256.f, 0)));
77
giuros01c9573f32019-06-20 10:30:17 +010078 ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
79 ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
80
Michalis Spyrouafa5d812017-11-30 14:25:57 +000081 // Checks performed when output is configured
82 if((output != nullptr) && (output->total_size() != 0))
83 {
84 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
85 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
Michalis Spyrouafa5d812017-11-30 14:25:57 +000086 }
87
88 return Status{};
89}
90
91std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
92{
Georgios Pinitas5a594532018-12-03 14:30:05 +000093 // Configure kernel window
94 Window win = calculate_max_window(*input, Steps());
Michalis Spyrouafa5d812017-11-30 14:25:57 +000095
Georgios Pinitas5a594532018-12-03 14:30:05 +000096 if(output != nullptr)
Michalis Spyrouafa5d812017-11-30 14:25:57 +000097 {
Georgios Pinitas5a594532018-12-03 14:30:05 +000098 // Output auto inizialitation if not yet initialized
99 auto_init_if_empty(*output, *input->clone());
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000100
Georgios Pinitas5a594532018-12-03 14:30:05 +0000101 // NEActivationLayerKernel doesn't need padding so update_window_and_padding() can be skipped
102 Coordinates coord;
103 coord.set_num_dimensions(output->num_dimensions());
104 output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000105 }
106
Georgios Pinitas5a594532018-12-03 14:30:05 +0000107 return std::make_pair(Status{}, win);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000108}
109} // namespace
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100110
111NEActivationLayerKernel::NEActivationLayerKernel()
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100112 : _input(nullptr), _output(nullptr), _func(nullptr), _act_info()
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100113{
114}
115
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100116void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100117{
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000118 ARM_COMPUTE_ERROR_ON_NULLPTR(input);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100119
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100120 _input = input;
121 _act_info = activation_info;
122 _output = input;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100123
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100124 // Out-of-place calculation
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100125 if(output != nullptr)
126 {
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100127 _output = output;
128 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100129
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100130 // Disabled activation, thus no operation needed
131 if(!activation_info.enabled())
132 {
133 _func = nullptr;
134 }
135
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100136 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr, activation_info));
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000137
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100138 // Activation functions : FP32
139 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f32 =
140 {
141 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, float> },
142 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, float> },
143 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, float> },
144 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float> },
145 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float> },
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100146 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, float> },
Georgios Pinitas579c0492017-07-12 16:12:12 +0100147 { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, float> },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100148 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float> },
Georgios Pinitasfb0fdcd2019-08-22 17:10:04 +0100149 { ActivationFunction::ELU, &NEActivationLayerKernel::activation<ActivationFunction::ELU, float> },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100150 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float> },
151 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float> },
152 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float> },
Usama Arif80e55db2019-05-14 17:48:47 +0100153 { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, float> },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100154 };
Pablo Tello91654c42017-07-05 11:32:17 +0100155
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000156#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello91654c42017-07-05 11:32:17 +0100157 // Activation functions : FP16
158 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f16 =
159 {
160 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, float16_t> },
161 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, float16_t> },
162 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, float16_t> },
163 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float16_t> },
164 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float16_t> },
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100165 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, float16_t> },
Georgios Pinitas3463a8b2018-08-23 13:11:53 +0100166 { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, float16_t> },
Pablo Tello91654c42017-07-05 11:32:17 +0100167 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float16_t> },
Georgios Pinitasfb0fdcd2019-08-22 17:10:04 +0100168 { ActivationFunction::ELU, &NEActivationLayerKernel::activation<ActivationFunction::ELU, float16_t> },
Pablo Tello91654c42017-07-05 11:32:17 +0100169 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float16_t> },
170 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float16_t> },
171 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float16_t> },
Usama Arif80e55db2019-05-14 17:48:47 +0100172 { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, float16_t> },
Pablo Tello91654c42017-07-05 11:32:17 +0100173 };
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000174#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
Pablo Tello91654c42017-07-05 11:32:17 +0100175
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000176 // Activation functions : QASYMM8
177 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qasymm8 =
178 {
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000179 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qasymm8_t> },
Isabella Gottardi5d62c012019-01-29 15:05:41 +0000180 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qasymm8_t> },
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000181 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, qasymm8_t> },
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +0000182 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qasymm8_t> },
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100183 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qasymm8_t> },
Usama Arif80e55db2019-05-14 17:48:47 +0100184 { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, qasymm8_t> },
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000185 };
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100186
giuros01c9573f32019-06-20 10:30:17 +0100187 // Activation functions : QSYMM16
188 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qsymm16 =
189 {
190 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qsymm16_t> },
191 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qsymm16_t> },
192 };
193
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100194 switch(input->info()->data_type())
195 {
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000196 case DataType::QASYMM8:
197 _func = act_map_qasymm8[activation_info.activation()];
198 break;
giuros01c9573f32019-06-20 10:30:17 +0100199 case DataType::QSYMM16:
200 _func = act_map_qsymm16[activation_info.activation()];
201 break;
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100202 case DataType::F32:
203 _func = act_map_f32[activation_info.activation()];
204 break;
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000205#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello91654c42017-07-05 11:32:17 +0100206 case DataType::F16:
207 _func = act_map_f16[activation_info.activation()];
208 break;
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000209#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100210 default:
211 ARM_COMPUTE_ERROR("Unsupported data type.");
212 }
213
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100214 // Configure kernel window
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000215 auto win_config = validate_and_configure_window(input->info(), (output != nullptr) ? output->info() : nullptr);
216 ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
217 ICPPKernel::configure(win_config.second);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100218}
219
Pablo Tello91654c42017-07-05 11:32:17 +0100220template <ActivationLayerInfo::ActivationFunction F, typename T>
Georgios Pinitas5a594532018-12-03 14:30:05 +0000221typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type
222NEActivationLayerKernel::activation(const Window &window)
Pablo Tello91654c42017-07-05 11:32:17 +0100223{
Georgios Pinitas5a594532018-12-03 14:30:05 +0000224 /** NEON vector tag type. */
225 using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
Pablo Tello91654c42017-07-05 11:32:17 +0100226
Georgios Pinitas5a594532018-12-03 14:30:05 +0000227 const int window_step_x = 16 / sizeof(T);
228 const auto window_start_x = static_cast<int>(window.x().start());
229 const auto window_end_x = static_cast<int>(window.x().end());
230 const ActivationFunction act = F;
Pablo Tello91654c42017-07-05 11:32:17 +0100231
Georgios Pinitas5a594532018-12-03 14:30:05 +0000232 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
233 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
Georgios Pinitas3463a8b2018-08-23 13:11:53 +0100234
Georgios Pinitas5a594532018-12-03 14:30:05 +0000235 Iterator input(_input, win_collapsed);
236 Iterator output(_output, win_collapsed);
Pablo Tello91654c42017-07-05 11:32:17 +0100237
Michele Di Giorgio8d5dd862019-09-13 12:23:46 +0100238 const auto epsilon = wrapper::vdup_n(static_cast<T>(1e-24), ExactTagType{});
Georgios Pinitas5a594532018-12-03 14:30:05 +0000239 const auto const_1 = wrapper::vdup_n(static_cast<T>(1.f), ExactTagType{});
240 const auto const_0 = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
241 const auto va = wrapper::vdup_n(static_cast<T>(_act_info.a()), ExactTagType{});
242 const auto vb = wrapper::vdup_n(static_cast<T>(_act_info.b()), ExactTagType{});
243 const auto a = static_cast<T>(_act_info.a());
244 const auto b = static_cast<T>(_act_info.b());
245
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100246 execute_window_loop(win_collapsed, [&](const Coordinates &)
Pablo Tello91654c42017-07-05 11:32:17 +0100247 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000248 const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
249 const auto output_ptr = reinterpret_cast<T *>(output.ptr());
Pablo Tello91654c42017-07-05 11:32:17 +0100250
Georgios Pinitas5a594532018-12-03 14:30:05 +0000251 wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
Pablo Tello91654c42017-07-05 11:32:17 +0100252
Georgios Pinitas5a594532018-12-03 14:30:05 +0000253 // Compute S elements per iteration
254 int x = window_start_x;
255 for(; x <= (window_end_x - window_step_x); x += window_step_x)
Pablo Tello91654c42017-07-05 11:32:17 +0100256 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000257 const auto vin = wrapper::vloadq(input_ptr + x);
258 switch(act)
Georgios Pinitas3463a8b2018-08-23 13:11:53 +0100259 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000260 case ActivationFunction::ABS:
261 tmp = wrapper::vabs(vin);
262 break;
263 case ActivationFunction::LINEAR:
264 tmp = wrapper::vmla(vb, va, vin);
265 break;
266 case ActivationFunction::LOGISTIC:
267 tmp = wrapper::vinv(wrapper::vadd(const_1, wrapper::vexpq(wrapper::vneg(vin))));
268 break;
269 case ActivationFunction::RELU:
270 tmp = wrapper::vmax(const_0, vin);
271 break;
272 case ActivationFunction::BOUNDED_RELU:
273 tmp = wrapper::vmin(va, wrapper::vmax(const_0, vin));
274 break;
275 case ActivationFunction::LU_BOUNDED_RELU:
276 tmp = wrapper::vmin(va, wrapper::vmax(vb, vin));
277 break;
278 case ActivationFunction::LEAKY_RELU:
279 tmp = wrapper::vbsl(wrapper::vcgt(vin, const_0), vin, wrapper::vmul(va, vin));
280 break;
281 case ActivationFunction::SOFT_RELU:
282 tmp = wrapper::vlog(wrapper::vadd(const_1, wrapper::vexpq(vin)));
283 break;
Georgios Pinitasfb0fdcd2019-08-22 17:10:04 +0100284 case ActivationFunction::ELU:
285 tmp = wrapper::vbsl(wrapper::vcge(vin, const_0), vin, wrapper::vmul(va, wrapper::vsub(wrapper::vexpq(vin), const_1)));
286 break;
Georgios Pinitas5a594532018-12-03 14:30:05 +0000287 case ActivationFunction::SQRT:
Michele Di Giorgio8d5dd862019-09-13 12:23:46 +0100288 tmp = wrapper::vinv(wrapper::vinvsqrt(vin + epsilon));
Georgios Pinitas5a594532018-12-03 14:30:05 +0000289 break;
290 case ActivationFunction::SQUARE:
291 tmp = wrapper::vmul(vin, vin);
292 break;
293 case ActivationFunction::TANH:
294 tmp = wrapper::vmul(va, wrapper::vtanh(wrapper::vmul(vb, vin)));
295 break;
Usama Arif80e55db2019-05-14 17:48:47 +0100296 case ActivationFunction::IDENTITY:
297 tmp = vin;
298 break;
Georgios Pinitas5a594532018-12-03 14:30:05 +0000299 default:
300 ARM_COMPUTE_ERROR("Unsupported activation function");
Georgios Pinitas3463a8b2018-08-23 13:11:53 +0100301 }
Georgios Pinitas5a594532018-12-03 14:30:05 +0000302 wrapper::vstore(output_ptr + x, tmp);
Pablo Tello91654c42017-07-05 11:32:17 +0100303 }
304
Georgios Pinitas5a594532018-12-03 14:30:05 +0000305 // Compute left-over elements
306 for(; x < window_end_x; ++x)
Georgios Pinitasf525eab2018-01-30 14:47:39 +0000307 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000308 const T in = *(reinterpret_cast<const T *>(input_ptr + x));
309 T tmp;
310 switch(act)
Georgios Pinitasf525eab2018-01-30 14:47:39 +0000311 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000312 case ActivationFunction::ABS:
313 tmp = std::abs(in);
314 break;
315 case ActivationFunction::LINEAR:
316 tmp = a * in + b;
317 break;
318 case ActivationFunction::LOGISTIC:
319 tmp = static_cast<T>(1) / (static_cast<T>(1) + std::exp(-in));
320 break;
321 case ActivationFunction::RELU:
322 tmp = std::max<T>(static_cast<T>(0), in);
323 break;
324 case ActivationFunction::BOUNDED_RELU:
325 tmp = std::min<T>(a, std::max(static_cast<T>(0), in));
326 break;
327 case ActivationFunction::LU_BOUNDED_RELU:
328 tmp = std::min<T>(a, std::max<T>(b, in));
329 break;
330 case ActivationFunction::LEAKY_RELU:
331 tmp = (in > 0) ? in : a * in;
332 break;
333 case ActivationFunction::SOFT_RELU:
334 tmp = std::log(static_cast<T>(1) + std::exp(in));
335 break;
Georgios Pinitasfb0fdcd2019-08-22 17:10:04 +0100336 case ActivationFunction::ELU:
337 tmp = (in >= 0) ? in : a * (std::exp(in) - 1);
338 break;
Georgios Pinitas5a594532018-12-03 14:30:05 +0000339 case ActivationFunction::SQRT:
340 tmp = std::sqrt(in);
341 break;
342 case ActivationFunction::SQUARE:
343 tmp = in * in;
344 break;
345 case ActivationFunction::TANH:
346 tmp = a * std::tanh(b * in);
347 break;
Usama Arif80e55db2019-05-14 17:48:47 +0100348 case ActivationFunction::IDENTITY:
349 tmp = in;
350 break;
Georgios Pinitas5a594532018-12-03 14:30:05 +0000351 default:
352 ARM_COMPUTE_ERROR("Unsupported activation function");
Georgios Pinitasf525eab2018-01-30 14:47:39 +0000353 }
Georgios Pinitas5a594532018-12-03 14:30:05 +0000354 *(output_ptr + x) = tmp;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100355 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100356 },
357 input, output);
358}
359
360template <ActivationLayerInfo::ActivationFunction F, typename T>
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000361typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
362{
Georgios Pinitas5a594532018-12-03 14:30:05 +0000363 const int window_step_x = 16 / sizeof(T);
364 const auto window_start_x = static_cast<int>(window.x().start());
365 const auto window_end_x = static_cast<int>(window.x().end());
366 const ActivationFunction act = F;
367
368 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
369 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
370
371 Iterator input(_input, win_collapsed);
372 Iterator output(_output, win_collapsed);
373
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100374 const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
375 const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
376 const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in));
377 const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in));
378 const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in);
379 const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in);
380 const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in);
381 const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0);
382 const auto vconst_1 = vdupq_n_f32(1.f);
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100383 const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
384 const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
385 const float a_f32 = _act_info.a();
386 const float b_f32 = _act_info.b();
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +0000387
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000388 // Initialise scale/offset for re-quantization
389 float s = qi_in.scale / qi_out.scale;
390 float o = -qi_in.offset * s + qi_out.offset;
391 float32x4_t vs = vdupq_n_f32(s);
392 float32x4_t vo = vdupq_n_f32(o);
393
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100394 execute_window_loop(win_collapsed, [&](const Coordinates &)
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000395 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000396 const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
397 const auto output_ptr = reinterpret_cast<T *>(output.ptr());
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000398
Georgios Pinitas5a594532018-12-03 14:30:05 +0000399 wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000400
Georgios Pinitas5a594532018-12-03 14:30:05 +0000401 // Compute S elements per iteration
402 int x = window_start_x;
403 for(; x <= (window_end_x - window_step_x); x += window_step_x)
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000404 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000405 const auto vin = wrapper::vloadq(input_ptr + x);
406 if(act == ActivationFunction::RELU)
407 {
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000408 // Perform activation
Georgios Pinitas5a594532018-12-03 14:30:05 +0000409 tmp = vmaxq_u8(vconst_0, vin);
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000410 // Re-quantize to new output space
411 tmp = vmlaq_qasymm8(tmp, vs, vo);
Georgios Pinitas5a594532018-12-03 14:30:05 +0000412 }
413 else if(act == ActivationFunction::BOUNDED_RELU)
414 {
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +0000415 // Perform activation
Georgios Pinitas5a594532018-12-03 14:30:05 +0000416 tmp = vminq_u8(va, vmaxq_u8(vconst_0, vin));
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +0000417 // Re-quantize to new output space
418 tmp = vmlaq_qasymm8(tmp, vs, vo);
Georgios Pinitas5a594532018-12-03 14:30:05 +0000419 }
420 else if(act == ActivationFunction::LU_BOUNDED_RELU)
421 {
422 // Perform activation
423 tmp = vminq_u8(va, vmaxq_u8(vb, vin));
424 // Re-quantize to new output space
425 tmp = vmlaq_qasymm8(tmp, vs, vo);
426 }
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000427 else if(act == ActivationFunction::LOGISTIC)
428 {
Isabella Gottardif7a3bf22019-03-15 14:58:24 +0000429 // De-quantize
430 const auto vin_deq = vdequantize(vin, qi_in);
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000431 // Perform activation
Isabella Gottardif7a3bf22019-03-15 14:58:24 +0000432 const float32x4x4_t tmp_dep =
433 {
434 {
435 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[0])))),
436 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[1])))),
437 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[2])))),
438 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[3])))),
439 }
440 };
441 // Re-quantize to new output space
442 tmp = vquantize(tmp_dep, qi_out);
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000443 }
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100444 else if(act == ActivationFunction::TANH)
445 {
446 // De-quantize
447 const auto vin_deq = vdequantize(vin, qi_in);
448 // Perform activation
449 const float32x4x4_t tmp_dep =
450 {
451 {
452 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))),
453 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))),
454 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[2], vb_f32))),
455 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[3], vb_f32))),
456 }
457 };
458 // Re-quantize to new output space
459 tmp = vquantize(tmp_dep, qi_out);
460 }
Georgios Pinitas5a594532018-12-03 14:30:05 +0000461 else
462 {
463 ARM_COMPUTE_ERROR("Unsupported activation function");
464 }
465 wrapper::vstore(output_ptr + x, tmp);
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000466 }
467
Georgios Pinitas5a594532018-12-03 14:30:05 +0000468 // Compute left-over elements
469 for(; x < window_end_x; ++x)
470 {
471 T in = *(reinterpret_cast<const T *>(input_ptr + x));
472 T tmp;
473 if(act == ActivationFunction::RELU)
474 {
475 tmp = std::max(const_0, in);
Georgios Pinitas57016a42019-01-16 12:54:29 +0000476 tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
Georgios Pinitas5a594532018-12-03 14:30:05 +0000477 }
478 else if(act == ActivationFunction::BOUNDED_RELU)
479 {
480 tmp = std::min(a, std::max(const_0, in));
Georgios Pinitas57016a42019-01-16 12:54:29 +0000481 tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
Georgios Pinitas5a594532018-12-03 14:30:05 +0000482 }
483 else if(act == ActivationFunction::LU_BOUNDED_RELU)
484 {
485 tmp = std::min(a, std::max(b, in));
Georgios Pinitas57016a42019-01-16 12:54:29 +0000486 tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
Georgios Pinitas5a594532018-12-03 14:30:05 +0000487 }
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000488 else if(act == ActivationFunction::LOGISTIC)
489 {
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100490 float tmp_f = dequantize_qasymm8(in, qi_in);
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000491 tmp_f = 1.f / (1.f + std::exp(-tmp_f));
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100492 tmp = quantize_qasymm8(tmp_f, qi_out);
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000493 }
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100494 else if(act == ActivationFunction::TANH)
495 {
496 float tmp_f = dequantize_qasymm8(in, qi_in);
497 tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
498 tmp = quantize_qasymm8(tmp_f, qi_out);
499 }
Georgios Pinitas5a594532018-12-03 14:30:05 +0000500 else
501 {
502 ARM_COMPUTE_ERROR("Unsupported activation function");
503 }
504 *(output_ptr + x) = tmp;
505 }
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000506 },
507 input, output);
508}
509
giuros01c9573f32019-06-20 10:30:17 +0100510template <ActivationLayerInfo::ActivationFunction F, typename T>
511typename std::enable_if<std::is_same<T, qsymm16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
512{
513 const int window_step_x = 16 / sizeof(T);
514 const auto window_start_x = static_cast<int>(window.x().start());
515 const auto window_end_x = static_cast<int>(window.x().end());
516 const ActivationFunction act = F;
517
518 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
519 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
520
521 Iterator input(_input, win_collapsed);
522 Iterator output(_output, win_collapsed);
523
524 const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
525 const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
526 const auto vconst_1 = vdupq_n_f32(1.f);
527 const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
528 const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
529 const float a_f32 = _act_info.a();
530 const float b_f32 = _act_info.b();
531
532 execute_window_loop(win_collapsed, [&](const Coordinates &)
533 {
534 const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
535 const auto output_ptr = reinterpret_cast<T *>(output.ptr());
536
537 wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
538 ARM_COMPUTE_UNUSED(tmp);
539
540 // Compute S elements per iteration
541 int x = window_start_x;
542 for(; x <= (window_end_x - window_step_x); x += window_step_x)
543 {
544 const auto vin = wrapper::vloadq(input_ptr + x);
545 if(act == ActivationFunction::LOGISTIC)
546 {
547 // De-quantize
548 const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
549 // Perform activation
550 const float32x4x2_t tmp_dep =
551 {
552 {
553 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[0])))),
554 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[1])))),
555 }
556 };
557 // Re-quantize to new output space
558 tmp = vquantize_int16(tmp_dep, qi_out.scale);
559 }
560 else if(act == ActivationFunction::TANH)
561 {
562 // De-quantize
563 const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
564 // Perform activation
565 const float32x4x2_t tmp_dep =
566 {
567 {
568 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))),
569 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))),
570 }
571 };
572 // Re-quantize to new output space
573 tmp = vquantize_int16(tmp_dep, qi_out.scale);
574 }
575 else
576 {
577 ARM_COMPUTE_ERROR("Unsupported activation function");
578 }
579 wrapper::vstore(output_ptr + x, tmp);
580 }
581
582 // Compute left-over elements
583 for(; x < window_end_x; ++x)
584 {
585 T in = *(reinterpret_cast<const T *>(input_ptr + x));
586 T tmp;
587 if(act == ActivationFunction::LOGISTIC)
588 {
589 float tmp_f = dequantize_qsymm16(in, qi_in.scale);
590 tmp_f = 1.f / (1.f + std::exp(-tmp_f));
591 tmp = quantize_qsymm16(tmp_f, qi_out);
592 }
593 else if(act == ActivationFunction::TANH)
594 {
595 float tmp_f = dequantize_qsymm16(in, qi_in.scale);
596 tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
597 tmp = quantize_qsymm16(tmp_f, qi_out);
598 }
599 else
600 {
601 ARM_COMPUTE_ERROR("Unsupported activation function");
602 }
603 *(output_ptr + x) = tmp;
604 }
605 },
606 input, output);
607}
608
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000609Status NEActivationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
610{
611 ARM_COMPUTE_UNUSED(act_info);
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100612 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info));
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000613 ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (output != nullptr) ? output->clone().get() : nullptr).first);
614
615 return Status{};
616}
617
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100618void NEActivationLayerKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100619{
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100620 // Early exit on disabled activation
621 if(!_act_info.enabled())
622 {
623 return;
624 }
625
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100626 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100627 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100628 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100629 ARM_COMPUTE_ERROR_ON(_func == nullptr);
630
631 (this->*_func)(window);
632}