blob: 44f76f6e22fee5db2e236d9bd19cd7fdda0f0af5 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Georgios Pinitas5a594532018-12-03 14:30:05 +00002 * Copyright (c) 2017-2019 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
25
Anthony Barbiereaefd002018-07-20 17:49:35 +010026#include "arm_compute/core/CPP/Validate.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010027#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
Michel Iwaniec5dfeae62017-11-29 10:48:23 +000029#include "arm_compute/core/NEON/NEAsymm.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010030#include "arm_compute/core/NEON/NEFixedPoint.h"
31#include "arm_compute/core/NEON/NEMath.h"
giuros01c9573f32019-06-20 10:30:17 +010032#include "arm_compute/core/NEON/NESymm.h"
Georgios Pinitas5a594532018-12-03 14:30:05 +000033#include "arm_compute/core/NEON/wrapper/wrapper.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010034#include "arm_compute/core/TensorInfo.h"
35#include "arm_compute/core/Utils.h"
36#include "arm_compute/core/Validate.h"
37#include "arm_compute/core/Window.h"
38
39#include <arm_neon.h>
40#include <array>
41#include <cmath>
42#include <map>
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010043#include <set>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010044
45using namespace arm_compute;
Michalis Spyrouafa5d812017-11-30 14:25:57 +000046namespace
47{
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010048Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &activation_info)
Michalis Spyrouafa5d812017-11-30 14:25:57 +000049{
Anthony Barbiereaefd002018-07-20 17:49:35 +010050 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
Michalis Spyrou8d4d1b82019-11-28 11:31:23 +000051 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32);
Michalis Spyrouafa5d812017-11-30 14:25:57 +000052
giuros01c9573f32019-06-20 10:30:17 +010053 static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations =
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010054 {
55 ActivationLayerInfo::ActivationFunction::RELU,
56 ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
57 ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
58 ActivationLayerInfo::ActivationFunction::LOGISTIC,
59 ActivationLayerInfo::ActivationFunction::TANH
60 };
giuros01c9573f32019-06-20 10:30:17 +010061 static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations =
62 {
63 ActivationLayerInfo::ActivationFunction::LOGISTIC,
64 ActivationLayerInfo::ActivationFunction::TANH
65 };
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010066 const DataType data_type = input->data_type();
67 const QuantizationInfo &oq_info = (output != nullptr) ? output->quantization_info() : input->quantization_info();
68 const ActivationLayerInfo::ActivationFunction f_act = activation_info.activation();
69
giuros01c9573f32019-06-20 10:30:17 +010070 ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_asymmetric(data_type) && (qasymm8_supported_activations.count(f_act) == 0),
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010071 "For QASYMM8 only tanh, logistic, relu and lower/upper bounded relu are supported");
giuros01c9573f32019-06-20 10:30:17 +010072
73 ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_symmetric(data_type) && (qsymm16_supported_activations.count(f_act) == 0),
74 "For QSYMM16 only tanh and logistic are supported");
Michalis Spyrou8d4d1b82019-11-28 11:31:23 +000075 ARM_COMPUTE_RETURN_ERROR_ON((data_type == DataType::QASYMM8 || data_type == DataType::QASYMM16) && (f_act == ActivationLayerInfo::ActivationFunction::TANH)
76 && (oq_info != QuantizationInfo(1.f / 128.f, 128)));
77 ARM_COMPUTE_RETURN_ERROR_ON((data_type == DataType::QASYMM8 || data_type == DataType::QASYMM16) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC)
78 && (oq_info != QuantizationInfo(1.f / 256.f, 0)));
79
80 ARM_COMPUTE_RETURN_ERROR_ON(data_type == DataType::QASYMM8_SIGNED && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 0)));
81 ARM_COMPUTE_RETURN_ERROR_ON(data_type == DataType::QASYMM8_SIGNED && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 256.f, -128)));
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010082
giuros01c9573f32019-06-20 10:30:17 +010083 ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
84 ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
85
Michalis Spyrouafa5d812017-11-30 14:25:57 +000086 // Checks performed when output is configured
87 if((output != nullptr) && (output->total_size() != 0))
88 {
89 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
90 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
Michalis Spyrouafa5d812017-11-30 14:25:57 +000091 }
92
93 return Status{};
94}
95
96std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
97{
Georgios Pinitas5a594532018-12-03 14:30:05 +000098 // Configure kernel window
99 Window win = calculate_max_window(*input, Steps());
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000100
Georgios Pinitas5a594532018-12-03 14:30:05 +0000101 if(output != nullptr)
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000102 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000103 // Output auto inizialitation if not yet initialized
104 auto_init_if_empty(*output, *input->clone());
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000105
Georgios Pinitas5a594532018-12-03 14:30:05 +0000106 // NEActivationLayerKernel doesn't need padding so update_window_and_padding() can be skipped
107 Coordinates coord;
108 coord.set_num_dimensions(output->num_dimensions());
109 output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000110 }
111
Georgios Pinitas5a594532018-12-03 14:30:05 +0000112 return std::make_pair(Status{}, win);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000113}
114} // namespace
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100115
116NEActivationLayerKernel::NEActivationLayerKernel()
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100117 : _input(nullptr), _output(nullptr), _func(nullptr), _act_info()
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100118{
119}
120
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100121void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100122{
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000123 ARM_COMPUTE_ERROR_ON_NULLPTR(input);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100124
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100125 _input = input;
126 _act_info = activation_info;
127 _output = input;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100128
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100129 // Out-of-place calculation
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100130 if(output != nullptr)
131 {
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100132 _output = output;
133 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100134
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100135 // Disabled activation, thus no operation needed
136 if(!activation_info.enabled())
137 {
138 _func = nullptr;
139 }
140
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100141 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr, activation_info));
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000142
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100143 // Activation functions : FP32
144 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f32 =
145 {
146 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, float> },
147 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, float> },
148 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, float> },
149 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float> },
150 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float> },
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100151 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, float> },
Georgios Pinitas579c0492017-07-12 16:12:12 +0100152 { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, float> },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100153 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float> },
Georgios Pinitasfb0fdcd2019-08-22 17:10:04 +0100154 { ActivationFunction::ELU, &NEActivationLayerKernel::activation<ActivationFunction::ELU, float> },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100155 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float> },
156 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float> },
157 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float> },
Usama Arif80e55db2019-05-14 17:48:47 +0100158 { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, float> },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100159 };
Pablo Tello91654c42017-07-05 11:32:17 +0100160
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000161#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello91654c42017-07-05 11:32:17 +0100162 // Activation functions : FP16
163 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f16 =
164 {
165 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, float16_t> },
166 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, float16_t> },
167 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, float16_t> },
168 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float16_t> },
169 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float16_t> },
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100170 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, float16_t> },
Georgios Pinitas3463a8b2018-08-23 13:11:53 +0100171 { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, float16_t> },
Pablo Tello91654c42017-07-05 11:32:17 +0100172 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float16_t> },
Georgios Pinitasfb0fdcd2019-08-22 17:10:04 +0100173 { ActivationFunction::ELU, &NEActivationLayerKernel::activation<ActivationFunction::ELU, float16_t> },
Pablo Tello91654c42017-07-05 11:32:17 +0100174 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float16_t> },
175 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float16_t> },
176 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float16_t> },
Usama Arif80e55db2019-05-14 17:48:47 +0100177 { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, float16_t> },
Pablo Tello91654c42017-07-05 11:32:17 +0100178 };
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000179#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
Pablo Tello91654c42017-07-05 11:32:17 +0100180
Michalis Spyrou8d4d1b82019-11-28 11:31:23 +0000181 // Activation functions : QASYMM8_SIGNED
182 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qasymm8_signed =
183 {
184 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qasymm8_signed_t> },
185 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qasymm8_signed_t> },
186 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, qasymm8_signed_t> },
187 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qasymm8_signed_t> },
188 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qasymm8_signed_t> },
189 { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, qasymm8_signed_t> },
190 };
191
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000192 // Activation functions : QASYMM8
193 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qasymm8 =
194 {
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000195 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qasymm8_t> },
Isabella Gottardi5d62c012019-01-29 15:05:41 +0000196 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qasymm8_t> },
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000197 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, qasymm8_t> },
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +0000198 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qasymm8_t> },
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100199 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qasymm8_t> },
Usama Arif80e55db2019-05-14 17:48:47 +0100200 { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, qasymm8_t> },
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000201 };
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100202
giuros01c9573f32019-06-20 10:30:17 +0100203 // Activation functions : QSYMM16
204 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qsymm16 =
205 {
206 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qsymm16_t> },
207 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qsymm16_t> },
208 };
209
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100210 switch(input->info()->data_type())
211 {
Michalis Spyrou8d4d1b82019-11-28 11:31:23 +0000212 case DataType::QASYMM8_SIGNED:
213 _func = act_map_qasymm8_signed[activation_info.activation()];
214 break;
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000215 case DataType::QASYMM8:
216 _func = act_map_qasymm8[activation_info.activation()];
217 break;
giuros01c9573f32019-06-20 10:30:17 +0100218 case DataType::QSYMM16:
219 _func = act_map_qsymm16[activation_info.activation()];
220 break;
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100221 case DataType::F32:
222 _func = act_map_f32[activation_info.activation()];
223 break;
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000224#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello91654c42017-07-05 11:32:17 +0100225 case DataType::F16:
226 _func = act_map_f16[activation_info.activation()];
227 break;
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000228#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100229 default:
230 ARM_COMPUTE_ERROR("Unsupported data type.");
231 }
232
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100233 // Configure kernel window
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000234 auto win_config = validate_and_configure_window(input->info(), (output != nullptr) ? output->info() : nullptr);
235 ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
236 ICPPKernel::configure(win_config.second);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100237}
238
Pablo Tello91654c42017-07-05 11:32:17 +0100239template <ActivationLayerInfo::ActivationFunction F, typename T>
Georgios Pinitas5a594532018-12-03 14:30:05 +0000240typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type
241NEActivationLayerKernel::activation(const Window &window)
Pablo Tello91654c42017-07-05 11:32:17 +0100242{
Georgios Pinitas5a594532018-12-03 14:30:05 +0000243 /** NEON vector tag type. */
244 using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
Pablo Tello91654c42017-07-05 11:32:17 +0100245
Georgios Pinitas5a594532018-12-03 14:30:05 +0000246 const int window_step_x = 16 / sizeof(T);
247 const auto window_start_x = static_cast<int>(window.x().start());
248 const auto window_end_x = static_cast<int>(window.x().end());
249 const ActivationFunction act = F;
Pablo Tello91654c42017-07-05 11:32:17 +0100250
Georgios Pinitas5a594532018-12-03 14:30:05 +0000251 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
252 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
Georgios Pinitas3463a8b2018-08-23 13:11:53 +0100253
Georgios Pinitas5a594532018-12-03 14:30:05 +0000254 Iterator input(_input, win_collapsed);
255 Iterator output(_output, win_collapsed);
Pablo Tello91654c42017-07-05 11:32:17 +0100256
Michele Di Giorgio8d5dd862019-09-13 12:23:46 +0100257 const auto epsilon = wrapper::vdup_n(static_cast<T>(1e-24), ExactTagType{});
Georgios Pinitas5a594532018-12-03 14:30:05 +0000258 const auto const_1 = wrapper::vdup_n(static_cast<T>(1.f), ExactTagType{});
259 const auto const_0 = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
260 const auto va = wrapper::vdup_n(static_cast<T>(_act_info.a()), ExactTagType{});
261 const auto vb = wrapper::vdup_n(static_cast<T>(_act_info.b()), ExactTagType{});
262 const auto a = static_cast<T>(_act_info.a());
263 const auto b = static_cast<T>(_act_info.b());
264
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100265 execute_window_loop(win_collapsed, [&](const Coordinates &)
Pablo Tello91654c42017-07-05 11:32:17 +0100266 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000267 const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
268 const auto output_ptr = reinterpret_cast<T *>(output.ptr());
Pablo Tello91654c42017-07-05 11:32:17 +0100269
Georgios Pinitas5a594532018-12-03 14:30:05 +0000270 wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
Pablo Tello91654c42017-07-05 11:32:17 +0100271
Georgios Pinitas5a594532018-12-03 14:30:05 +0000272 // Compute S elements per iteration
273 int x = window_start_x;
274 for(; x <= (window_end_x - window_step_x); x += window_step_x)
Pablo Tello91654c42017-07-05 11:32:17 +0100275 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000276 const auto vin = wrapper::vloadq(input_ptr + x);
277 switch(act)
Georgios Pinitas3463a8b2018-08-23 13:11:53 +0100278 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000279 case ActivationFunction::ABS:
280 tmp = wrapper::vabs(vin);
281 break;
282 case ActivationFunction::LINEAR:
283 tmp = wrapper::vmla(vb, va, vin);
284 break;
285 case ActivationFunction::LOGISTIC:
286 tmp = wrapper::vinv(wrapper::vadd(const_1, wrapper::vexpq(wrapper::vneg(vin))));
287 break;
288 case ActivationFunction::RELU:
289 tmp = wrapper::vmax(const_0, vin);
290 break;
291 case ActivationFunction::BOUNDED_RELU:
292 tmp = wrapper::vmin(va, wrapper::vmax(const_0, vin));
293 break;
294 case ActivationFunction::LU_BOUNDED_RELU:
295 tmp = wrapper::vmin(va, wrapper::vmax(vb, vin));
296 break;
297 case ActivationFunction::LEAKY_RELU:
298 tmp = wrapper::vbsl(wrapper::vcgt(vin, const_0), vin, wrapper::vmul(va, vin));
299 break;
300 case ActivationFunction::SOFT_RELU:
301 tmp = wrapper::vlog(wrapper::vadd(const_1, wrapper::vexpq(vin)));
302 break;
Georgios Pinitasfb0fdcd2019-08-22 17:10:04 +0100303 case ActivationFunction::ELU:
304 tmp = wrapper::vbsl(wrapper::vcge(vin, const_0), vin, wrapper::vmul(va, wrapper::vsub(wrapper::vexpq(vin), const_1)));
305 break;
Georgios Pinitas5a594532018-12-03 14:30:05 +0000306 case ActivationFunction::SQRT:
Michele Di Giorgio8d5dd862019-09-13 12:23:46 +0100307 tmp = wrapper::vinv(wrapper::vinvsqrt(vin + epsilon));
Georgios Pinitas5a594532018-12-03 14:30:05 +0000308 break;
309 case ActivationFunction::SQUARE:
310 tmp = wrapper::vmul(vin, vin);
311 break;
312 case ActivationFunction::TANH:
313 tmp = wrapper::vmul(va, wrapper::vtanh(wrapper::vmul(vb, vin)));
314 break;
Usama Arif80e55db2019-05-14 17:48:47 +0100315 case ActivationFunction::IDENTITY:
316 tmp = vin;
317 break;
Georgios Pinitas5a594532018-12-03 14:30:05 +0000318 default:
319 ARM_COMPUTE_ERROR("Unsupported activation function");
Georgios Pinitas3463a8b2018-08-23 13:11:53 +0100320 }
Georgios Pinitas5a594532018-12-03 14:30:05 +0000321 wrapper::vstore(output_ptr + x, tmp);
Pablo Tello91654c42017-07-05 11:32:17 +0100322 }
323
Georgios Pinitas5a594532018-12-03 14:30:05 +0000324 // Compute left-over elements
325 for(; x < window_end_x; ++x)
Georgios Pinitasf525eab2018-01-30 14:47:39 +0000326 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000327 const T in = *(reinterpret_cast<const T *>(input_ptr + x));
328 T tmp;
329 switch(act)
Georgios Pinitasf525eab2018-01-30 14:47:39 +0000330 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000331 case ActivationFunction::ABS:
332 tmp = std::abs(in);
333 break;
334 case ActivationFunction::LINEAR:
335 tmp = a * in + b;
336 break;
337 case ActivationFunction::LOGISTIC:
338 tmp = static_cast<T>(1) / (static_cast<T>(1) + std::exp(-in));
339 break;
340 case ActivationFunction::RELU:
341 tmp = std::max<T>(static_cast<T>(0), in);
342 break;
343 case ActivationFunction::BOUNDED_RELU:
344 tmp = std::min<T>(a, std::max(static_cast<T>(0), in));
345 break;
346 case ActivationFunction::LU_BOUNDED_RELU:
347 tmp = std::min<T>(a, std::max<T>(b, in));
348 break;
349 case ActivationFunction::LEAKY_RELU:
350 tmp = (in > 0) ? in : a * in;
351 break;
352 case ActivationFunction::SOFT_RELU:
353 tmp = std::log(static_cast<T>(1) + std::exp(in));
354 break;
Georgios Pinitasfb0fdcd2019-08-22 17:10:04 +0100355 case ActivationFunction::ELU:
356 tmp = (in >= 0) ? in : a * (std::exp(in) - 1);
357 break;
Georgios Pinitas5a594532018-12-03 14:30:05 +0000358 case ActivationFunction::SQRT:
359 tmp = std::sqrt(in);
360 break;
361 case ActivationFunction::SQUARE:
362 tmp = in * in;
363 break;
364 case ActivationFunction::TANH:
365 tmp = a * std::tanh(b * in);
366 break;
Usama Arif80e55db2019-05-14 17:48:47 +0100367 case ActivationFunction::IDENTITY:
368 tmp = in;
369 break;
Georgios Pinitas5a594532018-12-03 14:30:05 +0000370 default:
371 ARM_COMPUTE_ERROR("Unsupported activation function");
Georgios Pinitasf525eab2018-01-30 14:47:39 +0000372 }
Georgios Pinitas5a594532018-12-03 14:30:05 +0000373 *(output_ptr + x) = tmp;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100374 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100375 },
376 input, output);
377}
378
379template <ActivationLayerInfo::ActivationFunction F, typename T>
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000380typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
381{
Georgios Pinitas5a594532018-12-03 14:30:05 +0000382 const int window_step_x = 16 / sizeof(T);
383 const auto window_start_x = static_cast<int>(window.x().start());
384 const auto window_end_x = static_cast<int>(window.x().end());
385 const ActivationFunction act = F;
386
387 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
388 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
389
390 Iterator input(_input, win_collapsed);
391 Iterator output(_output, win_collapsed);
392
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100393 const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
394 const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
395 const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in));
396 const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in));
397 const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in);
398 const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in);
399 const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in);
400 const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0);
401 const auto vconst_1 = vdupq_n_f32(1.f);
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100402 const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
403 const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
404 const float a_f32 = _act_info.a();
405 const float b_f32 = _act_info.b();
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +0000406
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000407 // Initialise scale/offset for re-quantization
408 float s = qi_in.scale / qi_out.scale;
409 float o = -qi_in.offset * s + qi_out.offset;
410 float32x4_t vs = vdupq_n_f32(s);
411 float32x4_t vo = vdupq_n_f32(o);
412
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100413 execute_window_loop(win_collapsed, [&](const Coordinates &)
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000414 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000415 const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
416 const auto output_ptr = reinterpret_cast<T *>(output.ptr());
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000417
Georgios Pinitas5a594532018-12-03 14:30:05 +0000418 wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000419
Georgios Pinitas5a594532018-12-03 14:30:05 +0000420 // Compute S elements per iteration
421 int x = window_start_x;
422 for(; x <= (window_end_x - window_step_x); x += window_step_x)
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000423 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000424 const auto vin = wrapper::vloadq(input_ptr + x);
425 if(act == ActivationFunction::RELU)
426 {
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000427 // Perform activation
Georgios Pinitas5a594532018-12-03 14:30:05 +0000428 tmp = vmaxq_u8(vconst_0, vin);
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000429 // Re-quantize to new output space
430 tmp = vmlaq_qasymm8(tmp, vs, vo);
Georgios Pinitas5a594532018-12-03 14:30:05 +0000431 }
432 else if(act == ActivationFunction::BOUNDED_RELU)
433 {
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +0000434 // Perform activation
Georgios Pinitas5a594532018-12-03 14:30:05 +0000435 tmp = vminq_u8(va, vmaxq_u8(vconst_0, vin));
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +0000436 // Re-quantize to new output space
437 tmp = vmlaq_qasymm8(tmp, vs, vo);
Georgios Pinitas5a594532018-12-03 14:30:05 +0000438 }
439 else if(act == ActivationFunction::LU_BOUNDED_RELU)
440 {
441 // Perform activation
442 tmp = vminq_u8(va, vmaxq_u8(vb, vin));
443 // Re-quantize to new output space
444 tmp = vmlaq_qasymm8(tmp, vs, vo);
445 }
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000446 else if(act == ActivationFunction::LOGISTIC)
447 {
Isabella Gottardif7a3bf22019-03-15 14:58:24 +0000448 // De-quantize
449 const auto vin_deq = vdequantize(vin, qi_in);
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000450 // Perform activation
Isabella Gottardif7a3bf22019-03-15 14:58:24 +0000451 const float32x4x4_t tmp_dep =
452 {
453 {
454 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[0])))),
455 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[1])))),
456 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[2])))),
457 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[3])))),
458 }
459 };
460 // Re-quantize to new output space
461 tmp = vquantize(tmp_dep, qi_out);
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000462 }
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100463 else if(act == ActivationFunction::TANH)
464 {
465 // De-quantize
466 const auto vin_deq = vdequantize(vin, qi_in);
467 // Perform activation
468 const float32x4x4_t tmp_dep =
469 {
470 {
471 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))),
472 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))),
473 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[2], vb_f32))),
474 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[3], vb_f32))),
475 }
476 };
477 // Re-quantize to new output space
478 tmp = vquantize(tmp_dep, qi_out);
479 }
Georgios Pinitas5a594532018-12-03 14:30:05 +0000480 else
481 {
482 ARM_COMPUTE_ERROR("Unsupported activation function");
483 }
484 wrapper::vstore(output_ptr + x, tmp);
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000485 }
486
Georgios Pinitas5a594532018-12-03 14:30:05 +0000487 // Compute left-over elements
488 for(; x < window_end_x; ++x)
489 {
490 T in = *(reinterpret_cast<const T *>(input_ptr + x));
491 T tmp;
492 if(act == ActivationFunction::RELU)
493 {
494 tmp = std::max(const_0, in);
Georgios Pinitas57016a42019-01-16 12:54:29 +0000495 tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
Georgios Pinitas5a594532018-12-03 14:30:05 +0000496 }
497 else if(act == ActivationFunction::BOUNDED_RELU)
498 {
499 tmp = std::min(a, std::max(const_0, in));
Georgios Pinitas57016a42019-01-16 12:54:29 +0000500 tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
Georgios Pinitas5a594532018-12-03 14:30:05 +0000501 }
502 else if(act == ActivationFunction::LU_BOUNDED_RELU)
503 {
504 tmp = std::min(a, std::max(b, in));
Georgios Pinitas57016a42019-01-16 12:54:29 +0000505 tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
Georgios Pinitas5a594532018-12-03 14:30:05 +0000506 }
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000507 else if(act == ActivationFunction::LOGISTIC)
508 {
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100509 float tmp_f = dequantize_qasymm8(in, qi_in);
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000510 tmp_f = 1.f / (1.f + std::exp(-tmp_f));
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100511 tmp = quantize_qasymm8(tmp_f, qi_out);
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000512 }
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100513 else if(act == ActivationFunction::TANH)
514 {
515 float tmp_f = dequantize_qasymm8(in, qi_in);
516 tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
517 tmp = quantize_qasymm8(tmp_f, qi_out);
518 }
Georgios Pinitas5a594532018-12-03 14:30:05 +0000519 else
520 {
521 ARM_COMPUTE_ERROR("Unsupported activation function");
522 }
523 *(output_ptr + x) = tmp;
524 }
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000525 },
526 input, output);
527}
528
giuros01c9573f32019-06-20 10:30:17 +0100529template <ActivationLayerInfo::ActivationFunction F, typename T>
Michalis Spyrou8d4d1b82019-11-28 11:31:23 +0000530typename std::enable_if<std::is_same<T, qasymm8_signed_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
531{
532 const int window_step_x = 16 / sizeof(T);
533 const auto window_start_x = static_cast<int>(window.x().start());
534 const auto window_end_x = static_cast<int>(window.x().end());
535 const ActivationFunction act = F;
536
537 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
538 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
539
540 Iterator input(_input, win_collapsed);
541 Iterator output(_output, win_collapsed);
542
543 const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
544 const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
545 const qasymm8x16_signed_t va = vdupq_n_s8(quantize_qasymm8_signed(_act_info.a(), qi_in));
546 const qasymm8x16_signed_t vb = vdupq_n_s8(quantize_qasymm8_signed(_act_info.b(), qi_in));
547 const qasymm8_signed_t a = quantize_qasymm8_signed(_act_info.a(), qi_in);
548 const qasymm8_signed_t b = quantize_qasymm8_signed(_act_info.b(), qi_in);
549 const qasymm8_signed_t const_0 = quantize_qasymm8_signed(0.f, qi_in);
550 const qasymm8x16_signed_t vconst_0 = vdupq_n_s8(const_0);
551 const auto vconst_1 = vdupq_n_f32(1.f);
552 const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
553 const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
554 const float a_f32 = _act_info.a();
555 const float b_f32 = _act_info.b();
556
557 // Initialise scale/offset for re-quantization
558 float s = qi_in.scale / qi_out.scale;
559 float o = -qi_in.offset * s + qi_out.offset;
560 float32x4_t vs = vdupq_n_f32(s);
561 float32x4_t vo = vdupq_n_f32(o);
562
563 execute_window_loop(win_collapsed, [&](const Coordinates &)
564 {
565 const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
566 const auto output_ptr = reinterpret_cast<T *>(output.ptr());
567
568 wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
569
570 // Compute S elements per iteration
571 int x = window_start_x;
572 for(; x <= (window_end_x - window_step_x); x += window_step_x)
573 {
574 const auto vin = wrapper::vloadq(input_ptr + x);
575 if(act == ActivationFunction::RELU)
576 {
577 // Perform activation
578 tmp = vmaxq_s8(vconst_0, vin);
579 // Re-quantize to new output space
580 tmp = vmlaq_qasymm8_signed(tmp, vs, vo);
581 }
582 else if(act == ActivationFunction::BOUNDED_RELU)
583 {
584 // Perform activation
585 tmp = vminq_s8(va, vmaxq_s8(vconst_0, vin));
586 // Re-quantize to new output space
587 tmp = vmlaq_qasymm8_signed(tmp, vs, vo);
588 }
589 else if(act == ActivationFunction::LU_BOUNDED_RELU)
590 {
591 // Perform activation
592 tmp = vminq_s8(va, vmaxq_s8(vb, vin));
593 // Re-quantize to new output space
594 tmp = vmlaq_qasymm8_signed(tmp, vs, vo);
595 }
596 else if(act == ActivationFunction::LOGISTIC)
597 {
598 // De-quantize
599 const auto vin_deq = vdequantize(vin, qi_in);
600 // Perform activation
601 const float32x4x4_t tmp_dep =
602 {
603 {
604 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[0])))),
605 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[1])))),
606 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[2])))),
607 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[3])))),
608 }
609 };
610 // Re-quantize to new output space
611 tmp = vquantize_signed(tmp_dep, qi_out);
612 }
613 else if(act == ActivationFunction::TANH)
614 {
615 // De-quantize
616 const auto vin_deq = vdequantize(vin, qi_in);
617 // Perform activation
618 const float32x4x4_t tmp_dep =
619 {
620 {
621 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))),
622 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))),
623 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[2], vb_f32))),
624 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[3], vb_f32))),
625 }
626 };
627 // Re-quantize to new output space
628 tmp = vquantize_signed(tmp_dep, qi_out);
629 }
630 else
631 {
632 ARM_COMPUTE_ERROR("Unsupported activation function");
633 }
634 wrapper::vstore(output_ptr + x, tmp);
635 }
636
637 // Compute left-over elements
638 for(; x < window_end_x; ++x)
639 {
640 T in = *(reinterpret_cast<const T *>(input_ptr + x));
641 T tmp;
642 if(act == ActivationFunction::RELU)
643 {
644 tmp = std::max(const_0, in);
645 tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
646 }
647 else if(act == ActivationFunction::BOUNDED_RELU)
648 {
649 tmp = std::min(a, std::max(const_0, in));
650 tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
651 }
652 else if(act == ActivationFunction::LU_BOUNDED_RELU)
653 {
654 tmp = std::min(a, std::max(b, in));
655 tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
656 }
657 else if(act == ActivationFunction::LOGISTIC)
658 {
659 float tmp_f = dequantize_qasymm8_signed(in, qi_in);
660 tmp_f = 1.f / (1.f + std::exp(-tmp_f));
661 tmp = quantize_qasymm8_signed(tmp_f, qi_out);
662 }
663 else if(act == ActivationFunction::TANH)
664 {
665 float tmp_f = dequantize_qasymm8_signed(in, qi_in);
666 tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
667 tmp = quantize_qasymm8_signed(tmp_f, qi_out);
668 }
669 else
670 {
671 ARM_COMPUTE_ERROR("Unsupported activation function");
672 }
673 *(output_ptr + x) = tmp;
674 }
675 },
676 input, output);
677}
678
679template <ActivationLayerInfo::ActivationFunction F, typename T>
giuros01c9573f32019-06-20 10:30:17 +0100680typename std::enable_if<std::is_same<T, qsymm16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
681{
682 const int window_step_x = 16 / sizeof(T);
683 const auto window_start_x = static_cast<int>(window.x().start());
684 const auto window_end_x = static_cast<int>(window.x().end());
685 const ActivationFunction act = F;
686
687 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
688 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
689
690 Iterator input(_input, win_collapsed);
691 Iterator output(_output, win_collapsed);
692
693 const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
694 const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
695 const auto vconst_1 = vdupq_n_f32(1.f);
696 const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
697 const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
698 const float a_f32 = _act_info.a();
699 const float b_f32 = _act_info.b();
700
701 execute_window_loop(win_collapsed, [&](const Coordinates &)
702 {
703 const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
704 const auto output_ptr = reinterpret_cast<T *>(output.ptr());
705
706 wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
707 ARM_COMPUTE_UNUSED(tmp);
708
709 // Compute S elements per iteration
710 int x = window_start_x;
711 for(; x <= (window_end_x - window_step_x); x += window_step_x)
712 {
713 const auto vin = wrapper::vloadq(input_ptr + x);
714 if(act == ActivationFunction::LOGISTIC)
715 {
716 // De-quantize
717 const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
718 // Perform activation
719 const float32x4x2_t tmp_dep =
720 {
721 {
722 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[0])))),
723 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[1])))),
724 }
725 };
726 // Re-quantize to new output space
727 tmp = vquantize_int16(tmp_dep, qi_out.scale);
728 }
729 else if(act == ActivationFunction::TANH)
730 {
731 // De-quantize
732 const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
733 // Perform activation
734 const float32x4x2_t tmp_dep =
735 {
736 {
737 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))),
738 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))),
739 }
740 };
741 // Re-quantize to new output space
742 tmp = vquantize_int16(tmp_dep, qi_out.scale);
743 }
744 else
745 {
746 ARM_COMPUTE_ERROR("Unsupported activation function");
747 }
748 wrapper::vstore(output_ptr + x, tmp);
749 }
750
751 // Compute left-over elements
752 for(; x < window_end_x; ++x)
753 {
754 T in = *(reinterpret_cast<const T *>(input_ptr + x));
755 T tmp;
756 if(act == ActivationFunction::LOGISTIC)
757 {
758 float tmp_f = dequantize_qsymm16(in, qi_in.scale);
759 tmp_f = 1.f / (1.f + std::exp(-tmp_f));
760 tmp = quantize_qsymm16(tmp_f, qi_out);
761 }
762 else if(act == ActivationFunction::TANH)
763 {
764 float tmp_f = dequantize_qsymm16(in, qi_in.scale);
765 tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
766 tmp = quantize_qsymm16(tmp_f, qi_out);
767 }
768 else
769 {
770 ARM_COMPUTE_ERROR("Unsupported activation function");
771 }
772 *(output_ptr + x) = tmp;
773 }
774 },
775 input, output);
776}
777
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000778Status NEActivationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
779{
780 ARM_COMPUTE_UNUSED(act_info);
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100781 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info));
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000782 ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (output != nullptr) ? output->clone().get() : nullptr).first);
783
784 return Status{};
785}
786
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100787void NEActivationLayerKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100788{
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100789 // Early exit on disabled activation
790 if(!_act_info.enabled())
791 {
792 return;
793 }
794
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100795 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100796 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100797 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100798 ARM_COMPUTE_ERROR_ON(_func == nullptr);
799
800 (this->*_func)(window);
801}