blob: 3953305996ef4f4f791e4276b26a1f2fde271320 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Georgios Pinitas5a594532018-12-03 14:30:05 +00002 * Copyright (c) 2017-2019 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
25
Anthony Barbiereaefd002018-07-20 17:49:35 +010026#include "arm_compute/core/CPP/Validate.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010027#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
Michel Iwaniec5dfeae62017-11-29 10:48:23 +000029#include "arm_compute/core/NEON/NEAsymm.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010030#include "arm_compute/core/NEON/NEFixedPoint.h"
31#include "arm_compute/core/NEON/NEMath.h"
giuros01c9573f32019-06-20 10:30:17 +010032#include "arm_compute/core/NEON/NESymm.h"
Georgios Pinitas5a594532018-12-03 14:30:05 +000033#include "arm_compute/core/NEON/wrapper/wrapper.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010034#include "arm_compute/core/TensorInfo.h"
35#include "arm_compute/core/Utils.h"
36#include "arm_compute/core/Validate.h"
37#include "arm_compute/core/Window.h"
38
39#include <arm_neon.h>
40#include <array>
41#include <cmath>
42#include <map>
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010043#include <set>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010044
45using namespace arm_compute;
Michalis Spyrouafa5d812017-11-30 14:25:57 +000046namespace
47{
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010048Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &activation_info)
Michalis Spyrouafa5d812017-11-30 14:25:57 +000049{
Anthony Barbiereaefd002018-07-20 17:49:35 +010050 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
giuros01c9573f32019-06-20 10:30:17 +010051 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32);
Michalis Spyrouafa5d812017-11-30 14:25:57 +000052
giuros01c9573f32019-06-20 10:30:17 +010053 static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations =
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010054 {
55 ActivationLayerInfo::ActivationFunction::RELU,
56 ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
57 ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
58 ActivationLayerInfo::ActivationFunction::LOGISTIC,
59 ActivationLayerInfo::ActivationFunction::TANH
60 };
giuros01c9573f32019-06-20 10:30:17 +010061 static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations =
62 {
63 ActivationLayerInfo::ActivationFunction::LOGISTIC,
64 ActivationLayerInfo::ActivationFunction::TANH
65 };
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010066 const DataType data_type = input->data_type();
67 const QuantizationInfo &oq_info = (output != nullptr) ? output->quantization_info() : input->quantization_info();
68 const ActivationLayerInfo::ActivationFunction f_act = activation_info.activation();
69
giuros01c9573f32019-06-20 10:30:17 +010070 ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_asymmetric(data_type) && (qasymm8_supported_activations.count(f_act) == 0),
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010071 "For QASYMM8 only tanh, logistic, relu and lower/upper bounded relu are supported");
giuros01c9573f32019-06-20 10:30:17 +010072
73 ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_symmetric(data_type) && (qsymm16_supported_activations.count(f_act) == 0),
74 "For QSYMM16 only tanh and logistic are supported");
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010075 ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 128)));
76 ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 256.f, 0)));
77
giuros01c9573f32019-06-20 10:30:17 +010078 ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
79 ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
80
Michalis Spyrouafa5d812017-11-30 14:25:57 +000081 // Checks performed when output is configured
82 if((output != nullptr) && (output->total_size() != 0))
83 {
84 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
85 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
Michalis Spyrouafa5d812017-11-30 14:25:57 +000086 }
87
88 return Status{};
89}
90
91std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
92{
Georgios Pinitas5a594532018-12-03 14:30:05 +000093 // Configure kernel window
94 Window win = calculate_max_window(*input, Steps());
Michalis Spyrouafa5d812017-11-30 14:25:57 +000095
Georgios Pinitas5a594532018-12-03 14:30:05 +000096 if(output != nullptr)
Michalis Spyrouafa5d812017-11-30 14:25:57 +000097 {
Georgios Pinitas5a594532018-12-03 14:30:05 +000098 // Output auto inizialitation if not yet initialized
99 auto_init_if_empty(*output, *input->clone());
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000100
Georgios Pinitas5a594532018-12-03 14:30:05 +0000101 // NEActivationLayerKernel doesn't need padding so update_window_and_padding() can be skipped
102 Coordinates coord;
103 coord.set_num_dimensions(output->num_dimensions());
104 output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000105 }
106
Georgios Pinitas5a594532018-12-03 14:30:05 +0000107 return std::make_pair(Status{}, win);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000108}
109} // namespace
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100110
111NEActivationLayerKernel::NEActivationLayerKernel()
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100112 : _input(nullptr), _output(nullptr), _func(nullptr), _act_info(ActivationFunction::LOGISTIC)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100113{
114}
115
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100116void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100117{
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000118 ARM_COMPUTE_ERROR_ON_NULLPTR(input);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100119
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100120 _input = input;
121 _act_info = activation_info;
122 _output = input;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100123
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100124 if(output != nullptr)
125 {
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100126 _output = output;
127 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100128
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100129 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr, activation_info));
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000130
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100131 // Activation functions : FP32
132 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f32 =
133 {
134 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, float> },
135 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, float> },
136 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, float> },
137 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float> },
138 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float> },
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100139 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, float> },
Georgios Pinitas579c0492017-07-12 16:12:12 +0100140 { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, float> },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100141 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float> },
142 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float> },
143 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float> },
144 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float> },
Usama Arif80e55db2019-05-14 17:48:47 +0100145 { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, float> },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100146 };
Pablo Tello91654c42017-07-05 11:32:17 +0100147
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000148#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello91654c42017-07-05 11:32:17 +0100149 // Activation functions : FP16
150 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f16 =
151 {
152 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, float16_t> },
153 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, float16_t> },
154 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, float16_t> },
155 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float16_t> },
156 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float16_t> },
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100157 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, float16_t> },
Georgios Pinitas3463a8b2018-08-23 13:11:53 +0100158 { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, float16_t> },
Pablo Tello91654c42017-07-05 11:32:17 +0100159 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float16_t> },
160 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float16_t> },
161 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float16_t> },
162 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float16_t> },
Usama Arif80e55db2019-05-14 17:48:47 +0100163 { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, float16_t> },
Pablo Tello91654c42017-07-05 11:32:17 +0100164 };
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000165#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
Pablo Tello91654c42017-07-05 11:32:17 +0100166
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000167 // Activation functions : QASYMM8
168 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qasymm8 =
169 {
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000170 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qasymm8_t> },
Isabella Gottardi5d62c012019-01-29 15:05:41 +0000171 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qasymm8_t> },
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000172 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, qasymm8_t> },
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +0000173 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qasymm8_t> },
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100174 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qasymm8_t> },
Usama Arif80e55db2019-05-14 17:48:47 +0100175 { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, qasymm8_t> },
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000176 };
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100177
giuros01c9573f32019-06-20 10:30:17 +0100178 // Activation functions : QSYMM16
179 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qsymm16 =
180 {
181 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qsymm16_t> },
182 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qsymm16_t> },
183 };
184
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100185 switch(input->info()->data_type())
186 {
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000187 case DataType::QASYMM8:
188 _func = act_map_qasymm8[activation_info.activation()];
189 break;
giuros01c9573f32019-06-20 10:30:17 +0100190 case DataType::QSYMM16:
191 _func = act_map_qsymm16[activation_info.activation()];
192 break;
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100193 case DataType::F32:
194 _func = act_map_f32[activation_info.activation()];
195 break;
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000196#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello91654c42017-07-05 11:32:17 +0100197 case DataType::F16:
198 _func = act_map_f16[activation_info.activation()];
199 break;
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000200#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100201 default:
202 ARM_COMPUTE_ERROR("Unsupported data type.");
203 }
204
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100205 // Configure kernel window
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000206 auto win_config = validate_and_configure_window(input->info(), (output != nullptr) ? output->info() : nullptr);
207 ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
208 ICPPKernel::configure(win_config.second);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100209}
210
Pablo Tello91654c42017-07-05 11:32:17 +0100211template <ActivationLayerInfo::ActivationFunction F, typename T>
Georgios Pinitas5a594532018-12-03 14:30:05 +0000212typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type
213NEActivationLayerKernel::activation(const Window &window)
Pablo Tello91654c42017-07-05 11:32:17 +0100214{
Georgios Pinitas5a594532018-12-03 14:30:05 +0000215 /** NEON vector tag type. */
216 using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
Pablo Tello91654c42017-07-05 11:32:17 +0100217
Georgios Pinitas5a594532018-12-03 14:30:05 +0000218 const int window_step_x = 16 / sizeof(T);
219 const auto window_start_x = static_cast<int>(window.x().start());
220 const auto window_end_x = static_cast<int>(window.x().end());
221 const ActivationFunction act = F;
Pablo Tello91654c42017-07-05 11:32:17 +0100222
Georgios Pinitas5a594532018-12-03 14:30:05 +0000223 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
224 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
Georgios Pinitas3463a8b2018-08-23 13:11:53 +0100225
Georgios Pinitas5a594532018-12-03 14:30:05 +0000226 Iterator input(_input, win_collapsed);
227 Iterator output(_output, win_collapsed);
Pablo Tello91654c42017-07-05 11:32:17 +0100228
Georgios Pinitas5a594532018-12-03 14:30:05 +0000229 const auto const_1 = wrapper::vdup_n(static_cast<T>(1.f), ExactTagType{});
230 const auto const_0 = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
231 const auto va = wrapper::vdup_n(static_cast<T>(_act_info.a()), ExactTagType{});
232 const auto vb = wrapper::vdup_n(static_cast<T>(_act_info.b()), ExactTagType{});
233 const auto a = static_cast<T>(_act_info.a());
234 const auto b = static_cast<T>(_act_info.b());
235
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100236 execute_window_loop(win_collapsed, [&](const Coordinates &)
Pablo Tello91654c42017-07-05 11:32:17 +0100237 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000238 const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
239 const auto output_ptr = reinterpret_cast<T *>(output.ptr());
Pablo Tello91654c42017-07-05 11:32:17 +0100240
Georgios Pinitas5a594532018-12-03 14:30:05 +0000241 wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
Pablo Tello91654c42017-07-05 11:32:17 +0100242
Georgios Pinitas5a594532018-12-03 14:30:05 +0000243 // Compute S elements per iteration
244 int x = window_start_x;
245 for(; x <= (window_end_x - window_step_x); x += window_step_x)
Pablo Tello91654c42017-07-05 11:32:17 +0100246 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000247 const auto vin = wrapper::vloadq(input_ptr + x);
248 switch(act)
Georgios Pinitas3463a8b2018-08-23 13:11:53 +0100249 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000250 case ActivationFunction::ABS:
251 tmp = wrapper::vabs(vin);
252 break;
253 case ActivationFunction::LINEAR:
254 tmp = wrapper::vmla(vb, va, vin);
255 break;
256 case ActivationFunction::LOGISTIC:
257 tmp = wrapper::vinv(wrapper::vadd(const_1, wrapper::vexpq(wrapper::vneg(vin))));
258 break;
259 case ActivationFunction::RELU:
260 tmp = wrapper::vmax(const_0, vin);
261 break;
262 case ActivationFunction::BOUNDED_RELU:
263 tmp = wrapper::vmin(va, wrapper::vmax(const_0, vin));
264 break;
265 case ActivationFunction::LU_BOUNDED_RELU:
266 tmp = wrapper::vmin(va, wrapper::vmax(vb, vin));
267 break;
268 case ActivationFunction::LEAKY_RELU:
269 tmp = wrapper::vbsl(wrapper::vcgt(vin, const_0), vin, wrapper::vmul(va, vin));
270 break;
271 case ActivationFunction::SOFT_RELU:
272 tmp = wrapper::vlog(wrapper::vadd(const_1, wrapper::vexpq(vin)));
273 break;
274 case ActivationFunction::SQRT:
275 tmp = wrapper::vinv(wrapper::vinvsqrt(vin));
276 break;
277 case ActivationFunction::SQUARE:
278 tmp = wrapper::vmul(vin, vin);
279 break;
280 case ActivationFunction::TANH:
281 tmp = wrapper::vmul(va, wrapper::vtanh(wrapper::vmul(vb, vin)));
282 break;
Usama Arif80e55db2019-05-14 17:48:47 +0100283 case ActivationFunction::IDENTITY:
284 tmp = vin;
285 break;
Georgios Pinitas5a594532018-12-03 14:30:05 +0000286 default:
287 ARM_COMPUTE_ERROR("Unsupported activation function");
Georgios Pinitas3463a8b2018-08-23 13:11:53 +0100288 }
Georgios Pinitas5a594532018-12-03 14:30:05 +0000289 wrapper::vstore(output_ptr + x, tmp);
Pablo Tello91654c42017-07-05 11:32:17 +0100290 }
291
Georgios Pinitas5a594532018-12-03 14:30:05 +0000292 // Compute left-over elements
293 for(; x < window_end_x; ++x)
Georgios Pinitasf525eab2018-01-30 14:47:39 +0000294 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000295 const T in = *(reinterpret_cast<const T *>(input_ptr + x));
296 T tmp;
297 switch(act)
Georgios Pinitasf525eab2018-01-30 14:47:39 +0000298 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000299 case ActivationFunction::ABS:
300 tmp = std::abs(in);
301 break;
302 case ActivationFunction::LINEAR:
303 tmp = a * in + b;
304 break;
305 case ActivationFunction::LOGISTIC:
306 tmp = static_cast<T>(1) / (static_cast<T>(1) + std::exp(-in));
307 break;
308 case ActivationFunction::RELU:
309 tmp = std::max<T>(static_cast<T>(0), in);
310 break;
311 case ActivationFunction::BOUNDED_RELU:
312 tmp = std::min<T>(a, std::max(static_cast<T>(0), in));
313 break;
314 case ActivationFunction::LU_BOUNDED_RELU:
315 tmp = std::min<T>(a, std::max<T>(b, in));
316 break;
317 case ActivationFunction::LEAKY_RELU:
318 tmp = (in > 0) ? in : a * in;
319 break;
320 case ActivationFunction::SOFT_RELU:
321 tmp = std::log(static_cast<T>(1) + std::exp(in));
322 break;
323 case ActivationFunction::SQRT:
324 tmp = std::sqrt(in);
325 break;
326 case ActivationFunction::SQUARE:
327 tmp = in * in;
328 break;
329 case ActivationFunction::TANH:
330 tmp = a * std::tanh(b * in);
331 break;
Usama Arif80e55db2019-05-14 17:48:47 +0100332 case ActivationFunction::IDENTITY:
333 tmp = in;
334 break;
Georgios Pinitas5a594532018-12-03 14:30:05 +0000335 default:
336 ARM_COMPUTE_ERROR("Unsupported activation function");
Georgios Pinitasf525eab2018-01-30 14:47:39 +0000337 }
Georgios Pinitas5a594532018-12-03 14:30:05 +0000338 *(output_ptr + x) = tmp;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100339 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100340 },
341 input, output);
342}
343
344template <ActivationLayerInfo::ActivationFunction F, typename T>
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000345typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
346{
Georgios Pinitas5a594532018-12-03 14:30:05 +0000347 const int window_step_x = 16 / sizeof(T);
348 const auto window_start_x = static_cast<int>(window.x().start());
349 const auto window_end_x = static_cast<int>(window.x().end());
350 const ActivationFunction act = F;
351
352 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
353 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
354
355 Iterator input(_input, win_collapsed);
356 Iterator output(_output, win_collapsed);
357
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100358 const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
359 const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
360 const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in));
361 const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in));
362 const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in);
363 const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in);
364 const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in);
365 const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0);
366 const auto vconst_1 = vdupq_n_f32(1.f);
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100367 const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
368 const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
369 const float a_f32 = _act_info.a();
370 const float b_f32 = _act_info.b();
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +0000371
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000372 // Initialise scale/offset for re-quantization
373 float s = qi_in.scale / qi_out.scale;
374 float o = -qi_in.offset * s + qi_out.offset;
375 float32x4_t vs = vdupq_n_f32(s);
376 float32x4_t vo = vdupq_n_f32(o);
377
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100378 execute_window_loop(win_collapsed, [&](const Coordinates &)
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000379 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000380 const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
381 const auto output_ptr = reinterpret_cast<T *>(output.ptr());
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000382
Georgios Pinitas5a594532018-12-03 14:30:05 +0000383 wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000384
Georgios Pinitas5a594532018-12-03 14:30:05 +0000385 // Compute S elements per iteration
386 int x = window_start_x;
387 for(; x <= (window_end_x - window_step_x); x += window_step_x)
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000388 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000389 const auto vin = wrapper::vloadq(input_ptr + x);
390 if(act == ActivationFunction::RELU)
391 {
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000392 // Perform activation
Georgios Pinitas5a594532018-12-03 14:30:05 +0000393 tmp = vmaxq_u8(vconst_0, vin);
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000394 // Re-quantize to new output space
395 tmp = vmlaq_qasymm8(tmp, vs, vo);
Georgios Pinitas5a594532018-12-03 14:30:05 +0000396 }
397 else if(act == ActivationFunction::BOUNDED_RELU)
398 {
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +0000399 // Perform activation
Georgios Pinitas5a594532018-12-03 14:30:05 +0000400 tmp = vminq_u8(va, vmaxq_u8(vconst_0, vin));
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +0000401 // Re-quantize to new output space
402 tmp = vmlaq_qasymm8(tmp, vs, vo);
Georgios Pinitas5a594532018-12-03 14:30:05 +0000403 }
404 else if(act == ActivationFunction::LU_BOUNDED_RELU)
405 {
406 // Perform activation
407 tmp = vminq_u8(va, vmaxq_u8(vb, vin));
408 // Re-quantize to new output space
409 tmp = vmlaq_qasymm8(tmp, vs, vo);
410 }
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000411 else if(act == ActivationFunction::LOGISTIC)
412 {
Isabella Gottardif7a3bf22019-03-15 14:58:24 +0000413 // De-quantize
414 const auto vin_deq = vdequantize(vin, qi_in);
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000415 // Perform activation
Isabella Gottardif7a3bf22019-03-15 14:58:24 +0000416 const float32x4x4_t tmp_dep =
417 {
418 {
419 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[0])))),
420 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[1])))),
421 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[2])))),
422 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[3])))),
423 }
424 };
425 // Re-quantize to new output space
426 tmp = vquantize(tmp_dep, qi_out);
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000427 }
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100428 else if(act == ActivationFunction::TANH)
429 {
430 // De-quantize
431 const auto vin_deq = vdequantize(vin, qi_in);
432 // Perform activation
433 const float32x4x4_t tmp_dep =
434 {
435 {
436 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))),
437 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))),
438 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[2], vb_f32))),
439 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[3], vb_f32))),
440 }
441 };
442 // Re-quantize to new output space
443 tmp = vquantize(tmp_dep, qi_out);
444 }
Georgios Pinitas5a594532018-12-03 14:30:05 +0000445 else
446 {
447 ARM_COMPUTE_ERROR("Unsupported activation function");
448 }
449 wrapper::vstore(output_ptr + x, tmp);
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000450 }
451
Georgios Pinitas5a594532018-12-03 14:30:05 +0000452 // Compute left-over elements
453 for(; x < window_end_x; ++x)
454 {
455 T in = *(reinterpret_cast<const T *>(input_ptr + x));
456 T tmp;
457 if(act == ActivationFunction::RELU)
458 {
459 tmp = std::max(const_0, in);
Georgios Pinitas57016a42019-01-16 12:54:29 +0000460 tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
Georgios Pinitas5a594532018-12-03 14:30:05 +0000461 }
462 else if(act == ActivationFunction::BOUNDED_RELU)
463 {
464 tmp = std::min(a, std::max(const_0, in));
Georgios Pinitas57016a42019-01-16 12:54:29 +0000465 tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
Georgios Pinitas5a594532018-12-03 14:30:05 +0000466 }
467 else if(act == ActivationFunction::LU_BOUNDED_RELU)
468 {
469 tmp = std::min(a, std::max(b, in));
Georgios Pinitas57016a42019-01-16 12:54:29 +0000470 tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
Georgios Pinitas5a594532018-12-03 14:30:05 +0000471 }
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000472 else if(act == ActivationFunction::LOGISTIC)
473 {
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100474 float tmp_f = dequantize_qasymm8(in, qi_in);
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000475 tmp_f = 1.f / (1.f + std::exp(-tmp_f));
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100476 tmp = quantize_qasymm8(tmp_f, qi_out);
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000477 }
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100478 else if(act == ActivationFunction::TANH)
479 {
480 float tmp_f = dequantize_qasymm8(in, qi_in);
481 tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
482 tmp = quantize_qasymm8(tmp_f, qi_out);
483 }
Georgios Pinitas5a594532018-12-03 14:30:05 +0000484 else
485 {
486 ARM_COMPUTE_ERROR("Unsupported activation function");
487 }
488 *(output_ptr + x) = tmp;
489 }
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000490 },
491 input, output);
492}
493
giuros01c9573f32019-06-20 10:30:17 +0100494template <ActivationLayerInfo::ActivationFunction F, typename T>
495typename std::enable_if<std::is_same<T, qsymm16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
496{
497 const int window_step_x = 16 / sizeof(T);
498 const auto window_start_x = static_cast<int>(window.x().start());
499 const auto window_end_x = static_cast<int>(window.x().end());
500 const ActivationFunction act = F;
501
502 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
503 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
504
505 Iterator input(_input, win_collapsed);
506 Iterator output(_output, win_collapsed);
507
508 const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
509 const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
510 const auto vconst_1 = vdupq_n_f32(1.f);
511 const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
512 const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
513 const float a_f32 = _act_info.a();
514 const float b_f32 = _act_info.b();
515
516 execute_window_loop(win_collapsed, [&](const Coordinates &)
517 {
518 const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
519 const auto output_ptr = reinterpret_cast<T *>(output.ptr());
520
521 wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
522 ARM_COMPUTE_UNUSED(tmp);
523
524 // Compute S elements per iteration
525 int x = window_start_x;
526 for(; x <= (window_end_x - window_step_x); x += window_step_x)
527 {
528 const auto vin = wrapper::vloadq(input_ptr + x);
529 if(act == ActivationFunction::LOGISTIC)
530 {
531 // De-quantize
532 const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
533 // Perform activation
534 const float32x4x2_t tmp_dep =
535 {
536 {
537 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[0])))),
538 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[1])))),
539 }
540 };
541 // Re-quantize to new output space
542 tmp = vquantize_int16(tmp_dep, qi_out.scale);
543 }
544 else if(act == ActivationFunction::TANH)
545 {
546 // De-quantize
547 const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
548 // Perform activation
549 const float32x4x2_t tmp_dep =
550 {
551 {
552 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))),
553 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))),
554 }
555 };
556 // Re-quantize to new output space
557 tmp = vquantize_int16(tmp_dep, qi_out.scale);
558 }
559 else
560 {
561 ARM_COMPUTE_ERROR("Unsupported activation function");
562 }
563 wrapper::vstore(output_ptr + x, tmp);
564 }
565
566 // Compute left-over elements
567 for(; x < window_end_x; ++x)
568 {
569 T in = *(reinterpret_cast<const T *>(input_ptr + x));
570 T tmp;
571 if(act == ActivationFunction::LOGISTIC)
572 {
573 float tmp_f = dequantize_qsymm16(in, qi_in.scale);
574 tmp_f = 1.f / (1.f + std::exp(-tmp_f));
575 tmp = quantize_qsymm16(tmp_f, qi_out);
576 }
577 else if(act == ActivationFunction::TANH)
578 {
579 float tmp_f = dequantize_qsymm16(in, qi_in.scale);
580 tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
581 tmp = quantize_qsymm16(tmp_f, qi_out);
582 }
583 else
584 {
585 ARM_COMPUTE_ERROR("Unsupported activation function");
586 }
587 *(output_ptr + x) = tmp;
588 }
589 },
590 input, output);
591}
592
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000593Status NEActivationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
594{
595 ARM_COMPUTE_UNUSED(act_info);
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100596 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info));
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000597 ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (output != nullptr) ? output->clone().get() : nullptr).first);
598
599 return Status{};
600}
601
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100602void NEActivationLayerKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100603{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100604 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100605 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100606 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100607 ARM_COMPUTE_ERROR_ON(_func == nullptr);
608
609 (this->*_func)(window);
610}