blob: 7023d5976391debbbc66b2fc9d4936a197b48cd3 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2017-2020 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
25
Anthony Barbiereaefd002018-07-20 17:49:35 +010026#include "arm_compute/core/CPP/Validate.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010027#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
Michel Iwaniec5dfeae62017-11-29 10:48:23 +000029#include "arm_compute/core/NEON/NEAsymm.h"
giuros01c9573f32019-06-20 10:30:17 +010030#include "arm_compute/core/NEON/NESymm.h"
Georgios Pinitas5a594532018-12-03 14:30:05 +000031#include "arm_compute/core/NEON/wrapper/wrapper.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010032#include "arm_compute/core/TensorInfo.h"
33#include "arm_compute/core/Utils.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010034#include "arm_compute/core/Window.h"
35
36#include <arm_neon.h>
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010037#include <set>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010038
Michele Di Giorgiof9b595a2020-07-03 13:34:52 +010039namespace arm_compute
40{
Michalis Spyrouafa5d812017-11-30 14:25:57 +000041namespace
42{
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010043Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &activation_info)
Michalis Spyrouafa5d812017-11-30 14:25:57 +000044{
Anthony Barbiereaefd002018-07-20 17:49:35 +010045 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
Michele Di Giorgiof9b595a2020-07-03 13:34:52 +010046 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32);
Michalis Spyrouafa5d812017-11-30 14:25:57 +000047
morgolockaa85cdf2020-02-28 15:38:28 +000048 const static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations =
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010049 {
50 ActivationLayerInfo::ActivationFunction::RELU,
51 ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
52 ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
53 ActivationLayerInfo::ActivationFunction::LOGISTIC,
morgolockaa85cdf2020-02-28 15:38:28 +000054 ActivationLayerInfo::ActivationFunction::TANH,
55 ActivationLayerInfo::ActivationFunction::HARD_SWISH
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010056 };
morgolockaa85cdf2020-02-28 15:38:28 +000057 const static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations =
giuros01c9573f32019-06-20 10:30:17 +010058 {
59 ActivationLayerInfo::ActivationFunction::LOGISTIC,
morgolockaa85cdf2020-02-28 15:38:28 +000060 ActivationLayerInfo::ActivationFunction::TANH,
61 ActivationLayerInfo::ActivationFunction::HARD_SWISH
giuros01c9573f32019-06-20 10:30:17 +010062 };
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010063 const DataType data_type = input->data_type();
64 const QuantizationInfo &oq_info = (output != nullptr) ? output->quantization_info() : input->quantization_info();
65 const ActivationLayerInfo::ActivationFunction f_act = activation_info.activation();
66
giuros01c9573f32019-06-20 10:30:17 +010067 ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_asymmetric(data_type) && (qasymm8_supported_activations.count(f_act) == 0),
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010068 "For QASYMM8 only tanh, logistic, relu and lower/upper bounded relu are supported");
giuros01c9573f32019-06-20 10:30:17 +010069
70 ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_symmetric(data_type) && (qsymm16_supported_activations.count(f_act) == 0),
71 "For QSYMM16 only tanh and logistic are supported");
Michalis Spyrou8d4d1b82019-11-28 11:31:23 +000072 ARM_COMPUTE_RETURN_ERROR_ON((data_type == DataType::QASYMM8 || data_type == DataType::QASYMM16) && (f_act == ActivationLayerInfo::ActivationFunction::TANH)
73 && (oq_info != QuantizationInfo(1.f / 128.f, 128)));
74 ARM_COMPUTE_RETURN_ERROR_ON((data_type == DataType::QASYMM8 || data_type == DataType::QASYMM16) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC)
75 && (oq_info != QuantizationInfo(1.f / 256.f, 0)));
76
77 ARM_COMPUTE_RETURN_ERROR_ON(data_type == DataType::QASYMM8_SIGNED && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 128.f, 0)));
78 ARM_COMPUTE_RETURN_ERROR_ON(data_type == DataType::QASYMM8_SIGNED && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 256.f, -128)));
Georgios Pinitas4b3fba12019-06-04 17:31:46 +010079
giuros01c9573f32019-06-20 10:30:17 +010080 ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
81 ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
82
Michalis Spyrouafa5d812017-11-30 14:25:57 +000083 // Checks performed when output is configured
84 if((output != nullptr) && (output->total_size() != 0))
85 {
86 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
87 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
Michalis Spyrouafa5d812017-11-30 14:25:57 +000088 }
89
90 return Status{};
91}
92
Georgios Pinitas1fd2c802020-06-16 17:44:46 +010093std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *input, ITensorInfo *output)
Michalis Spyrouafa5d812017-11-30 14:25:57 +000094{
Georgios Pinitas5a594532018-12-03 14:30:05 +000095 // Configure kernel window
96 Window win = calculate_max_window(*input, Steps());
Michalis Spyrouafa5d812017-11-30 14:25:57 +000097
Georgios Pinitas5a594532018-12-03 14:30:05 +000098 if(output != nullptr)
Michalis Spyrouafa5d812017-11-30 14:25:57 +000099 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000100 // Output auto inizialitation if not yet initialized
101 auto_init_if_empty(*output, *input->clone());
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000102
Georgios Pinitas5a594532018-12-03 14:30:05 +0000103 // NEActivationLayerKernel doesn't need padding so update_window_and_padding() can be skipped
104 Coordinates coord;
105 coord.set_num_dimensions(output->num_dimensions());
106 output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000107 }
108
Georgios Pinitas5a594532018-12-03 14:30:05 +0000109 return std::make_pair(Status{}, win);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000110}
111} // namespace
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100112
113NEActivationLayerKernel::NEActivationLayerKernel()
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100114 : _func(nullptr), _act_info()
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100115{
116}
117
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100118void NEActivationLayerKernel::configure(const ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo activation_info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100119{
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100120 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100121
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100122 _act_info = activation_info;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100123
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100124 // Disabled activation, thus no operation needed
125 if(!activation_info.enabled())
126 {
127 _func = nullptr;
128 }
129
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100130 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, output, activation_info));
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000131
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100132 // Activation functions : FP32
133 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f32 =
134 {
135 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, float> },
136 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, float> },
137 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, float> },
138 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float> },
139 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float> },
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100140 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, float> },
Georgios Pinitas579c0492017-07-12 16:12:12 +0100141 { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, float> },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100142 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float> },
Georgios Pinitasfb0fdcd2019-08-22 17:10:04 +0100143 { ActivationFunction::ELU, &NEActivationLayerKernel::activation<ActivationFunction::ELU, float> },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100144 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float> },
145 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float> },
146 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float> },
Usama Arif80e55db2019-05-14 17:48:47 +0100147 { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, float> },
morgolock07df3d42020-02-27 11:46:28 +0000148 { ActivationFunction::HARD_SWISH, &NEActivationLayerKernel::activation<ActivationFunction::HARD_SWISH, float> },
149
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100150 };
Pablo Tello91654c42017-07-05 11:32:17 +0100151
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000152#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello91654c42017-07-05 11:32:17 +0100153 // Activation functions : FP16
154 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f16 =
155 {
156 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, float16_t> },
157 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, float16_t> },
158 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, float16_t> },
159 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float16_t> },
160 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float16_t> },
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100161 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, float16_t> },
Georgios Pinitas3463a8b2018-08-23 13:11:53 +0100162 { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, float16_t> },
Pablo Tello91654c42017-07-05 11:32:17 +0100163 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float16_t> },
Georgios Pinitasfb0fdcd2019-08-22 17:10:04 +0100164 { ActivationFunction::ELU, &NEActivationLayerKernel::activation<ActivationFunction::ELU, float16_t> },
Pablo Tello91654c42017-07-05 11:32:17 +0100165 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float16_t> },
166 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float16_t> },
167 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float16_t> },
Usama Arif80e55db2019-05-14 17:48:47 +0100168 { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, float16_t> },
morgolock07df3d42020-02-27 11:46:28 +0000169 { ActivationFunction::HARD_SWISH, &NEActivationLayerKernel::activation<ActivationFunction::HARD_SWISH, float16_t> },
170
Pablo Tello91654c42017-07-05 11:32:17 +0100171 };
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000172#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
Pablo Tello91654c42017-07-05 11:32:17 +0100173
Michalis Spyrou8d4d1b82019-11-28 11:31:23 +0000174 // Activation functions : QASYMM8_SIGNED
175 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qasymm8_signed =
176 {
177 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qasymm8_signed_t> },
178 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qasymm8_signed_t> },
179 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, qasymm8_signed_t> },
180 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qasymm8_signed_t> },
181 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qasymm8_signed_t> },
182 { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, qasymm8_signed_t> },
morgolockaa85cdf2020-02-28 15:38:28 +0000183 { ActivationFunction::HARD_SWISH, &NEActivationLayerKernel::activation<ActivationFunction::HARD_SWISH, qasymm8_signed_t> },
184
Michalis Spyrou8d4d1b82019-11-28 11:31:23 +0000185 };
186
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000187 // Activation functions : QASYMM8
188 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qasymm8 =
189 {
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000190 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qasymm8_t> },
Isabella Gottardi5d62c012019-01-29 15:05:41 +0000191 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qasymm8_t> },
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000192 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, qasymm8_t> },
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +0000193 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qasymm8_t> },
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100194 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qasymm8_t> },
Usama Arif80e55db2019-05-14 17:48:47 +0100195 { ActivationFunction::IDENTITY, &NEActivationLayerKernel::activation<ActivationFunction::IDENTITY, qasymm8_t> },
morgolockaa85cdf2020-02-28 15:38:28 +0000196 { ActivationFunction::HARD_SWISH, &NEActivationLayerKernel::activation<ActivationFunction::HARD_SWISH, qasymm8_t> },
197
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000198 };
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100199
giuros01c9573f32019-06-20 10:30:17 +0100200 // Activation functions : QSYMM16
201 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qsymm16 =
202 {
203 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qsymm16_t> },
204 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qsymm16_t> },
morgolockaa85cdf2020-02-28 15:38:28 +0000205
giuros01c9573f32019-06-20 10:30:17 +0100206 };
207
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100208 switch(input->data_type())
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100209 {
Michalis Spyrou8d4d1b82019-11-28 11:31:23 +0000210 case DataType::QASYMM8_SIGNED:
211 _func = act_map_qasymm8_signed[activation_info.activation()];
212 break;
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000213 case DataType::QASYMM8:
214 _func = act_map_qasymm8[activation_info.activation()];
215 break;
giuros01c9573f32019-06-20 10:30:17 +0100216 case DataType::QSYMM16:
217 _func = act_map_qsymm16[activation_info.activation()];
218 break;
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100219 case DataType::F32:
220 _func = act_map_f32[activation_info.activation()];
221 break;
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000222#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello91654c42017-07-05 11:32:17 +0100223 case DataType::F16:
224 _func = act_map_f16[activation_info.activation()];
225 break;
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000226#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100227 default:
228 ARM_COMPUTE_ERROR("Unsupported data type.");
229 }
230
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100231 // Configure kernel window
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100232 auto win_config = validate_and_configure_window(input, output);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000233 ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
234 ICPPKernel::configure(win_config.second);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100235}
236
Pablo Tello91654c42017-07-05 11:32:17 +0100237template <ActivationLayerInfo::ActivationFunction F, typename T>
Georgios Pinitas5a594532018-12-03 14:30:05 +0000238typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100239NEActivationLayerKernel::activation(const ITensor *src, ITensor *dst, const Window &window)
Pablo Tello91654c42017-07-05 11:32:17 +0100240{
Georgios Pinitas5a594532018-12-03 14:30:05 +0000241 /** NEON vector tag type. */
242 using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
Pablo Tello91654c42017-07-05 11:32:17 +0100243
Georgios Pinitas5a594532018-12-03 14:30:05 +0000244 const int window_step_x = 16 / sizeof(T);
245 const auto window_start_x = static_cast<int>(window.x().start());
246 const auto window_end_x = static_cast<int>(window.x().end());
247 const ActivationFunction act = F;
Pablo Tello91654c42017-07-05 11:32:17 +0100248
Georgios Pinitas5a594532018-12-03 14:30:05 +0000249 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
250 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
Georgios Pinitas3463a8b2018-08-23 13:11:53 +0100251
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100252 Iterator input(src, win_collapsed);
253 Iterator output(dst, win_collapsed);
Pablo Tello91654c42017-07-05 11:32:17 +0100254
SiCong Lia32e2ae2020-06-08 17:30:51 +0100255 // A small delta added to the input to prevent NAN values caused by zeros in inputs to SQRT
256#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100257 const auto delta = wrapper::vdup_n(static_cast<T>(1e-7), ExactTagType {});
SiCong Lia32e2ae2020-06-08 17:30:51 +0100258#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100259 const auto delta = wrapper::vdup_n(static_cast<T>(1e-24), ExactTagType {});
SiCong Lia32e2ae2020-06-08 17:30:51 +0100260#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100261 const auto const_1 = wrapper::vdup_n(static_cast<T>(1.f), ExactTagType {});
morgolock07df3d42020-02-27 11:46:28 +0000262 const auto const_0 = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
263 const auto const_6 = wrapper::vdup_n(static_cast<T>(6.f), ExactTagType{});
264 const auto const_3 = wrapper::vdup_n(static_cast<T>(3.f), ExactTagType{});
265 const auto const_inv_6 = wrapper::vdup_n(static_cast<T>(0.166666667f), ExactTagType{});
Georgios Pinitas5a594532018-12-03 14:30:05 +0000266
morgolock07df3d42020-02-27 11:46:28 +0000267 const auto va = wrapper::vdup_n(static_cast<T>(_act_info.a()), ExactTagType{});
268 const auto vb = wrapper::vdup_n(static_cast<T>(_act_info.b()), ExactTagType{});
269 const auto a = static_cast<T>(_act_info.a());
270 const auto b = static_cast<T>(_act_info.b());
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100271 execute_window_loop(win_collapsed, [&](const Coordinates &)
Pablo Tello91654c42017-07-05 11:32:17 +0100272 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000273 const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
274 const auto output_ptr = reinterpret_cast<T *>(output.ptr());
Pablo Tello91654c42017-07-05 11:32:17 +0100275
Georgios Pinitas5a594532018-12-03 14:30:05 +0000276 wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
Pablo Tello91654c42017-07-05 11:32:17 +0100277
Georgios Pinitas5a594532018-12-03 14:30:05 +0000278 // Compute S elements per iteration
279 int x = window_start_x;
280 for(; x <= (window_end_x - window_step_x); x += window_step_x)
Pablo Tello91654c42017-07-05 11:32:17 +0100281 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000282 const auto vin = wrapper::vloadq(input_ptr + x);
283 switch(act)
Georgios Pinitas3463a8b2018-08-23 13:11:53 +0100284 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000285 case ActivationFunction::ABS:
286 tmp = wrapper::vabs(vin);
287 break;
288 case ActivationFunction::LINEAR:
289 tmp = wrapper::vmla(vb, va, vin);
290 break;
291 case ActivationFunction::LOGISTIC:
292 tmp = wrapper::vinv(wrapper::vadd(const_1, wrapper::vexpq(wrapper::vneg(vin))));
293 break;
294 case ActivationFunction::RELU:
295 tmp = wrapper::vmax(const_0, vin);
296 break;
297 case ActivationFunction::BOUNDED_RELU:
298 tmp = wrapper::vmin(va, wrapper::vmax(const_0, vin));
299 break;
300 case ActivationFunction::LU_BOUNDED_RELU:
301 tmp = wrapper::vmin(va, wrapper::vmax(vb, vin));
302 break;
303 case ActivationFunction::LEAKY_RELU:
304 tmp = wrapper::vbsl(wrapper::vcgt(vin, const_0), vin, wrapper::vmul(va, vin));
305 break;
306 case ActivationFunction::SOFT_RELU:
307 tmp = wrapper::vlog(wrapper::vadd(const_1, wrapper::vexpq(vin)));
308 break;
Georgios Pinitasfb0fdcd2019-08-22 17:10:04 +0100309 case ActivationFunction::ELU:
310 tmp = wrapper::vbsl(wrapper::vcge(vin, const_0), vin, wrapper::vmul(va, wrapper::vsub(wrapper::vexpq(vin), const_1)));
311 break;
Georgios Pinitas5a594532018-12-03 14:30:05 +0000312 case ActivationFunction::SQRT:
SiCong Lia32e2ae2020-06-08 17:30:51 +0100313 tmp = wrapper::vinv(wrapper::vinvsqrt(wrapper::vadd(vin, delta)));
Georgios Pinitas5a594532018-12-03 14:30:05 +0000314 break;
315 case ActivationFunction::SQUARE:
316 tmp = wrapper::vmul(vin, vin);
317 break;
318 case ActivationFunction::TANH:
319 tmp = wrapper::vmul(va, wrapper::vtanh(wrapper::vmul(vb, vin)));
320 break;
Usama Arif80e55db2019-05-14 17:48:47 +0100321 case ActivationFunction::IDENTITY:
322 tmp = vin;
323 break;
morgolock07df3d42020-02-27 11:46:28 +0000324 case ActivationFunction::HARD_SWISH:
325 tmp = wrapper::vmul(vin, wrapper::vmul(const_inv_6, wrapper::vmin(const_6, wrapper::vmax(const_0, wrapper::vadd(vin, const_3)))));
326 break;
Georgios Pinitas5a594532018-12-03 14:30:05 +0000327 default:
328 ARM_COMPUTE_ERROR("Unsupported activation function");
Georgios Pinitas3463a8b2018-08-23 13:11:53 +0100329 }
Georgios Pinitas5a594532018-12-03 14:30:05 +0000330 wrapper::vstore(output_ptr + x, tmp);
Pablo Tello91654c42017-07-05 11:32:17 +0100331 }
332
Georgios Pinitas5a594532018-12-03 14:30:05 +0000333 // Compute left-over elements
334 for(; x < window_end_x; ++x)
Georgios Pinitasf525eab2018-01-30 14:47:39 +0000335 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000336 const T in = *(reinterpret_cast<const T *>(input_ptr + x));
337 T tmp;
338 switch(act)
Georgios Pinitasf525eab2018-01-30 14:47:39 +0000339 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000340 case ActivationFunction::ABS:
341 tmp = std::abs(in);
342 break;
343 case ActivationFunction::LINEAR:
344 tmp = a * in + b;
345 break;
346 case ActivationFunction::LOGISTIC:
347 tmp = static_cast<T>(1) / (static_cast<T>(1) + std::exp(-in));
348 break;
349 case ActivationFunction::RELU:
350 tmp = std::max<T>(static_cast<T>(0), in);
351 break;
352 case ActivationFunction::BOUNDED_RELU:
353 tmp = std::min<T>(a, std::max(static_cast<T>(0), in));
354 break;
355 case ActivationFunction::LU_BOUNDED_RELU:
356 tmp = std::min<T>(a, std::max<T>(b, in));
357 break;
358 case ActivationFunction::LEAKY_RELU:
359 tmp = (in > 0) ? in : a * in;
360 break;
361 case ActivationFunction::SOFT_RELU:
362 tmp = std::log(static_cast<T>(1) + std::exp(in));
363 break;
Georgios Pinitasfb0fdcd2019-08-22 17:10:04 +0100364 case ActivationFunction::ELU:
365 tmp = (in >= 0) ? in : a * (std::exp(in) - 1);
366 break;
Georgios Pinitas5a594532018-12-03 14:30:05 +0000367 case ActivationFunction::SQRT:
368 tmp = std::sqrt(in);
369 break;
370 case ActivationFunction::SQUARE:
371 tmp = in * in;
372 break;
373 case ActivationFunction::TANH:
374 tmp = a * std::tanh(b * in);
375 break;
Usama Arif80e55db2019-05-14 17:48:47 +0100376 case ActivationFunction::IDENTITY:
377 tmp = in;
378 break;
morgolock07df3d42020-02-27 11:46:28 +0000379 case ActivationFunction::HARD_SWISH:
380 tmp = in * ((std::min(std::max((in + 3), 0.0f), 6.0f)) * 0.166666667f);
381 break;
Georgios Pinitas5a594532018-12-03 14:30:05 +0000382 default:
383 ARM_COMPUTE_ERROR("Unsupported activation function");
Georgios Pinitasf525eab2018-01-30 14:47:39 +0000384 }
Georgios Pinitas5a594532018-12-03 14:30:05 +0000385 *(output_ptr + x) = tmp;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100386 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100387 },
388 input, output);
389}
390
391template <ActivationLayerInfo::ActivationFunction F, typename T>
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100392typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivationLayerKernel::activation(const ITensor *src, ITensor *dst, const Window &window)
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000393{
Georgios Pinitas5a594532018-12-03 14:30:05 +0000394 const int window_step_x = 16 / sizeof(T);
395 const auto window_start_x = static_cast<int>(window.x().start());
396 const auto window_end_x = static_cast<int>(window.x().end());
397 const ActivationFunction act = F;
398
399 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
400 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
401
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100402 Iterator input(src, win_collapsed);
403 Iterator output(dst, win_collapsed);
Georgios Pinitas5a594532018-12-03 14:30:05 +0000404
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100405 const UniformQuantizationInfo qi_in = src->info()->quantization_info().uniform();
406 const UniformQuantizationInfo qi_out = dst->info()->quantization_info().uniform();
morgolockaa85cdf2020-02-28 15:38:28 +0000407 const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in));
408 const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in));
409 const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in);
410 const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in);
411 const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in);
412 const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0);
413 const auto vconst_1 = vdupq_n_f32(1.f);
414 const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
415 const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
416 const float a_f32 = _act_info.a();
417 const float b_f32 = _act_info.b();
418 const auto const_6_f32 = vdupq_n_f32(6.f);
419 const auto const_0_f32 = vdupq_n_f32(0.f);
420 const auto const_3_f32 = vdupq_n_f32(3.f);
421 const auto const_inv_6_f32 = vdupq_n_f32(0.166666667f);
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +0000422
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000423 // Initialise scale/offset for re-quantization
424 float s = qi_in.scale / qi_out.scale;
425 float o = -qi_in.offset * s + qi_out.offset;
426 float32x4_t vs = vdupq_n_f32(s);
427 float32x4_t vo = vdupq_n_f32(o);
428
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100429 execute_window_loop(win_collapsed, [&](const Coordinates &)
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000430 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000431 const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
432 const auto output_ptr = reinterpret_cast<T *>(output.ptr());
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000433
Georgios Pinitas5a594532018-12-03 14:30:05 +0000434 wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000435
Georgios Pinitas5a594532018-12-03 14:30:05 +0000436 // Compute S elements per iteration
437 int x = window_start_x;
438 for(; x <= (window_end_x - window_step_x); x += window_step_x)
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000439 {
Georgios Pinitas5a594532018-12-03 14:30:05 +0000440 const auto vin = wrapper::vloadq(input_ptr + x);
441 if(act == ActivationFunction::RELU)
442 {
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000443 // Perform activation
Georgios Pinitas5a594532018-12-03 14:30:05 +0000444 tmp = vmaxq_u8(vconst_0, vin);
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000445 // Re-quantize to new output space
446 tmp = vmlaq_qasymm8(tmp, vs, vo);
Georgios Pinitas5a594532018-12-03 14:30:05 +0000447 }
448 else if(act == ActivationFunction::BOUNDED_RELU)
449 {
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +0000450 // Perform activation
Georgios Pinitas5a594532018-12-03 14:30:05 +0000451 tmp = vminq_u8(va, vmaxq_u8(vconst_0, vin));
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +0000452 // Re-quantize to new output space
453 tmp = vmlaq_qasymm8(tmp, vs, vo);
Georgios Pinitas5a594532018-12-03 14:30:05 +0000454 }
455 else if(act == ActivationFunction::LU_BOUNDED_RELU)
456 {
457 // Perform activation
458 tmp = vminq_u8(va, vmaxq_u8(vb, vin));
459 // Re-quantize to new output space
460 tmp = vmlaq_qasymm8(tmp, vs, vo);
461 }
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000462 else if(act == ActivationFunction::LOGISTIC)
463 {
Isabella Gottardif7a3bf22019-03-15 14:58:24 +0000464 // De-quantize
465 const auto vin_deq = vdequantize(vin, qi_in);
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000466 // Perform activation
Isabella Gottardif7a3bf22019-03-15 14:58:24 +0000467 const float32x4x4_t tmp_dep =
468 {
469 {
470 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[0])))),
471 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[1])))),
472 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[2])))),
473 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[3])))),
474 }
475 };
476 // Re-quantize to new output space
477 tmp = vquantize(tmp_dep, qi_out);
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000478 }
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100479 else if(act == ActivationFunction::TANH)
480 {
481 // De-quantize
482 const auto vin_deq = vdequantize(vin, qi_in);
483 // Perform activation
484 const float32x4x4_t tmp_dep =
485 {
486 {
487 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))),
488 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))),
489 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[2], vb_f32))),
490 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[3], vb_f32))),
491 }
492 };
493 // Re-quantize to new output space
494 tmp = vquantize(tmp_dep, qi_out);
495 }
morgolockaa85cdf2020-02-28 15:38:28 +0000496 else if(act == ActivationFunction::HARD_SWISH)
497 {
498 // De-quantize
499 const auto vin_deq = vdequantize(vin, qi_in);
500 // Perform activation
501 const float32x4x4_t tmp_dep =
502 {
503 {
504 wrapper::vmul(vin_deq.val[0], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[0], const_3_f32))))),
505 wrapper::vmul(vin_deq.val[1], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[1], const_3_f32))))),
506 wrapper::vmul(vin_deq.val[2], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[2], const_3_f32))))),
507 wrapper::vmul(vin_deq.val[3], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[3], const_3_f32))))),
508 }
509 };
510 // Re-quantize to new output space
511 tmp = vquantize(tmp_dep, qi_out);
512 }
Georgios Pinitas5a594532018-12-03 14:30:05 +0000513 else
514 {
515 ARM_COMPUTE_ERROR("Unsupported activation function");
516 }
517 wrapper::vstore(output_ptr + x, tmp);
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000518 }
519
Georgios Pinitas5a594532018-12-03 14:30:05 +0000520 // Compute left-over elements
521 for(; x < window_end_x; ++x)
522 {
523 T in = *(reinterpret_cast<const T *>(input_ptr + x));
524 T tmp;
525 if(act == ActivationFunction::RELU)
526 {
527 tmp = std::max(const_0, in);
Michele Di Giorgiob70770e2020-04-22 12:26:10 +0100528 tmp = utility::clamp<int32_t, qasymm8_t>(tmp * s + o);
Georgios Pinitas5a594532018-12-03 14:30:05 +0000529 }
530 else if(act == ActivationFunction::BOUNDED_RELU)
531 {
532 tmp = std::min(a, std::max(const_0, in));
Michele Di Giorgiob70770e2020-04-22 12:26:10 +0100533 tmp = utility::clamp<int32_t, qasymm8_t>(tmp * s + o);
Georgios Pinitas5a594532018-12-03 14:30:05 +0000534 }
535 else if(act == ActivationFunction::LU_BOUNDED_RELU)
536 {
537 tmp = std::min(a, std::max(b, in));
Michele Di Giorgiob70770e2020-04-22 12:26:10 +0100538 tmp = utility::clamp<int32_t, qasymm8_t>(tmp * s + o);
Georgios Pinitas5a594532018-12-03 14:30:05 +0000539 }
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000540 else if(act == ActivationFunction::LOGISTIC)
541 {
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100542 float tmp_f = dequantize_qasymm8(in, qi_in);
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000543 tmp_f = 1.f / (1.f + std::exp(-tmp_f));
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100544 tmp = quantize_qasymm8(tmp_f, qi_out);
Isabella Gottardi03bb5502019-01-31 17:45:07 +0000545 }
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100546 else if(act == ActivationFunction::TANH)
547 {
548 float tmp_f = dequantize_qasymm8(in, qi_in);
549 tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
550 tmp = quantize_qasymm8(tmp_f, qi_out);
551 }
morgolockaa85cdf2020-02-28 15:38:28 +0000552 else if(act == ActivationFunction::HARD_SWISH)
553 {
554 float tmp_f = dequantize_qasymm8(in, qi_in);
555 tmp_f = tmp_f * ((std::min(std::max((tmp_f + 3), 0.0f), 6.0f)) * 0.166666667f);
556 tmp = quantize_qasymm8(tmp_f, qi_out);
557 }
Georgios Pinitas5a594532018-12-03 14:30:05 +0000558 else
559 {
560 ARM_COMPUTE_ERROR("Unsupported activation function");
561 }
562 *(output_ptr + x) = tmp;
563 }
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000564 },
565 input, output);
566}
567
giuros01c9573f32019-06-20 10:30:17 +0100568template <ActivationLayerInfo::ActivationFunction F, typename T>
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100569typename std::enable_if<std::is_same<T, qasymm8_signed_t>::value, void>::type NEActivationLayerKernel::activation(const ITensor *src, ITensor *dst, const Window &window)
Michalis Spyrou8d4d1b82019-11-28 11:31:23 +0000570{
571 const int window_step_x = 16 / sizeof(T);
572 const auto window_start_x = static_cast<int>(window.x().start());
573 const auto window_end_x = static_cast<int>(window.x().end());
574 const ActivationFunction act = F;
575
576 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
577 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
578
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100579 Iterator input(src, win_collapsed);
580 Iterator output(dst, win_collapsed);
Michalis Spyrou8d4d1b82019-11-28 11:31:23 +0000581
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100582 const UniformQuantizationInfo qi_in = src->info()->quantization_info().uniform();
583 const UniformQuantizationInfo qi_out = dst->info()->quantization_info().uniform();
morgolockaa85cdf2020-02-28 15:38:28 +0000584 const qasymm8x16_signed_t va = vdupq_n_s8(quantize_qasymm8_signed(_act_info.a(), qi_in));
585 const qasymm8x16_signed_t vb = vdupq_n_s8(quantize_qasymm8_signed(_act_info.b(), qi_in));
586 const qasymm8_signed_t a = quantize_qasymm8_signed(_act_info.a(), qi_in);
587 const qasymm8_signed_t b = quantize_qasymm8_signed(_act_info.b(), qi_in);
588 const qasymm8_signed_t const_0 = quantize_qasymm8_signed(0.f, qi_in);
589 const qasymm8x16_signed_t vconst_0 = vdupq_n_s8(const_0);
590 const auto vconst_1 = vdupq_n_f32(1.f);
591 const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
592 const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
593 const float a_f32 = _act_info.a();
594 const float b_f32 = _act_info.b();
595 const auto const_6_f32 = vdupq_n_f32(6.f);
596 const auto const_0_f32 = vdupq_n_f32(0.f);
597 const auto const_3_f32 = vdupq_n_f32(3.f);
598 const auto const_inv_6_f32 = vdupq_n_f32(0.166666667f);
Michalis Spyrou8d4d1b82019-11-28 11:31:23 +0000599
600 // Initialise scale/offset for re-quantization
601 float s = qi_in.scale / qi_out.scale;
602 float o = -qi_in.offset * s + qi_out.offset;
603 float32x4_t vs = vdupq_n_f32(s);
604 float32x4_t vo = vdupq_n_f32(o);
605
606 execute_window_loop(win_collapsed, [&](const Coordinates &)
607 {
608 const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
609 const auto output_ptr = reinterpret_cast<T *>(output.ptr());
610
611 wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
612
613 // Compute S elements per iteration
614 int x = window_start_x;
615 for(; x <= (window_end_x - window_step_x); x += window_step_x)
616 {
617 const auto vin = wrapper::vloadq(input_ptr + x);
618 if(act == ActivationFunction::RELU)
619 {
620 // Perform activation
621 tmp = vmaxq_s8(vconst_0, vin);
622 // Re-quantize to new output space
623 tmp = vmlaq_qasymm8_signed(tmp, vs, vo);
624 }
625 else if(act == ActivationFunction::BOUNDED_RELU)
626 {
627 // Perform activation
628 tmp = vminq_s8(va, vmaxq_s8(vconst_0, vin));
629 // Re-quantize to new output space
630 tmp = vmlaq_qasymm8_signed(tmp, vs, vo);
631 }
632 else if(act == ActivationFunction::LU_BOUNDED_RELU)
633 {
634 // Perform activation
635 tmp = vminq_s8(va, vmaxq_s8(vb, vin));
636 // Re-quantize to new output space
637 tmp = vmlaq_qasymm8_signed(tmp, vs, vo);
638 }
639 else if(act == ActivationFunction::LOGISTIC)
640 {
641 // De-quantize
642 const auto vin_deq = vdequantize(vin, qi_in);
643 // Perform activation
644 const float32x4x4_t tmp_dep =
645 {
646 {
647 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[0])))),
648 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[1])))),
649 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[2])))),
650 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[3])))),
651 }
652 };
653 // Re-quantize to new output space
654 tmp = vquantize_signed(tmp_dep, qi_out);
655 }
656 else if(act == ActivationFunction::TANH)
657 {
658 // De-quantize
659 const auto vin_deq = vdequantize(vin, qi_in);
660 // Perform activation
661 const float32x4x4_t tmp_dep =
662 {
663 {
664 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))),
665 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))),
666 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[2], vb_f32))),
667 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[3], vb_f32))),
668 }
669 };
670 // Re-quantize to new output space
671 tmp = vquantize_signed(tmp_dep, qi_out);
672 }
morgolockaa85cdf2020-02-28 15:38:28 +0000673 else if(act == ActivationFunction::HARD_SWISH)
674 {
675 // De-quantize
676 const auto vin_deq = vdequantize(vin, qi_in);
677 // Perform activation
678 const float32x4x4_t tmp_dep =
679 {
680 {
681 wrapper::vmul(vin_deq.val[0], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[0], const_3_f32))))),
682 wrapper::vmul(vin_deq.val[1], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[1], const_3_f32))))),
683 wrapper::vmul(vin_deq.val[2], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[2], const_3_f32))))),
684 wrapper::vmul(vin_deq.val[3], wrapper::vmul(const_inv_6_f32, wrapper::vmin(const_6_f32, wrapper::vmax(const_0_f32, wrapper::vadd(vin_deq.val[3], const_3_f32))))),
685 }
686 };
687 // Re-quantize to new output space
688 tmp = vquantize_signed(tmp_dep, qi_out);
689 }
Michalis Spyrou8d4d1b82019-11-28 11:31:23 +0000690 else
691 {
692 ARM_COMPUTE_ERROR("Unsupported activation function");
693 }
694 wrapper::vstore(output_ptr + x, tmp);
695 }
696
697 // Compute left-over elements
698 for(; x < window_end_x; ++x)
699 {
700 T in = *(reinterpret_cast<const T *>(input_ptr + x));
701 T tmp;
702 if(act == ActivationFunction::RELU)
703 {
704 tmp = std::max(const_0, in);
Michele Di Giorgiob70770e2020-04-22 12:26:10 +0100705 tmp = utility::clamp<int32_t, qasymm8_signed_t>(tmp * s + o);
Michalis Spyrou8d4d1b82019-11-28 11:31:23 +0000706 }
707 else if(act == ActivationFunction::BOUNDED_RELU)
708 {
709 tmp = std::min(a, std::max(const_0, in));
Michele Di Giorgiob70770e2020-04-22 12:26:10 +0100710 tmp = utility::clamp<int32_t, qasymm8_signed_t>(tmp * s + o);
Michalis Spyrou8d4d1b82019-11-28 11:31:23 +0000711 }
712 else if(act == ActivationFunction::LU_BOUNDED_RELU)
713 {
714 tmp = std::min(a, std::max(b, in));
Michele Di Giorgiob70770e2020-04-22 12:26:10 +0100715 tmp = utility::clamp<int32_t, qasymm8_signed_t>(tmp * s + o);
Michalis Spyrou8d4d1b82019-11-28 11:31:23 +0000716 }
717 else if(act == ActivationFunction::LOGISTIC)
718 {
719 float tmp_f = dequantize_qasymm8_signed(in, qi_in);
720 tmp_f = 1.f / (1.f + std::exp(-tmp_f));
721 tmp = quantize_qasymm8_signed(tmp_f, qi_out);
722 }
723 else if(act == ActivationFunction::TANH)
724 {
725 float tmp_f = dequantize_qasymm8_signed(in, qi_in);
726 tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
727 tmp = quantize_qasymm8_signed(tmp_f, qi_out);
728 }
morgolockaa85cdf2020-02-28 15:38:28 +0000729 else if(act == ActivationFunction::HARD_SWISH)
730 {
731 float tmp_f = dequantize_qasymm8_signed(in, qi_in);
732 tmp_f = tmp_f * ((std::min(std::max((tmp_f + 3), 0.0f), 6.0f)) * 0.166666667f);
733 tmp = quantize_qasymm8_signed(tmp_f, qi_out);
734 }
Michalis Spyrou8d4d1b82019-11-28 11:31:23 +0000735 else
736 {
737 ARM_COMPUTE_ERROR("Unsupported activation function");
738 }
739 *(output_ptr + x) = tmp;
740 }
741 },
742 input, output);
743}
744
745template <ActivationLayerInfo::ActivationFunction F, typename T>
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100746typename std::enable_if<std::is_same<T, qsymm16_t>::value, void>::type NEActivationLayerKernel::activation(const ITensor *src, ITensor *dst, const Window &window)
giuros01c9573f32019-06-20 10:30:17 +0100747{
748 const int window_step_x = 16 / sizeof(T);
749 const auto window_start_x = static_cast<int>(window.x().start());
750 const auto window_end_x = static_cast<int>(window.x().end());
751 const ActivationFunction act = F;
752
753 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
754 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
755
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100756 Iterator input(src, win_collapsed);
757 Iterator output(dst, win_collapsed);
giuros01c9573f32019-06-20 10:30:17 +0100758
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100759 const UniformQuantizationInfo qi_in = src->info()->quantization_info().uniform();
760 const UniformQuantizationInfo qi_out = dst->info()->quantization_info().uniform();
giuros01c9573f32019-06-20 10:30:17 +0100761 const auto vconst_1 = vdupq_n_f32(1.f);
762 const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
763 const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
764 const float a_f32 = _act_info.a();
765 const float b_f32 = _act_info.b();
766
767 execute_window_loop(win_collapsed, [&](const Coordinates &)
768 {
769 const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
770 const auto output_ptr = reinterpret_cast<T *>(output.ptr());
771
772 wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
773 ARM_COMPUTE_UNUSED(tmp);
774
775 // Compute S elements per iteration
776 int x = window_start_x;
777 for(; x <= (window_end_x - window_step_x); x += window_step_x)
778 {
779 const auto vin = wrapper::vloadq(input_ptr + x);
780 if(act == ActivationFunction::LOGISTIC)
781 {
782 // De-quantize
783 const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
784 // Perform activation
785 const float32x4x2_t tmp_dep =
786 {
787 {
788 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[0])))),
789 wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(vin_deq.val[1])))),
790 }
791 };
792 // Re-quantize to new output space
793 tmp = vquantize_int16(tmp_dep, qi_out.scale);
794 }
795 else if(act == ActivationFunction::TANH)
796 {
797 // De-quantize
798 const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
799 // Perform activation
800 const float32x4x2_t tmp_dep =
801 {
802 {
803 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))),
804 wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))),
805 }
806 };
807 // Re-quantize to new output space
808 tmp = vquantize_int16(tmp_dep, qi_out.scale);
809 }
810 else
811 {
812 ARM_COMPUTE_ERROR("Unsupported activation function");
813 }
814 wrapper::vstore(output_ptr + x, tmp);
815 }
816
817 // Compute left-over elements
818 for(; x < window_end_x; ++x)
819 {
820 T in = *(reinterpret_cast<const T *>(input_ptr + x));
821 T tmp;
822 if(act == ActivationFunction::LOGISTIC)
823 {
824 float tmp_f = dequantize_qsymm16(in, qi_in.scale);
825 tmp_f = 1.f / (1.f + std::exp(-tmp_f));
826 tmp = quantize_qsymm16(tmp_f, qi_out);
827 }
828 else if(act == ActivationFunction::TANH)
829 {
830 float tmp_f = dequantize_qsymm16(in, qi_in.scale);
831 tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
832 tmp = quantize_qsymm16(tmp_f, qi_out);
833 }
834 else
835 {
836 ARM_COMPUTE_ERROR("Unsupported activation function");
837 }
838 *(output_ptr + x) = tmp;
839 }
840 },
841 input, output);
842}
843
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000844Status NEActivationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
845{
846 ARM_COMPUTE_UNUSED(act_info);
Georgios Pinitas4b3fba12019-06-04 17:31:46 +0100847 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info));
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000848 ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (output != nullptr) ? output->clone().get() : nullptr).first);
849
850 return Status{};
851}
852
Michalis Spyrouce0c6752020-06-18 10:14:57 +0100853void NEActivationLayerKernel::run_op(const InputTensorMap &inputs,
854 const OutputTensorMap &outputs,
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100855 const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100856{
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100857 // Early exit on disabled activation
858 if(!_act_info.enabled())
859 {
860 return;
861 }
862
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100863 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100864 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100865 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100866 ARM_COMPUTE_ERROR_ON(_func == nullptr);
867
Georgios Pinitas1fd2c802020-06-16 17:44:46 +0100868 ARM_COMPUTE_ERROR_ON(inputs.empty() || outputs.empty());
869
Michalis Spyrouce0c6752020-06-18 10:14:57 +0100870 (this->*_func)(inputs.at(TensorType::ACL_SRC), outputs.at(TensorType::ACL_DST), window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100871}
Michele Di Giorgiof9b595a2020-07-03 13:34:52 +0100872} // namespace arm_compute