blob: a6dbfe6551289e065ed9573ab0fe53c53588cace [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +00002 * Copyright (c) 2017-2018 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
25
26#include "arm_compute/core/FixedPoint.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
Michel Iwaniec5dfeae62017-11-29 10:48:23 +000029#include "arm_compute/core/NEON/NEAsymm.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010030#include "arm_compute/core/NEON/NEFixedPoint.h"
31#include "arm_compute/core/NEON/NEMath.h"
Michel Iwaniec5dfeae62017-11-29 10:48:23 +000032#include "arm_compute/core/QAsymm8.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010033#include "arm_compute/core/TensorInfo.h"
34#include "arm_compute/core/Utils.h"
35#include "arm_compute/core/Validate.h"
36#include "arm_compute/core/Window.h"
37
38#include <arm_neon.h>
39#include <array>
40#include <cmath>
41#include <map>
42
43using namespace arm_compute;
Michalis Spyrouafa5d812017-11-30 14:25:57 +000044namespace
45{
46Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
47{
48 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
Michel Iwaniec5dfeae62017-11-29 10:48:23 +000049 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
Michalis Spyrouafa5d812017-11-30 14:25:57 +000050
51 // Checks performed when output is configured
52 if((output != nullptr) && (output->total_size() != 0))
53 {
54 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
55 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
56 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
57 }
58
59 return Status{};
60}
61
62std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
63{
64 constexpr unsigned int num_elems_processed_per_iteration = 16;
65 Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
66 bool window_changed = false;
67
68 if(output != nullptr && (output->total_size() != 0))
69 {
70 AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
71
72 window_changed = update_window_and_padding(win,
73 AccessWindowHorizontal(input, 0, num_elems_processed_per_iteration),
74 output_access);
75
76 output_access.set_valid_region(win, input->valid_region());
77 }
78 else
79 {
80 // In-place computation
81 window_changed = update_window_and_padding(win,
82 AccessWindowHorizontal(input, 0, num_elems_processed_per_iteration));
83 }
84
85 Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
86 return std::make_pair(err, win);
87}
88} // namespace
Anthony Barbier6ff3b192017-09-04 18:44:23 +010089
90NEActivationLayerKernel::NEActivationLayerKernel()
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +010091 : _input(nullptr), _output(nullptr), _func(nullptr), _act_info(ActivationFunction::LOGISTIC)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010092{
93}
94
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +010095void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010096{
Michalis Spyrouafa5d812017-11-30 14:25:57 +000097 ARM_COMPUTE_ERROR_ON_NULLPTR(input);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010098
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +010099 _input = input;
100 _act_info = activation_info;
101 _output = input;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100102
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100103 if(output != nullptr)
104 {
105 // Output auto inizialitation if not yet initialized
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000106 auto_init_if_empty(*output->info(), *input->info()->clone());
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100107 _output = output;
108 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100109
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000110 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr));
111
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +0000112 ARM_COMPUTE_ERROR_ON_MSG((input->info()->data_type() == DataType::QASYMM8) && (activation_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU)
113 && (activation_info.activation() != ActivationLayerInfo::ActivationFunction::RELU),
114 "For QASYMM8 only relu and lower/upper bounded relu are supported");
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000115
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100116 // Activation functions : FP32
117 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f32 =
118 {
119 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, float> },
120 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, float> },
121 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, float> },
122 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float> },
123 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float> },
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100124 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, float> },
Georgios Pinitas579c0492017-07-12 16:12:12 +0100125 { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, float> },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100126 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float> },
127 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float> },
128 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float> },
129 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float> },
130 };
Pablo Tello91654c42017-07-05 11:32:17 +0100131
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000132#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello91654c42017-07-05 11:32:17 +0100133 // Activation functions : FP16
134 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f16 =
135 {
136 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, float16_t> },
137 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, float16_t> },
138 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, float16_t> },
139 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float16_t> },
140 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float16_t> },
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100141 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, float16_t> },
Pablo Tello91654c42017-07-05 11:32:17 +0100142 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float16_t> },
143 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float16_t> },
144 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float16_t> },
145 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float16_t> },
146 };
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000147#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
Pablo Tello91654c42017-07-05 11:32:17 +0100148
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100149 // Activation functions : QS8
150 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs8 =
151 {
152 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, qint8_t> },
153 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, qint8_t> },
154 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qint8_t> },
155 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qint8_t> },
156 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qint8_t> },
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100157 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, qint8_t> },
Georgios Pinitas579c0492017-07-12 16:12:12 +0100158 { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, qint8_t> },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100159 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, qint8_t> },
160 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, qint8_t> },
161 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, qint8_t> },
162 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qint8_t> },
163 };
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100164 // Activation functions : QS16
165 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs16 =
166 {
167 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, qint16_t> },
168 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, qint16_t> },
169 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qint16_t> },
170 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qint16_t> },
171 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qint16_t> },
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100172 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, qint16_t> },
Georgios Pinitas579c0492017-07-12 16:12:12 +0100173 { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, qint16_t> },
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100174 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, qint16_t> },
175 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, qint16_t> },
176 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, qint16_t> },
177 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qint16_t> },
178 };
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000179 // Activation functions : QASYMM8
180 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qasymm8 =
181 {
182 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, qasymm8_t> },
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +0000183 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qasymm8_t> },
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000184 };
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100185
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100186 switch(input->info()->data_type())
187 {
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000188 case DataType::QASYMM8:
189 _func = act_map_qasymm8[activation_info.activation()];
190 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100191 case DataType::QS8:
192 _func = act_map_qs8[activation_info.activation()];
193 break;
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100194 case DataType::QS16:
195 _func = act_map_qs16[activation_info.activation()];
196 break;
197 case DataType::F32:
198 _func = act_map_f32[activation_info.activation()];
199 break;
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000200#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello91654c42017-07-05 11:32:17 +0100201 case DataType::F16:
202 _func = act_map_f16[activation_info.activation()];
203 break;
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000204#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100205 default:
206 ARM_COMPUTE_ERROR("Unsupported data type.");
207 }
208
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100209 // Configure kernel window
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000210 auto win_config = validate_and_configure_window(input->info(), (output != nullptr) ? output->info() : nullptr);
211 ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
212 ICPPKernel::configure(win_config.second);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100213}
214
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000215#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello91654c42017-07-05 11:32:17 +0100216template <ActivationLayerInfo::ActivationFunction F, typename T>
217typename std::enable_if<std::is_same<T, float16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
218{
219 Iterator input(_input, window);
220 Iterator output(_output, window);
221
222 static const float16x8_t CONST_0 = vdupq_n_f16(0.f);
223 static const float16x8_t CONST_1 = vdupq_n_f16(1.f);
224
225 const float16x8_t a = vdupq_n_f16(_act_info.a());
226 const float16x8_t b = vdupq_n_f16(_act_info.b());
227
228 execute_window_loop(window, [&](const Coordinates &)
229 {
230 const auto input_ptr = reinterpret_cast<const float16_t *>(input.ptr());
231 const auto output_ptr = reinterpret_cast<float16_t *>(output.ptr());
232
233 const float16x8x2_t in = vld2q_f16(input_ptr);
234 float16x8x2_t tmp = { {} };
235
236 switch(F)
237 {
238 case ActivationFunction::ABS:
239 tmp =
240 {
241 {
242 vabsq_f16(in.val[0]),
243 vabsq_f16(in.val[1]),
244 }
245 };
246 break;
247 case ActivationFunction::BOUNDED_RELU:
248 tmp =
249 {
250 {
251 vminq_f16(a, vmaxq_f16(CONST_0, in.val[0])),
252 vminq_f16(a, vmaxq_f16(CONST_0, in.val[1]))
253 }
254 };
255 break;
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100256 case ActivationFunction::LU_BOUNDED_RELU:
257 tmp =
258 {
259 {
260 vminq_f16(a, vmaxq_f16(b, in.val[0])),
261 vminq_f16(a, vmaxq_f16(b, in.val[1]))
262 }
263 };
264 break;
Pablo Tello91654c42017-07-05 11:32:17 +0100265 case ActivationFunction::LINEAR:
266 tmp =
267 {
268 {
269 vaddq_f16(b, vmulq_f16(a, in.val[0])),
270 vaddq_f16(b, vmulq_f16(a, in.val[1]))
271 }
272 };
273 break;
274 case ActivationFunction::LOGISTIC:
275 tmp =
276 {
277 {
278 vinvq_f16(vaddq_f16(CONST_1, vexpq_f16(vnegq_f16(in.val[0])))),
279 vinvq_f16(vaddq_f16(CONST_1, vexpq_f16(vnegq_f16(in.val[1])))),
280 }
281 };
282 break;
283 case ActivationFunction::RELU:
284 tmp =
285 {
286 {
287 vmaxq_f16(CONST_0, in.val[0]),
288 vmaxq_f16(CONST_0, in.val[1])
289 }
290 };
291 break;
292 case ActivationFunction::LEAKY_RELU:
293 tmp =
294 {
295 {
296 vbslq_f16(vcgtq_f16(in.val[0], CONST_0), in.val[0], vmulq_f16(a, in.val[0])),
297 vbslq_f16(vcgtq_f16(in.val[1], CONST_0), in.val[1], vmulq_f16(a, in.val[1]))
298 }
299 };
300 break;
301 case ActivationFunction::SOFT_RELU:
302 tmp =
303 {
304 {
305 vlogq_f16(vaddq_f16(CONST_1, vexpq_f16(in.val[0]))),
306 vlogq_f16(vaddq_f16(CONST_1, vexpq_f16(in.val[1]))),
307 }
308 };
309 break;
310 case ActivationFunction::SQRT:
311 tmp =
312 {
313 {
314 vinvq_f16(vinvsqrtq_f16(in.val[0])),
315 vinvq_f16(vinvsqrtq_f16(in.val[1])),
316 }
317 };
318 break;
319 case ActivationFunction::SQUARE:
320 tmp =
321 {
322 {
323 vmulq_f16(in.val[0], in.val[0]),
324 vmulq_f16(in.val[1], in.val[1])
325 }
326 };
327 break;
328 case ActivationFunction::TANH:
329 tmp =
330 {
331 {
332 vmulq_f16(a, vtanhq_f16(vmulq_f16(b, in.val[0]))),
333 vmulq_f16(a, vtanhq_f16(vmulq_f16(b, in.val[1]))),
334 }
335 };
336 break;
337 default:
338 ARM_COMPUTE_ERROR("Not implemented");
339 break;
340 }
341
342 vst2q_f16(output_ptr, tmp);
343 },
344 input, output);
345}
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000346#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello91654c42017-07-05 11:32:17 +0100347
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100348template <ActivationLayerInfo::ActivationFunction F, typename T>
349typename std::enable_if<std::is_same<T, float>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
350{
351 Iterator input(_input, window);
352 Iterator output(_output, window);
353
354 static const float32x4_t CONST_1 = vdupq_n_f32(1.f);
355 static const float32x4_t CONST_0 = vdupq_n_f32(0.f);
356 const float32x4_t a = vdupq_n_f32(_act_info.a());
357 const float32x4_t b = vdupq_n_f32(_act_info.b());
358
359 execute_window_loop(window, [&](const Coordinates & id)
360 {
361 const auto input_ptr = reinterpret_cast<const float *>(input.ptr());
362 const auto output_ptr = reinterpret_cast<float *>(output.ptr());
363
Georgios Pinitasf525eab2018-01-30 14:47:39 +0000364 const float32x4x4_t in =
365 {
366 {
367 vld1q_f32(input_ptr),
368 vld1q_f32(input_ptr + 4),
369 vld1q_f32(input_ptr + 8),
370 vld1q_f32(input_ptr + 12)
371 }
372 };
373 float32x4x4_t tmp = { {} };
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100374
375 switch(F)
376 {
377 case ActivationFunction::ABS:
378 tmp =
379 {
380 {
381 vabsq_f32(in.val[0]),
382 vabsq_f32(in.val[1]),
383 vabsq_f32(in.val[2]),
384 vabsq_f32(in.val[3]),
385 }
386 };
387 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100388 case ActivationFunction::LINEAR:
389 tmp =
390 {
391 {
392 vmlaq_f32(b, a, in.val[0]),
393 vmlaq_f32(b, a, in.val[1]),
394 vmlaq_f32(b, a, in.val[2]),
395 vmlaq_f32(b, a, in.val[3]),
396 }
397 };
398 break;
399 case ActivationFunction::LOGISTIC:
400 tmp =
401 {
402 {
403 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[0])))),
404 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[1])))),
405 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[2])))),
406 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[3])))),
407 }
408 };
409 break;
410 case ActivationFunction::RELU:
411 tmp =
412 {
413 {
414 vmaxq_f32(CONST_0, in.val[0]),
415 vmaxq_f32(CONST_0, in.val[1]),
416 vmaxq_f32(CONST_0, in.val[2]),
417 vmaxq_f32(CONST_0, in.val[3]),
418 }
419 };
420 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100421 case ActivationFunction::BOUNDED_RELU:
422 tmp =
423 {
424 {
425 vminq_f32(a, vmaxq_f32(CONST_0, in.val[0])),
426 vminq_f32(a, vmaxq_f32(CONST_0, in.val[1])),
427 vminq_f32(a, vmaxq_f32(CONST_0, in.val[2])),
428 vminq_f32(a, vmaxq_f32(CONST_0, in.val[3])),
429 }
430 };
431 break;
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100432 case ActivationFunction::LU_BOUNDED_RELU:
433 tmp =
434 {
435 {
436 vminq_f32(a, vmaxq_f32(b, in.val[0])),
437 vminq_f32(a, vmaxq_f32(b, in.val[1])),
438 vminq_f32(a, vmaxq_f32(b, in.val[2])),
439 vminq_f32(a, vmaxq_f32(b, in.val[3])),
440 }
441 };
442 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100443 case ActivationFunction::LEAKY_RELU:
444 tmp =
445 {
446 {
447 vbslq_f32(vcgtq_f32(in.val[0], CONST_0), in.val[0], vmulq_f32(a, in.val[0])),
448 vbslq_f32(vcgtq_f32(in.val[1], CONST_0), in.val[1], vmulq_f32(a, in.val[1])),
449 vbslq_f32(vcgtq_f32(in.val[2], CONST_0), in.val[2], vmulq_f32(a, in.val[2])),
450 vbslq_f32(vcgtq_f32(in.val[3], CONST_0), in.val[3], vmulq_f32(a, in.val[3])),
451 }
452 };
453 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100454 case ActivationFunction::SOFT_RELU:
455 tmp =
456 {
457 {
458 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[0]))),
459 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[1]))),
460 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[2]))),
461 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[3]))),
462 }
463 };
464 break;
465 case ActivationFunction::SQRT:
466 tmp =
467 {
468 {
469 vinvq_f32(vinvsqrtq_f32(in.val[0])),
470 vinvq_f32(vinvsqrtq_f32(in.val[1])),
471 vinvq_f32(vinvsqrtq_f32(in.val[2])),
472 vinvq_f32(vinvsqrtq_f32(in.val[3])),
473 }
474 };
475 break;
476 case ActivationFunction::SQUARE:
477 tmp =
478 {
479 {
480 vmulq_f32(in.val[0], in.val[0]),
481 vmulq_f32(in.val[1], in.val[1]),
482 vmulq_f32(in.val[2], in.val[2]),
483 vmulq_f32(in.val[3], in.val[3]),
484 }
485 };
486 break;
487 case ActivationFunction::TANH:
488 tmp =
489 {
490 {
491 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[0]))),
492 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[1]))),
493 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[2]))),
494 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[3]))),
495 }
496 };
497 break;
498 default:
499 break;
500 }
501
Georgios Pinitasf525eab2018-01-30 14:47:39 +0000502 vst1q_f32(output_ptr, tmp.val[0]);
503 vst1q_f32(output_ptr + 4, tmp.val[1]);
504 vst1q_f32(output_ptr + 8, tmp.val[2]);
505 vst1q_f32(output_ptr + 12, tmp.val[3]);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100506 },
507 input, output);
508}
509
510template <ActivationLayerInfo::ActivationFunction F, typename T>
511typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
512{
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100513 Iterator input(_input, window);
514 Iterator output(_output, window);
515 const int fixed_point_position = _input->info()->fixed_point_position();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100516
517 static const qint8x16_t CONST_0 = vdupq_n_qs8(0);
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100518 const qint8x16_t CONST_1 = vdupq_n_qs8(sqcvt_qs8_f32(1.f, fixed_point_position));
519 const qint8x16_t a = vdupq_n_qs8(sqcvt_qs8_f32(_act_info.a(), fixed_point_position));
520 const qint8x16_t b = vdupq_n_qs8(sqcvt_qs8_f32(_act_info.b(), fixed_point_position));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100521
522 execute_window_loop(window, [&](const Coordinates & id)
523 {
524 const auto input_ptr = reinterpret_cast<const int8_t *>(input.ptr());
525 const auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
526
527 const qint8x16_t in = vld1q_qs8(input_ptr);
528 qint8x16_t tmp = {};
529
530 switch(F)
531 {
532 case ActivationFunction::ABS:
533 tmp = vqabsq_qs8(in);
534 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100535 case ActivationFunction::LINEAR:
536 tmp = vqmlaq_qs8(b, a, in, fixed_point_position);
537 break;
538 case ActivationFunction::LOGISTIC:
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100539 tmp = vqrecipq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(vnegq_s8(in), fixed_point_position)), fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100540 break;
541 case ActivationFunction::RELU:
542 tmp = vmaxq_qs8(CONST_0, in);
543 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100544 case ActivationFunction::BOUNDED_RELU:
545 tmp = vminq_qs8(a, vmaxq_qs8(CONST_0, in));
546 break;
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100547 case ActivationFunction::LU_BOUNDED_RELU:
548 tmp = vminq_qs8(a, vmaxq_qs8(b, in));
549 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100550 case ActivationFunction::LEAKY_RELU:
551 tmp = vbslq_s8(vcgtq_s8(in, CONST_0), in, vmulq_qs8(a, in, fixed_point_position));
552 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100553 case ActivationFunction::SOFT_RELU:
554 tmp = vlogq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(in, fixed_point_position)), fixed_point_position);
555 break;
556 case ActivationFunction::SQRT:
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100557 tmp = vqrecipq_qs8(vqinvsqrtq_qs8(in, fixed_point_position), fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100558 break;
559 case ActivationFunction::SQUARE:
560 tmp = vqmulq_qs8(in, in, fixed_point_position);
561 break;
562 case ActivationFunction::TANH:
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100563 tmp = vqmulq_qs8(a, vqtanhq_qs8(vqmulq_qs8(b, in, fixed_point_position), fixed_point_position), fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100564 break;
565 default:
566 break;
567 }
568
569 vst1q_qs8(output_ptr, tmp);
570 },
571 input, output);
572}
573
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100574template <ActivationLayerInfo::ActivationFunction F, typename T>
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000575typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
576{
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +0000577 Iterator input(_input, window);
578 Iterator output(_output, window);
579 const QuantizationInfo qi_in = _input->info()->quantization_info();
580 const QuantizationInfo qi_out = _output->info()->quantization_info();
581 const qasymm8x16_t a = vdupq_n_u8(sqcvt_qasymm8_f32(_act_info.a(), qi_in.scale, qi_in.offset));
582 const qasymm8x16_t b = vdupq_n_u8(sqcvt_qasymm8_f32(_act_info.b(), qi_in.scale, qi_in.offset));
583 static const qasymm8x16_t CONST_0 = vdupq_n_u8(sqcvt_qasymm8_f32(0.f, qi_in.scale, qi_in.offset));
584
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000585 // Initialise scale/offset for re-quantization
586 float s = qi_in.scale / qi_out.scale;
587 float o = -qi_in.offset * s + qi_out.offset;
588 float32x4_t vs = vdupq_n_f32(s);
589 float32x4_t vo = vdupq_n_f32(o);
590
591 execute_window_loop(window, [&](const Coordinates & id)
592 {
593 const auto input_ptr = reinterpret_cast<const qasymm8_t *>(input.ptr());
594 const auto output_ptr = reinterpret_cast<qasymm8_t *>(output.ptr());
595
596 const qasymm8x16_t in = vld1q_u8(input_ptr);
597 qasymm8x16_t tmp = {};
598
599 switch(F)
600 {
601 case ActivationFunction::LU_BOUNDED_RELU:
602 // Perform activation
603 tmp = vminq_u8(a, vmaxq_u8(b, in));
604 // Re-quantize to new output space
605 tmp = vmlaq_qasymm8(tmp, vs, vo);
606 break;
Michele Di Giorgiodde3ad92018-01-23 16:55:24 +0000607 case ActivationFunction::RELU:
608 // Perform activation
609 tmp = vmaxq_u8(CONST_0, in);
610 // Re-quantize to new output space
611 tmp = vmlaq_qasymm8(tmp, vs, vo);
612 break;
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000613 default:
614 ARM_COMPUTE_ERROR("Function not implemented");
615 break;
616 }
617
618 vst1q_u8(output_ptr, tmp);
619 },
620 input, output);
621}
622
623template <ActivationLayerInfo::ActivationFunction F, typename T>
Pablo Tello91654c42017-07-05 11:32:17 +0100624typename std::enable_if<std::is_same<T, qint16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100625{
626 Iterator input(_input, window);
627 Iterator output(_output, window);
628 const int fixed_point_position = _input->info()->fixed_point_position();
629
630 static const qint16x8_t CONST_0 = vdupq_n_qs16(0);
631 const qint16x8_t CONST_1 = vdupq_n_qs16(sqcvt_qs16_f32(1.f, fixed_point_position));
632 const qint16x8_t a = vdupq_n_qs16(sqcvt_qs16_f32(_act_info.a(), fixed_point_position));
633 const qint16x8_t b = vdupq_n_qs16(sqcvt_qs16_f32(_act_info.b(), fixed_point_position));
634
635 execute_window_loop(window, [&](const Coordinates & id)
636 {
637 const auto input_ptr = reinterpret_cast<const int16_t *>(input.ptr());
638 const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());
639
640 const qint16x8x2_t in = vld2q_s16(input_ptr);
641 qint16x8x2_t tmp = { {} };
642
643 switch(F)
644 {
645 case ActivationFunction::ABS:
646 tmp =
647 {
648 {
649 vqabsq_qs16(in.val[0]),
650 vqabsq_qs16(in.val[1]),
651 }
652 };
653 break;
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100654 case ActivationFunction::LINEAR:
655 tmp =
656 {
657 {
658 vqmlaq_qs16(b, a, in.val[0], fixed_point_position),
659 vqmlaq_qs16(b, a, in.val[1], fixed_point_position),
660 }
661 };
662 break;
663 case ActivationFunction::LOGISTIC:
664 tmp =
665 {
666 {
667 vqrecipq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(vnegq_s16(in.val[0]), fixed_point_position)), fixed_point_position),
668 vqrecipq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(vnegq_s16(in.val[1]), fixed_point_position)), fixed_point_position),
669 }
670 };
671 break;
672 case ActivationFunction::RELU:
673 tmp =
674 {
675 {
676 vmaxq_qs16(CONST_0, in.val[0]),
677 vmaxq_qs16(CONST_0, in.val[1]),
678 }
679 };
680 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100681 case ActivationFunction::BOUNDED_RELU:
682 tmp =
683 {
684 {
685 vminq_qs16(a, vmaxq_qs16(CONST_0, in.val[0])),
686 vminq_qs16(a, vmaxq_qs16(CONST_0, in.val[1])),
687 }
688 };
689 break;
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100690 case ActivationFunction::LU_BOUNDED_RELU:
691 tmp =
692 {
693 {
694 vminq_qs16(a, vmaxq_qs16(b, in.val[0])),
695 vminq_qs16(a, vmaxq_qs16(b, in.val[1])),
696 }
697 };
698 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100699 case ActivationFunction::LEAKY_RELU:
700 tmp =
701 {
702 {
703 vbslq_s16(vcgtq_s16(in.val[0], CONST_0), in.val[0], vmulq_qs16(a, in.val[0], fixed_point_position)),
704 vbslq_s16(vcgtq_s16(in.val[1], CONST_0), in.val[1], vmulq_qs16(a, in.val[1], fixed_point_position)),
705 }
706 };
707 break;
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100708 case ActivationFunction::SOFT_RELU:
709 tmp =
710 {
711 {
712 vlogq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(in.val[0], fixed_point_position)), fixed_point_position),
713 vlogq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(in.val[1], fixed_point_position)), fixed_point_position),
714 }
715 };
716 break;
717 case ActivationFunction::SQRT:
718 tmp =
719 {
720 {
721 vqrecipq_qs16(vqinvsqrtq_qs16(in.val[0], fixed_point_position), fixed_point_position),
722 vqrecipq_qs16(vqinvsqrtq_qs16(in.val[1], fixed_point_position), fixed_point_position),
723 }
724 };
725 break;
726 case ActivationFunction::SQUARE:
727 tmp =
728 {
729 {
730 vqmulq_qs16(in.val[0], in.val[0], fixed_point_position),
731 vqmulq_qs16(in.val[1], in.val[1], fixed_point_position),
732 }
733 };
734 break;
735 case ActivationFunction::TANH:
736 tmp =
737 {
738 {
739 vqmulq_qs16(a, vqtanhq_qs16(vqmulq_qs16(b, in.val[0], fixed_point_position), fixed_point_position), fixed_point_position),
740 vqmulq_qs16(a, vqtanhq_qs16(vqmulq_qs16(b, in.val[1], fixed_point_position), fixed_point_position), fixed_point_position),
741 }
742 };
743 break;
744 default:
Pablo Tello91654c42017-07-05 11:32:17 +0100745 ARM_COMPUTE_ERROR("Function not implemented");
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100746 break;
747 }
748
749 vst2q_qs16(output_ptr, tmp);
750 },
751 input, output);
752}
753
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000754Status NEActivationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
755{
756 ARM_COMPUTE_UNUSED(act_info);
757 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
758 ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (output != nullptr) ? output->clone().get() : nullptr).first);
759
760 return Status{};
761}
762
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100763void NEActivationLayerKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100764{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100765 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100766 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100767 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100768 ARM_COMPUTE_ERROR_ON(_func == nullptr);
769
770 (this->*_func)(window);
771}