blob: 3195411e18b3eadc67acd98768b2c3b67197acea [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
25
26#include "arm_compute/core/FixedPoint.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
29#include "arm_compute/core/NEON/NEFixedPoint.h"
30#include "arm_compute/core/NEON/NEMath.h"
31#include "arm_compute/core/TensorInfo.h"
32#include "arm_compute/core/Utils.h"
33#include "arm_compute/core/Validate.h"
34#include "arm_compute/core/Window.h"
35
36#include <arm_neon.h>
37#include <array>
38#include <cmath>
39#include <map>
40
41using namespace arm_compute;
42
43NEActivationLayerKernel::NEActivationLayerKernel()
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +010044 : _input(nullptr), _output(nullptr), _func(nullptr), _act_info(ActivationFunction::LOGISTIC)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010045{
46}
47
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +010048void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010049{
Pablo Tello91654c42017-07-05 11:32:17 +010050 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010051
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +010052 _input = input;
53 _act_info = activation_info;
54 _output = input;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010055
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +010056 if(output != nullptr)
57 {
58 // Output auto inizialitation if not yet initialized
59 auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
60
61 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
62 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
63 ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
64
65 _output = output;
66 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +010067
68 // Activation functions : FP32
69 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f32 =
70 {
71 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, float> },
72 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, float> },
73 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, float> },
74 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float> },
75 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float> },
Georgios Pinitas579c0492017-07-12 16:12:12 +010076 { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, float> },
Anthony Barbier6ff3b192017-09-04 18:44:23 +010077 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float> },
78 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float> },
79 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float> },
80 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float> },
81 };
Pablo Tello91654c42017-07-05 11:32:17 +010082
83#ifdef ARM_COMPUTE_ENABLE_FP16
84 // Activation functions : FP16
85 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f16 =
86 {
87 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, float16_t> },
88 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, float16_t> },
89 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, float16_t> },
90 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float16_t> },
91 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float16_t> },
92 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float16_t> },
93 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float16_t> },
94 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float16_t> },
95 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float16_t> },
96 };
97#endif /* ARM_COMPUTE_ENABLE_FP16*/
98
Anthony Barbier6ff3b192017-09-04 18:44:23 +010099 // Activation functions : QS8
100 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs8 =
101 {
102 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, qint8_t> },
103 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, qint8_t> },
104 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qint8_t> },
105 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qint8_t> },
106 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qint8_t> },
Georgios Pinitas579c0492017-07-12 16:12:12 +0100107 { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, qint8_t> },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100108 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, qint8_t> },
109 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, qint8_t> },
110 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, qint8_t> },
111 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qint8_t> },
112 };
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100113 // Activation functions : QS16
114 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs16 =
115 {
116 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, qint16_t> },
117 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, qint16_t> },
118 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qint16_t> },
119 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qint16_t> },
120 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qint16_t> },
Georgios Pinitas579c0492017-07-12 16:12:12 +0100121 { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, qint16_t> },
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100122 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, qint16_t> },
123 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, qint16_t> },
124 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, qint16_t> },
125 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qint16_t> },
126 };
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100127
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100128 switch(input->info()->data_type())
129 {
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100130 case DataType::QS8:
131 _func = act_map_qs8[activation_info.activation()];
132 break;
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100133 case DataType::QS16:
134 _func = act_map_qs16[activation_info.activation()];
135 break;
136 case DataType::F32:
137 _func = act_map_f32[activation_info.activation()];
138 break;
Pablo Tello91654c42017-07-05 11:32:17 +0100139#ifdef ARM_COMPUTE_ENABLE_FP16
140 case DataType::F16:
141 _func = act_map_f16[activation_info.activation()];
142 break;
143#endif /* ARM_COMPUTE_ENABLE_FP16 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100144 default:
145 ARM_COMPUTE_ERROR("Unsupported data type.");
146 }
147
148 constexpr unsigned int num_elems_processed_per_iteration = 16;
149
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100150 // Configure kernel window
151 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
152
153 if(output != nullptr)
154 {
155 AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
156
157 update_window_and_padding(win,
158 AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration),
159 output_access);
160
161 output_access.set_valid_region(win, input->info()->valid_region());
162 }
163 else
164 {
165 // In-place computation
166 update_window_and_padding(win,
167 AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration));
168 }
169
170 ICPPKernel::configure(win);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100171}
172
Pablo Tello91654c42017-07-05 11:32:17 +0100173#ifdef ARM_COMPUTE_ENABLE_FP16
174template <ActivationLayerInfo::ActivationFunction F, typename T>
175typename std::enable_if<std::is_same<T, float16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
176{
177 Iterator input(_input, window);
178 Iterator output(_output, window);
179
180 static const float16x8_t CONST_0 = vdupq_n_f16(0.f);
181 static const float16x8_t CONST_1 = vdupq_n_f16(1.f);
182
183 const float16x8_t a = vdupq_n_f16(_act_info.a());
184 const float16x8_t b = vdupq_n_f16(_act_info.b());
185
186 execute_window_loop(window, [&](const Coordinates &)
187 {
188 const auto input_ptr = reinterpret_cast<const float16_t *>(input.ptr());
189 const auto output_ptr = reinterpret_cast<float16_t *>(output.ptr());
190
191 const float16x8x2_t in = vld2q_f16(input_ptr);
192 float16x8x2_t tmp = { {} };
193
194 switch(F)
195 {
196 case ActivationFunction::ABS:
197 tmp =
198 {
199 {
200 vabsq_f16(in.val[0]),
201 vabsq_f16(in.val[1]),
202 }
203 };
204 break;
205 case ActivationFunction::BOUNDED_RELU:
206 tmp =
207 {
208 {
209 vminq_f16(a, vmaxq_f16(CONST_0, in.val[0])),
210 vminq_f16(a, vmaxq_f16(CONST_0, in.val[1]))
211 }
212 };
213 break;
214 case ActivationFunction::LINEAR:
215 tmp =
216 {
217 {
218 vaddq_f16(b, vmulq_f16(a, in.val[0])),
219 vaddq_f16(b, vmulq_f16(a, in.val[1]))
220 }
221 };
222 break;
223 case ActivationFunction::LOGISTIC:
224 tmp =
225 {
226 {
227 vinvq_f16(vaddq_f16(CONST_1, vexpq_f16(vnegq_f16(in.val[0])))),
228 vinvq_f16(vaddq_f16(CONST_1, vexpq_f16(vnegq_f16(in.val[1])))),
229 }
230 };
231 break;
232 case ActivationFunction::RELU:
233 tmp =
234 {
235 {
236 vmaxq_f16(CONST_0, in.val[0]),
237 vmaxq_f16(CONST_0, in.val[1])
238 }
239 };
240 break;
241 case ActivationFunction::LEAKY_RELU:
242 tmp =
243 {
244 {
245 vbslq_f16(vcgtq_f16(in.val[0], CONST_0), in.val[0], vmulq_f16(a, in.val[0])),
246 vbslq_f16(vcgtq_f16(in.val[1], CONST_0), in.val[1], vmulq_f16(a, in.val[1]))
247 }
248 };
249 break;
250 case ActivationFunction::SOFT_RELU:
251 tmp =
252 {
253 {
254 vlogq_f16(vaddq_f16(CONST_1, vexpq_f16(in.val[0]))),
255 vlogq_f16(vaddq_f16(CONST_1, vexpq_f16(in.val[1]))),
256 }
257 };
258 break;
259 case ActivationFunction::SQRT:
260 tmp =
261 {
262 {
263 vinvq_f16(vinvsqrtq_f16(in.val[0])),
264 vinvq_f16(vinvsqrtq_f16(in.val[1])),
265 }
266 };
267 break;
268 case ActivationFunction::SQUARE:
269 tmp =
270 {
271 {
272 vmulq_f16(in.val[0], in.val[0]),
273 vmulq_f16(in.val[1], in.val[1])
274 }
275 };
276 break;
277 case ActivationFunction::TANH:
278 tmp =
279 {
280 {
281 vmulq_f16(a, vtanhq_f16(vmulq_f16(b, in.val[0]))),
282 vmulq_f16(a, vtanhq_f16(vmulq_f16(b, in.val[1]))),
283 }
284 };
285 break;
286 default:
287 ARM_COMPUTE_ERROR("Not implemented");
288 break;
289 }
290
291 vst2q_f16(output_ptr, tmp);
292 },
293 input, output);
294}
295#endif /* ARM_COMPUTE_ENABLE_FP16 */
296
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100297template <ActivationLayerInfo::ActivationFunction F, typename T>
298typename std::enable_if<std::is_same<T, float>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
299{
300 Iterator input(_input, window);
301 Iterator output(_output, window);
302
303 static const float32x4_t CONST_1 = vdupq_n_f32(1.f);
304 static const float32x4_t CONST_0 = vdupq_n_f32(0.f);
305 const float32x4_t a = vdupq_n_f32(_act_info.a());
306 const float32x4_t b = vdupq_n_f32(_act_info.b());
307
308 execute_window_loop(window, [&](const Coordinates & id)
309 {
310 const auto input_ptr = reinterpret_cast<const float *>(input.ptr());
311 const auto output_ptr = reinterpret_cast<float *>(output.ptr());
312
313 const float32x4x4_t in = vld4q_f32(input_ptr);
314 float32x4x4_t tmp = { {} };
315
316 switch(F)
317 {
318 case ActivationFunction::ABS:
319 tmp =
320 {
321 {
322 vabsq_f32(in.val[0]),
323 vabsq_f32(in.val[1]),
324 vabsq_f32(in.val[2]),
325 vabsq_f32(in.val[3]),
326 }
327 };
328 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100329 case ActivationFunction::LINEAR:
330 tmp =
331 {
332 {
333 vmlaq_f32(b, a, in.val[0]),
334 vmlaq_f32(b, a, in.val[1]),
335 vmlaq_f32(b, a, in.val[2]),
336 vmlaq_f32(b, a, in.val[3]),
337 }
338 };
339 break;
340 case ActivationFunction::LOGISTIC:
341 tmp =
342 {
343 {
344 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[0])))),
345 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[1])))),
346 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[2])))),
347 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[3])))),
348 }
349 };
350 break;
351 case ActivationFunction::RELU:
352 tmp =
353 {
354 {
355 vmaxq_f32(CONST_0, in.val[0]),
356 vmaxq_f32(CONST_0, in.val[1]),
357 vmaxq_f32(CONST_0, in.val[2]),
358 vmaxq_f32(CONST_0, in.val[3]),
359 }
360 };
361 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100362 case ActivationFunction::BOUNDED_RELU:
363 tmp =
364 {
365 {
366 vminq_f32(a, vmaxq_f32(CONST_0, in.val[0])),
367 vminq_f32(a, vmaxq_f32(CONST_0, in.val[1])),
368 vminq_f32(a, vmaxq_f32(CONST_0, in.val[2])),
369 vminq_f32(a, vmaxq_f32(CONST_0, in.val[3])),
370 }
371 };
372 break;
373 case ActivationFunction::LEAKY_RELU:
374 tmp =
375 {
376 {
377 vbslq_f32(vcgtq_f32(in.val[0], CONST_0), in.val[0], vmulq_f32(a, in.val[0])),
378 vbslq_f32(vcgtq_f32(in.val[1], CONST_0), in.val[1], vmulq_f32(a, in.val[1])),
379 vbslq_f32(vcgtq_f32(in.val[2], CONST_0), in.val[2], vmulq_f32(a, in.val[2])),
380 vbslq_f32(vcgtq_f32(in.val[3], CONST_0), in.val[3], vmulq_f32(a, in.val[3])),
381 }
382 };
383 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100384 case ActivationFunction::SOFT_RELU:
385 tmp =
386 {
387 {
388 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[0]))),
389 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[1]))),
390 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[2]))),
391 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[3]))),
392 }
393 };
394 break;
395 case ActivationFunction::SQRT:
396 tmp =
397 {
398 {
399 vinvq_f32(vinvsqrtq_f32(in.val[0])),
400 vinvq_f32(vinvsqrtq_f32(in.val[1])),
401 vinvq_f32(vinvsqrtq_f32(in.val[2])),
402 vinvq_f32(vinvsqrtq_f32(in.val[3])),
403 }
404 };
405 break;
406 case ActivationFunction::SQUARE:
407 tmp =
408 {
409 {
410 vmulq_f32(in.val[0], in.val[0]),
411 vmulq_f32(in.val[1], in.val[1]),
412 vmulq_f32(in.val[2], in.val[2]),
413 vmulq_f32(in.val[3], in.val[3]),
414 }
415 };
416 break;
417 case ActivationFunction::TANH:
418 tmp =
419 {
420 {
421 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[0]))),
422 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[1]))),
423 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[2]))),
424 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[3]))),
425 }
426 };
427 break;
428 default:
429 break;
430 }
431
432 vst4q_f32(output_ptr, tmp);
433 },
434 input, output);
435}
436
437template <ActivationLayerInfo::ActivationFunction F, typename T>
438typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
439{
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100440 Iterator input(_input, window);
441 Iterator output(_output, window);
442 const int fixed_point_position = _input->info()->fixed_point_position();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100443
444 static const qint8x16_t CONST_0 = vdupq_n_qs8(0);
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100445 const qint8x16_t CONST_1 = vdupq_n_qs8(sqcvt_qs8_f32(1.f, fixed_point_position));
446 const qint8x16_t a = vdupq_n_qs8(sqcvt_qs8_f32(_act_info.a(), fixed_point_position));
447 const qint8x16_t b = vdupq_n_qs8(sqcvt_qs8_f32(_act_info.b(), fixed_point_position));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100448
449 execute_window_loop(window, [&](const Coordinates & id)
450 {
451 const auto input_ptr = reinterpret_cast<const int8_t *>(input.ptr());
452 const auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
453
454 const qint8x16_t in = vld1q_qs8(input_ptr);
455 qint8x16_t tmp = {};
456
457 switch(F)
458 {
459 case ActivationFunction::ABS:
460 tmp = vqabsq_qs8(in);
461 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100462 case ActivationFunction::LINEAR:
463 tmp = vqmlaq_qs8(b, a, in, fixed_point_position);
464 break;
465 case ActivationFunction::LOGISTIC:
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100466 tmp = vqrecipq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(vnegq_s8(in), fixed_point_position)), fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100467 break;
468 case ActivationFunction::RELU:
469 tmp = vmaxq_qs8(CONST_0, in);
470 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100471 case ActivationFunction::BOUNDED_RELU:
472 tmp = vminq_qs8(a, vmaxq_qs8(CONST_0, in));
473 break;
474 case ActivationFunction::LEAKY_RELU:
475 tmp = vbslq_s8(vcgtq_s8(in, CONST_0), in, vmulq_qs8(a, in, fixed_point_position));
476 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100477 case ActivationFunction::SOFT_RELU:
478 tmp = vlogq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(in, fixed_point_position)), fixed_point_position);
479 break;
480 case ActivationFunction::SQRT:
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100481 tmp = vqrecipq_qs8(vqinvsqrtq_qs8(in, fixed_point_position), fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100482 break;
483 case ActivationFunction::SQUARE:
484 tmp = vqmulq_qs8(in, in, fixed_point_position);
485 break;
486 case ActivationFunction::TANH:
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100487 tmp = vqmulq_qs8(a, vqtanhq_qs8(vqmulq_qs8(b, in, fixed_point_position), fixed_point_position), fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100488 break;
489 default:
490 break;
491 }
492
493 vst1q_qs8(output_ptr, tmp);
494 },
495 input, output);
496}
497
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100498template <ActivationLayerInfo::ActivationFunction F, typename T>
Pablo Tello91654c42017-07-05 11:32:17 +0100499typename std::enable_if<std::is_same<T, qint16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100500{
501 Iterator input(_input, window);
502 Iterator output(_output, window);
503 const int fixed_point_position = _input->info()->fixed_point_position();
504
505 static const qint16x8_t CONST_0 = vdupq_n_qs16(0);
506 const qint16x8_t CONST_1 = vdupq_n_qs16(sqcvt_qs16_f32(1.f, fixed_point_position));
507 const qint16x8_t a = vdupq_n_qs16(sqcvt_qs16_f32(_act_info.a(), fixed_point_position));
508 const qint16x8_t b = vdupq_n_qs16(sqcvt_qs16_f32(_act_info.b(), fixed_point_position));
509
510 execute_window_loop(window, [&](const Coordinates & id)
511 {
512 const auto input_ptr = reinterpret_cast<const int16_t *>(input.ptr());
513 const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());
514
515 const qint16x8x2_t in = vld2q_s16(input_ptr);
516 qint16x8x2_t tmp = { {} };
517
518 switch(F)
519 {
520 case ActivationFunction::ABS:
521 tmp =
522 {
523 {
524 vqabsq_qs16(in.val[0]),
525 vqabsq_qs16(in.val[1]),
526 }
527 };
528 break;
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100529 case ActivationFunction::LINEAR:
530 tmp =
531 {
532 {
533 vqmlaq_qs16(b, a, in.val[0], fixed_point_position),
534 vqmlaq_qs16(b, a, in.val[1], fixed_point_position),
535 }
536 };
537 break;
538 case ActivationFunction::LOGISTIC:
539 tmp =
540 {
541 {
542 vqrecipq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(vnegq_s16(in.val[0]), fixed_point_position)), fixed_point_position),
543 vqrecipq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(vnegq_s16(in.val[1]), fixed_point_position)), fixed_point_position),
544 }
545 };
546 break;
547 case ActivationFunction::RELU:
548 tmp =
549 {
550 {
551 vmaxq_qs16(CONST_0, in.val[0]),
552 vmaxq_qs16(CONST_0, in.val[1]),
553 }
554 };
555 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100556 case ActivationFunction::BOUNDED_RELU:
557 tmp =
558 {
559 {
560 vminq_qs16(a, vmaxq_qs16(CONST_0, in.val[0])),
561 vminq_qs16(a, vmaxq_qs16(CONST_0, in.val[1])),
562 }
563 };
564 break;
565 case ActivationFunction::LEAKY_RELU:
566 tmp =
567 {
568 {
569 vbslq_s16(vcgtq_s16(in.val[0], CONST_0), in.val[0], vmulq_qs16(a, in.val[0], fixed_point_position)),
570 vbslq_s16(vcgtq_s16(in.val[1], CONST_0), in.val[1], vmulq_qs16(a, in.val[1], fixed_point_position)),
571 }
572 };
573 break;
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100574 case ActivationFunction::SOFT_RELU:
575 tmp =
576 {
577 {
578 vlogq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(in.val[0], fixed_point_position)), fixed_point_position),
579 vlogq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(in.val[1], fixed_point_position)), fixed_point_position),
580 }
581 };
582 break;
583 case ActivationFunction::SQRT:
584 tmp =
585 {
586 {
587 vqrecipq_qs16(vqinvsqrtq_qs16(in.val[0], fixed_point_position), fixed_point_position),
588 vqrecipq_qs16(vqinvsqrtq_qs16(in.val[1], fixed_point_position), fixed_point_position),
589 }
590 };
591 break;
592 case ActivationFunction::SQUARE:
593 tmp =
594 {
595 {
596 vqmulq_qs16(in.val[0], in.val[0], fixed_point_position),
597 vqmulq_qs16(in.val[1], in.val[1], fixed_point_position),
598 }
599 };
600 break;
601 case ActivationFunction::TANH:
602 tmp =
603 {
604 {
605 vqmulq_qs16(a, vqtanhq_qs16(vqmulq_qs16(b, in.val[0], fixed_point_position), fixed_point_position), fixed_point_position),
606 vqmulq_qs16(a, vqtanhq_qs16(vqmulq_qs16(b, in.val[1], fixed_point_position), fixed_point_position), fixed_point_position),
607 }
608 };
609 break;
610 default:
Pablo Tello91654c42017-07-05 11:32:17 +0100611 ARM_COMPUTE_ERROR("Function not implemented");
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100612 break;
613 }
614
615 vst2q_qs16(output_ptr, tmp);
616 },
617 input, output);
618}
619
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100620void NEActivationLayerKernel::run(const Window &window)
621{
622 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100623 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100624 ARM_COMPUTE_ERROR_ON(_func == nullptr);
625
626 (this->*_func)(window);
627}