blob: 67fc45bc7036436ba3304e43ef5ffdc95437762e [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
25
26#include "arm_compute/core/FixedPoint.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
29#include "arm_compute/core/NEON/NEFixedPoint.h"
30#include "arm_compute/core/NEON/NEMath.h"
31#include "arm_compute/core/TensorInfo.h"
32#include "arm_compute/core/Utils.h"
33#include "arm_compute/core/Validate.h"
34#include "arm_compute/core/Window.h"
35
36#include <arm_neon.h>
37#include <array>
38#include <cmath>
39#include <map>
40
41using namespace arm_compute;
42
43NEActivationLayerKernel::NEActivationLayerKernel()
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +010044 : _input(nullptr), _output(nullptr), _func(nullptr), _act_info(ActivationFunction::LOGISTIC)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010045{
46}
47
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +010048void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010049{
Pablo Tello91654c42017-07-05 11:32:17 +010050 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010051
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +010052 _input = input;
53 _act_info = activation_info;
54 _output = input;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010055
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +010056 if(output != nullptr)
57 {
58 // Output auto inizialitation if not yet initialized
59 auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
60
61 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
62 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
63 ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
64
65 _output = output;
66 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +010067
68 // Activation functions : FP32
69 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f32 =
70 {
71 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, float> },
72 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, float> },
73 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, float> },
74 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float> },
75 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float> },
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +010076 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, float> },
Georgios Pinitas579c0492017-07-12 16:12:12 +010077 { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, float> },
Anthony Barbier6ff3b192017-09-04 18:44:23 +010078 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float> },
79 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float> },
80 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float> },
81 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float> },
82 };
Pablo Tello91654c42017-07-05 11:32:17 +010083
84#ifdef ARM_COMPUTE_ENABLE_FP16
85 // Activation functions : FP16
86 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f16 =
87 {
88 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, float16_t> },
89 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, float16_t> },
90 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, float16_t> },
91 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float16_t> },
92 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float16_t> },
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +010093 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, float16_t> },
Pablo Tello91654c42017-07-05 11:32:17 +010094 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float16_t> },
95 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float16_t> },
96 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float16_t> },
97 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float16_t> },
98 };
99#endif /* ARM_COMPUTE_ENABLE_FP16*/
100
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100101 // Activation functions : QS8
102 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs8 =
103 {
104 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, qint8_t> },
105 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, qint8_t> },
106 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qint8_t> },
107 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qint8_t> },
108 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qint8_t> },
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100109 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, qint8_t> },
Georgios Pinitas579c0492017-07-12 16:12:12 +0100110 { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, qint8_t> },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100111 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, qint8_t> },
112 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, qint8_t> },
113 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, qint8_t> },
114 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qint8_t> },
115 };
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100116 // Activation functions : QS16
117 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs16 =
118 {
119 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, qint16_t> },
120 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, qint16_t> },
121 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qint16_t> },
122 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qint16_t> },
123 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qint16_t> },
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100124 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, qint16_t> },
Georgios Pinitas579c0492017-07-12 16:12:12 +0100125 { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, qint16_t> },
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100126 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, qint16_t> },
127 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, qint16_t> },
128 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, qint16_t> },
129 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qint16_t> },
130 };
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100131
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100132 switch(input->info()->data_type())
133 {
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100134 case DataType::QS8:
135 _func = act_map_qs8[activation_info.activation()];
136 break;
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100137 case DataType::QS16:
138 _func = act_map_qs16[activation_info.activation()];
139 break;
140 case DataType::F32:
141 _func = act_map_f32[activation_info.activation()];
142 break;
Pablo Tello91654c42017-07-05 11:32:17 +0100143#ifdef ARM_COMPUTE_ENABLE_FP16
144 case DataType::F16:
145 _func = act_map_f16[activation_info.activation()];
146 break;
147#endif /* ARM_COMPUTE_ENABLE_FP16 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100148 default:
149 ARM_COMPUTE_ERROR("Unsupported data type.");
150 }
151
152 constexpr unsigned int num_elems_processed_per_iteration = 16;
153
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100154 // Configure kernel window
155 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
156
157 if(output != nullptr)
158 {
159 AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
160
161 update_window_and_padding(win,
162 AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration),
163 output_access);
164
165 output_access.set_valid_region(win, input->info()->valid_region());
166 }
167 else
168 {
169 // In-place computation
170 update_window_and_padding(win,
171 AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration));
172 }
173
174 ICPPKernel::configure(win);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100175}
176
Pablo Tello91654c42017-07-05 11:32:17 +0100177#ifdef ARM_COMPUTE_ENABLE_FP16
178template <ActivationLayerInfo::ActivationFunction F, typename T>
179typename std::enable_if<std::is_same<T, float16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
180{
181 Iterator input(_input, window);
182 Iterator output(_output, window);
183
184 static const float16x8_t CONST_0 = vdupq_n_f16(0.f);
185 static const float16x8_t CONST_1 = vdupq_n_f16(1.f);
186
187 const float16x8_t a = vdupq_n_f16(_act_info.a());
188 const float16x8_t b = vdupq_n_f16(_act_info.b());
189
190 execute_window_loop(window, [&](const Coordinates &)
191 {
192 const auto input_ptr = reinterpret_cast<const float16_t *>(input.ptr());
193 const auto output_ptr = reinterpret_cast<float16_t *>(output.ptr());
194
195 const float16x8x2_t in = vld2q_f16(input_ptr);
196 float16x8x2_t tmp = { {} };
197
198 switch(F)
199 {
200 case ActivationFunction::ABS:
201 tmp =
202 {
203 {
204 vabsq_f16(in.val[0]),
205 vabsq_f16(in.val[1]),
206 }
207 };
208 break;
209 case ActivationFunction::BOUNDED_RELU:
210 tmp =
211 {
212 {
213 vminq_f16(a, vmaxq_f16(CONST_0, in.val[0])),
214 vminq_f16(a, vmaxq_f16(CONST_0, in.val[1]))
215 }
216 };
217 break;
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100218 case ActivationFunction::LU_BOUNDED_RELU:
219 tmp =
220 {
221 {
222 vminq_f16(a, vmaxq_f16(b, in.val[0])),
223 vminq_f16(a, vmaxq_f16(b, in.val[1]))
224 }
225 };
226 break;
Pablo Tello91654c42017-07-05 11:32:17 +0100227 case ActivationFunction::LINEAR:
228 tmp =
229 {
230 {
231 vaddq_f16(b, vmulq_f16(a, in.val[0])),
232 vaddq_f16(b, vmulq_f16(a, in.val[1]))
233 }
234 };
235 break;
236 case ActivationFunction::LOGISTIC:
237 tmp =
238 {
239 {
240 vinvq_f16(vaddq_f16(CONST_1, vexpq_f16(vnegq_f16(in.val[0])))),
241 vinvq_f16(vaddq_f16(CONST_1, vexpq_f16(vnegq_f16(in.val[1])))),
242 }
243 };
244 break;
245 case ActivationFunction::RELU:
246 tmp =
247 {
248 {
249 vmaxq_f16(CONST_0, in.val[0]),
250 vmaxq_f16(CONST_0, in.val[1])
251 }
252 };
253 break;
254 case ActivationFunction::LEAKY_RELU:
255 tmp =
256 {
257 {
258 vbslq_f16(vcgtq_f16(in.val[0], CONST_0), in.val[0], vmulq_f16(a, in.val[0])),
259 vbslq_f16(vcgtq_f16(in.val[1], CONST_0), in.val[1], vmulq_f16(a, in.val[1]))
260 }
261 };
262 break;
263 case ActivationFunction::SOFT_RELU:
264 tmp =
265 {
266 {
267 vlogq_f16(vaddq_f16(CONST_1, vexpq_f16(in.val[0]))),
268 vlogq_f16(vaddq_f16(CONST_1, vexpq_f16(in.val[1]))),
269 }
270 };
271 break;
272 case ActivationFunction::SQRT:
273 tmp =
274 {
275 {
276 vinvq_f16(vinvsqrtq_f16(in.val[0])),
277 vinvq_f16(vinvsqrtq_f16(in.val[1])),
278 }
279 };
280 break;
281 case ActivationFunction::SQUARE:
282 tmp =
283 {
284 {
285 vmulq_f16(in.val[0], in.val[0]),
286 vmulq_f16(in.val[1], in.val[1])
287 }
288 };
289 break;
290 case ActivationFunction::TANH:
291 tmp =
292 {
293 {
294 vmulq_f16(a, vtanhq_f16(vmulq_f16(b, in.val[0]))),
295 vmulq_f16(a, vtanhq_f16(vmulq_f16(b, in.val[1]))),
296 }
297 };
298 break;
299 default:
300 ARM_COMPUTE_ERROR("Not implemented");
301 break;
302 }
303
304 vst2q_f16(output_ptr, tmp);
305 },
306 input, output);
307}
308#endif /* ARM_COMPUTE_ENABLE_FP16 */
309
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100310template <ActivationLayerInfo::ActivationFunction F, typename T>
311typename std::enable_if<std::is_same<T, float>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
312{
313 Iterator input(_input, window);
314 Iterator output(_output, window);
315
316 static const float32x4_t CONST_1 = vdupq_n_f32(1.f);
317 static const float32x4_t CONST_0 = vdupq_n_f32(0.f);
318 const float32x4_t a = vdupq_n_f32(_act_info.a());
319 const float32x4_t b = vdupq_n_f32(_act_info.b());
320
321 execute_window_loop(window, [&](const Coordinates & id)
322 {
323 const auto input_ptr = reinterpret_cast<const float *>(input.ptr());
324 const auto output_ptr = reinterpret_cast<float *>(output.ptr());
325
326 const float32x4x4_t in = vld4q_f32(input_ptr);
327 float32x4x4_t tmp = { {} };
328
329 switch(F)
330 {
331 case ActivationFunction::ABS:
332 tmp =
333 {
334 {
335 vabsq_f32(in.val[0]),
336 vabsq_f32(in.val[1]),
337 vabsq_f32(in.val[2]),
338 vabsq_f32(in.val[3]),
339 }
340 };
341 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100342 case ActivationFunction::LINEAR:
343 tmp =
344 {
345 {
346 vmlaq_f32(b, a, in.val[0]),
347 vmlaq_f32(b, a, in.val[1]),
348 vmlaq_f32(b, a, in.val[2]),
349 vmlaq_f32(b, a, in.val[3]),
350 }
351 };
352 break;
353 case ActivationFunction::LOGISTIC:
354 tmp =
355 {
356 {
357 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[0])))),
358 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[1])))),
359 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[2])))),
360 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[3])))),
361 }
362 };
363 break;
364 case ActivationFunction::RELU:
365 tmp =
366 {
367 {
368 vmaxq_f32(CONST_0, in.val[0]),
369 vmaxq_f32(CONST_0, in.val[1]),
370 vmaxq_f32(CONST_0, in.val[2]),
371 vmaxq_f32(CONST_0, in.val[3]),
372 }
373 };
374 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100375 case ActivationFunction::BOUNDED_RELU:
376 tmp =
377 {
378 {
379 vminq_f32(a, vmaxq_f32(CONST_0, in.val[0])),
380 vminq_f32(a, vmaxq_f32(CONST_0, in.val[1])),
381 vminq_f32(a, vmaxq_f32(CONST_0, in.val[2])),
382 vminq_f32(a, vmaxq_f32(CONST_0, in.val[3])),
383 }
384 };
385 break;
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100386 case ActivationFunction::LU_BOUNDED_RELU:
387 tmp =
388 {
389 {
390 vminq_f32(a, vmaxq_f32(b, in.val[0])),
391 vminq_f32(a, vmaxq_f32(b, in.val[1])),
392 vminq_f32(a, vmaxq_f32(b, in.val[2])),
393 vminq_f32(a, vmaxq_f32(b, in.val[3])),
394 }
395 };
396 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100397 case ActivationFunction::LEAKY_RELU:
398 tmp =
399 {
400 {
401 vbslq_f32(vcgtq_f32(in.val[0], CONST_0), in.val[0], vmulq_f32(a, in.val[0])),
402 vbslq_f32(vcgtq_f32(in.val[1], CONST_0), in.val[1], vmulq_f32(a, in.val[1])),
403 vbslq_f32(vcgtq_f32(in.val[2], CONST_0), in.val[2], vmulq_f32(a, in.val[2])),
404 vbslq_f32(vcgtq_f32(in.val[3], CONST_0), in.val[3], vmulq_f32(a, in.val[3])),
405 }
406 };
407 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100408 case ActivationFunction::SOFT_RELU:
409 tmp =
410 {
411 {
412 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[0]))),
413 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[1]))),
414 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[2]))),
415 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[3]))),
416 }
417 };
418 break;
419 case ActivationFunction::SQRT:
420 tmp =
421 {
422 {
423 vinvq_f32(vinvsqrtq_f32(in.val[0])),
424 vinvq_f32(vinvsqrtq_f32(in.val[1])),
425 vinvq_f32(vinvsqrtq_f32(in.val[2])),
426 vinvq_f32(vinvsqrtq_f32(in.val[3])),
427 }
428 };
429 break;
430 case ActivationFunction::SQUARE:
431 tmp =
432 {
433 {
434 vmulq_f32(in.val[0], in.val[0]),
435 vmulq_f32(in.val[1], in.val[1]),
436 vmulq_f32(in.val[2], in.val[2]),
437 vmulq_f32(in.val[3], in.val[3]),
438 }
439 };
440 break;
441 case ActivationFunction::TANH:
442 tmp =
443 {
444 {
445 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[0]))),
446 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[1]))),
447 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[2]))),
448 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[3]))),
449 }
450 };
451 break;
452 default:
453 break;
454 }
455
456 vst4q_f32(output_ptr, tmp);
457 },
458 input, output);
459}
460
461template <ActivationLayerInfo::ActivationFunction F, typename T>
462typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
463{
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100464 Iterator input(_input, window);
465 Iterator output(_output, window);
466 const int fixed_point_position = _input->info()->fixed_point_position();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100467
468 static const qint8x16_t CONST_0 = vdupq_n_qs8(0);
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100469 const qint8x16_t CONST_1 = vdupq_n_qs8(sqcvt_qs8_f32(1.f, fixed_point_position));
470 const qint8x16_t a = vdupq_n_qs8(sqcvt_qs8_f32(_act_info.a(), fixed_point_position));
471 const qint8x16_t b = vdupq_n_qs8(sqcvt_qs8_f32(_act_info.b(), fixed_point_position));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100472
473 execute_window_loop(window, [&](const Coordinates & id)
474 {
475 const auto input_ptr = reinterpret_cast<const int8_t *>(input.ptr());
476 const auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
477
478 const qint8x16_t in = vld1q_qs8(input_ptr);
479 qint8x16_t tmp = {};
480
481 switch(F)
482 {
483 case ActivationFunction::ABS:
484 tmp = vqabsq_qs8(in);
485 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100486 case ActivationFunction::LINEAR:
487 tmp = vqmlaq_qs8(b, a, in, fixed_point_position);
488 break;
489 case ActivationFunction::LOGISTIC:
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100490 tmp = vqrecipq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(vnegq_s8(in), fixed_point_position)), fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100491 break;
492 case ActivationFunction::RELU:
493 tmp = vmaxq_qs8(CONST_0, in);
494 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100495 case ActivationFunction::BOUNDED_RELU:
496 tmp = vminq_qs8(a, vmaxq_qs8(CONST_0, in));
497 break;
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100498 case ActivationFunction::LU_BOUNDED_RELU:
499 tmp = vminq_qs8(a, vmaxq_qs8(b, in));
500 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100501 case ActivationFunction::LEAKY_RELU:
502 tmp = vbslq_s8(vcgtq_s8(in, CONST_0), in, vmulq_qs8(a, in, fixed_point_position));
503 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100504 case ActivationFunction::SOFT_RELU:
505 tmp = vlogq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(in, fixed_point_position)), fixed_point_position);
506 break;
507 case ActivationFunction::SQRT:
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100508 tmp = vqrecipq_qs8(vqinvsqrtq_qs8(in, fixed_point_position), fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100509 break;
510 case ActivationFunction::SQUARE:
511 tmp = vqmulq_qs8(in, in, fixed_point_position);
512 break;
513 case ActivationFunction::TANH:
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100514 tmp = vqmulq_qs8(a, vqtanhq_qs8(vqmulq_qs8(b, in, fixed_point_position), fixed_point_position), fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100515 break;
516 default:
517 break;
518 }
519
520 vst1q_qs8(output_ptr, tmp);
521 },
522 input, output);
523}
524
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100525template <ActivationLayerInfo::ActivationFunction F, typename T>
Pablo Tello91654c42017-07-05 11:32:17 +0100526typename std::enable_if<std::is_same<T, qint16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100527{
528 Iterator input(_input, window);
529 Iterator output(_output, window);
530 const int fixed_point_position = _input->info()->fixed_point_position();
531
532 static const qint16x8_t CONST_0 = vdupq_n_qs16(0);
533 const qint16x8_t CONST_1 = vdupq_n_qs16(sqcvt_qs16_f32(1.f, fixed_point_position));
534 const qint16x8_t a = vdupq_n_qs16(sqcvt_qs16_f32(_act_info.a(), fixed_point_position));
535 const qint16x8_t b = vdupq_n_qs16(sqcvt_qs16_f32(_act_info.b(), fixed_point_position));
536
537 execute_window_loop(window, [&](const Coordinates & id)
538 {
539 const auto input_ptr = reinterpret_cast<const int16_t *>(input.ptr());
540 const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());
541
542 const qint16x8x2_t in = vld2q_s16(input_ptr);
543 qint16x8x2_t tmp = { {} };
544
545 switch(F)
546 {
547 case ActivationFunction::ABS:
548 tmp =
549 {
550 {
551 vqabsq_qs16(in.val[0]),
552 vqabsq_qs16(in.val[1]),
553 }
554 };
555 break;
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100556 case ActivationFunction::LINEAR:
557 tmp =
558 {
559 {
560 vqmlaq_qs16(b, a, in.val[0], fixed_point_position),
561 vqmlaq_qs16(b, a, in.val[1], fixed_point_position),
562 }
563 };
564 break;
565 case ActivationFunction::LOGISTIC:
566 tmp =
567 {
568 {
569 vqrecipq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(vnegq_s16(in.val[0]), fixed_point_position)), fixed_point_position),
570 vqrecipq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(vnegq_s16(in.val[1]), fixed_point_position)), fixed_point_position),
571 }
572 };
573 break;
574 case ActivationFunction::RELU:
575 tmp =
576 {
577 {
578 vmaxq_qs16(CONST_0, in.val[0]),
579 vmaxq_qs16(CONST_0, in.val[1]),
580 }
581 };
582 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100583 case ActivationFunction::BOUNDED_RELU:
584 tmp =
585 {
586 {
587 vminq_qs16(a, vmaxq_qs16(CONST_0, in.val[0])),
588 vminq_qs16(a, vmaxq_qs16(CONST_0, in.val[1])),
589 }
590 };
591 break;
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100592 case ActivationFunction::LU_BOUNDED_RELU:
593 tmp =
594 {
595 {
596 vminq_qs16(a, vmaxq_qs16(b, in.val[0])),
597 vminq_qs16(a, vmaxq_qs16(b, in.val[1])),
598 }
599 };
600 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100601 case ActivationFunction::LEAKY_RELU:
602 tmp =
603 {
604 {
605 vbslq_s16(vcgtq_s16(in.val[0], CONST_0), in.val[0], vmulq_qs16(a, in.val[0], fixed_point_position)),
606 vbslq_s16(vcgtq_s16(in.val[1], CONST_0), in.val[1], vmulq_qs16(a, in.val[1], fixed_point_position)),
607 }
608 };
609 break;
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100610 case ActivationFunction::SOFT_RELU:
611 tmp =
612 {
613 {
614 vlogq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(in.val[0], fixed_point_position)), fixed_point_position),
615 vlogq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(in.val[1], fixed_point_position)), fixed_point_position),
616 }
617 };
618 break;
619 case ActivationFunction::SQRT:
620 tmp =
621 {
622 {
623 vqrecipq_qs16(vqinvsqrtq_qs16(in.val[0], fixed_point_position), fixed_point_position),
624 vqrecipq_qs16(vqinvsqrtq_qs16(in.val[1], fixed_point_position), fixed_point_position),
625 }
626 };
627 break;
628 case ActivationFunction::SQUARE:
629 tmp =
630 {
631 {
632 vqmulq_qs16(in.val[0], in.val[0], fixed_point_position),
633 vqmulq_qs16(in.val[1], in.val[1], fixed_point_position),
634 }
635 };
636 break;
637 case ActivationFunction::TANH:
638 tmp =
639 {
640 {
641 vqmulq_qs16(a, vqtanhq_qs16(vqmulq_qs16(b, in.val[0], fixed_point_position), fixed_point_position), fixed_point_position),
642 vqmulq_qs16(a, vqtanhq_qs16(vqmulq_qs16(b, in.val[1], fixed_point_position), fixed_point_position), fixed_point_position),
643 }
644 };
645 break;
646 default:
Pablo Tello91654c42017-07-05 11:32:17 +0100647 ARM_COMPUTE_ERROR("Function not implemented");
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100648 break;
649 }
650
651 vst2q_qs16(output_ptr, tmp);
652 },
653 input, output);
654}
655
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100656void NEActivationLayerKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100657{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100658 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100659 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100660 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100661 ARM_COMPUTE_ERROR_ON(_func == nullptr);
662
663 (this->*_func)(window);
664}