blob: f530413453dea41e6d365c4a14a5747dda439b31 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
25
26#include "arm_compute/core/FixedPoint.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
29#include "arm_compute/core/NEON/NEFixedPoint.h"
30#include "arm_compute/core/NEON/NEMath.h"
31#include "arm_compute/core/TensorInfo.h"
32#include "arm_compute/core/Utils.h"
33#include "arm_compute/core/Validate.h"
34#include "arm_compute/core/Window.h"
35
36#include <arm_neon.h>
37#include <array>
38#include <cmath>
39#include <map>
40
41using namespace arm_compute;
42
43NEActivationLayerKernel::NEActivationLayerKernel()
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +010044 : _input(nullptr), _output(nullptr), _func(nullptr), _act_info(ActivationFunction::LOGISTIC)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010045{
46}
47
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +010048void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010049{
Georgios Pinitasccc65d42017-06-27 17:39:11 +010050 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F32);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010051
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +010052 _input = input;
53 _act_info = activation_info;
54 _output = input;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010055
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +010056 if(output != nullptr)
57 {
58 // Output auto inizialitation if not yet initialized
59 auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
60
61 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
62 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
63 ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
64
65 _output = output;
66 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +010067
68 // Activation functions : FP32
69 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f32 =
70 {
71 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, float> },
72 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, float> },
73 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, float> },
74 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float> },
75 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float> },
76 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float> },
77 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float> },
78 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float> },
79 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float> },
80 };
Anthony Barbier6ff3b192017-09-04 18:44:23 +010081 // Activation functions : QS8
82 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs8 =
83 {
84 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, qint8_t> },
85 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, qint8_t> },
86 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qint8_t> },
87 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qint8_t> },
88 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qint8_t> },
89 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, qint8_t> },
90 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, qint8_t> },
91 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, qint8_t> },
92 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qint8_t> },
93 };
Georgios Pinitasccc65d42017-06-27 17:39:11 +010094 // Activation functions : QS16
95 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs16 =
96 {
97 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, qint16_t> },
98 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, qint16_t> },
99 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qint16_t> },
100 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qint16_t> },
101 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qint16_t> },
102 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, qint16_t> },
103 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, qint16_t> },
104 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, qint16_t> },
105 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qint16_t> },
106 };
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100107
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100108 switch(input->info()->data_type())
109 {
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100110 case DataType::QS8:
111 _func = act_map_qs8[activation_info.activation()];
112 break;
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100113 case DataType::QS16:
114 _func = act_map_qs16[activation_info.activation()];
115 break;
116 case DataType::F32:
117 _func = act_map_f32[activation_info.activation()];
118 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100119 default:
120 ARM_COMPUTE_ERROR("Unsupported data type.");
121 }
122
123 constexpr unsigned int num_elems_processed_per_iteration = 16;
124
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100125 // Configure kernel window
126 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
127
128 if(output != nullptr)
129 {
130 AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
131
132 update_window_and_padding(win,
133 AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration),
134 output_access);
135
136 output_access.set_valid_region(win, input->info()->valid_region());
137 }
138 else
139 {
140 // In-place computation
141 update_window_and_padding(win,
142 AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration));
143 }
144
145 ICPPKernel::configure(win);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100146}
147
148template <ActivationLayerInfo::ActivationFunction F, typename T>
149typename std::enable_if<std::is_same<T, float>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
150{
151 Iterator input(_input, window);
152 Iterator output(_output, window);
153
154 static const float32x4_t CONST_1 = vdupq_n_f32(1.f);
155 static const float32x4_t CONST_0 = vdupq_n_f32(0.f);
156 const float32x4_t a = vdupq_n_f32(_act_info.a());
157 const float32x4_t b = vdupq_n_f32(_act_info.b());
158
159 execute_window_loop(window, [&](const Coordinates & id)
160 {
161 const auto input_ptr = reinterpret_cast<const float *>(input.ptr());
162 const auto output_ptr = reinterpret_cast<float *>(output.ptr());
163
164 const float32x4x4_t in = vld4q_f32(input_ptr);
165 float32x4x4_t tmp = { {} };
166
167 switch(F)
168 {
169 case ActivationFunction::ABS:
170 tmp =
171 {
172 {
173 vabsq_f32(in.val[0]),
174 vabsq_f32(in.val[1]),
175 vabsq_f32(in.val[2]),
176 vabsq_f32(in.val[3]),
177 }
178 };
179 break;
180 case ActivationFunction::BOUNDED_RELU:
181 tmp =
182 {
183 {
184 vminq_f32(a, vmaxq_f32(CONST_0, in.val[0])),
185 vminq_f32(a, vmaxq_f32(CONST_0, in.val[1])),
186 vminq_f32(a, vmaxq_f32(CONST_0, in.val[2])),
187 vminq_f32(a, vmaxq_f32(CONST_0, in.val[3])),
188 }
189 };
190 break;
191 case ActivationFunction::LINEAR:
192 tmp =
193 {
194 {
195 vmlaq_f32(b, a, in.val[0]),
196 vmlaq_f32(b, a, in.val[1]),
197 vmlaq_f32(b, a, in.val[2]),
198 vmlaq_f32(b, a, in.val[3]),
199 }
200 };
201 break;
202 case ActivationFunction::LOGISTIC:
203 tmp =
204 {
205 {
206 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[0])))),
207 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[1])))),
208 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[2])))),
209 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[3])))),
210 }
211 };
212 break;
213 case ActivationFunction::RELU:
214 tmp =
215 {
216 {
217 vmaxq_f32(CONST_0, in.val[0]),
218 vmaxq_f32(CONST_0, in.val[1]),
219 vmaxq_f32(CONST_0, in.val[2]),
220 vmaxq_f32(CONST_0, in.val[3]),
221 }
222 };
223 break;
224 case ActivationFunction::SOFT_RELU:
225 tmp =
226 {
227 {
228 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[0]))),
229 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[1]))),
230 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[2]))),
231 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[3]))),
232 }
233 };
234 break;
235 case ActivationFunction::SQRT:
236 tmp =
237 {
238 {
239 vinvq_f32(vinvsqrtq_f32(in.val[0])),
240 vinvq_f32(vinvsqrtq_f32(in.val[1])),
241 vinvq_f32(vinvsqrtq_f32(in.val[2])),
242 vinvq_f32(vinvsqrtq_f32(in.val[3])),
243 }
244 };
245 break;
246 case ActivationFunction::SQUARE:
247 tmp =
248 {
249 {
250 vmulq_f32(in.val[0], in.val[0]),
251 vmulq_f32(in.val[1], in.val[1]),
252 vmulq_f32(in.val[2], in.val[2]),
253 vmulq_f32(in.val[3], in.val[3]),
254 }
255 };
256 break;
257 case ActivationFunction::TANH:
258 tmp =
259 {
260 {
261 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[0]))),
262 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[1]))),
263 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[2]))),
264 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[3]))),
265 }
266 };
267 break;
268 default:
269 break;
270 }
271
272 vst4q_f32(output_ptr, tmp);
273 },
274 input, output);
275}
276
277template <ActivationLayerInfo::ActivationFunction F, typename T>
278typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
279{
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100280 Iterator input(_input, window);
281 Iterator output(_output, window);
282 const int fixed_point_position = _input->info()->fixed_point_position();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100283
284 static const qint8x16_t CONST_0 = vdupq_n_qs8(0);
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100285 const qint8x16_t CONST_1 = vdupq_n_qs8(sqcvt_qs8_f32(1.f, fixed_point_position));
286 const qint8x16_t a = vdupq_n_qs8(sqcvt_qs8_f32(_act_info.a(), fixed_point_position));
287 const qint8x16_t b = vdupq_n_qs8(sqcvt_qs8_f32(_act_info.b(), fixed_point_position));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100288
289 execute_window_loop(window, [&](const Coordinates & id)
290 {
291 const auto input_ptr = reinterpret_cast<const int8_t *>(input.ptr());
292 const auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
293
294 const qint8x16_t in = vld1q_qs8(input_ptr);
295 qint8x16_t tmp = {};
296
297 switch(F)
298 {
299 case ActivationFunction::ABS:
300 tmp = vqabsq_qs8(in);
301 break;
302 case ActivationFunction::BOUNDED_RELU:
303 tmp = vminq_qs8(a, vmaxq_qs8(CONST_0, in));
304 break;
305 case ActivationFunction::LINEAR:
306 tmp = vqmlaq_qs8(b, a, in, fixed_point_position);
307 break;
308 case ActivationFunction::LOGISTIC:
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100309 tmp = vqrecipq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(vnegq_s8(in), fixed_point_position)), fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100310 break;
311 case ActivationFunction::RELU:
312 tmp = vmaxq_qs8(CONST_0, in);
313 break;
314 case ActivationFunction::SOFT_RELU:
315 tmp = vlogq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(in, fixed_point_position)), fixed_point_position);
316 break;
317 case ActivationFunction::SQRT:
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100318 tmp = vqrecipq_qs8(vqinvsqrtq_qs8(in, fixed_point_position), fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100319 break;
320 case ActivationFunction::SQUARE:
321 tmp = vqmulq_qs8(in, in, fixed_point_position);
322 break;
323 case ActivationFunction::TANH:
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100324 tmp = vqmulq_qs8(a, vqtanhq_qs8(vqmulq_qs8(b, in, fixed_point_position), fixed_point_position), fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100325 break;
326 default:
327 break;
328 }
329
330 vst1q_qs8(output_ptr, tmp);
331 },
332 input, output);
333}
334
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100335template <ActivationLayerInfo::ActivationFunction F, typename T>
336typename std::enable_if<std::is_same<T, int16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
337{
338 Iterator input(_input, window);
339 Iterator output(_output, window);
340 const int fixed_point_position = _input->info()->fixed_point_position();
341
342 static const qint16x8_t CONST_0 = vdupq_n_qs16(0);
343 const qint16x8_t CONST_1 = vdupq_n_qs16(sqcvt_qs16_f32(1.f, fixed_point_position));
344 const qint16x8_t a = vdupq_n_qs16(sqcvt_qs16_f32(_act_info.a(), fixed_point_position));
345 const qint16x8_t b = vdupq_n_qs16(sqcvt_qs16_f32(_act_info.b(), fixed_point_position));
346
347 execute_window_loop(window, [&](const Coordinates & id)
348 {
349 const auto input_ptr = reinterpret_cast<const int16_t *>(input.ptr());
350 const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());
351
352 const qint16x8x2_t in = vld2q_s16(input_ptr);
353 qint16x8x2_t tmp = { {} };
354
355 switch(F)
356 {
357 case ActivationFunction::ABS:
358 tmp =
359 {
360 {
361 vqabsq_qs16(in.val[0]),
362 vqabsq_qs16(in.val[1]),
363 }
364 };
365 break;
366 case ActivationFunction::BOUNDED_RELU:
367 tmp =
368 {
369 {
370 vminq_qs16(a, vmaxq_qs16(CONST_0, in.val[0])),
371 vminq_qs16(a, vmaxq_qs16(CONST_0, in.val[1])),
372 }
373 };
374 break;
375 case ActivationFunction::LINEAR:
376 tmp =
377 {
378 {
379 vqmlaq_qs16(b, a, in.val[0], fixed_point_position),
380 vqmlaq_qs16(b, a, in.val[1], fixed_point_position),
381 }
382 };
383 break;
384 case ActivationFunction::LOGISTIC:
385 tmp =
386 {
387 {
388 vqrecipq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(vnegq_s16(in.val[0]), fixed_point_position)), fixed_point_position),
389 vqrecipq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(vnegq_s16(in.val[1]), fixed_point_position)), fixed_point_position),
390 }
391 };
392 break;
393 case ActivationFunction::RELU:
394 tmp =
395 {
396 {
397 vmaxq_qs16(CONST_0, in.val[0]),
398 vmaxq_qs16(CONST_0, in.val[1]),
399 }
400 };
401 break;
402 case ActivationFunction::SOFT_RELU:
403 tmp =
404 {
405 {
406 vlogq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(in.val[0], fixed_point_position)), fixed_point_position),
407 vlogq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(in.val[1], fixed_point_position)), fixed_point_position),
408 }
409 };
410 break;
411 case ActivationFunction::SQRT:
412 tmp =
413 {
414 {
415 vqrecipq_qs16(vqinvsqrtq_qs16(in.val[0], fixed_point_position), fixed_point_position),
416 vqrecipq_qs16(vqinvsqrtq_qs16(in.val[1], fixed_point_position), fixed_point_position),
417 }
418 };
419 break;
420 case ActivationFunction::SQUARE:
421 tmp =
422 {
423 {
424 vqmulq_qs16(in.val[0], in.val[0], fixed_point_position),
425 vqmulq_qs16(in.val[1], in.val[1], fixed_point_position),
426 }
427 };
428 break;
429 case ActivationFunction::TANH:
430 tmp =
431 {
432 {
433 vqmulq_qs16(a, vqtanhq_qs16(vqmulq_qs16(b, in.val[0], fixed_point_position), fixed_point_position), fixed_point_position),
434 vqmulq_qs16(a, vqtanhq_qs16(vqmulq_qs16(b, in.val[1], fixed_point_position), fixed_point_position), fixed_point_position),
435 }
436 };
437 break;
438 default:
439 break;
440 }
441
442 vst2q_qs16(output_ptr, tmp);
443 },
444 input, output);
445}
446
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100447void NEActivationLayerKernel::run(const Window &window)
448{
449 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100450 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100451 ARM_COMPUTE_ERROR_ON(_func == nullptr);
452
453 (this->*_func)(window);
454}