blob: 9670b7798c646d9b6239ffc32e01aee07e92effd [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
25
26#include "arm_compute/core/FixedPoint.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
Michel Iwaniec5dfeae62017-11-29 10:48:23 +000029#include "arm_compute/core/NEON/NEAsymm.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010030#include "arm_compute/core/NEON/NEFixedPoint.h"
31#include "arm_compute/core/NEON/NEMath.h"
Michel Iwaniec5dfeae62017-11-29 10:48:23 +000032#include "arm_compute/core/QAsymm8.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010033#include "arm_compute/core/TensorInfo.h"
34#include "arm_compute/core/Utils.h"
35#include "arm_compute/core/Validate.h"
36#include "arm_compute/core/Window.h"
37
38#include <arm_neon.h>
39#include <array>
40#include <cmath>
41#include <map>
42
43using namespace arm_compute;
Michalis Spyrouafa5d812017-11-30 14:25:57 +000044namespace
45{
46Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
47{
48 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
Michel Iwaniec5dfeae62017-11-29 10:48:23 +000049 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
Michalis Spyrouafa5d812017-11-30 14:25:57 +000050
51 // Checks performed when output is configured
52 if((output != nullptr) && (output->total_size() != 0))
53 {
54 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
55 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
56 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
57 }
58
59 return Status{};
60}
61
62std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
63{
64 constexpr unsigned int num_elems_processed_per_iteration = 16;
65 Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
66 bool window_changed = false;
67
68 if(output != nullptr && (output->total_size() != 0))
69 {
70 AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
71
72 window_changed = update_window_and_padding(win,
73 AccessWindowHorizontal(input, 0, num_elems_processed_per_iteration),
74 output_access);
75
76 output_access.set_valid_region(win, input->valid_region());
77 }
78 else
79 {
80 // In-place computation
81 window_changed = update_window_and_padding(win,
82 AccessWindowHorizontal(input, 0, num_elems_processed_per_iteration));
83 }
84
85 Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
86 return std::make_pair(err, win);
87}
88} // namespace
Anthony Barbier6ff3b192017-09-04 18:44:23 +010089
90NEActivationLayerKernel::NEActivationLayerKernel()
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +010091 : _input(nullptr), _output(nullptr), _func(nullptr), _act_info(ActivationFunction::LOGISTIC)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010092{
93}
94
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +010095void NEActivationLayerKernel::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010096{
Michalis Spyrouafa5d812017-11-30 14:25:57 +000097 ARM_COMPUTE_ERROR_ON_NULLPTR(input);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010098
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +010099 _input = input;
100 _act_info = activation_info;
101 _output = input;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100102
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100103 if(output != nullptr)
104 {
105 // Output auto inizialitation if not yet initialized
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000106 auto_init_if_empty(*output->info(), *input->info()->clone());
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100107 _output = output;
108 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100109
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000110 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr));
111
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000112 ARM_COMPUTE_ERROR_ON_MSG((input->info()->data_type() == DataType::QASYMM8) && (activation_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU),
113 "For QASYMM8 only lower/upper bounded relu is supported");
114
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100115 // Activation functions : FP32
116 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f32 =
117 {
118 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, float> },
119 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, float> },
120 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, float> },
121 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float> },
122 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float> },
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100123 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, float> },
Georgios Pinitas579c0492017-07-12 16:12:12 +0100124 { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, float> },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100125 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float> },
126 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float> },
127 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float> },
128 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float> },
129 };
Pablo Tello91654c42017-07-05 11:32:17 +0100130
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000131#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello91654c42017-07-05 11:32:17 +0100132 // Activation functions : FP16
133 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f16 =
134 {
135 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, float16_t> },
136 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, float16_t> },
137 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, float16_t> },
138 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float16_t> },
139 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float16_t> },
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100140 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, float16_t> },
Pablo Tello91654c42017-07-05 11:32:17 +0100141 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float16_t> },
142 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float16_t> },
143 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float16_t> },
144 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float16_t> },
145 };
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000146#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
Pablo Tello91654c42017-07-05 11:32:17 +0100147
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100148 // Activation functions : QS8
149 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs8 =
150 {
151 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, qint8_t> },
152 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, qint8_t> },
153 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qint8_t> },
154 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qint8_t> },
155 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qint8_t> },
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100156 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, qint8_t> },
Georgios Pinitas579c0492017-07-12 16:12:12 +0100157 { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, qint8_t> },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100158 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, qint8_t> },
159 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, qint8_t> },
160 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, qint8_t> },
161 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qint8_t> },
162 };
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100163 // Activation functions : QS16
164 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs16 =
165 {
166 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, qint16_t> },
167 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, qint16_t> },
168 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qint16_t> },
169 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qint16_t> },
170 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qint16_t> },
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100171 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, qint16_t> },
Georgios Pinitas579c0492017-07-12 16:12:12 +0100172 { ActivationFunction::LEAKY_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LEAKY_RELU, qint16_t> },
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100173 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, qint16_t> },
174 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, qint16_t> },
175 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, qint16_t> },
176 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qint16_t> },
177 };
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000178 // Activation functions : QASYMM8
179 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qasymm8 =
180 {
181 { ActivationFunction::LU_BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::LU_BOUNDED_RELU, qasymm8_t> },
182 };
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100183
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100184 switch(input->info()->data_type())
185 {
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000186 case DataType::QASYMM8:
187 _func = act_map_qasymm8[activation_info.activation()];
188 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100189 case DataType::QS8:
190 _func = act_map_qs8[activation_info.activation()];
191 break;
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100192 case DataType::QS16:
193 _func = act_map_qs16[activation_info.activation()];
194 break;
195 case DataType::F32:
196 _func = act_map_f32[activation_info.activation()];
197 break;
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000198#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello91654c42017-07-05 11:32:17 +0100199 case DataType::F16:
200 _func = act_map_f16[activation_info.activation()];
201 break;
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000202#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100203 default:
204 ARM_COMPUTE_ERROR("Unsupported data type.");
205 }
206
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100207 // Configure kernel window
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000208 auto win_config = validate_and_configure_window(input->info(), (output != nullptr) ? output->info() : nullptr);
209 ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
210 ICPPKernel::configure(win_config.second);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100211}
212
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000213#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello91654c42017-07-05 11:32:17 +0100214template <ActivationLayerInfo::ActivationFunction F, typename T>
215typename std::enable_if<std::is_same<T, float16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
216{
217 Iterator input(_input, window);
218 Iterator output(_output, window);
219
220 static const float16x8_t CONST_0 = vdupq_n_f16(0.f);
221 static const float16x8_t CONST_1 = vdupq_n_f16(1.f);
222
223 const float16x8_t a = vdupq_n_f16(_act_info.a());
224 const float16x8_t b = vdupq_n_f16(_act_info.b());
225
226 execute_window_loop(window, [&](const Coordinates &)
227 {
228 const auto input_ptr = reinterpret_cast<const float16_t *>(input.ptr());
229 const auto output_ptr = reinterpret_cast<float16_t *>(output.ptr());
230
231 const float16x8x2_t in = vld2q_f16(input_ptr);
232 float16x8x2_t tmp = { {} };
233
234 switch(F)
235 {
236 case ActivationFunction::ABS:
237 tmp =
238 {
239 {
240 vabsq_f16(in.val[0]),
241 vabsq_f16(in.val[1]),
242 }
243 };
244 break;
245 case ActivationFunction::BOUNDED_RELU:
246 tmp =
247 {
248 {
249 vminq_f16(a, vmaxq_f16(CONST_0, in.val[0])),
250 vminq_f16(a, vmaxq_f16(CONST_0, in.val[1]))
251 }
252 };
253 break;
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100254 case ActivationFunction::LU_BOUNDED_RELU:
255 tmp =
256 {
257 {
258 vminq_f16(a, vmaxq_f16(b, in.val[0])),
259 vminq_f16(a, vmaxq_f16(b, in.val[1]))
260 }
261 };
262 break;
Pablo Tello91654c42017-07-05 11:32:17 +0100263 case ActivationFunction::LINEAR:
264 tmp =
265 {
266 {
267 vaddq_f16(b, vmulq_f16(a, in.val[0])),
268 vaddq_f16(b, vmulq_f16(a, in.val[1]))
269 }
270 };
271 break;
272 case ActivationFunction::LOGISTIC:
273 tmp =
274 {
275 {
276 vinvq_f16(vaddq_f16(CONST_1, vexpq_f16(vnegq_f16(in.val[0])))),
277 vinvq_f16(vaddq_f16(CONST_1, vexpq_f16(vnegq_f16(in.val[1])))),
278 }
279 };
280 break;
281 case ActivationFunction::RELU:
282 tmp =
283 {
284 {
285 vmaxq_f16(CONST_0, in.val[0]),
286 vmaxq_f16(CONST_0, in.val[1])
287 }
288 };
289 break;
290 case ActivationFunction::LEAKY_RELU:
291 tmp =
292 {
293 {
294 vbslq_f16(vcgtq_f16(in.val[0], CONST_0), in.val[0], vmulq_f16(a, in.val[0])),
295 vbslq_f16(vcgtq_f16(in.val[1], CONST_0), in.val[1], vmulq_f16(a, in.val[1]))
296 }
297 };
298 break;
299 case ActivationFunction::SOFT_RELU:
300 tmp =
301 {
302 {
303 vlogq_f16(vaddq_f16(CONST_1, vexpq_f16(in.val[0]))),
304 vlogq_f16(vaddq_f16(CONST_1, vexpq_f16(in.val[1]))),
305 }
306 };
307 break;
308 case ActivationFunction::SQRT:
309 tmp =
310 {
311 {
312 vinvq_f16(vinvsqrtq_f16(in.val[0])),
313 vinvq_f16(vinvsqrtq_f16(in.val[1])),
314 }
315 };
316 break;
317 case ActivationFunction::SQUARE:
318 tmp =
319 {
320 {
321 vmulq_f16(in.val[0], in.val[0]),
322 vmulq_f16(in.val[1], in.val[1])
323 }
324 };
325 break;
326 case ActivationFunction::TANH:
327 tmp =
328 {
329 {
330 vmulq_f16(a, vtanhq_f16(vmulq_f16(b, in.val[0]))),
331 vmulq_f16(a, vtanhq_f16(vmulq_f16(b, in.val[1]))),
332 }
333 };
334 break;
335 default:
336 ARM_COMPUTE_ERROR("Not implemented");
337 break;
338 }
339
340 vst2q_f16(output_ptr, tmp);
341 },
342 input, output);
343}
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000344#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello91654c42017-07-05 11:32:17 +0100345
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100346template <ActivationLayerInfo::ActivationFunction F, typename T>
347typename std::enable_if<std::is_same<T, float>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
348{
349 Iterator input(_input, window);
350 Iterator output(_output, window);
351
352 static const float32x4_t CONST_1 = vdupq_n_f32(1.f);
353 static const float32x4_t CONST_0 = vdupq_n_f32(0.f);
354 const float32x4_t a = vdupq_n_f32(_act_info.a());
355 const float32x4_t b = vdupq_n_f32(_act_info.b());
356
357 execute_window_loop(window, [&](const Coordinates & id)
358 {
359 const auto input_ptr = reinterpret_cast<const float *>(input.ptr());
360 const auto output_ptr = reinterpret_cast<float *>(output.ptr());
361
362 const float32x4x4_t in = vld4q_f32(input_ptr);
363 float32x4x4_t tmp = { {} };
364
365 switch(F)
366 {
367 case ActivationFunction::ABS:
368 tmp =
369 {
370 {
371 vabsq_f32(in.val[0]),
372 vabsq_f32(in.val[1]),
373 vabsq_f32(in.val[2]),
374 vabsq_f32(in.val[3]),
375 }
376 };
377 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100378 case ActivationFunction::LINEAR:
379 tmp =
380 {
381 {
382 vmlaq_f32(b, a, in.val[0]),
383 vmlaq_f32(b, a, in.val[1]),
384 vmlaq_f32(b, a, in.val[2]),
385 vmlaq_f32(b, a, in.val[3]),
386 }
387 };
388 break;
389 case ActivationFunction::LOGISTIC:
390 tmp =
391 {
392 {
393 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[0])))),
394 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[1])))),
395 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[2])))),
396 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[3])))),
397 }
398 };
399 break;
400 case ActivationFunction::RELU:
401 tmp =
402 {
403 {
404 vmaxq_f32(CONST_0, in.val[0]),
405 vmaxq_f32(CONST_0, in.val[1]),
406 vmaxq_f32(CONST_0, in.val[2]),
407 vmaxq_f32(CONST_0, in.val[3]),
408 }
409 };
410 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100411 case ActivationFunction::BOUNDED_RELU:
412 tmp =
413 {
414 {
415 vminq_f32(a, vmaxq_f32(CONST_0, in.val[0])),
416 vminq_f32(a, vmaxq_f32(CONST_0, in.val[1])),
417 vminq_f32(a, vmaxq_f32(CONST_0, in.val[2])),
418 vminq_f32(a, vmaxq_f32(CONST_0, in.val[3])),
419 }
420 };
421 break;
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100422 case ActivationFunction::LU_BOUNDED_RELU:
423 tmp =
424 {
425 {
426 vminq_f32(a, vmaxq_f32(b, in.val[0])),
427 vminq_f32(a, vmaxq_f32(b, in.val[1])),
428 vminq_f32(a, vmaxq_f32(b, in.val[2])),
429 vminq_f32(a, vmaxq_f32(b, in.val[3])),
430 }
431 };
432 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100433 case ActivationFunction::LEAKY_RELU:
434 tmp =
435 {
436 {
437 vbslq_f32(vcgtq_f32(in.val[0], CONST_0), in.val[0], vmulq_f32(a, in.val[0])),
438 vbslq_f32(vcgtq_f32(in.val[1], CONST_0), in.val[1], vmulq_f32(a, in.val[1])),
439 vbslq_f32(vcgtq_f32(in.val[2], CONST_0), in.val[2], vmulq_f32(a, in.val[2])),
440 vbslq_f32(vcgtq_f32(in.val[3], CONST_0), in.val[3], vmulq_f32(a, in.val[3])),
441 }
442 };
443 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100444 case ActivationFunction::SOFT_RELU:
445 tmp =
446 {
447 {
448 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[0]))),
449 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[1]))),
450 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[2]))),
451 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[3]))),
452 }
453 };
454 break;
455 case ActivationFunction::SQRT:
456 tmp =
457 {
458 {
459 vinvq_f32(vinvsqrtq_f32(in.val[0])),
460 vinvq_f32(vinvsqrtq_f32(in.val[1])),
461 vinvq_f32(vinvsqrtq_f32(in.val[2])),
462 vinvq_f32(vinvsqrtq_f32(in.val[3])),
463 }
464 };
465 break;
466 case ActivationFunction::SQUARE:
467 tmp =
468 {
469 {
470 vmulq_f32(in.val[0], in.val[0]),
471 vmulq_f32(in.val[1], in.val[1]),
472 vmulq_f32(in.val[2], in.val[2]),
473 vmulq_f32(in.val[3], in.val[3]),
474 }
475 };
476 break;
477 case ActivationFunction::TANH:
478 tmp =
479 {
480 {
481 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[0]))),
482 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[1]))),
483 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[2]))),
484 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[3]))),
485 }
486 };
487 break;
488 default:
489 break;
490 }
491
492 vst4q_f32(output_ptr, tmp);
493 },
494 input, output);
495}
496
497template <ActivationLayerInfo::ActivationFunction F, typename T>
498typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
499{
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100500 Iterator input(_input, window);
501 Iterator output(_output, window);
502 const int fixed_point_position = _input->info()->fixed_point_position();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100503
504 static const qint8x16_t CONST_0 = vdupq_n_qs8(0);
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100505 const qint8x16_t CONST_1 = vdupq_n_qs8(sqcvt_qs8_f32(1.f, fixed_point_position));
506 const qint8x16_t a = vdupq_n_qs8(sqcvt_qs8_f32(_act_info.a(), fixed_point_position));
507 const qint8x16_t b = vdupq_n_qs8(sqcvt_qs8_f32(_act_info.b(), fixed_point_position));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100508
509 execute_window_loop(window, [&](const Coordinates & id)
510 {
511 const auto input_ptr = reinterpret_cast<const int8_t *>(input.ptr());
512 const auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
513
514 const qint8x16_t in = vld1q_qs8(input_ptr);
515 qint8x16_t tmp = {};
516
517 switch(F)
518 {
519 case ActivationFunction::ABS:
520 tmp = vqabsq_qs8(in);
521 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100522 case ActivationFunction::LINEAR:
523 tmp = vqmlaq_qs8(b, a, in, fixed_point_position);
524 break;
525 case ActivationFunction::LOGISTIC:
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100526 tmp = vqrecipq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(vnegq_s8(in), fixed_point_position)), fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100527 break;
528 case ActivationFunction::RELU:
529 tmp = vmaxq_qs8(CONST_0, in);
530 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100531 case ActivationFunction::BOUNDED_RELU:
532 tmp = vminq_qs8(a, vmaxq_qs8(CONST_0, in));
533 break;
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100534 case ActivationFunction::LU_BOUNDED_RELU:
535 tmp = vminq_qs8(a, vmaxq_qs8(b, in));
536 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100537 case ActivationFunction::LEAKY_RELU:
538 tmp = vbslq_s8(vcgtq_s8(in, CONST_0), in, vmulq_qs8(a, in, fixed_point_position));
539 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100540 case ActivationFunction::SOFT_RELU:
541 tmp = vlogq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(in, fixed_point_position)), fixed_point_position);
542 break;
543 case ActivationFunction::SQRT:
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100544 tmp = vqrecipq_qs8(vqinvsqrtq_qs8(in, fixed_point_position), fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100545 break;
546 case ActivationFunction::SQUARE:
547 tmp = vqmulq_qs8(in, in, fixed_point_position);
548 break;
549 case ActivationFunction::TANH:
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100550 tmp = vqmulq_qs8(a, vqtanhq_qs8(vqmulq_qs8(b, in, fixed_point_position), fixed_point_position), fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100551 break;
552 default:
553 break;
554 }
555
556 vst1q_qs8(output_ptr, tmp);
557 },
558 input, output);
559}
560
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100561template <ActivationLayerInfo::ActivationFunction F, typename T>
Michel Iwaniec5dfeae62017-11-29 10:48:23 +0000562typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
563{
564 Iterator input(_input, window);
565 Iterator output(_output, window);
566 const QuantizationInfo qi_in = _input->info()->quantization_info();
567 const QuantizationInfo qi_out = _output->info()->quantization_info();
568 const qasymm8x16_t a = vdupq_n_u8(sqcvt_qasymm8_f32(_act_info.a(), qi_in.scale, qi_in.offset));
569 const qasymm8x16_t b = vdupq_n_u8(sqcvt_qasymm8_f32(_act_info.b(), qi_in.scale, qi_in.offset));
570 // Initialise scale/offset for re-quantization
571 float s = qi_in.scale / qi_out.scale;
572 float o = -qi_in.offset * s + qi_out.offset;
573 float32x4_t vs = vdupq_n_f32(s);
574 float32x4_t vo = vdupq_n_f32(o);
575
576 execute_window_loop(window, [&](const Coordinates & id)
577 {
578 const auto input_ptr = reinterpret_cast<const qasymm8_t *>(input.ptr());
579 const auto output_ptr = reinterpret_cast<qasymm8_t *>(output.ptr());
580
581 const qasymm8x16_t in = vld1q_u8(input_ptr);
582 qasymm8x16_t tmp = {};
583
584 switch(F)
585 {
586 case ActivationFunction::LU_BOUNDED_RELU:
587 // Perform activation
588 tmp = vminq_u8(a, vmaxq_u8(b, in));
589 // Re-quantize to new output space
590 tmp = vmlaq_qasymm8(tmp, vs, vo);
591 break;
592 default:
593 ARM_COMPUTE_ERROR("Function not implemented");
594 break;
595 }
596
597 vst1q_u8(output_ptr, tmp);
598 },
599 input, output);
600}
601
602template <ActivationLayerInfo::ActivationFunction F, typename T>
Pablo Tello91654c42017-07-05 11:32:17 +0100603typename std::enable_if<std::is_same<T, qint16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100604{
605 Iterator input(_input, window);
606 Iterator output(_output, window);
607 const int fixed_point_position = _input->info()->fixed_point_position();
608
609 static const qint16x8_t CONST_0 = vdupq_n_qs16(0);
610 const qint16x8_t CONST_1 = vdupq_n_qs16(sqcvt_qs16_f32(1.f, fixed_point_position));
611 const qint16x8_t a = vdupq_n_qs16(sqcvt_qs16_f32(_act_info.a(), fixed_point_position));
612 const qint16x8_t b = vdupq_n_qs16(sqcvt_qs16_f32(_act_info.b(), fixed_point_position));
613
614 execute_window_loop(window, [&](const Coordinates & id)
615 {
616 const auto input_ptr = reinterpret_cast<const int16_t *>(input.ptr());
617 const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());
618
619 const qint16x8x2_t in = vld2q_s16(input_ptr);
620 qint16x8x2_t tmp = { {} };
621
622 switch(F)
623 {
624 case ActivationFunction::ABS:
625 tmp =
626 {
627 {
628 vqabsq_qs16(in.val[0]),
629 vqabsq_qs16(in.val[1]),
630 }
631 };
632 break;
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100633 case ActivationFunction::LINEAR:
634 tmp =
635 {
636 {
637 vqmlaq_qs16(b, a, in.val[0], fixed_point_position),
638 vqmlaq_qs16(b, a, in.val[1], fixed_point_position),
639 }
640 };
641 break;
642 case ActivationFunction::LOGISTIC:
643 tmp =
644 {
645 {
646 vqrecipq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(vnegq_s16(in.val[0]), fixed_point_position)), fixed_point_position),
647 vqrecipq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(vnegq_s16(in.val[1]), fixed_point_position)), fixed_point_position),
648 }
649 };
650 break;
651 case ActivationFunction::RELU:
652 tmp =
653 {
654 {
655 vmaxq_qs16(CONST_0, in.val[0]),
656 vmaxq_qs16(CONST_0, in.val[1]),
657 }
658 };
659 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100660 case ActivationFunction::BOUNDED_RELU:
661 tmp =
662 {
663 {
664 vminq_qs16(a, vmaxq_qs16(CONST_0, in.val[0])),
665 vminq_qs16(a, vmaxq_qs16(CONST_0, in.val[1])),
666 }
667 };
668 break;
Georgios Pinitas64ebe5b2017-09-01 17:44:24 +0100669 case ActivationFunction::LU_BOUNDED_RELU:
670 tmp =
671 {
672 {
673 vminq_qs16(a, vmaxq_qs16(b, in.val[0])),
674 vminq_qs16(a, vmaxq_qs16(b, in.val[1])),
675 }
676 };
677 break;
Georgios Pinitas579c0492017-07-12 16:12:12 +0100678 case ActivationFunction::LEAKY_RELU:
679 tmp =
680 {
681 {
682 vbslq_s16(vcgtq_s16(in.val[0], CONST_0), in.val[0], vmulq_qs16(a, in.val[0], fixed_point_position)),
683 vbslq_s16(vcgtq_s16(in.val[1], CONST_0), in.val[1], vmulq_qs16(a, in.val[1], fixed_point_position)),
684 }
685 };
686 break;
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100687 case ActivationFunction::SOFT_RELU:
688 tmp =
689 {
690 {
691 vlogq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(in.val[0], fixed_point_position)), fixed_point_position),
692 vlogq_qs16(vqaddq_qs16(CONST_1, vqexpq_qs16(in.val[1], fixed_point_position)), fixed_point_position),
693 }
694 };
695 break;
696 case ActivationFunction::SQRT:
697 tmp =
698 {
699 {
700 vqrecipq_qs16(vqinvsqrtq_qs16(in.val[0], fixed_point_position), fixed_point_position),
701 vqrecipq_qs16(vqinvsqrtq_qs16(in.val[1], fixed_point_position), fixed_point_position),
702 }
703 };
704 break;
705 case ActivationFunction::SQUARE:
706 tmp =
707 {
708 {
709 vqmulq_qs16(in.val[0], in.val[0], fixed_point_position),
710 vqmulq_qs16(in.val[1], in.val[1], fixed_point_position),
711 }
712 };
713 break;
714 case ActivationFunction::TANH:
715 tmp =
716 {
717 {
718 vqmulq_qs16(a, vqtanhq_qs16(vqmulq_qs16(b, in.val[0], fixed_point_position), fixed_point_position), fixed_point_position),
719 vqmulq_qs16(a, vqtanhq_qs16(vqmulq_qs16(b, in.val[1], fixed_point_position), fixed_point_position), fixed_point_position),
720 }
721 };
722 break;
723 default:
Pablo Tello91654c42017-07-05 11:32:17 +0100724 ARM_COMPUTE_ERROR("Function not implemented");
Georgios Pinitasccc65d42017-06-27 17:39:11 +0100725 break;
726 }
727
728 vst2q_qs16(output_ptr, tmp);
729 },
730 input, output);
731}
732
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000733Status NEActivationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
734{
735 ARM_COMPUTE_UNUSED(act_info);
736 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
737 ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (output != nullptr) ? output->clone().get() : nullptr).first);
738
739 return Status{};
740}
741
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100742void NEActivationLayerKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100743{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100744 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100745 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
Gian Marco Iodiceb30dcc52017-06-20 09:07:21 +0100746 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100747 ARM_COMPUTE_ERROR_ON(_func == nullptr);
748
749 (this->*_func)(window);
750}