blob: a87807800737c17e2abdd242f5bff367ffc3e814 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
25
26#include "arm_compute/core/FixedPoint.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
29#include "arm_compute/core/NEON/NEFixedPoint.h"
30#include "arm_compute/core/NEON/NEMath.h"
31#include "arm_compute/core/TensorInfo.h"
32#include "arm_compute/core/Utils.h"
33#include "arm_compute/core/Validate.h"
34#include "arm_compute/core/Window.h"
35
36#include <arm_neon.h>
37#include <array>
38#include <cmath>
39#include <map>
40
41using namespace arm_compute;
42
43NEActivationLayerKernel::NEActivationLayerKernel()
44 : _func(nullptr), _act_info(ActivationFunction::LOGISTIC)
45{
46}
47
48void NEActivationLayerKernel::configure(const ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
49{
50 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::QS8);
51 ARM_COMPUTE_ERROR_ON_NULLPTR(output);
52
53 // Output auto inizialitation if not yet initialized
54 auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
55
56 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
57 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
58 ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
59
60 // Activation functions : FP32
61 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f32 =
62 {
63 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, float> },
64 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, float> },
65 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, float> },
66 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, float> },
67 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, float> },
68 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, float> },
69 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, float> },
70 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float> },
71 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float> },
72 };
73
74 // Activation functions : QS8
75 static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs8 =
76 {
77 { ActivationFunction::ABS, &NEActivationLayerKernel::activation<ActivationFunction::ABS, qint8_t> },
78 { ActivationFunction::LINEAR, &NEActivationLayerKernel::activation<ActivationFunction::LINEAR, qint8_t> },
79 { ActivationFunction::LOGISTIC, &NEActivationLayerKernel::activation<ActivationFunction::LOGISTIC, qint8_t> },
80 { ActivationFunction::RELU, &NEActivationLayerKernel::activation<ActivationFunction::RELU, qint8_t> },
81 { ActivationFunction::BOUNDED_RELU, &NEActivationLayerKernel::activation<ActivationFunction::BOUNDED_RELU, qint8_t> },
82 { ActivationFunction::SOFT_RELU, &NEActivationLayerKernel::activation<ActivationFunction::SOFT_RELU, qint8_t> },
83 { ActivationFunction::SQRT, &NEActivationLayerKernel::activation<ActivationFunction::SQRT, qint8_t> },
84 { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, qint8_t> },
85 { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, qint8_t> },
86 };
87
88 _input = input;
89 _output = output;
90 _act_info = activation_info;
91 switch(input->info()->data_type())
92 {
93 case DataType::F32:
94 _func = act_map_f32[activation_info.activation()];
95 break;
96 case DataType::QS8:
97 _func = act_map_qs8[activation_info.activation()];
98 break;
99 default:
100 ARM_COMPUTE_ERROR("Unsupported data type.");
101 }
102
103 constexpr unsigned int num_elems_processed_per_iteration = 16;
104
105 INESimpleKernel::configure(_input, _output, num_elems_processed_per_iteration);
106}
107
108template <ActivationLayerInfo::ActivationFunction F, typename T>
109typename std::enable_if<std::is_same<T, float>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
110{
111 Iterator input(_input, window);
112 Iterator output(_output, window);
113
114 static const float32x4_t CONST_1 = vdupq_n_f32(1.f);
115 static const float32x4_t CONST_0 = vdupq_n_f32(0.f);
116 const float32x4_t a = vdupq_n_f32(_act_info.a());
117 const float32x4_t b = vdupq_n_f32(_act_info.b());
118
119 execute_window_loop(window, [&](const Coordinates & id)
120 {
121 const auto input_ptr = reinterpret_cast<const float *>(input.ptr());
122 const auto output_ptr = reinterpret_cast<float *>(output.ptr());
123
124 const float32x4x4_t in = vld4q_f32(input_ptr);
125 float32x4x4_t tmp = { {} };
126
127 switch(F)
128 {
129 case ActivationFunction::ABS:
130 tmp =
131 {
132 {
133 vabsq_f32(in.val[0]),
134 vabsq_f32(in.val[1]),
135 vabsq_f32(in.val[2]),
136 vabsq_f32(in.val[3]),
137 }
138 };
139 break;
140 case ActivationFunction::BOUNDED_RELU:
141 tmp =
142 {
143 {
144 vminq_f32(a, vmaxq_f32(CONST_0, in.val[0])),
145 vminq_f32(a, vmaxq_f32(CONST_0, in.val[1])),
146 vminq_f32(a, vmaxq_f32(CONST_0, in.val[2])),
147 vminq_f32(a, vmaxq_f32(CONST_0, in.val[3])),
148 }
149 };
150 break;
151 case ActivationFunction::LINEAR:
152 tmp =
153 {
154 {
155 vmlaq_f32(b, a, in.val[0]),
156 vmlaq_f32(b, a, in.val[1]),
157 vmlaq_f32(b, a, in.val[2]),
158 vmlaq_f32(b, a, in.val[3]),
159 }
160 };
161 break;
162 case ActivationFunction::LOGISTIC:
163 tmp =
164 {
165 {
166 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[0])))),
167 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[1])))),
168 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[2])))),
169 vinvq_f32(vaddq_f32(CONST_1, vexpq_f32(vnegq_f32(in.val[3])))),
170 }
171 };
172 break;
173 case ActivationFunction::RELU:
174 tmp =
175 {
176 {
177 vmaxq_f32(CONST_0, in.val[0]),
178 vmaxq_f32(CONST_0, in.val[1]),
179 vmaxq_f32(CONST_0, in.val[2]),
180 vmaxq_f32(CONST_0, in.val[3]),
181 }
182 };
183 break;
184 case ActivationFunction::SOFT_RELU:
185 tmp =
186 {
187 {
188 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[0]))),
189 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[1]))),
190 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[2]))),
191 vlogq_f32(vaddq_f32(CONST_1, vexpq_f32(in.val[3]))),
192 }
193 };
194 break;
195 case ActivationFunction::SQRT:
196 tmp =
197 {
198 {
199 vinvq_f32(vinvsqrtq_f32(in.val[0])),
200 vinvq_f32(vinvsqrtq_f32(in.val[1])),
201 vinvq_f32(vinvsqrtq_f32(in.val[2])),
202 vinvq_f32(vinvsqrtq_f32(in.val[3])),
203 }
204 };
205 break;
206 case ActivationFunction::SQUARE:
207 tmp =
208 {
209 {
210 vmulq_f32(in.val[0], in.val[0]),
211 vmulq_f32(in.val[1], in.val[1]),
212 vmulq_f32(in.val[2], in.val[2]),
213 vmulq_f32(in.val[3], in.val[3]),
214 }
215 };
216 break;
217 case ActivationFunction::TANH:
218 tmp =
219 {
220 {
221 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[0]))),
222 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[1]))),
223 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[2]))),
224 vmulq_f32(a, vtanhq_f32(vmulq_f32(b, in.val[3]))),
225 }
226 };
227 break;
228 default:
229 break;
230 }
231
232 vst4q_f32(output_ptr, tmp);
233 },
234 input, output);
235}
236
237template <ActivationLayerInfo::ActivationFunction F, typename T>
238typename std::enable_if<std::is_same<T, int8_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
239{
240 Iterator input(_input, window);
241 Iterator output(_output, window);
242 int fixed_point_position = _input->info()->fixed_point_position();
243
244 static const qint8x16_t CONST_0 = vdupq_n_qs8(0);
245 const qint8x16_t CONST_1 = vdupq_n_qs8(scvt_qs8_f32(1.f, fixed_point_position));
246 const qint8x16_t a = vdupq_n_qs8(scvt_qs8_f32(_act_info.a(), fixed_point_position));
247 const qint8x16_t b = vdupq_n_qs8(scvt_qs8_f32(_act_info.b(), fixed_point_position));
248
249 execute_window_loop(window, [&](const Coordinates & id)
250 {
251 const auto input_ptr = reinterpret_cast<const int8_t *>(input.ptr());
252 const auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
253
254 const qint8x16_t in = vld1q_qs8(input_ptr);
255 qint8x16_t tmp = {};
256
257 switch(F)
258 {
259 case ActivationFunction::ABS:
260 tmp = vqabsq_qs8(in);
261 break;
262 case ActivationFunction::BOUNDED_RELU:
263 tmp = vminq_qs8(a, vmaxq_qs8(CONST_0, in));
264 break;
265 case ActivationFunction::LINEAR:
266 tmp = vqmlaq_qs8(b, a, in, fixed_point_position);
267 break;
268 case ActivationFunction::LOGISTIC:
269 tmp = vrecipq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(vnegq_s8(in), fixed_point_position)), fixed_point_position);
270 break;
271 case ActivationFunction::RELU:
272 tmp = vmaxq_qs8(CONST_0, in);
273 break;
274 case ActivationFunction::SOFT_RELU:
275 tmp = vlogq_qs8(vqaddq_qs8(CONST_1, vqexpq_qs8(in, fixed_point_position)), fixed_point_position);
276 break;
277 case ActivationFunction::SQRT:
278 tmp = vrecipq_qs8(vinvsqrtq_qs8(in, fixed_point_position), fixed_point_position);
279 break;
280 case ActivationFunction::SQUARE:
281 tmp = vqmulq_qs8(in, in, fixed_point_position);
282 break;
283 case ActivationFunction::TANH:
284 tmp = vtanhq_qs8(in, fixed_point_position);
285 break;
286 default:
287 break;
288 }
289
290 vst1q_qs8(output_ptr, tmp);
291 },
292 input, output);
293}
294
295void NEActivationLayerKernel::run(const Window &window)
296{
297 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
298 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window);
299 ARM_COMPUTE_ERROR_ON(_func == nullptr);
300
301 (this->*_func)(window);
302}