COMPMID-1800: (Nightly) Mismatches in SC9863 board for NEON FP16 Fixes for: - ReduceMean, reduction on the X axis for FP16 with 8 elements was performed only up to a certain point. The fix now takes into account the number of elements of the vector and does as many reductions as necessary. - YOLOLayer, activation for FP16 has to be performed on 32 bits until the FP16 approximations is fixed. Change-Id: I75373f4edd37de476e6fe1a56de3ef386b65c619

commit: 1c948d47f55ff8a39aa527f63ea7df93a13dd38e [log] [tgz]
author: Michele Di Giorgio <michele.digiorgio@arm.com> Tue Nov 20 16:03:01 2018 +0000
committer: Michalis Spyrou <michalis.spyrou@arm.com> Wed Nov 21 14:08:19 2018 +0000
tree: 4579798cb95ecaf62b5d7fe61b5d3753301e2804
parent: 8b2814ab7b9dc00278132d74d2f738b843b6c0c7 [diff] [blame]
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index 2163f7b..5ce79f1 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp

@@ -184,7 +184,7 @@
     Iterator output(_output, window);
 
     static const float16x8_t CONST_0   = vdupq_n_f16(0.f);
-    static const float16x4_t CONST_1_H = vdup_n_f16(1.f);
+    static const float16x8_t CONST_1_H = vdupq_n_f16(1.f);
 
     static const float32x4_t CONST_1_F32 = vdupq_n_f32(1.f);
 
@@ -240,24 +240,11 @@
                 break;
             case ActivationFunction::LOGISTIC:
             {
-                // TODO (COMPMID-1535) : Revisit FP16 approximations
-                const float16x4x2_t in0 =
-                {
-                    vinv_f16(vadd_f16(CONST_1_H, vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vneg_f16(vget_low_f16(in.val[0]))))))),
-                    vinv_f16(vadd_f16(CONST_1_H, vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vneg_f16(vget_high_f16(in.val[0]))))))),
-                };
-
-                const float16x4x2_t in1 =
-                {
-                    vinv_f16(vadd_f16(CONST_1_H, vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vneg_f16(vget_low_f16(in.val[1]))))))),
-                    vinv_f16(vadd_f16(CONST_1_H, vcvt_f16_f32(vexpq_f32(vcvt_f32_f16(vneg_f16(vget_high_f16(in.val[1]))))))),
-                };
-
                 tmp =
                 {
                     {
-                        vcombine_f16(in0.val[0], in0.val[1]),
-                        vcombine_f16(in1.val[0], in1.val[1]),
+                        vinvq_f16(vaddq_f16(CONST_1_H, vexpq_f16(vnegq_f16(in.val[0])))),
+                        vinvq_f16(vaddq_f16(CONST_1_H, vexpq_f16(vnegq_f16(in.val[1]))))
                     }
                 };
             }
commit	1c948d47f55ff8a39aa527f63ea7df93a13dd38e	[log] [tgz]
author	Michele Di Giorgio <michele.digiorgio@arm.com>	Tue Nov 20 16:03:01 2018 +0000
committer	Michalis Spyrou <michalis.spyrou@arm.com>	Wed Nov 21 14:08:19 2018 +0000
tree	4579798cb95ecaf62b5d7fe61b5d3753301e2804
parent	8b2814ab7b9dc00278132d74d2f738b843b6c0c7 [diff] [blame]