COMPMID-421: Added FP16 support in BatchNormalizationLayer.
Change-Id: I7142e0e8466ef79e016ae56d285e8e9291573e52
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79814
Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h
index b467a60..39f0c3b 100644
--- a/arm_compute/core/NEON/NEMath.h
+++ b/arm_compute/core/NEON/NEMath.h
@@ -36,6 +36,16 @@
*/
float32x4_t vinvsqrtq_f32(float32x4_t x);
+#ifdef ARM_COMPUTE_ENABLE_FP16
+/** Calculate inverse square root.
+ *
+ * @param[in] x Input value.
+ *
+ * @return The calculated inverse square root.
+ */
+float16x8_t vinvsqrtq_f16(float16x8_t x);
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
+
/** Calculate reciprocal.
*
* @param[in] x Input value.
diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl
index 1d90029..08f6749 100644
--- a/arm_compute/core/NEON/NEMath.inl
+++ b/arm_compute/core/NEON/NEMath.inl
@@ -141,7 +141,6 @@
{
return vexpq_f32(vmulq_f32(n, vlogq_f32(val)));
}
-
#ifdef ARM_COMPUTE_ENABLE_FP16
/* Exponent polynomial coefficients */
const std::array<float16x8_t, 8> exp_tab_f16 =
@@ -172,12 +171,12 @@
vdupq_n_f16(0.0141278216615f),
}
};
+
inline float16x8_t vinvsqrtq_f16(float16x8_t x)
{
float16x8_t sqrt_reciprocal = vrsqrteq_f16(x);
sqrt_reciprocal = vmulq_f16(vrsqrtsq_f16(vmulq_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
sqrt_reciprocal = vmulq_f16(vrsqrtsq_f16(vmulq_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
-
return sqrt_reciprocal;
}