COMPMID-1188: Add support for activation in NEBatchNormalization.

Change-Id: I1e206574dac6433218db6e138adb7bf5f66a536d
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145222
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/dup_n.h b/arm_compute/core/NEON/wrapper/intrinsics/dup_n.h
index 1c07b4f..4d9a795 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/dup_n.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/dup_n.h
@@ -45,6 +45,9 @@
 VDUP_N_IMPL(uint32_t, uint32x2_t, vdup_n, u32, traits::vector_64_tag)
 VDUP_N_IMPL(int32_t, int32x2_t, vdup_n, s32, traits::vector_64_tag)
 VDUP_N_IMPL(float, float32x2_t, vdup_n, f32, traits::vector_64_tag)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VDUP_N_IMPL(float16_t, float16x4_t, vdup_n, f16, traits::vector_64_tag)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
 VDUP_N_IMPL(uint8_t, uint8x16_t, vdupq_n, u8, traits::vector_128_tag)
 VDUP_N_IMPL(int8_t, int8x16_t, vdupq_n, s8, traits::vector_128_tag)
@@ -53,6 +56,9 @@
 VDUP_N_IMPL(uint32_t, uint32x4_t, vdupq_n, u32, traits::vector_128_tag)
 VDUP_N_IMPL(int32_t, int32x4_t, vdupq_n, s32, traits::vector_128_tag)
 VDUP_N_IMPL(float, float32x4_t, vdupq_n, f32, traits::vector_128_tag)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VDUP_N_IMPL(float16_t, float16x8_t, vdupq_n, f16, traits::vector_128_tag)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
 #undef VDUP_N_IMPL
 } // namespace wrapper
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/max.h b/arm_compute/core/NEON/wrapper/intrinsics/max.h
index 1a8e95d..05ed051 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/max.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/max.h
@@ -43,6 +43,9 @@
 VMAX_IMPL(uint32_t, uint32x2_t, vmax, u32)
 VMAX_IMPL(int32_t, int32x2_t, vmax, s32)
 VMAX_IMPL(float, float32x2_t, vmax, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VMAX_IMPL(float16_t, float16x4_t, vmax, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
 VMAX_IMPL(uint8_t, uint8x16_t, vmaxq, u8)
 VMAX_IMPL(int8_t, int8x16_t, vmaxq, s8)
@@ -51,6 +54,9 @@
 VMAX_IMPL(uint32_t, uint32x4_t, vmaxq, u32)
 VMAX_IMPL(int32_t, int32x4_t, vmaxq, s32)
 VMAX_IMPL(float, float32x4_t, vmaxq, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VMAX_IMPL(float16_t, float16x8_t, vmaxq, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
 #undef VMAX_IMPL
 } // namespace wrapper
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/min.h b/arm_compute/core/NEON/wrapper/intrinsics/min.h
index ae79631..5ea2068 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/min.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/min.h
@@ -43,6 +43,9 @@
 VMIN_IMPL(uint32_t, uint32x2_t, vmin, u32)
 VMIN_IMPL(int32_t, int32x2_t, vmin, s32)
 VMIN_IMPL(float, float32x2_t, vmin, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VMIN_IMPL(float16_t, float16x4_t, vmin, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
 VMIN_IMPL(uint8_t, uint8x16_t, vminq, u8)
 VMIN_IMPL(int8_t, int8x16_t, vminq, s8)
@@ -51,6 +54,9 @@
 VMIN_IMPL(uint32_t, uint32x4_t, vminq, u32)
 VMIN_IMPL(int32_t, int32x4_t, vminq, s32)
 VMIN_IMPL(float, float32x4_t, vminq, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VMIN_IMPL(float16_t, float16x8_t, vminq, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
 #undef VMIN_IMPL
 } // namespace wrapper
diff --git a/arm_compute/core/NEON/wrapper/traits.h b/arm_compute/core/NEON/wrapper/traits.h
index 495ddbb..5cd6086 100644
--- a/arm_compute/core/NEON/wrapper/traits.h
+++ b/arm_compute/core/NEON/wrapper/traits.h
@@ -62,6 +62,10 @@
 template <> struct neon_vector<int64_t, 2>{ using type = int64x2_t; using tag_type = vector_128_tag; };
 template <> struct neon_vector<float_t, 2>{ using type = float32x2_t; using tag_type = vector_64_tag; };
 template <> struct neon_vector<float_t, 4>{ using type = float32x4_t; using tag_type = vector_128_tag; };
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+template <> struct neon_vector<float16_t, 4>{ using type = float16x4_t; using tag_type = vector_64_tag; };
+template <> struct neon_vector<float16_t, 8>{ using type = float16x8_t; using tag_type = vector_128_tag; };
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 #endif /* DOXYGEN_SKIP_THIS */
 
 /**  Helper type template to get the type of a neon vector */