COMPMID-1580 Implement ReduceMean in NEON

Change-Id: Id974efad304c2513b8824a6561ad45ee60b9e7fb
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/153763
Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
Reviewed-by: Isabella Gottardi <isabella.gottardi@arm.com>
Tested-by: bsgcomp <bsgcomp@arm.com>
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/load.h b/arm_compute/core/NEON/wrapper/intrinsics/load.h
index 442d857..b5d9ed2 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/load.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/load.h
@@ -45,6 +45,9 @@
 //VLOAD_IMPL(uint64_t, uint64x1_t, u64)
 //VLOAD_IMPL(int64_t, int64x1_t, s64)
 VLOAD_IMPL(float, float32x2_t, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VLOAD_IMPL(float16_t, float16x4_t, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
 #define VLOADQ_IMPL(stype, vtype, postfix) \
     inline vtype vloadq(const stype *ptr)  \
@@ -61,6 +64,9 @@
 //VLOAD_IMPL(uint64_t, uint64x1_t, u64)
 //VLOAD_IMPL(int64_t, int64x1_t, s64)
 VLOADQ_IMPL(float, float32x4_t, f32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VLOADQ_IMPL(float16_t, float16x8_t, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
 #undef VLOAD_IMPL
 } // namespace wrapper