COMPMID-2160: Implement Round for NEON

Change-Id: Ie80e2ad294eaf95bc823d979842c320e8fb41f67
Signed-off-by: Usama Arif <usama.arif@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1215
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
index e6edf22..c6e8536 100644
--- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
@@ -180,6 +180,7 @@
     return wrapper::vpmax(pmax, pmax);
 }
 
+template <>
 uint32_t calculate_vector_index(uint32x4x4_t vec_res_idx, uint8x16_t vec_res_value, ReductionOperation op)
 {
     uint32x4x4_t res_idx_mask{ { 0 } };
@@ -272,6 +273,7 @@
     return wrapper::vpmax(pmax, pmax);
 }
 
+template <>
 uint32_t calculate_vector_index(uint32x4x4_t vec_res_idx, float16x8_t vec_res_value, ReductionOperation op)
 {
     uint32x4x2_t res_idx_mask{ 0 };