COMPMID-2819: Perform sqrt in double domain for L2 pooling.

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I51d50ceda903c4322b659cd872d041d7db73c335
Reviewed-on: https://review.mlplatform.org/c/2481
diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
index aaeb33f..4af5424 100644
--- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
@@ -1578,7 +1578,12 @@
         // Calculate square-root in case of l2 pooling
         if(pooling_type == PoolingType::L2)
         {
-            vres = vmulq_f32(vres, vinvsqrtq_f32(vres));
+            float32x4_t l2_res = { static_cast<float>(sqrt(vgetq_lane_f32(vres, 0))),
+                                   static_cast<float>(sqrt(vgetq_lane_f32(vres, 1))),
+                                   static_cast<float>(sqrt(vgetq_lane_f32(vres, 2))),
+                                   static_cast<float>(sqrt(vgetq_lane_f32(vres, 3)))
+                                 };
+            vres = l2_res;
         }
 
         // Store result