COMPMID-1499: Fixed issues to build for FP16 on Android

Change-Id: I7cd15e9115b5c6f544005528d69061751286be11
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/143708
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com>
diff --git a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp
index 14fa1b4..5e1c216 100644
--- a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp
+++ b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,10 +41,6 @@
 
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
-template class arm_compute::NEHarrisScoreFP16Kernel<3>;
-template class arm_compute::NEHarrisScoreFP16Kernel<5>;
-template class arm_compute::NEHarrisScoreFP16Kernel<7>;
-
 namespace fp16
 {
 inline float16x8_t harris_score(float16x8_t gx2, float16x8_t gy2, float16x8_t gxgy, float sensitivity, float strength_thresh)
@@ -361,6 +357,10 @@
     INEKernel::configure(win);
 }
 
+template class arm_compute::NEHarrisScoreFP16Kernel<3>;
+template class arm_compute::NEHarrisScoreFP16Kernel<5>;
+template class arm_compute::NEHarrisScoreFP16Kernel<7>;
+
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
 template class arm_compute::NEHarrisScoreKernel<3>;
diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
index 4041b62..484e58b 100644
--- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
@@ -392,7 +392,7 @@
         const auto out_ptr = reinterpret_cast<T *>(output.ptr());
 
         // Init max value
-        auto vec_max = vdup_n<vec_16_byte_t<T>>(std::numeric_limits<T>::lowest());
+        auto vec_max = vdup_n<vec_16_byte_t<T>>(support::cpp11::lowest<T>());
 
         // Loop over input row
         for(const T *it = in_ptr; it < (in_ptr + input_width); it += vec_size_of(vec_max))
@@ -694,7 +694,7 @@
             {
                 auto vec_elements = vld<vec_16_byte_t<T>>(in_ptr + i);
                 vec_elements      = vsub(vec_elements, vec_max);
-                vec_elements      = vexp(vmul_n(vec_elements, beta));
+                vec_elements      = vexp(vmul_n(vec_elements, static_cast<T>(beta)));
                 vec_sum           = vadd(vec_sum, vec_elements);
                 vst(tmp_ptr + i, vec_elements);
             }