COMPMID-556: Add support to build arm64-v8.2-a for Android platform (clang compiler)

Change-Id: Ibb779dd3a8d10786da6d8f70590e654e14654d7b
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/95530
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h
index 1fc0d5c..5c60d73 100644
--- a/arm_compute/core/NEON/NEMath.h
+++ b/arm_compute/core/NEON/NEMath.h
@@ -116,7 +116,7 @@
  */
 float32x4_t vpowq_f32(float32x4_t val, float32x4_t n);
 
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 /** Calculate hyperbolic tangent.
  *
  * tanh(x) = (e^2x - 1)/(e^2x + 1)
@@ -179,7 +179,7 @@
  * @return The calculated power.
  */
 float16x8_t vpowq_f16(float16x8_t val, float16x8_t n);
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 } // namespace arm_compute
 #include "arm_compute/core/NEON/NEMath.inl"
 #endif /* __ARM_COMPUTE_NEMATH_H__ */
diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl
index 250114f..50f217c 100644
--- a/arm_compute/core/NEON/NEMath.inl
+++ b/arm_compute/core/NEON/NEMath.inl
@@ -168,7 +168,7 @@
 {
     return vexpq_f32(vmulq_f32(n, vlogq_f32(val)));
 }
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 /* Exponent polynomial coefficients */
 const std::array<float16x8_t, 8> exp_tab_f16 =
 {
@@ -301,5 +301,5 @@
 {
     return vexpq_f16(vmulq_f16(n, vlogq_f16(val)));
 }
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 } // namespace arm_compute
diff --git a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h
index d6ad0be..fa8a3be 100644
--- a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h
+++ b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h
@@ -80,7 +80,7 @@
     float _alpha;
 };
 
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 /** Interface for the accumulate weighted kernel using F16 */
 class NEAccumulateWeightedFP16Kernel : public NEAccumulateWeightedKernel
 {
@@ -88,9 +88,9 @@
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
 };
-#else  /* ARM_COMPUTE_AARCH64_V8_2 */
+#else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 using NEAccumulateWeightedFP16Kernel = NEAccumulateWeightedKernel;
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
 /** Interface for the accumulate squared kernel
  *
diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
index e70dd45..a3fd3fe 100644
--- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
@@ -27,9 +27,9 @@
 #include "arm_compute/core/FixedPoint.h"
 #include "arm_compute/core/NEON/INEKernel.h"
 
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 #include <arm_fp16.h>
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
 namespace arm_compute
 {
@@ -76,14 +76,14 @@
      */
     template <ActivationLayerInfo::ActivationFunction F, typename T>
     typename std::enable_if<std::is_same<T, float>::value, void>::type activation(const Window &window);
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
     /** Function to apply an activation function on a tensor.
      *
      *  @param[in] window Region on which to execute the kernel
      */
     template <ActivationLayerInfo::ActivationFunction F, typename T>
     typename std::enable_if<std::is_same<T, float16_t>::value, void>::type activation(const Window &window);
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
     /** Function to apply an activation function on a tensor.
      *
      *  @param[in] window Region on which to execute the kernel
diff --git a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h
index a53e4d7..29248f6 100644
--- a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h
+++ b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h
@@ -46,7 +46,7 @@
     BorderSize border_size() const override;
 };
 
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 /** NEON kernel to perform a Box 3x3 filter using F16 simd
  */
 class NEBox3x3FP16Kernel : public NEBox3x3Kernel
@@ -55,8 +55,8 @@
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
 };
-#else  /* ARM_COMPUTE_AARCH64_V8_2 */
+#else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 using NEBox3x3FP16Kernel = NEBox3x3Kernel;
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 } // namespace arm_compute
 #endif /*__ARM_COMPUTE_NEBOX3x3KERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h
index 4f1a1f3..a57c389 100644
--- a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h
+++ b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h
@@ -81,7 +81,7 @@
     ITensor          *_phase;     /**< Destination tensor - Quantized phase */
 };
 
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 /** NEON kernel to perform Gradient computation
  */
 class NEGradientFP16Kernel : public NEGradientKernel
@@ -90,9 +90,9 @@
     // Inherited methods overriden:
     void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type) override;
 };
-#else  /* ARM_COMPUTE_AARCH64_V8_2 */
+#else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 using NEGradientFP16Kernel = NEGradientKernel;
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
 /** NEON kernel to perform Non-Maxima suppression for Canny Edge.
  *
diff --git a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h
index cfa5220..c3c37e4 100644
--- a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h
+++ b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h
@@ -99,7 +99,7 @@
     HarrisScoreFunction *_func;
 };
 
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 /** Interface for the accumulate Weighted kernel using F16 */
 template <int32_t block_size>
 class NEHarrisScoreFP16Kernel : public INEHarrisScoreKernel
@@ -118,9 +118,9 @@
     /** Harris Score function to use for the particular image types passed to configure() */
     HarrisScoreFunction *_func;
 };
-#else  /* ARM_COMPUTE_AARCH64_V8_2 */
+#else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 template <int32_t block_size>
 using NEHarrisScoreFP16Kernel = NEHarrisScoreKernel<block_size>;
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEHARRISCORNERSKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h
index fba8d8d..46b2a8d 100644
--- a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h
+++ b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h
@@ -94,7 +94,7 @@
     ITensor                  *_phase;     /**< Output - Phase */
 };
 
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 /** Template interface for the kernel to compute magnitude and phase */
 template <MagnitudeType mag_type, PhaseType phase_type>
 class NEMagnitudePhaseFP16Kernel : public INEKernel
@@ -156,9 +156,9 @@
     ITensor                  *_magnitude; /**< Output - Magnitude */
     ITensor                  *_phase;     /**< Output - Phase */
 };
-#else  /* ARM_COMPUTE_AARCH64_V8_2 */
+#else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 template <MagnitudeType mag_type, PhaseType phase_type>
 using NEMagnitudePhaseFP16Kernel = NEMagnitudePhaseKernel<mag_type, phase_type>;
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h
index f47b487..da8aecf 100644
--- a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h
+++ b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h
@@ -78,7 +78,7 @@
     ITensor                *_output; /**< Destination tensor */
 };
 
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 /** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in F16 if the input data type is F32
  */
 class NENonMaximaSuppression3x3FP16Kernel : public NENonMaximaSuppression3x3Kernel
@@ -92,8 +92,8 @@
      */
     void configure(const ITensor *input, ITensor *output, bool border_undefined);
 };
-#else  /* ARM_COMPUTE_AARCH64_V8_2 */
+#else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 using NENonMaximaSuppression3x3FP16Kernel = NENonMaximaSuppression3x3Kernel;
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 } // namespace arm_compute
 #endif /* _ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/convolution/NEDirectConvolutionDetail.h b/arm_compute/core/NEON/kernels/convolution/NEDirectConvolutionDetail.h
index aac70b0..657bcd6 100644
--- a/arm_compute/core/NEON/kernels/convolution/NEDirectConvolutionDetail.h
+++ b/arm_compute/core/NEON/kernels/convolution/NEDirectConvolutionDetail.h
@@ -315,7 +315,7 @@
     vst1_qs16(buffer, vget_low_s16(values.val[0]));
 }
 
-#ifdef ARM_COMPUTE_AARCH64_V8_2
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 /** Loads a 3x3 matrix as a row (float16_t).
  *
  * @param[in] ptr Pointer to a float 3x3 matrix.
@@ -455,7 +455,7 @@
 {
     vst1_f16(buffer, vget_low_f16(values.val[0]));
 }
-#endif /* ARM_COMPUTE_AARCH64_V8_2 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
 /** Get the number of elements processed on 3x3 convolution.
  *