COMPMID-634: Enable clang with libc++ to compile for Android (32 and 64 bits)

Change-Id: I693f64e70cd478e93675a8b04360128ded3b60d4
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/93015
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/src/core/NEON/kernels/NEAccumulateKernel.cpp b/src/core/NEON/kernels/NEAccumulateKernel.cpp
index deafabe..856e3ac 100644
--- a/src/core/NEON/kernels/NEAccumulateKernel.cpp
+++ b/src/core/NEON/kernels/NEAccumulateKernel.cpp
@@ -41,7 +41,7 @@
 /* Max S16 value used for saturation purposes. */
 const static uint16x8_t max_int_u16 = vdupq_n_u16(static_cast<uint16_t>(INT16_MAX));
 
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
 namespace fp16
 {
 inline float16x8x2_t convert_u8x16_to_f16x8x2(uint8x16_t input)
@@ -132,7 +132,7 @@
     },
     input, accum);
 }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 
 namespace
 {
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index 67fc45b..8dfce0f 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -81,7 +81,7 @@
         { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float> },
     };
 
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
     // Activation functions : FP16
     static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f16 =
     {
@@ -96,7 +96,7 @@
         { ActivationFunction::SQUARE, &NEActivationLayerKernel::activation<ActivationFunction::SQUARE, float16_t> },
         { ActivationFunction::TANH, &NEActivationLayerKernel::activation<ActivationFunction::TANH, float16_t> },
     };
-#endif /* ARM_COMPUTE_ENABLE_FP16*/
+#endif /* ARM_COMPUTE_AARCH64_V8_2*/
 
     // Activation functions : QS8
     static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qs8 =
@@ -140,11 +140,11 @@
         case DataType::F32:
             _func = act_map_f32[activation_info.activation()];
             break;
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
         case DataType::F16:
             _func = act_map_f16[activation_info.activation()];
             break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
         default:
             ARM_COMPUTE_ERROR("Unsupported data type.");
     }
@@ -174,7 +174,7 @@
     ICPPKernel::configure(win);
 }
 
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
 template <ActivationLayerInfo::ActivationFunction F, typename T>
 typename std::enable_if<std::is_same<T, float16_t>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
 {
@@ -305,7 +305,7 @@
     },
     input, output);
 }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 
 template <ActivationLayerInfo::ActivationFunction F, typename T>
 typename std::enable_if<std::is_same<T, float>::value, void>::type NEActivationLayerKernel::activation(const Window &window)
diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
index f263fd0..02fabca 100644
--- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
@@ -145,7 +145,7 @@
     return res;
 }
 
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
 inline float16x8x2_t vadd2q_f16(const float16x8x2_t &a, const float16x8x2_t &b)
 {
     const float16x8x2_t res =
@@ -158,11 +158,11 @@
 
     return res;
 }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 
 void add_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
 {
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
     Iterator input1(in1, window);
     Iterator input2(in2, window);
     Iterator output(out, window);
@@ -175,13 +175,13 @@
         vst2q_f16(reinterpret_cast<float16_t *>(output.ptr()), vadd2q_f16(a, b));
     },
     input1, input2, output);
-#else  /* ARM_COMPUTE_ENABLE_FP16 */
+#else  /* ARM_COMPUTE_AARCH64_V8_2 */
     ARM_COMPUTE_UNUSED(in1);
     ARM_COMPUTE_UNUSED(in2);
     ARM_COMPUTE_UNUSED(out);
     ARM_COMPUTE_UNUSED(window);
     ARM_COMPUTE_ERROR("Not supported, recompile the library with arch=arm64-v8.2-a");
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 }
 
 void add_F32_F32_F32(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
index 85f72c1..8aadab9 100644
--- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
@@ -157,7 +157,7 @@
     input1, input2, output);
 }
 
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
 inline float16x8x2_t vsub2q_f16(const float16x8x2_t &a, const float16x8x2_t &b)
 {
     const float16x8x2_t res =
@@ -170,11 +170,11 @@
 
     return res;
 }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 
 void sub_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
 {
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
     Iterator input1(in1, window);
     Iterator input2(in2, window);
     Iterator output(out, window);
@@ -187,13 +187,13 @@
         vst2q_f16(reinterpret_cast<float16_t *>(output.ptr()), vsub2q_f16(a, b));
     },
     input1, input2, output);
-#else  /* ARM_COMPUTE_ENABLE_FP16 */
+#else  /* ARM_COMPUTE_AARCH64_V8_2 */
     ARM_COMPUTE_UNUSED(in1);
     ARM_COMPUTE_UNUSED(in2);
     ARM_COMPUTE_UNUSED(out);
     ARM_COMPUTE_UNUSED(window);
     ARM_COMPUTE_ERROR("Not supported, recompile the library with arch=arm64-v8.2-a");
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 }
 
 void sub_F32_F32_F32(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
diff --git a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
index f6f6f9c..c48653a 100644
--- a/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp
@@ -169,7 +169,7 @@
     input, output);
 }
 
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
 void batch_normalization_fp16(ITensor *in, ITensor *out, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, const Window &window)
 {
     Iterator input(in, window);
@@ -212,7 +212,7 @@
     },
     input, output);
 }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 
 void NEBatchNormalizationLayerKernel::configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon)
 {
@@ -258,11 +258,11 @@
             num_elems_processed_per_iteration = 4;
             break;
         case DataType::F16:
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
             _func                             = &batch_normalization_fp16;
             num_elems_processed_per_iteration = 8;
             break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
         default:
             ARM_COMPUTE_ERROR("Element size not supported");
             break;
diff --git a/src/core/NEON/kernels/NEBox3x3Kernel.cpp b/src/core/NEON/kernels/NEBox3x3Kernel.cpp
index d7178e4..be8beae 100644
--- a/src/core/NEON/kernels/NEBox3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NEBox3x3Kernel.cpp
@@ -33,7 +33,7 @@
 
 using namespace arm_compute;
 
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
 void NEBox3x3FP16Kernel::run(const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info);
@@ -104,7 +104,7 @@
     },
     input, output);
 }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 
 BorderSize NEBox3x3Kernel::border_size() const
 {
diff --git a/src/core/NEON/kernels/NECannyEdgeKernel.cpp b/src/core/NEON/kernels/NECannyEdgeKernel.cpp
index bcbe790..944f29d 100644
--- a/src/core/NEON/kernels/NECannyEdgeKernel.cpp
+++ b/src/core/NEON/kernels/NECannyEdgeKernel.cpp
@@ -51,7 +51,7 @@
 constexpr int MAYBE   = 127;
 } // namespace
 
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
 namespace fp16
 {
 inline uint8x8_t phase_quantization(const float32x4x2_t &gx, const float32x4x2_t &gy)
@@ -787,7 +787,7 @@
 
     INEKernel::configure(win);
 }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 
 namespace
 {
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp
index 6631359..06620d4 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp
@@ -124,7 +124,7 @@
     return vqaddq_qs32(x, y);
 }
 
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
 inline float16x8_t internal_vld1q(const float16_t *in)
 {
     return vld1q_f16(in);
@@ -141,7 +141,7 @@
 {
     return vaddq_f16(x, y);
 }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 
 template <typename T1, typename T2, bool in_place>
 void accumulate_bias(ITensor *input, const ITensor *bias, const Window window, ITensor *output)
@@ -246,13 +246,13 @@
             _func = (output == nullptr) ? &accumulate_bias<qint32_t, qint16_t, true> : &accumulate_bias<qint32_t, qint16_t, false>;
             break;
         }
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
         case DataType::F16:
         {
             _func = (output == nullptr) ? &accumulate_bias<float16_t, float16_t, true> : &accumulate_bias<float16_t, float16_t, false>;
             break;
         }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
         case DataType::F32:
         {
             _func = (output == nullptr) ? &accumulate_bias<float, float, true> : &accumulate_bias<float, float, false>;
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
index 2766d69..8642a19 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
@@ -68,7 +68,7 @@
     return vdupq_n_qs16(v);
 }
 
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
 template <unsigned int stridex>
 float16x8_t internal_vld1q(const float16_t *in);
 
@@ -113,7 +113,7 @@
     ARM_COMPUTE_UNUSED(fixed_point_position);
     return vaddq_f16(x, vmulq_f16(y, z));
 }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 
 template <unsigned int stridex>
 float32x4_t internal_vld1q(const float *in);
@@ -427,7 +427,7 @@
     }
 };
 
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
 inline float16x8x3_t load_matrix_row(const float16_t *ptr)
 {
     /* ptr is a pointer to a row in a 3x3 matrix, the function returns 3 vectors holding exactly the same value in all lanes:
@@ -567,7 +567,7 @@
     vst1_f16(buffer, vadd_f16(vld1_f16(buffer), vget_low_f16(values.val[0])));
 }
 
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 
 inline float32x4x3_t load_matrix_row(const float *ptr)
 {
@@ -1433,9 +1433,9 @@
         {
             switch(input->info()->data_type())
             {
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
                 case DataType::F16:
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
                 case DataType::QS8:
                 case DataType::QS16:
                     _num_elems_written_per_iteration = 8;
@@ -1468,9 +1468,9 @@
                     _num_elems_read_per_iteration    = 12;
                     _num_elems_written_per_iteration = 16 >> conv_stride_x;
                     break;
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
                 case DataType::F16:
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
                 case DataType::QS8:
                 case DataType::QS16:
                     _num_weight_elems_read_per_row   = 8 + _kernel_size - 1;
@@ -1532,11 +1532,11 @@
                 case DataType::F32:
                     convolve_1x1<float, float>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
                     break;
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
                 case DataType::F16:
                     convolve_1x1<float16_t, float16_t>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
                     break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
                 default:
                     ARM_COMPUTE_ERROR("Data type not supported");
                     break;
@@ -1553,11 +1553,11 @@
                 case DataType::F32:
                     convolve_3x3<float, float>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
                     break;
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
                 case DataType::F16:
                     convolve_3x3<float16_t, float16_t>(window, _num_elems_read_per_iteration, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
                     break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
                 default:
                     ARM_COMPUTE_ERROR("Data type not supported");
                     break;
diff --git a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
index fb07cb0..e61f952 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
@@ -108,7 +108,7 @@
             in0_out, in1);
             break;
         }
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
         case DataType::F16:
         {
             execute_window_loop(window, [&](const Coordinates & id)
@@ -128,7 +128,7 @@
             in0_out, in1);
             break;
         }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
         case DataType::QS8:
         {
             execute_window_loop(window, [&](const Coordinates & id)
diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
index 9dbce1d..c94d3b1 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
@@ -66,7 +66,7 @@
     in, out);
 }
 
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
 void matrix_addition_f16(const ITensor *input, ITensor *output, const Window &window, float beta)
 {
     const float16x8_t beta_f16 = vdupq_n_f16(beta);
@@ -89,7 +89,7 @@
     },
     in, out);
 }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 
 void matrix_addition_qs8(const ITensor *input, ITensor *output, const Window &window, float beta)
 {
@@ -167,10 +167,10 @@
             _func = &matrix_addition_qs16;
             break;
         case DataType::F16:
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
             _func = &matrix_addition_f16;
             break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
         default:
             ARM_COMPUTE_ERROR("Data type not supported");
             break;
diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
index 6909082..4fcf6e2 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
@@ -53,7 +53,7 @@
 template <bool multiply_alpha>
 void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info, float alpha)
 {
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
     const auto width_matrix_b  = static_cast<int>(output->info()->dimension(0));
     const auto in_b_stride     = static_cast<int>(input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type()));
     const auto num_elems_vec_a = static_cast<int>(input0->info()->dimension(0));
@@ -186,7 +186,7 @@
 
     },
     ina, inb, out);
-#else  /* ARM_COMPUTE_ENABLE_FP16 */
+#else  /* ARM_COMPUTE_AARCH64_V8_2 */
     ARM_COMPUTE_UNUSED(input0);
     ARM_COMPUTE_UNUSED(input1);
     ARM_COMPUTE_UNUSED(output);
@@ -194,7 +194,7 @@
     ARM_COMPUTE_UNUSED(info);
     ARM_COMPUTE_UNUSED(alpha);
     ARM_COMPUTE_ERROR("Not implemented");
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 }
 
 template <bool multiply_alpha>
@@ -915,7 +915,7 @@
 template <bool multiply_alpha>
 void matrix_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, float alpha)
 {
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
     const size_t in_b_stride          = input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type());
     const size_t out_stride           = output->info()->strides_in_bytes()[1] / data_size_from_type(output->info()->data_type());
     const int    num_elems_matrix_b_x = input1->info()->dimension(0);
@@ -1051,14 +1051,14 @@
         vst1q_f16(mtx_out + 3 * out_stride, c.val[3]);
     },
     ina, inb, out);
-#else  /* ARM_COMPUTE_ENABLE_FP16 */
+#else  /* ARM_COMPUTE_AARCH64_V8_2 */
     ARM_COMPUTE_UNUSED(input0);
     ARM_COMPUTE_UNUSED(input1);
     ARM_COMPUTE_UNUSED(output);
     ARM_COMPUTE_UNUSED(window);
     ARM_COMPUTE_UNUSED(alpha);
     ARM_COMPUTE_ERROR("Not implemented");
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 }
 
 template <bool multiply_alpha>
@@ -1454,13 +1454,13 @@
                 num_elems_processed_per_iteration_x = 16;
                 break;
             }
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
             case DataType::F16:
             {
                 num_elems_processed_per_iteration_x = 32;
                 break;
             }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
             default:
             {
                 ARM_COMPUTE_ERROR("Data type not supported");
@@ -1503,13 +1503,13 @@
                 num_elems_processed_per_iteration_x = 8;
                 break;
             }
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
             case DataType::F16:
             {
                 num_elems_processed_per_iteration_x = 8;
                 break;
             }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
             default:
             {
                 ARM_COMPUTE_ERROR("Data type not supported");
@@ -1563,14 +1563,14 @@
                 vector_matrix_multiply_qs16<false>(_input0, _input1, _output, window, info, _alpha);
                 break;
             }
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
             case DataType::F16:
             {
                 multiply_alpha ? vector_matrix_multiply_f16<true>(_input0, _input1, _output, window, info, _alpha) :
                 vector_matrix_multiply_f16<false>(_input0, _input1, _output, window, info, _alpha);
                 break;
             }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
             default:
             {
                 ARM_COMPUTE_ERROR("Data type not supported");
@@ -1600,14 +1600,14 @@
                 matrix_matrix_multiply_qs16<false>(_input0, _input1, _output, window, _alpha);
                 break;
             }
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
             case DataType::F16:
             {
                 multiply_alpha ? matrix_matrix_multiply_f16<true>(_input0, _input1, _output, window, _alpha) :
                 matrix_matrix_multiply_f16<false>(_input0, _input1, _output, window, _alpha);
                 break;
             }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
             default:
             {
                 ARM_COMPUTE_ERROR("Data type not supported");
diff --git a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp
index 233b2ba..d8440e3 100644
--- a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp
+++ b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp
@@ -39,7 +39,7 @@
 
 using namespace arm_compute;
 
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
 
 template class arm_compute::NEHarrisScoreFP16Kernel<3>;
 template class arm_compute::NEHarrisScoreFP16Kernel<5>;
@@ -361,7 +361,7 @@
     INEKernel::configure(win);
 }
 
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 
 template class arm_compute::NEHarrisScoreKernel<3>;
 template class arm_compute::NEHarrisScoreKernel<5>;
diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp
index 1c018b2..099f2f1 100644
--- a/src/core/NEON/kernels/NEIm2ColKernel.cpp
+++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp
@@ -311,11 +311,11 @@
             case DataType::F32:
                 _func = &NEIm2ColKernel::run_reduced<float>;
                 break;
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
             case DataType::F16:
                 _func = &NEIm2ColKernel::run_reduced<float16_t>;
                 break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
             case DataType::QS8:
                 _func = &NEIm2ColKernel::run_reduced<qint8_t>;
                 break;
@@ -334,11 +334,11 @@
             case DataType::F32:
                 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<float, false> : &NEIm2ColKernel::run_generic<float, true>;
                 break;
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
             case DataType::F16:
                 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<float16_t, false> : &NEIm2ColKernel::run_generic<float16_t, true>;
                 break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
             case DataType::QS8:
                 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<qint8_t, false> : &NEIm2ColKernel::run_generic<qint8_t, true>;
                 break;
diff --git a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp
index 1b2942c..99b4250 100644
--- a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp
@@ -51,7 +51,7 @@
 {
 void vector_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info)
 {
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
     const auto width_matrix_b  = static_cast<int>(output->info()->dimension(0));
     const auto in_b_stride     = static_cast<int>(input1->info()->strides_in_bytes()[1] / data_size_from_type(input1->info()->data_type()));
     const auto num_elems_vec_a = static_cast<int>(input0->info()->dimension(0));
@@ -160,14 +160,14 @@
         vst1q_f16(vec_out + 24, acc3);
     },
     ina, out);
-#else  /* ARM_COMPUTE_ENABLE_FP16 */
+#else  /* ARM_COMPUTE_AARCH64_V8_2 */
     ARM_COMPUTE_UNUSED(input0);
     ARM_COMPUTE_UNUSED(input1);
     ARM_COMPUTE_UNUSED(output);
     ARM_COMPUTE_UNUSED(window);
     ARM_COMPUTE_UNUSED(info);
     ARM_COMPUTE_ERROR("Not supported, recompile with -march=armv8.2-a+fp16+simd.");
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 }
 
 void vector_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, ITensor *output, const Window &window, const ThreadInfo &info)
diff --git a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp
index 433985f..2424ec1 100644
--- a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp
+++ b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp
@@ -51,7 +51,7 @@
 constexpr float COEFF2       = 0.2447f;
 } // namespace
 
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
 namespace fp16
 {
 inline float16x8_t inv(float16x8_t x)
@@ -429,7 +429,7 @@
 template class arm_compute::NEMagnitudePhaseFP16Kernel<MagnitudeType::L2NORM, PhaseType::SIGNED>;
 template class arm_compute::NEMagnitudePhaseFP16Kernel<MagnitudeType::L1NORM, PhaseType::UNSIGNED>;
 template class arm_compute::NEMagnitudePhaseFP16Kernel<MagnitudeType::L2NORM, PhaseType::UNSIGNED>;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 
 namespace
 {
diff --git a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp
index b7dfb59..c4517da 100644
--- a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp
+++ b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp
@@ -41,7 +41,7 @@
 class Coordinates;
 } // namespace arm_compute
 
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
 namespace fp16
 {
 inline void mask_top(const float16x8_t &vc, const float16x8_t &in0, const float16x8_t &in1, uint16x8_t &mask)
@@ -224,7 +224,7 @@
 
     INEKernel::configure(win);
 }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 
 namespace
 {
diff --git a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
index d6d26e2..f6f3d5f 100644
--- a/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NENormalizationLayerKernel.cpp
@@ -232,7 +232,7 @@
         },
         input, input_squared, output);
     }
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
     else if(dt == DataType::F16)
     {
         const float16x8_t coeff_vec    = vdupq_n_f16(_norm_info.scale_coeff());
@@ -268,7 +268,7 @@
         },
         input, input_squared, output);
     }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
     else
     {
         ARM_COMPUTE_ERROR("Not supported");
diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
index 19d45e2..2c90d9a 100644
--- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
+++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
@@ -38,9 +38,9 @@
 #include <cstdint>
 #include <cstdlib>
 
-#if ARM_COMPUTE_ENABLE_FP16
+#if ARM_COMPUTE_AARCH64_V8_2
 #include <arm_fp16.h> // needed for float16_t
-#endif                /* ARM_COMPUTE_ENABLE_FP16 */
+#endif                /* ARM_COMPUTE_AARCH64_V8_2 */
 
 using namespace arm_compute;
 
@@ -335,7 +335,7 @@
 template <bool is_scale255, bool is_sat>
 void mul_F16_F16_F16_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale)
 {
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
     const auto          input1    = static_cast<const float16_t *__restrict>(input1_ptr);
     const auto          input2    = static_cast<const float16_t *__restrict>(input2_ptr);
     const auto          output    = static_cast<float16_t *__restrict>(output_ptr);
@@ -350,13 +350,13 @@
         }
     };
     vst2q_f16(output, result);
-#else  /* ARM_COMPUTE_ENABLE_FP16 */
+#else  /* ARM_COMPUTE_AARCH64_V8_2 */
     ARM_COMPUTE_UNUSED(input1_ptr);
     ARM_COMPUTE_UNUSED(input2_ptr);
     ARM_COMPUTE_UNUSED(output_ptr);
     ARM_COMPUTE_UNUSED(scale);
     ARM_COMPUTE_ERROR("Not supported. Recompile the library with arch=arm64-v8.2-a.");
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 }
 
 template <bool is_scale255, bool is_sat>
diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
index 8d4e465..0024e33 100644
--- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
@@ -173,7 +173,7 @@
             }
             num_elems_horizontal_window = (pool_stride_x == 2) ? 4 : 8;
             break;
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
         case DataType::F16:
             switch(pool_size)
             {
@@ -192,7 +192,7 @@
                     break;
             }
             break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
         case DataType::F32:
             switch(pool_size)
             {
@@ -536,7 +536,7 @@
 template <PoolingType pooling_type>
 void NEPoolingLayerKernel::pooling3_f16(const Window &window_input, const Window &window)
 {
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
     Iterator input(_input, window_input);
     Iterator output(_output, window);
 
@@ -595,17 +595,17 @@
         *(reinterpret_cast<float16_t *>(output.ptr())) = vget_lane_f16(res, 0);
     },
     input, output);
-#else  /* ARM_COMPUTE_ENABLE_FP16 */
+#else  /* ARM_COMPUTE_AARCH64_V8_2 */
     ARM_COMPUTE_UNUSED(window_input);
     ARM_COMPUTE_UNUSED(window);
     ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 }
 
 template <PoolingType pooling_type>
 void NEPoolingLayerKernel::pooling2_f16(const Window &window_input, const Window &window)
 {
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
     Iterator      input(_input, window_input);
     Iterator      output(_output, window);
     constexpr int pool_size = 2;
@@ -654,11 +654,11 @@
         vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()), res);
     },
     input, output);
-#else  /* ARM_COMPUTE_ENABLE_FP16 */
+#else  /* ARM_COMPUTE_AARCH64_V8_2 */
     ARM_COMPUTE_UNUSED(window_input);
     ARM_COMPUTE_UNUSED(window);
     ARM_COMPUTE_ERROR("FP16 Not supported! Recompile the library with arch=arm64-v8.2-a");
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 }
 
 template <PoolingType pooling_type>
diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
index 648dac4..73aba28 100644
--- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
@@ -106,7 +106,7 @@
     while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice));
 }
 
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
 void logits_1d_max_f16(const ITensor *in, ITensor *out, const Window &window)
 {
     Window in_slice = window.first_slice_window_1D();
@@ -138,7 +138,7 @@
     }
     while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice));
 }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 
 void logits_1d_max_f32(const ITensor *in, ITensor *out, const Window &window)
 {
@@ -213,10 +213,10 @@
             _func = &logits_1d_max_f32;
             break;
         case DataType::F16:
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
             _func = &logits_1d_max_f16;
             break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
         default:
             ARM_COMPUTE_ERROR("Unsupported data type.");
     }
@@ -374,7 +374,7 @@
     while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice));
 }
 
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
 void logits_1d_shift_exp_sum_f16(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window)
 {
     Window window_max(window);
@@ -434,7 +434,7 @@
     }
     while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice));
 }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 
 void logits_1d_shift_exp_sum_f32(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window)
 {
@@ -532,10 +532,10 @@
             _func = &logits_1d_shift_exp_sum_f32;
             break;
         case DataType::F16:
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
             _func = &logits_1d_shift_exp_sum_f16;
             break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
         default:
             ARM_COMPUTE_ERROR("Unsupported data type.");
             break;
@@ -637,7 +637,7 @@
     }
     while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice));
 }
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
 void logits_1d_norm_f16(const ITensor *in, const ITensor *sum, ITensor *out, const Window &window)
 {
     Window window_sum(window);
@@ -668,7 +668,7 @@
     }
     while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice));
 }
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
 
 void logits_1d_norm_f32(const ITensor *in, const ITensor *sum, ITensor *out, const Window &window)
 {
@@ -738,10 +738,10 @@
             _func = &logits_1d_norm_f32;
             break;
         case DataType::F16:
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef ARM_COMPUTE_AARCH64_V8_2
             _func = &logits_1d_norm_f16;
             break;
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */
         default:
             ARM_COMPUTE_ERROR("Unsupported data type.");
             break;
diff --git a/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp b/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp
index 939f1b7..8728e77 100644
--- a/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp
+++ b/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp
@@ -39,6 +39,9 @@
 #include <cstddef>
 #include <cstdint>
 
+// Enable only if compiled for AArch64-V8.2-A targets
+#ifdef ARM_COMPUTE_AARCH64_V8_2
+
 #define ASM_PREFETCH(address) "PRFM PLDL1KEEP, " address "\n"
 #define ASM_PREFETCHL2(address) "PRFM PLDL2KEEP, " address "\n"
 #define ASM_PREFETCHW(address) "PRFM PSTL1KEEP, " address "\n"
@@ -517,3 +520,4 @@
     }
 }
 } // namespace arm_compute
+#endif /* ARM_COMPUTE_AARCH64_V8_2 */