Remove dependency on fp16 definitions from some core include files

This significantly improves the compilation times for parts of the core library that just need
a definition of float16_t rather than access to all of the fp16 intrinsics.

Signed-off-by: Matthew Bentham <Matthew.Bentham@arm.com>
Change-Id: I5da1c6b0df0dd87d1d17948cd2e9b7375874f455
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/529385
Tested-by: bsgcomp <bsgcomp@arm.com>
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9781
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/cpu/CpuTypes.h b/src/cpu/CpuTypes.h
new file mode 100644
index 0000000..0f7b9b6
--- /dev/null
+++ b/src/cpu/CpuTypes.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CPUTYPES
+#define ARM_COMPUTE_CPUTYPES
+
+namespace arm_compute
+{
+/* Type definitions compatible with arm_neon.h and arm_sve.h */
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+typedef __fp16 float16_t;
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+typedef float float32_t;
+}
+
+#endif /* ARM_COMPUTE_CPUTYPES */
diff --git a/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp b/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp
index f47df1e..d6c56d2 100644
--- a/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp
+++ b/src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,7 +32,6 @@
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/core/helpers/WindowHelpers.h"
 #include "src/cpu/kernels/depthwiseconv2d/list.h"
-#include "support/ToolchainSupport.h"
 
 namespace arm_compute
 {
diff --git a/src/cpu/kernels/CpuMaxUnpoolingLayerKernel.cpp b/src/cpu/kernels/CpuMaxUnpoolingLayerKernel.cpp
index 604f22f..7d077c7 100644
--- a/src/cpu/kernels/CpuMaxUnpoolingLayerKernel.cpp
+++ b/src/cpu/kernels/CpuMaxUnpoolingLayerKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022 Arm Limited.
+ * Copyright (c) 2020-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,7 +32,6 @@
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/core/helpers/WindowHelpers.h"
 #include "src/cpu/kernels/maxunpool/list.h"
-#include "support/ToolchainSupport.h"
 
 namespace arm_compute
 {
@@ -145,4 +144,4 @@
 }
 } // namespace kernels
 } // namespace cpu
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/cpu/kernels/add/generic/neon/fp16.cpp b/src/cpu/kernels/add/generic/neon/fp16.cpp
index a847cb6..fca7b2c 100644
--- a/src/cpu/kernels/add/generic/neon/fp16.cpp
+++ b/src/cpu/kernels/add/generic/neon/fp16.cpp
@@ -23,8 +23,8 @@
  */
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
 
+#include "src/cpu/CpuTypes.h"
 #include "src/cpu/kernels/add/generic/neon/impl.h"
-#include "support/ToolchainSupport.h"
 
 namespace arm_compute
 {
diff --git a/src/cpu/kernels/add/generic/sve/fp16.cpp b/src/cpu/kernels/add/generic/sve/fp16.cpp
index f3128030c..581f3ab 100644
--- a/src/cpu/kernels/add/generic/sve/fp16.cpp
+++ b/src/cpu/kernels/add/generic/sve/fp16.cpp
@@ -24,10 +24,9 @@
 
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
 
+#include "src/cpu/CpuTypes.h"
 #include "src/cpu/kernels/add/generic/sve/impl.h"
 
-#include "support/ToolchainSupport.h"
-
 namespace arm_compute
 {
 namespace cpu
diff --git a/src/cpu/kernels/addmuladd/generic/neon/fp16.cpp b/src/cpu/kernels/addmuladd/generic/neon/fp16.cpp
index 6fce1cd..15f62fe 100644
--- a/src/cpu/kernels/addmuladd/generic/neon/fp16.cpp
+++ b/src/cpu/kernels/addmuladd/generic/neon/fp16.cpp
@@ -27,6 +27,7 @@
 #include "arm_compute/core/ITensor.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/Window.h"
+#include "src/cpu/CpuTypes.h"
 
 #include <cstddef>
 #include <cstdint>
@@ -35,6 +36,8 @@
 #if defined(__aarch64__) && defined(ENABLE_FP16_KERNELS) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
 namespace
 {
+using arm_compute::float16_t;
+
 void a64_add_bn_clamp_direct_fp16_2x32(
     float16_t *out, size_t out_stride,
     float16_t *out_direct, size_t out_direct_stride,
diff --git a/src/cpu/kernels/boundingboxtransform/generic/neon/fp16.cpp b/src/cpu/kernels/boundingboxtransform/generic/neon/fp16.cpp
index 6826ff6..5661479 100644
--- a/src/cpu/kernels/boundingboxtransform/generic/neon/fp16.cpp
+++ b/src/cpu/kernels/boundingboxtransform/generic/neon/fp16.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,7 +22,9 @@
  * SOFTWARE.
  */
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
+#include "src/cpu/CpuTypes.h"
 #include "src/cpu/kernels/boundingboxtransform/generic/neon/impl.h"
+
 namespace arm_compute
 {
 namespace cpu
diff --git a/src/cpu/kernels/boundingboxtransform/generic/neon/impl.cpp b/src/cpu/kernels/boundingboxtransform/generic/neon/impl.cpp
index 2d08c87..d74a8a7 100644
--- a/src/cpu/kernels/boundingboxtransform/generic/neon/impl.cpp
+++ b/src/cpu/kernels/boundingboxtransform/generic/neon/impl.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,9 @@
  * SOFTWARE.
  */
 #include "src/cpu/kernels/boundingboxtransform/generic/neon/impl.h"
+
+#include "src/cpu/CpuTypes.h"
+
 namespace arm_compute
 {
 namespace cpu
@@ -143,4 +146,4 @@
 template void bounding_box_transform<float16_t>(const ITensor *boxes, ITensor *pred_boxes, const ITensor *deltas, BoundingBoxTransformInfo bbinfo, const Window &window);
 #endif //defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
 } // namespace cpu
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/cpu/kernels/cast/generic/neon/fp16.cpp b/src/cpu/kernels/cast/generic/neon/fp16.cpp
index d2c6692..385ca18 100644
--- a/src/cpu/kernels/cast/generic/neon/fp16.cpp
+++ b/src/cpu/kernels/cast/generic/neon/fp16.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2022 Arm Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,6 +28,8 @@
 #include "src/cpu/kernels/cast/list.h"
 #include "support/SaturateCast.h"
 
+#include "arm_neon.h"
+
 namespace arm_compute
 {
 namespace cpu
diff --git a/src/cpu/kernels/depthwiseconv2d/generic/neon/fp16.cpp b/src/cpu/kernels/depthwiseconv2d/generic/neon/fp16.cpp
index 553d816..e85a166 100644
--- a/src/cpu/kernels/depthwiseconv2d/generic/neon/fp16.cpp
+++ b/src/cpu/kernels/depthwiseconv2d/generic/neon/fp16.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,7 +22,9 @@
  * SOFTWARE.
  */
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#include "src/cpu/CpuTypes.h"
 #include "src/cpu/kernels/depthwiseconv2d/generic/neon/impl.h"
+
 namespace arm_compute
 {
 namespace cpu
diff --git a/src/cpu/kernels/elementwise_unary/generic/sve/fp16.cpp b/src/cpu/kernels/elementwise_unary/generic/sve/fp16.cpp
index 01567a7..a883309 100644
--- a/src/cpu/kernels/elementwise_unary/generic/sve/fp16.cpp
+++ b/src/cpu/kernels/elementwise_unary/generic/sve/fp16.cpp
@@ -23,6 +23,7 @@
  */
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
 #include "arm_compute/core/Helpers.h"
+#include "src/cpu/CpuTypes.h"
 #include "src/cpu/kernels/elementwise_unary/generic/sve/impl.h"
 
 namespace arm_compute
diff --git a/src/cpu/kernels/elementwise_unary/generic/sve/fp32.cpp b/src/cpu/kernels/elementwise_unary/generic/sve/fp32.cpp
index 47645ff..b21ed8d 100644
--- a/src/cpu/kernels/elementwise_unary/generic/sve/fp32.cpp
+++ b/src/cpu/kernels/elementwise_unary/generic/sve/fp32.cpp
@@ -23,6 +23,7 @@
  */
 
 #include "arm_compute/core/Helpers.h"
+#include "src/cpu/CpuTypes.h"
 #include "src/cpu/kernels/elementwise_unary/generic/sve/impl.h"
 
 namespace arm_compute
diff --git a/src/cpu/kernels/instancenorm/generic/neon/impl.h b/src/cpu/kernels/instancenorm/generic/neon/impl.h
index fa4b4b6..1d413a9 100644
--- a/src/cpu/kernels/instancenorm/generic/neon/impl.h
+++ b/src/cpu/kernels/instancenorm/generic/neon/impl.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,6 +24,9 @@
 #ifndef SRC_CORE_SVE_KERNELS_INSTANCENORM_IMPL_H
 #define SRC_CORE_SVE_KERNELS_INSTANCENORM_IMPL_H
 #include "arm_compute/core/Helpers.h"
+
+#include "arm_neon.h"
+
 namespace arm_compute
 {
 namespace cpu
diff --git a/src/cpu/kernels/l2normlayer/generic/neon/fp16.cpp b/src/cpu/kernels/l2normlayer/generic/neon/fp16.cpp
index ed84c10..661c3d7 100644
--- a/src/cpu/kernels/l2normlayer/generic/neon/fp16.cpp
+++ b/src/cpu/kernels/l2normlayer/generic/neon/fp16.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,9 +23,11 @@
  */
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
 
+#include "arm_compute/core/Helpers.h"
+
+#include "src/cpu/CpuTypes.h"
 #include "src/cpu/kernels/l2normlayer/generic/neon/impl.h"
 
-#include "arm_compute/core/Helpers.h"
 namespace arm_compute
 {
 namespace cpu
diff --git a/src/cpu/kernels/meanstddevnorm/generic/neon/fp16.cpp b/src/cpu/kernels/meanstddevnorm/generic/neon/fp16.cpp
index 3e712b5..47bf64a 100644
--- a/src/cpu/kernels/meanstddevnorm/generic/neon/fp16.cpp
+++ b/src/cpu/kernels/meanstddevnorm/generic/neon/fp16.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
+#include "src/cpu/CpuTypes.h"
 #include "src/cpu/kernels/meanstddevnorm/generic/neon/impl.h"
 
 namespace arm_compute
diff --git a/src/cpu/kernels/pool3d/neon/fp16.cpp b/src/cpu/kernels/pool3d/neon/fp16.cpp
index b79bcd9..0130a96 100644
--- a/src/cpu/kernels/pool3d/neon/fp16.cpp
+++ b/src/cpu/kernels/pool3d/neon/fp16.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,7 +22,9 @@
  * SOFTWARE.
  */
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
+#include "src/cpu/CpuTypes.h"
 #include "src/cpu/kernels/pool3d/neon/impl.h"
+
 namespace arm_compute
 {
 namespace cpu
@@ -33,4 +35,4 @@
 }
 } // namespace cpu
 } // namespace arm_compute
-#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
\ No newline at end of file
+#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
diff --git a/src/cpu/kernels/roialign/generic/neon/fp16.cpp b/src/cpu/kernels/roialign/generic/neon/fp16.cpp
index 6e585a4..c265d5d 100644
--- a/src/cpu/kernels/roialign/generic/neon/fp16.cpp
+++ b/src/cpu/kernels/roialign/generic/neon/fp16.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,7 +22,9 @@
  * SOFTWARE.
  */
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
+#include "src/cpu/CpuTypes.h"
 #include "src/cpu/kernels/roialign/generic/neon/impl.h"
+
 namespace arm_compute
 {
 namespace cpu
diff --git a/src/cpu/kernels/roialign/generic/neon/impl.cpp b/src/cpu/kernels/roialign/generic/neon/impl.cpp
index a4502e7..630d649 100644
--- a/src/cpu/kernels/roialign/generic/neon/impl.cpp
+++ b/src/cpu/kernels/roialign/generic/neon/impl.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,7 +22,10 @@
  * SOFTWARE.
  */
 #include "src/cpu/kernels/roialign/generic/neon/impl.h"
+
 #include "src/core/NEON/INEKernel.h"
+#include "src/cpu/CpuTypes.h"
+
 namespace arm_compute
 {
 namespace cpu
diff --git a/src/cpu/kernels/softmax/generic/neon/fp16.cpp b/src/cpu/kernels/softmax/generic/neon/fp16.cpp
index 3cb1cd6..f655669 100644
--- a/src/cpu/kernels/softmax/generic/neon/fp16.cpp
+++ b/src/cpu/kernels/softmax/generic/neon/fp16.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022 Arm Limited.
+ * Copyright (c) 2021-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,6 +23,7 @@
  */
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
 #include "arm_compute/core/Helpers.h"
+#include "src/cpu/CpuTypes.h"
 #include "src/cpu/kernels/softmax/generic/neon/impl.h"
 
 namespace arm_compute
diff --git a/src/cpu/kernels/softmax/generic/sve/fp16.cpp b/src/cpu/kernels/softmax/generic/sve/fp16.cpp
index 7aefcb1..15a523b 100644
--- a/src/cpu/kernels/softmax/generic/sve/fp16.cpp
+++ b/src/cpu/kernels/softmax/generic/sve/fp16.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022 Arm Limited.
+ * Copyright (c) 2021-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,6 +23,7 @@
  */
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
 #include "arm_compute/core/Helpers.h"
+#include "src/cpu/CpuTypes.h"
 #include "src/cpu/kernels/softmax/generic/sve/impl.h"
 namespace arm_compute
 {