Fixes for CMake and Bazel builds, tests failing in scons

- Fix 4 failing tests for multi_isa builds when experimental_fixed_format_kernels=1
- Fixes for CMake and Bazel builds to pass validation tests
- Update documentation, remove “-DCPPTHREADS=1” flag from CMake build example

Partially resolves: ONCPUML-1181

Signed-off-by: David Svantesson <david.svantesson@arm.com>
Change-Id: I7101676260a0adcb7b6ff6f4342ae36f921e7120
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9189
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/.bazelrc b/.bazelrc
index 267e648..8611db3 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -36,3 +36,4 @@
 build --flag_alias=logging=//:logging
 build --flag_alias=openmp=//:openmp
 build --flag_alias=cppthreads=//:cppthreads
+build --flag_alias=enable_bf16_validation=//:enable_bf16_validation
diff --git a/BUILD.bazel b/BUILD.bazel
index d33cf6b..e3ad75a 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -66,6 +66,12 @@
     visibility = ["//visibility:public"],
 )
 
+bool_flag(
+    name = "enable_bf16_validation",
+    build_setting_default = False,
+    visibility = ["//visibility:public"],
+)
+
 #---------------------------------------------------------------------
 # Flag variables
 config_setting(
@@ -103,6 +109,14 @@
     },
 )
 
+config_setting(
+    name = "bf16_validation_flag",
+    flag_values = {
+        ":enable_bf16_validation": "true",
+    },
+)
+
+
 #---------------------------------------------------------------------
 # Common defines used for all targets
 cc_library(
@@ -112,7 +126,6 @@
                   "ARM_COMPUTE_CPU_ENABLED",
                   "ARM_COMPUTE_ENABLE_NEON",
                   "ARM_COMPUTE_ENABLE_FP16",
-                  "ARM_COMPUTE_ENABLE_BF16",
                   "ARM_COMPUTE_ENABLE_I8MM",
                   "ENABLE_FP16_KERNELS",
                   "ENABLE_FP32_KERNELS",
@@ -125,6 +138,9 @@
                   "DARM_COMPUTE_GRAPH_ENABLED",
                   "ARM_COMPUTE_ENABLE_SVEF32MM",
                   "ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS",
+                  "ENABLE_SVE",
+                  "ARM_COMPUTE_ENABLE_SVE",
+                  "_GLIBCXX_USE_NANOSLEEP"
               ] + select({
                   "//:debug_flag": [
                       "ARM_COMPUTE_DEBUG_ENABLED",
@@ -227,9 +243,8 @@
         "//conditions:default": [],
     }),
     local_defines = [
-        "ENABLE_SVE",
-        "ARM_COMPUTE_ENABLE_SVE",
         "ARM_COMPUTE_ENABLE_SVE2",
+        "ARM_COMPUTE_ENABLE_BF16"
     ],
     deps = [
         "//:common_defines",
@@ -277,8 +292,7 @@
         "//conditions:default": [],
     }),
     local_defines = [
-        "ENABLE_SVE",
-        "ARM_COMPUTE_ENABLE_SVE",
+        "ARM_COMPUTE_ENABLE_BF16",
     ],
     deps = [
         "//:common_defines",
@@ -333,6 +347,9 @@
         "//:openmp_flag": ["-fopenmp"],
         "//conditions:default": [],
     }),
+    local_defines = [
+        "ARM_COMPUTE_ENABLE_BF16",
+    ],
     visibility = ["//visibility:public"],
     deps = [
         "//:common_defines",
@@ -342,6 +359,8 @@
         "//include",
         "//support",
         "//utils",
+        "//:arm_compute_sve",
+        "//:arm_compute_sve2"
     ],
     alwayslink = True,
 )
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4ac0c72..72992ed 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -101,7 +101,12 @@
     -Wsign-promo
     -Weffc++
     -Wno-overlength-strings
-    -Wno-ignored-attributes)
+    -Wno-ignored-attributes
+    -Wlogical-op
+    -Wnoexcept
+    -Wstrict-null-sentinel
+    -Wno-misleading-indentation
+    -O3)
 
 # Disable note popups on compiler ABI changes
 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
@@ -143,9 +148,7 @@
 add_library(arm_compute_sve "")
 target_compile_options(arm_compute_sve
                        PRIVATE "-march=armv8.2-a+sve+fp16+dotprod")
-target_compile_definitions(arm_compute_sve PRIVATE ENABLE_SVE)
-target_compile_definitions(arm_compute_sve PRIVATE ARM_COMPUTE_ENABLE_SVE)
-
+target_compile_definitions(arm_compute_sve PRIVATE ARM_COMPUTE_ENABLE_BF16)
 target_include_directories(
   arm_compute_sve
   PUBLIC $<INSTALL_INTERFACE:include>
@@ -165,10 +168,8 @@
 add_library(arm_compute_sve2 "")
 target_compile_options(arm_compute_sve2
                        PRIVATE "-march=armv8.6-a+sve2+fp16+dotprod")
-target_compile_definitions(arm_compute_sve2 PRIVATE ENABLE_SVE)
-target_compile_definitions(arm_compute_sve2 PRIVATE ARM_COMPUTE_ENABLE_SVE)
 target_compile_definitions(arm_compute_sve2 PRIVATE ARM_COMPUTE_ENABLE_SVE2)
-
+target_compile_definitions(arm_compute_sve2 PRIVATE ARM_COMPUTE_ENABLE_BF16)
 target_include_directories(
   arm_compute_sve2
   PUBLIC $<INSTALL_INTERFACE:include>
@@ -187,6 +188,7 @@
 
 add_library(arm_compute_core "")
 target_compile_options(arm_compute_core PRIVATE "-march=armv8.2-a+fp16")
+target_compile_definitions(arm_compute_core PRIVATE ARM_COMPUTE_ENABLE_BF16)
 target_include_directories(
   arm_compute_core
   PUBLIC $<INSTALL_INTERFACE:include>
@@ -201,6 +203,8 @@
 target_compile_options(arm_compute_core PUBLIC ${COMMON_CXX_FLAGS})
 
 add_library(ArmCompute::Core ALIAS arm_compute_core)
+target_link_libraries(
+  arm_compute_core PUBLIC arm_compute_sve arm_compute_sve2)
 
 # ---------------------------------------------------------------------
 # Graph Library
@@ -255,7 +259,9 @@
 
   add_executable(arm_compute_validation "")
   target_compile_options(arm_compute_validation PRIVATE "-march=armv8.2-a+fp16")
-
+  if(ENABLE_BF16_VALIDATION)
+    target_compile_definitions(arm_compute_validation PRIVATE ARM_COMPUTE_ENABLE_BF16)
+  endif()
   add_subdirectory(tests/validation)
   target_compile_options(arm_compute_validation PUBLIC ${COMMON_CXX_FLAGS})
   set_target_properties(
diff --git a/cmake/Options.cmake b/cmake/Options.cmake
index 20cf0e4..170dad9 100644
--- a/cmake/Options.cmake
+++ b/cmake/Options.cmake
@@ -50,7 +50,6 @@
 # ---------------------------------------------------------------------
 # Backends
 
-# TODO Add help string for each setting (Should user be able to )
 option(ENABLE_NEON "Enable Arm® Neon™ support" ON)
 option(ARM_COMPUTE_CPU_ENABLED "" ON)
 option(ARM_COMPUTE_ENABLE_NEON "" ON)
@@ -65,11 +64,12 @@
 option(ENABLE_NHWC_KERNELS "" ON)
 option(ENABLE_NCHW_KERNELS "" ON)
 option(ARM_COMPUTE_GRAPH_ENABLED "" ON)
-option(ARM_COMPUTE_ENABLE_BF16 "" ON)
 option(ARM_COMPUTE_ENABLE_SVEF32MM "" ON)
 option(ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS "" ON)
+option(ENABLE_SVE "" ON)
+option(ARM_COMPUTE_ENABLE_SVE "" ON)
+option(ENABLE_BF16_VALIDATION "" OFF)
 
-# TODO Check if this is required
 if(ENABLE_NEON)
   add_definitions(-DENABLE_NEON)
 endif()
@@ -121,3 +121,10 @@
 if(ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS)
   add_definitions(-DARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS)
 endif()
+if(ENABLE_SVE)
+  add_definitions(-DENABLE_SVE)
+endif()
+if(ARM_COMPUTE_ENABLE_SVE)
+  add_definitions(-DARM_COMPUTE_ENABLE_SVE)
+endif()
+add_definitions(-D_GLIBCXX_USE_NANOSLEEP)
\ No newline at end of file
diff --git a/docs/user_guide/how_to_build_and_run_examples.dox b/docs/user_guide/how_to_build_and_run_examples.dox
index ab87989..8aab445 100644
--- a/docs/user_guide/how_to_build_and_run_examples.dox
+++ b/docs/user_guide/how_to_build_and_run_examples.dox
@@ -510,7 +510,7 @@
 
 	mkdir build
 	cd build
-	cmake .. -DOPENMP=1 -DCPPTHREADS=1 -DWERROR=0 -DDEBUG=0 -DBUILD_EXAMPLES=1 -DBUILD_TESTING=1 -DCMAKE_INSTALL_LIBDIR=.
+	cmake .. -DOPENMP=1 -DWERROR=0 -DDEBUG=0 -DBUILD_EXAMPLES=1 -DBUILD_TESTING=1 -DCMAKE_INSTALL_LIBDIR=.
 	cmake --build . -j32
 
 */
diff --git a/tests/BUILD.bazel b/tests/BUILD.bazel
index 57ea3f6..8122652 100644
--- a/tests/BUILD.bazel
+++ b/tests/BUILD.bazel
@@ -35,8 +35,23 @@
         "validation/**/*.h",
     ]),
     copts = [
-        "-march=armv8.2-a+fp16",
-    ],
+                "-march=armv8.2-a+fp16",
+            ] + select({
+                "//:debug_flag": [
+                    "-O0",
+                    "-g",
+                    "-gdwarf-2",
+                ],
+                "//conditions:default": ["-O3"],
+            }) +
+            select({
+                "//:openmp_flag": ["-fopenmp"],
+                "//conditions:default": [],
+            }) +
+            select({
+                "//:Werror_flag": ["-Werror"],
+                "//conditions:default": [],
+            }),
     linkstatic = True,
     deps = [
         "//:arm_compute",
@@ -60,9 +75,23 @@
         "instruments/*.h",
     ]),
     copts = [
-        "-march=armv8.2-a+fp16",
-        "-ffp-contract=off",
-    ],
+                "-march=armv8.2-a+fp16",
+            ] + select({
+                "//:debug_flag": [
+                    "-O0",
+                    "-g",
+                    "-gdwarf-2",
+                ],
+                "//conditions:default": ["-O3"],
+            }) +
+            select({
+                "//:openmp_flag": ["-fopenmp"],
+                "//conditions:default": [],
+            }) +
+            select({
+                "//:Werror_flag": ["-Werror"],
+                "//conditions:default": [],
+            }),
     linkstatic = True,
     deps = [
         ":validation_framework",
@@ -71,6 +100,13 @@
         "//:common_defines",
         "//tests/framework",
     ],
+    local_defines = [] + 
+        select({
+                "//:bf16_validation_flag": [
+                "ARM_COMPUTE_ENABLE_BF16",
+                ],
+                "//conditions:default": [],
+              })
 )
 
 #---------------------------------------------------------------------
@@ -82,7 +118,24 @@
         "benchmark/NEON/*.cpp",
         "*.cpp",
     ]),
-    copts = ["-march=armv8.2-a+fp16"],
+    copts = [
+                "-march=armv8.2-a+fp16",
+            ] + select({
+                "//:debug_flag": [
+                    "-O0",
+                    "-g",
+                    "-gdwarf-2",
+                ],
+                "//conditions:default": ["-O3"],
+            }) +
+            select({
+                "//:openmp_flag": ["-fopenmp"],
+                "//conditions:default": [],
+            }) +
+            select({
+                "//:Werror_flag": ["-Werror"],
+                "//conditions:default": [],
+            }),
     linkstatic = True,
     deps = [
         ":arm_compute_validation",
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp
index 08b6a02..ea161a1 100644
--- a/tests/validation/NEON/ConvolutionLayer.cpp
+++ b/tests/validation/NEON/ConvolutionLayer.cpp
@@ -530,6 +530,7 @@
 TEST_SUITE_END() // WinogradLayer
 
 #ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
+TEST_SUITE(FIXED_FORMAT_KERNELS)
 TEST_SUITE(VariableWeightUtils)
 
 // UC2_1_* tests: the user requests a specific fixed format, but there is no kernel that supports it.
@@ -588,6 +589,8 @@
     ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo4, framework::LogLevel::ERRORS);
 }
 
+#if defined(ARM_COMPUTE_ENABLE_BF16)
+
 FIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
                        combine(framework::dataset::make("DataType", { DataType::F32 }),
                                framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo8i4_bf16 })))
@@ -604,6 +607,8 @@
     ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS);
 }
 
+#endif // ARM_COMPUTE_ENABLE_BF16
+
 // UC3_1_* tests: the user queries for ANY fixed format, but there is
 // no kernel that support the use case specified by the user (for
 // example, there is no fixed format kernel for the datatype of the
@@ -664,6 +669,8 @@
     ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS);
 }
 
+#if defined(ARM_COMPUTE_ENABLE_BF16)
+
 FIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
                        combine(framework::dataset::make("DataType", { DataType::F32 }),
                                framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
@@ -684,6 +691,8 @@
     ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS);
 }
 
+#endif // ARM_COMPUTE_ENABLE_BF16
+
 namespace
 {
 using TestCaseType          = std::tuple<TensorShape, TensorShape, arm_compute::WeightFormat>;
@@ -839,6 +848,7 @@
 #endif // ARM_COMPUTE_ENABLE_BF16
 
 TEST_SUITE_END() // ExperimentalNEAPIVariableWeightWithFixtures
+TEST_SUITE_END() // FIXED_FORMAT_KERNELS
 
 #endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS