Removes `experimental` from `experimental_fixed_format_kernels` flag

Renames `experimental_fixed_format_kernels` build option to
`fixed_format_kernels`.

Adds documentation for the flag covering basics:
- What fixed-format kernels are
- Why they're needed
- Which backend they're for (i.e. CPU)
- Some pointers on how to use them.

Resolves: ONCPUML-1253
Change-Id: I428c98614c309c9ffc32d0f32daa24740f7cb967
Signed-off-by: Nathan John Sircombe <nathan.sircombe@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9523
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/SConscript b/SConscript
index e6ef73c..7ea59d2 100644
--- a/SConscript
+++ b/SConscript
@@ -511,16 +511,15 @@
 # Common backend files
 lib_files = filelist['common']
 
+# Fixed format GEMM kernels.
+if env['fixed_format_kernels']:
+    arm_compute_env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS'])
+
 # Experimental files
 # Dynamic fusion
 if env['experimental_dynamic_fusion']:
     lib_files += filelist['experimental']['dynamic_fusion']
 
-# Fixed format GEMM kernels.
-if env['experimental_fixed_format_kernels']:
-    arm_compute_env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS'])
-
-
 # Logging files
 if env["logging"]:
     lib_files += filelist['logging']
@@ -596,8 +595,9 @@
     else:
         attrs = get_attrs_list(env, env['data_type_support'], env['data_layout_support'])
 
-    if env['experimental_fixed_format_kernels']:
-        attrs.append("experimental_fixed_format_kernels")
+
+    if env['fixed_format_kernels']:
+        attrs.append("fixed_format_kernels")
 
     # Setup data-type and data-layout files to include
     cpu_operators = custom_operators if use_custom_ops else filelist['cpu']['operators'].keys()
diff --git a/SConstruct b/SConstruct
index 2e20931..d6f15c0 100644
--- a/SConstruct
+++ b/SConstruct
@@ -126,7 +126,7 @@
             ├── fixtures
             └── Neon\n""", "", PathVariable.PathAccept),
     BoolVariable("experimental_dynamic_fusion", "Build the experimental dynamic fusion files", False),
-    BoolVariable("experimental_fixed_format_kernels", "Enable fixed format kernels for GEMM", False),
+    BoolVariable("fixed_format_kernels", "Enable fixed format kernels for GEMM", False),
     BoolVariable("mapfile", "Generate a map file", False),
     ListVariable("custom_options", "Custom options that can be used to turn on/off features", "none", ["disable_mmla_fp"]),
     ListVariable("data_type_support", "Enable a list of data types to support", "all", ["qasymm8", "qasymm8_signed", "qsymm16", "fp16", "fp32", "integer"]),
diff --git a/docs/user_guide/how_to_build_and_run_examples.dox b/docs/user_guide/how_to_build_and_run_examples.dox
index 8aab445..e0079cf 100644
--- a/docs/user_guide/how_to_build_and_run_examples.dox
+++ b/docs/user_guide/how_to_build_and_run_examples.dox
@@ -513,5 +513,19 @@
 	cmake .. -DOPENMP=1 -DWERROR=0 -DDEBUG=0 -DBUILD_EXAMPLES=1 -DBUILD_TESTING=1 -DCMAKE_INSTALL_LIBDIR=.
 	cmake --build . -j32
 
+@section S1_8_fixed_format Building with support for fixed format kernels
+
+@subsection S1_8_1_intro_to_fixed_format_kernels What are fixed format kernels?
+
+The GEMM kernels used for convolutions and fully-connected layers in Compute Library employ memory layouts optimized for each kernel implementation. This then requires the supplied weights to be re-ordered into a buffer ready for consumption by the GEMM kernel. Where Compute Library is being called from a framework or library which implements operator caching, the re-ordering of the inputted weights into an intermediate buffer may no longer be desirable. When using a cached operator, the caller may wish to re-write the weights tensor, and re-run the operator using the updated weights. With the default GEMM kernels in Compute Library, the GEMM will be executed with the old weights, leading to incorrect results.
+
+To address this, Compute Library provides a set of GEMM kernels which use a common blocked memory format. These kernels consume the input weights directly from the weights buffer and do not execute an intermediate pre-transpose step. With this approach, it is the responsibility of the user (in this case the calling framework) to ensure that the weights are re-ordered into the required memory format. @ref NEGEMM::has_opt_impl is a static function that queries whether there exists fixed-format kernel, and if so will return in the expected weights format. The supported weight formats are enumerated in @ref arm_compute::WeightFormat.
+
+@subsection S1_8_2_building_fixed_format Building with fixed format kernels
+
+Fixed format kernels are only available for the CPU backend. To build Compute Library with fixed format kernels set fixed_format_kernels=1:
+
+        scons Werror=1 debug=0 neon=1 opencl=0 embed_kernels=0 os=linux multi_isa=1 build=native cppthreads=1 openmp=0 fixed_format_kernels=1
+
 */
 } // namespace arm_compute
diff --git a/filelist.json b/filelist.json
index efb0182..fc4aaeb 100644
--- a/filelist.json
+++ b/filelist.json
@@ -1649,7 +1649,7 @@
             "estate64": [
               "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_pretransposed/generic.cpp"
             ],
-            "experimental_fixed_format_kernels": [
+            "fixed_format_kernels": [
               "src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_bf16fp32_mmla_6x16/generic.cpp",
               "src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp16_mla_6x32/generic.cpp",
               "src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32_mla_6x16/generic.cpp",
@@ -1725,7 +1725,7 @@
               "src/core/NEON/kernels/arm_gemm/transform-sve.cpp",
               "src/core/NEON/kernels/arm_gemm/misc-sve.cpp"
             ],
-            "experimental_fixed_format_kernels": [
+            "fixed_format_kernels": [
               "src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_bf16fp32_mmla_6x4VL/generic.cpp",
               "src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL/a64fx.cpp",
               "src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL/generic.cpp",
diff --git a/scripts/generate_build_files.py b/scripts/generate_build_files.py
index 8f8ed0e..0e639bf 100644
--- a/scripts/generate_build_files.py
+++ b/scripts/generate_build_files.py
@@ -234,9 +234,9 @@
     data_types = ["qasymm8", "qasymm8_signed", "qsymm16",
                   "fp16", "fp32", "integer"]  # Are all needed?
     data_layouts = ["nhwc", "nchw"]  # Are both needed?
-    experimental_fixed_format_kernels = ["experimental_fixed_format_kernels"]
+    fixed_format_kernels = ["fixed_format_kernels"]
     attrs = data_types + data_layouts + \
-        experimental_fixed_format_kernels + ["estate64"]
+        fixed_format_kernels + ["estate64"]
 
     # Setup data-type and data-layout files to include
     cpu_operators = filelist['cpu']['operators'].keys()
diff --git a/tests/SConscript b/tests/SConscript
index 8596cfa..33f709e 100644
--- a/tests/SConscript
+++ b/tests/SConscript
@@ -1,7 +1,7 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 
-# Copyright (c) 2017-2022 Arm Limited.
+# Copyright (c) 2017-2023 Arm Limited.
 #
 # SPDX-License-Identifier: MIT
 #
@@ -185,7 +185,7 @@
         test_env['LINKFLAGS'].append('-static-openmp')
 
 # Testing for fixed format GEMM kernels.
-if env['experimental_fixed_format_kernels'] and test_env['validation_tests']:
+if env['fixed_format_kernels'] and test_env['validation_tests']:
     test_env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS'])
 
 if test_env['validation_tests']: