Move lut kernel to sve2 category

This specific Lut kernel uses sve2 instructions

Resolves: COMPMID-6268

Signed-off-by: SiCong Li <sicong.li@arm.com>
Change-Id: I44fa3812e96fa79b3d1e1e3a31d587581f59f0e1
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9675
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/filelist.json b/filelist.json
index 807000a..c12873a 100644
--- a/filelist.json
+++ b/filelist.json
@@ -891,11 +891,13 @@
           },
           "sve": {
             "fp16": [ "src/cpu/kernels/activation/generic/sve/fp16.cpp" ],
-            "fp32": [ "src/cpu/kernels/activation/generic/sve/fp32.cpp" ],
-            "qasymm8": ["src/cpu/kernels/activation/generic/sve/lut.cpp"]
+            "fp32": [ "src/cpu/kernels/activation/generic/sve/fp32.cpp" ]
           },
           "sve2":{
-            "qasymm8": [ "src/cpu/kernels/activation/generic/sve2/qasymm8.cpp" ],
+            "qasymm8": [
+              "src/cpu/kernels/activation/generic/sve2/qasymm8.cpp",
+              "src/cpu/kernels/activation/generic/sve2/lut.cpp"
+            ],
             "qasymm8_signed": [ "src/cpu/kernels/activation/generic/sve2/qasymm8_signed.cpp" ],
             "qsymm16": [ "src/cpu/kernels/activation/generic/sve2/qsymm16.cpp" ]
           }
@@ -1454,9 +1456,11 @@
             "common": ["src/cpu/kernels/elementwise_unary/generic/sve/impl.cpp" ],
             "integer": ["src/cpu/kernels/elementwise_unary/generic/sve/integer.cpp"],
             "fp32": ["src/cpu/kernels/elementwise_unary/generic/sve/fp32.cpp"],
-            "fp16": ["src/cpu/kernels/elementwise_unary/generic/sve/fp16.cpp"],
-            "qasymm8": ["src/cpu/kernels/elementwise_unary/generic/sve/q8.cpp"],
-            "qasymm8_signed": ["src/cpu/kernels/elementwise_unary/generic/sve/q8.cpp"]
+            "fp16": ["src/cpu/kernels/elementwise_unary/generic/sve/fp16.cpp"]
+          },
+          "sve2": {
+            "qasymm8": ["src/cpu/kernels/elementwise_unary/generic/sve2/q8.cpp"],
+            "qasymm8_signed": ["src/cpu/kernels/elementwise_unary/generic/sve2/q8.cpp"]
           }
         }
       },
@@ -1802,9 +1806,9 @@
             "qasymm8": ["src/cpu/kernels/lut/generic/neon/u8.cpp"],
             "qasymm8_signed": ["src/cpu/kernels/lut/generic/neon/u8.cpp"]
           },
-          "sve": {
-            "qasymm8": ["src/cpu/kernels/lut/generic/sve/u8.cpp"],
-            "qasymm8_signed": ["src/cpu/kernels/lut/generic/sve/u8.cpp"]
+          "sve2": {
+            "qasymm8": ["src/cpu/kernels/lut/generic/sve2/u8.cpp"],
+            "qasymm8_signed": ["src/cpu/kernels/lut/generic/sve2/u8.cpp"]
           }
         }
       },
diff --git a/src/BUILD.bazel b/src/BUILD.bazel
index 8f35c7a..12dc1ca 100644
--- a/src/BUILD.bazel
+++ b/src/BUILD.bazel
@@ -107,7 +107,8 @@
 
 filegroup(
         name = "arm_compute_sve2_srcs",
-        srcs = ["cpu/kernels/activation/generic/sve2/qasymm8.cpp",
+        srcs = ["cpu/kernels/activation/generic/sve2/lut.cpp",
+	"cpu/kernels/activation/generic/sve2/qasymm8.cpp",
 	"cpu/kernels/activation/generic/sve2/qasymm8_signed.cpp",
 	"cpu/kernels/activation/generic/sve2/qsymm16.cpp",
 	"cpu/kernels/add/generic/sve2/qasymm8.cpp",
@@ -115,6 +116,8 @@
 	"cpu/kernels/add/generic/sve2/qsymm16.cpp",
 	"cpu/kernels/elementwise_binary/generic/sve2/qasymm8.cpp",
 	"cpu/kernels/elementwise_binary/generic/sve2/qasymm8_signed.cpp",
+	"cpu/kernels/elementwise_unary/generic/sve2/q8.cpp",
+	"cpu/kernels/lut/generic/sve2/u8.cpp",
 	"cpu/kernels/softmax/generic/sve2/impl.cpp",
 	"cpu/kernels/softmax/generic/sve2/qasymm8.cpp",
 	"cpu/kernels/softmax/generic/sve2/qasymm8_signed.cpp"]  +
@@ -323,7 +326,6 @@
 	"core/NEON/kernels/convolution/winograd/output_transforms/sme_fp32_mopa_4x4_3x3.cpp",
 	"cpu/kernels/activation/generic/sve/fp16.cpp",
 	"cpu/kernels/activation/generic/sve/fp32.cpp",
-	"cpu/kernels/activation/generic/sve/lut.cpp",
 	"cpu/kernels/add/generic/sve/fp16.cpp",
 	"cpu/kernels/add/generic/sve/fp32.cpp",
 	"cpu/kernels/add/generic/sve/impl.cpp",
@@ -336,8 +338,6 @@
 	"cpu/kernels/elementwise_unary/generic/sve/fp32.cpp",
 	"cpu/kernels/elementwise_unary/generic/sve/impl.cpp",
 	"cpu/kernels/elementwise_unary/generic/sve/integer.cpp",
-	"cpu/kernels/elementwise_unary/generic/sve/q8.cpp",
-	"cpu/kernels/lut/generic/sve/u8.cpp",
 	"cpu/kernels/scale/sve/fp16.cpp",
 	"cpu/kernels/scale/sve/fp32.cpp",
 	"cpu/kernels/scale/sve/integer.cpp",
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index cb48692..5d756da 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -302,7 +302,6 @@
 	core/NEON/kernels/convolution/winograd/output_transforms/sme_fp32_mopa_4x4_3x3.cpp
 	cpu/kernels/activation/generic/sve/fp16.cpp
 	cpu/kernels/activation/generic/sve/fp32.cpp
-	cpu/kernels/activation/generic/sve/lut.cpp
 	cpu/kernels/add/generic/sve/fp16.cpp
 	cpu/kernels/add/generic/sve/fp32.cpp
 	cpu/kernels/add/generic/sve/impl.cpp
@@ -315,8 +314,6 @@
 	cpu/kernels/elementwise_unary/generic/sve/fp32.cpp
 	cpu/kernels/elementwise_unary/generic/sve/impl.cpp
 	cpu/kernels/elementwise_unary/generic/sve/integer.cpp
-	cpu/kernels/elementwise_unary/generic/sve/q8.cpp
-	cpu/kernels/lut/generic/sve/u8.cpp
 	cpu/kernels/scale/sve/fp16.cpp
 	cpu/kernels/scale/sve/fp32.cpp
 	cpu/kernels/scale/sve/integer.cpp
@@ -332,7 +329,8 @@
 target_sources(
     arm_compute_sve2
     PRIVATE
-    cpu/kernels/activation/generic/sve2/qasymm8.cpp
+    cpu/kernels/activation/generic/sve2/lut.cpp
+	cpu/kernels/activation/generic/sve2/qasymm8.cpp
 	cpu/kernels/activation/generic/sve2/qasymm8_signed.cpp
 	cpu/kernels/activation/generic/sve2/qsymm16.cpp
 	cpu/kernels/add/generic/sve2/qasymm8.cpp
@@ -340,6 +338,8 @@
 	cpu/kernels/add/generic/sve2/qsymm16.cpp
 	cpu/kernels/elementwise_binary/generic/sve2/qasymm8.cpp
 	cpu/kernels/elementwise_binary/generic/sve2/qasymm8_signed.cpp
+	cpu/kernels/elementwise_unary/generic/sve2/q8.cpp
+	cpu/kernels/lut/generic/sve2/u8.cpp
 	cpu/kernels/softmax/generic/sve2/impl.cpp
 	cpu/kernels/softmax/generic/sve2/qasymm8.cpp
 	cpu/kernels/softmax/generic/sve2/qasymm8_signed.cpp
@@ -976,5 +976,4 @@
 	runtime/Tensor.cpp
 	runtime/TensorAllocator.cpp
 	runtime/Utils.cpp
-)
-    
\ No newline at end of file
+)
\ No newline at end of file
diff --git a/src/cpu/kernels/CpuActivationKernel.cpp b/src/cpu/kernels/CpuActivationKernel.cpp
index 04a9731..20a8489 100644
--- a/src/cpu/kernels/CpuActivationKernel.cpp
+++ b/src/cpu/kernels/CpuActivationKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2022 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -47,9 +47,9 @@
 {
 #ifdef ARM_COMPUTE_ENABLE_SVE
     {
-        "sve_q8_activation_lut",
-        [](const ActivationDataTypeISASelectorData & data) { return ActivationLayerInfo::is_lut_supported(data.f, data.dt) && data.cpumodel == CPUModel::A510 && data.isa.sve; },
-        REGISTER_QASYMM8_SVE(arm_compute::cpu::sve_q8_activation_lut)
+        "sve2_q8_activation_lut",
+        [](const ActivationDataTypeISASelectorData & data) { return ActivationLayerInfo::is_lut_supported(data.f, data.dt) && data.cpumodel == CPUModel::A510 && data.isa.sve2; },
+        REGISTER_QASYMM8_SVE2(arm_compute::cpu::sve2_q8_activation_lut)
     },
 #endif // ARM_COMPUTE_ENABLE_SVE
 #ifdef __aarch64__
diff --git a/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp b/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp
index dbb752a..04a7f15 100644
--- a/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp
+++ b/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp
@@ -167,12 +167,12 @@
     },
 #ifdef __aarch64__
     {
-        "sve_q8_elementwise_unary",
+        "sve2_q8_elementwise_unary",
         [](const DataTypeISASelectorData & data)
         {
-            return (data.dt == DataType::QASYMM8 || data.dt == DataType::QASYMM8_SIGNED) && data.isa.sve;
+            return (data.dt == DataType::QASYMM8 || data.dt == DataType::QASYMM8_SIGNED) && data.isa.sve2;
         },
-        REGISTER_QASYMM8_SVE(sve_q8_elementwise_unary),
+        REGISTER_QASYMM8_SVE2(sve2_q8_elementwise_unary),
         &q8_prepare_lut,
     },
     {
diff --git a/src/cpu/kernels/activation/generic/sve/lut.cpp b/src/cpu/kernels/activation/generic/sve2/lut.cpp
similarity index 90%
rename from src/cpu/kernels/activation/generic/sve/lut.cpp
rename to src/cpu/kernels/activation/generic/sve2/lut.cpp
index b404266..2e59757 100644
--- a/src/cpu/kernels/activation/generic/sve/lut.cpp
+++ b/src/cpu/kernels/activation/generic/sve2/lut.cpp
@@ -30,7 +30,7 @@
 namespace cpu
 {
 #ifdef __aarch64__
-void sve_q8_activation_lut(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
+void sve2_q8_activation_lut(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
 {
     ARM_COMPUTE_ERROR_ON(!ActivationLayerInfo::is_lut_supported(act_info.activation(), src->info()->data_type()));
     const auto window_end_x  = window.x().end();
@@ -42,7 +42,7 @@
     {
         const auto input_ptr  = input.ptr();
         auto       output_ptr = output.ptr();
-        lut_u8_sve(act_info.lut().data(), 1u, window_end_x, &input_ptr, &output_ptr);
+        lut_u8_sve2(act_info.lut().data(), 1u, window_end_x, &input_ptr, &output_ptr);
     },
     input, output);
 }
diff --git a/src/cpu/kernels/activation/list.h b/src/cpu/kernels/activation/list.h
index c2149b3..6550ddf 100644
--- a/src/cpu/kernels/activation/list.h
+++ b/src/cpu/kernels/activation/list.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022 Arm Limited.
+ * Copyright (c) 2020-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,7 +34,7 @@
 #ifdef __aarch64__
 DECLARE_ACTIVATION_KERNEL(neon_q8_activation_lut);
 #endif // __aarch64__
-DECLARE_ACTIVATION_KERNEL(sve_q8_activation_lut);
+DECLARE_ACTIVATION_KERNEL(sve2_q8_activation_lut);
 DECLARE_ACTIVATION_KERNEL(neon_qasymm8_activation);
 DECLARE_ACTIVATION_KERNEL(sve2_qasymm8_activation);
 DECLARE_ACTIVATION_KERNEL(neon_qasymm8_signed_activation);
diff --git a/src/cpu/kernels/elementwise_unary/generic/sve/q8.cpp b/src/cpu/kernels/elementwise_unary/generic/sve2/q8.cpp
similarity index 82%
rename from src/cpu/kernels/elementwise_unary/generic/sve/q8.cpp
rename to src/cpu/kernels/elementwise_unary/generic/sve2/q8.cpp
index b68f691..7e32f50 100644
--- a/src/cpu/kernels/elementwise_unary/generic/sve/q8.cpp
+++ b/src/cpu/kernels/elementwise_unary/generic/sve2/q8.cpp
@@ -29,23 +29,23 @@
 {
 namespace cpu
 {
-
-void sve_q8_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut)
+void sve2_q8_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut)
 {
     ARM_COMPUTE_UNUSED(op);
 
-    auto win = window;
+    auto       win          = window;
     const auto window_end_x = window.x().end();
     win.set(0, Window::Dimension(0, 1, 1));
 
     Iterator src_it(in, win);
     Iterator dst_it(out, win);
 
-    execute_window_loop(win, [&](const Coordinates &) {
+    execute_window_loop(win, [&](const Coordinates &)
+    {
         const auto src_ptr = src_it.ptr();
-        auto dst_ptr = dst_it.ptr();
+        auto       dst_ptr = dst_it.ptr();
 
-        lut_u8_sve(lut, 1, window_end_x, &src_ptr, &dst_ptr);
+        lut_u8_sve2(lut, 1, window_end_x, &src_ptr, &dst_ptr);
     },
     src_it, dst_it);
 }
diff --git a/src/cpu/kernels/elementwise_unary/list.h b/src/cpu/kernels/elementwise_unary/list.h
index 432fabf..a9701af 100644
--- a/src/cpu/kernels/elementwise_unary/list.h
+++ b/src/cpu/kernels/elementwise_unary/list.h
@@ -37,7 +37,7 @@
 DECLARE_ELEMETWISE_UNARY_KERNEL(sve_fp32_elementwise_unary);
 DECLARE_ELEMETWISE_UNARY_KERNEL(sve_fp16_elementwise_unary);
 DECLARE_ELEMETWISE_UNARY_KERNEL(sve_s32_elementwise_unary);
-DECLARE_ELEMETWISE_UNARY_KERNEL(sve_q8_elementwise_unary);
+DECLARE_ELEMETWISE_UNARY_KERNEL(sve2_q8_elementwise_unary);
 DECLARE_ELEMETWISE_UNARY_KERNEL(neon_fp32_elementwise_unary);
 DECLARE_ELEMETWISE_UNARY_KERNEL(neon_fp16_elementwise_unary);
 DECLARE_ELEMETWISE_UNARY_KERNEL(neon_s32_elementwise_unary);
diff --git a/src/cpu/kernels/lut/generic/sve/u8.cpp b/src/cpu/kernels/lut/generic/sve2/u8.cpp
similarity index 99%
rename from src/cpu/kernels/lut/generic/sve/u8.cpp
rename to src/cpu/kernels/lut/generic/sve2/u8.cpp
index 70f3a2e..b80d753 100644
--- a/src/cpu/kernels/lut/generic/sve/u8.cpp
+++ b/src/cpu/kernels/lut/generic/sve2/u8.cpp
@@ -31,8 +31,7 @@
 {
 namespace cpu
 {
-
-void lut_u8_sve(
+void lut_u8_sve2(
     const uint8_t        *table,
     size_t                num_strings,
     size_t                string_length,
diff --git a/src/cpu/kernels/lut/list.h b/src/cpu/kernels/lut/list.h
index 9749b91..7a2afc6 100644
--- a/src/cpu/kernels/lut/list.h
+++ b/src/cpu/kernels/lut/list.h
@@ -43,7 +43,7 @@
         uint8_t *const       *output)
 
 DECLARE_LUT_KERNEL(lut_u8_neon);
-DECLARE_LUT_KERNEL(lut_u8_sve);
+DECLARE_LUT_KERNEL(lut_u8_sve2);
 
 #undef DECLARE_LUT_KERNEL
 #endif // __aarch64__