Add quantized support for unary elementwise in CPU

* Add quantized unary elementwise in CPU using LUT.
* Widen the input data range of the test suite.
  - Fix CPU exponential function overflow/underflow range.
  - Fix saturation issue of CL round operator.

Resolves: COMPMID-5763
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: I41445de2b4a33ec6b01e0ab701516c240c852d0b
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9367
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/cpu/kernels/elementwise_unary/generic/neon/fp16.cpp b/src/cpu/kernels/elementwise_unary/generic/neon/fp16.cpp
index 976d006..b2833c2 100644
--- a/src/cpu/kernels/elementwise_unary/generic/neon/fp16.cpp
+++ b/src/cpu/kernels/elementwise_unary/generic/neon/fp16.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,8 +29,9 @@
 {
 namespace cpu
 {
-void neon_fp16_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op)
+void neon_fp16_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut)
 {
+    ARM_COMPUTE_UNUSED(lut);
     return elementwise_op<__fp16>(in, out, window, op);
 }
 }
diff --git a/src/cpu/kernels/elementwise_unary/generic/neon/fp32.cpp b/src/cpu/kernels/elementwise_unary/generic/neon/fp32.cpp
index 21f4d9d..6566821 100644
--- a/src/cpu/kernels/elementwise_unary/generic/neon/fp32.cpp
+++ b/src/cpu/kernels/elementwise_unary/generic/neon/fp32.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,8 +28,9 @@
 {
 namespace cpu
 {
-void neon_fp32_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op)
+void neon_fp32_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut)
 {
+    ARM_COMPUTE_UNUSED(lut);
     return elementwise_op<float>(in, out, window, op);
 }
 }
diff --git a/src/cpu/kernels/elementwise_unary/generic/neon/integer.cpp b/src/cpu/kernels/elementwise_unary/generic/neon/integer.cpp
index ef3120e..dfe5e30 100644
--- a/src/cpu/kernels/elementwise_unary/generic/neon/integer.cpp
+++ b/src/cpu/kernels/elementwise_unary/generic/neon/integer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,8 +28,9 @@
 {
 namespace cpu
 {
-void neon_s32_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op)
+void neon_s32_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut)
 {
+    ARM_COMPUTE_UNUSED(lut);
     return elementwise_op<int32_t>(in, out, window, op);
 }
 }
diff --git a/src/cpu/kernels/elementwise_unary/generic/neon/q8.cpp b/src/cpu/kernels/elementwise_unary/generic/neon/q8.cpp
new file mode 100644
index 0000000..08bb7f2
--- /dev/null
+++ b/src/cpu/kernels/elementwise_unary/generic/neon/q8.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/Helpers.h"
+#include "src/cpu/kernels/lut/list.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+
+#ifdef __aarch64__
+
+void neon_q8_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut)
+{
+    ARM_COMPUTE_UNUSED(op);
+
+    auto win = window;
+    const auto window_end_x = window.x().end();
+    win.set(0, Window::Dimension(0, 1, 1));
+
+    Iterator src_it(in, win);
+    Iterator dst_it(out, win);
+
+    execute_window_loop(win, [&](const Coordinates &) {
+        const auto src_ptr = src_it.ptr();
+        auto dst_ptr = dst_it.ptr();
+
+        lut_u8_neon(lut, 1, window_end_x, &src_ptr, &dst_ptr);
+    },
+    src_it, dst_it);
+}
+
+#endif // __aarch64__
+
+} // namespace cpu
+} // namespace arm_compute
diff --git a/src/cpu/kernels/elementwise_unary/generic/sve/fp16.cpp b/src/cpu/kernels/elementwise_unary/generic/sve/fp16.cpp
index ba29b3d..01567a7 100644
--- a/src/cpu/kernels/elementwise_unary/generic/sve/fp16.cpp
+++ b/src/cpu/kernels/elementwise_unary/generic/sve/fp16.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,10 +29,11 @@
 {
 namespace cpu
 {
-void sve_fp16_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op)
+void sve_fp16_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut)
 {
+    ARM_COMPUTE_UNUSED(lut);
     return elementwise_sve_op<float16_t>(in, out, window, op);
 }
 }
 } // namespace arm_compute
-#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
\ No newline at end of file
+#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
diff --git a/src/cpu/kernels/elementwise_unary/generic/sve/fp32.cpp b/src/cpu/kernels/elementwise_unary/generic/sve/fp32.cpp
index c5222c5..47645ff 100644
--- a/src/cpu/kernels/elementwise_unary/generic/sve/fp32.cpp
+++ b/src/cpu/kernels/elementwise_unary/generic/sve/fp32.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,8 +29,9 @@
 {
 namespace cpu
 {
-void sve_fp32_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op)
+void sve_fp32_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut)
 {
+    ARM_COMPUTE_UNUSED(lut);
     return elementwise_sve_op<float32_t>(in, out, window, op);
 }
 }
diff --git a/src/cpu/kernels/elementwise_unary/generic/sve/integer.cpp b/src/cpu/kernels/elementwise_unary/generic/sve/integer.cpp
index 984056a..068c3f7 100644
--- a/src/cpu/kernels/elementwise_unary/generic/sve/integer.cpp
+++ b/src/cpu/kernels/elementwise_unary/generic/sve/integer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,8 +29,9 @@
 {
 namespace cpu
 {
-void sve_s32_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op)
+void sve_s32_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut)
 {
+    ARM_COMPUTE_UNUSED(lut);
     return elementwise_sve_op<int32_t>(in, out, window, op);
 }
 }
diff --git a/src/cpu/kernels/elementwise_unary/generic/sve/q8.cpp b/src/cpu/kernels/elementwise_unary/generic/sve/q8.cpp
new file mode 100644
index 0000000..b68f691
--- /dev/null
+++ b/src/cpu/kernels/elementwise_unary/generic/sve/q8.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/Helpers.h"
+#include "src/cpu/kernels/lut/list.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+
+void sve_q8_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut)
+{
+    ARM_COMPUTE_UNUSED(op);
+
+    auto win = window;
+    const auto window_end_x = window.x().end();
+    win.set(0, Window::Dimension(0, 1, 1));
+
+    Iterator src_it(in, win);
+    Iterator dst_it(out, win);
+
+    execute_window_loop(win, [&](const Coordinates &) {
+        const auto src_ptr = src_it.ptr();
+        auto dst_ptr = dst_it.ptr();
+
+        lut_u8_sve(lut, 1, window_end_x, &src_ptr, &dst_ptr);
+    },
+    src_it, dst_it);
+}
+
+} // namespace cpu
+} // namespace arm_compute
diff --git a/src/cpu/kernels/elementwise_unary/list.h b/src/cpu/kernels/elementwise_unary/list.h
index 2a41b74..04c3bb6 100644
--- a/src/cpu/kernels/elementwise_unary/list.h
+++ b/src/cpu/kernels/elementwise_unary/list.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,17 +32,19 @@
 namespace cpu
 {
 #define DECLARE_ELEMETWISE_UNARY_KERNEL(func_name) \
-    void func_name(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op)
+    void func_name(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut)
 
 DECLARE_ELEMETWISE_UNARY_KERNEL(sve_fp32_elementwise_unary);
 DECLARE_ELEMETWISE_UNARY_KERNEL(sve_fp16_elementwise_unary);
 DECLARE_ELEMETWISE_UNARY_KERNEL(sve_s32_elementwise_unary);
+DECLARE_ELEMETWISE_UNARY_KERNEL(sve_q8_elementwise_unary);
 DECLARE_ELEMETWISE_UNARY_KERNEL(neon_fp32_elementwise_unary);
 DECLARE_ELEMETWISE_UNARY_KERNEL(neon_fp16_elementwise_unary);
 DECLARE_ELEMETWISE_UNARY_KERNEL(neon_s32_elementwise_unary);
+DECLARE_ELEMETWISE_UNARY_KERNEL(neon_q8_elementwise_unary);
 
 #undef DECLARE_ELEMETWISE_UNARY_KERNEL
 
 } // namespace cpu
 } // namespace arm_compute
-#endif // SRC_CORE_KERNELS_ELEMETWISE_UNARY_LIST_H
\ No newline at end of file
+#endif // SRC_CORE_KERNELS_ELEMETWISE_UNARY_LIST_H