Fix nightly failures in MatMulLowpNativeKernel when using bounded activation functions - Added checks for supported activation functions in MatMulLowpKernel validate - Replaced incorrect float activation macro with quantized implementation in mat_mul_quantized Resolves: [COMPMID-6339] Signed-off-by: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com> Change-Id: I15661f14877f1d3305644e6473feb5482a67e773 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/532858 Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-by: Pablo Tello <pablo.tello@arm.com> Comments-Addressed: bsgcomp <bsgcomp@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9855 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>

commit: c9eeee5c84ad817360a1719c538c6e6c0812ec13 [log] [tgz]
author: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com> Fri Jun 30 15:43:29 2023 +0100
committer: Mohmun02 <MohammedSuhail.Munshi@arm.com> Thu Jul 06 09:49:03 2023 +0000
tree: 6c80020617e83b0889e092d685940c7937f41d2c
parent: ce3c48c7af02555f81c0f5e7ef2677916cecef34 [diff] [blame]
diff --git a/src/core/CL/cl_kernels/common/mat_mul_quantized.cl b/src/core/CL/cl_kernels/common/mat_mul_quantized.cl
index 8cf857d..7029af2 100644
--- a/src/core/CL/cl_kernels/common/mat_mul_quantized.cl
+++ b/src/core/CL/cl_kernels/common/mat_mul_quantized.cl

@@ -34,6 +34,7 @@
  * @note The block's dimensions used for the LHS and RHS matrices (M0, N0 and K0) must be passed at compile time using -DN0, -DM0 and -DK0 (e.g. -DN0=8, -DM0=4, -DK0=4).
  * @note The number of leftover outputs rows/columns must be passed using -DPARTIAL_STORE_N0 and -DPARTIAL_STORE_M0 (e.g. -DPARTIAL_STORE_N0=2, -DPARTIAL_STORE_M0=3)
  * @note The fused activation function used should be passed with -DACTIVATION_TYPE, -DA_VAL and -DB_VAL are used for min and max output with the relu and bounded relu operations.
+ * @note The value of 0 in quantized format is equivalent to the quantization offset of the output data. This should be passed with -DZERO_POINT
  * @note The dimension K must be passed at compile time using -DK (e.g. -DK=6)
  * @note The kernel name in uppercase must be passed at compile time (e.g. -DMAT_MUL_NATIVE_QUANTIZED_NT_NT)
  * @note Only the following configurations of M0, N0 and K0 are currently supported:
@@ -196,12 +197,12 @@
     const bool x_cond = PARTIAL_STORE_N0 != 0 && get_global_id(0) == 0;
     const bool y_cond = PARTIAL_STORE_M0 != 0 && get_global_id(1) == 0;
 
-    T_ACTIVATION(int, M0, N0, ACTIVATION_TYPE, A_VAL, B_VAL, acc, acc);
-
     // Quantize the tile
     TILE(DATA_TYPE, M0, N0, accq);
     T_QUANTIZE8_ASYMMETRIC(int, DATA_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, acc, accq);
 
+    T_ACTIVATION_QUANTIZED(DATA_TYPE, M0, N0, ACTIVATION_TYPE, ZERO_POINT, A_VAL, B_VAL, accq, accq);
+
     TILE(int, M0, 1, indirect_buffer);
     LOOP_UNROLLING(int, _i, 0, 1, M0,
     {
@@ -221,6 +222,7 @@
  * @note The block's dimensions used for the LHS and RHS matrices (M0, N0 and K0) must be passed at compile time using -DN0, -DM0 and -DK0 (e.g. -DN0=8, -DM0=4, -DK0=4).
  * @note The number of leftover outputs rows/columns must be passed using -DPARTIAL_STORE_N0 and -DPARTIAL_STORE_M0 (e.g. -DPARTIAL_STORE_N0=2, -DPARTIAL_STORE_M0=3)
  * @note The fused activation function used should be passed with -DACTIVATION_TYPE, -DA_VAL and -DB_VAL are used for min and max output bounded activation functions.
+ * @note The value of 0 in quantized format is equivalent to the quantization offset of the output data. This should be passed with -DZERO_POINT
  * @note The dimension K must be passed at compile time using -DK (e.g. -DK=6)
  * @note The kernel name in uppercase must be passed at compile time (e.g. -DMAT_MUL_NATIVE_QUANTIZED_NT_T)
  * @note Only the following configurations of M0, N0 and K0 are currently supported:
@@ -375,12 +377,12 @@
     const bool x_cond = PARTIAL_STORE_N0 != 0 && get_global_id(0) == 0;
     const bool y_cond = PARTIAL_STORE_M0 != 0 && get_global_id(1) == 0;
 
-    T_ACTIVATION(int, M0, N0, ACTIVATION_TYPE, A_VAL, B_VAL, acc, acc);
-
     // Quantize the tile
     TILE(DATA_TYPE, M0, N0, accq);
     T_QUANTIZE8_ASYMMETRIC(int, DATA_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, acc, accq);
 
+    T_ACTIVATION_QUANTIZED(DATA_TYPE, M0, N0, ACTIVATION_TYPE, ZERO_POINT, A_VAL, B_VAL, accq, accq);
+
     TILE(int, M0, 1, indirect_buffer);
     LOOP_UNROLLING(int, _i, 0, 1, M0,
     {
@@ -400,6 +402,7 @@
  * @note The block's dimensions used for the LHS and RHS matrices (M0, N0 and K0) must be passed at compile time using -DN0, -DM0 and -DK0 (e.g. -DN0=8, -DM0=4, -DK0=4).
  * @note The number of leftover outputs rows/columns must be passed using -DPARTIAL_STORE_N0 and -DPARTIAL_STORE_M0 (e.g. -DPARTIAL_STORE_N0=2, -DPARTIAL_STORE_M0=3)
  * @note The fused activation function used should be passed with -DACTIVATION_TYPE, -DA_VAL and -DB_VAL are used for min and max output with the relu and bounded relu operations.
+ * @note The value of 0 in quantized format is equivalent to the quantization offset of the output data. This should be passed with -DZERO_POINT
  * @note The dimension K must be passed at compile time using -DK (e.g. -DK=6)
  * @note The kernel name in uppercase must be passed at compile time (e.g. -DMAT_MUL_NATIVE_QUANTIZED_T_NT)
  * @note Only the following configurations of M0, N0 and K0 are currently supported:
@@ -556,12 +559,12 @@
     const bool x_cond = PARTIAL_STORE_N0 != 0 && get_global_id(0) == 0;
     const bool y_cond = PARTIAL_STORE_M0 != 0 && get_global_id(1) == 0;
 
-    T_ACTIVATION(int, M0, N0, ACTIVATION_TYPE, A_VAL, B_VAL, acc, acc);
-
     // Quantize the tile
     TILE(DATA_TYPE, M0, N0, accq);
     T_QUANTIZE8_ASYMMETRIC(int, DATA_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, acc, accq);
 
+    T_ACTIVATION_QUANTIZED(DATA_TYPE, M0, N0, ACTIVATION_TYPE, ZERO_POINT, A_VAL, B_VAL, accq, accq);
+
     TILE(int, M0, 1, indirect_buffer);
     LOOP_UNROLLING(int, _i, 0, 1, M0,
     {
@@ -581,6 +584,7 @@
  * @note The block's dimensions used for the LHS and RHS matrices (M0, N0 and K0) must be passed at compile time using -DN0, -DM0 and -DK0 (e.g. -DN0=8, -DM0=4, -DK0=4).
  * @note The number of leftover outputs rows/columns must be passed using -DPARTIAL_STORE_N0 and -DPARTIAL_STORE_M0 (e.g. -DPARTIAL_STORE_N0=2, -DPARTIAL_STORE_M0=3)
  * @note The fused activation function used should be passed with -DACTIVATION_TYPE, -DA_VAL and -DB_VAL are used for min and max output with the relu and bounded relu operations.
+ * @note The value of 0 in quantized format is equivalent to the quantization offset of the output data. This should be passed with -DZERO_POINT
  * @note The dimension K must be passed at compile time using -DK (e.g. -DK=6)
  * @note The kernel name in uppercase must be passed at compile time (e.g. -DMAT_MUL_NATIVE_QUANTIZED_T_T)
  * @note Only the following configurations of M0, N0 and K0 are currently supported:
@@ -742,11 +746,11 @@
     const bool y_cond = PARTIAL_STORE_M0 != 0 && get_global_id(1) == 0;
 
     // Quantize the tile
-    T_ACTIVATION(int, M0, N0, ACTIVATION_TYPE, A_VAL, B_VAL, acc, acc);
-
     TILE(DATA_TYPE, M0, N0, accq);
     T_QUANTIZE8_ASYMMETRIC(int, DATA_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, acc, accq);
 
+    T_ACTIVATION_QUANTIZED(DATA_TYPE, M0, N0, ACTIVATION_TYPE, ZERO_POINT, A_VAL, B_VAL, accq, accq);
+
     TILE(int, M0, 1, indirect_buffer);
     LOOP_UNROLLING(int, _i, 0, 1, M0,
     {
commit	c9eeee5c84ad817360a1719c538c6e6c0812ec13	[log] [tgz]
author	Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com>	Fri Jun 30 15:43:29 2023 +0100
committer	Mohmun02 <MohammedSuhail.Munshi@arm.com>	Thu Jul 06 09:49:03 2023 +0000
tree	6c80020617e83b0889e092d685940c7937f41d2c
parent	ce3c48c7af02555f81c0f5e7ef2677916cecef34 [diff] [blame]