Add broadcast batched matmul validation cases Related to: COMPMID-5660 Signed-off-by: SiCong Li <sicong.li@arm.com> Change-Id: I2314c8b21acc638402c77080d59db2f3fed58fe2 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8911 Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-by: Mohmun02 <MohammedSuhail.Munshi@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>

commit: 1b6377b603c5833e52d704dc17db14f446ebc670 [log] [tgz]
author: SiCong Li <sicong.li@arm.com> Mon Jan 09 15:34:20 2023 +0000
committer: SiCong Li <sicong.li@arm.com> Wed Jan 18 16:44:11 2023 +0000
tree: cd4b03c6cc8aefe27b6b5f5fe87c559b26367d2d
parent: a68df8dd8befdb53ec03412cf88113c13aa47c09 [diff]
diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h
index 38a07ef..b267bf1 100644
--- a/arm_compute/runtime/CL/functions/CLGEMM.h
+++ b/arm_compute/runtime/CL/functions/CLGEMM.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -77,6 +77,9 @@
      *
      * @note Whilst the first input tensor can be a vector, the second input tensor must be at least a matrix
      *
+     * @note Batched GEMM only allows RHS tensor's rank to be <= 3
+     * @note Batched GEMM only supports broadcasting cases where RHS rank < LHS rank but not the other way around
+     *
      * @param[in]  compile_context The compile context to be used.
      * @param[in]  a               First input tensor  (Matrix or Vector A). Data types supported: F16/F32
      * @param[in]  b               Second input tensor (Matrix B). Data type supported: same as @p a.

diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index 7ce2521..db15923 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2022 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -66,6 +66,8 @@
      * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
      * @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function.
      *
+     * @note Batched GEMM only supports broadcasting cases where RHS rank < LHS rank but not the other way around
+     *
      * @param[in]  a         First input tensor  (Matrix A or Vector A). Data type supported: BFLOAT16/F16/F32
      * @param[in]  b         Second input tensor (Matrix B). Data type supported: same as @p a
      * @param[in]  c         Third input tensor  (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a

diff --git a/src/cpu/operators/CpuGemm.h b/src/cpu/operators/CpuGemm.h
index 031f02b..bc8adae 100644
--- a/src/cpu/operators/CpuGemm.h
+++ b/src/cpu/operators/CpuGemm.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022 Arm Limited.
+ * Copyright (c) 2021-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -81,6 +81,8 @@
      * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
      * @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function.
      *
+     * @note Batched GEMM only supports broadcasting cases where RHS rank < LHS rank but not the other way around
+     *
      * @param[in]  a         First input tensor info (Matrix A or Vector A). Data type supported: BFLOAT16/F16/F32
      * @param[in]  b         Second input tensor info (Matrix B). Data type supported: same as @p a
      * @param[in]  c         Third input tensor info (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a

diff --git a/src/gpu/cl/operators/ClGemm.h b/src/gpu/cl/operators/ClGemm.h
index aac463f..ea8a058 100644
--- a/src/gpu/cl/operators/ClGemm.h
+++ b/src/gpu/cl/operators/ClGemm.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2022 Arm Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -75,6 +75,9 @@
      *
      * @note Whilst the first input tensor can be a vector, the second input tensor must be at least a matrix
      *
+     * @note Batched GEMM only allows RHS tensor's rank to be <= 3
+     * @note Batched GEMM only supports broadcasting cases where RHS rank < LHS rank but not the other way around
+     *
      * @param[in]  compile_context The compile context to be used.
      * @param[in]  a               First input tensor  (Matrix or Vector A). Data types supported: F16/F32
      * @param[in]  b               Second input tensor (Matrix B). Data type supported: same as @p a.

diff --git a/tests/datasets/SmallGEMMDataset.h b/tests/datasets/SmallGEMMDataset.h
index fabddb2..c12f57b 100644
--- a/tests/datasets/SmallGEMMDataset.h
+++ b/tests/datasets/SmallGEMMDataset.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2022 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -91,6 +91,9 @@
         add_config(TensorShape(15U, 7U, 36U), TensorShape(29U, 15U, 36U), TensorShape(29U), TensorShape(29U, 7U, 36U), 1.0f, 0.0f);
         add_config(TensorShape(56U, 17U, 32U), TensorShape(5U, 56U, 32U), TensorShape(5U), TensorShape(5U, 17U, 32U), 1.0f, 0.0f);
         add_config(TensorShape(13U, 256U, 32U), TensorShape(19U, 13U, 32U), TensorShape(19U), TensorShape(19U, 256U, 32U), 1.0f, 0.0f);
+        // Broadcast in RHS's batch dimension
+        add_config(TensorShape(15U, 7U, 36U), TensorShape(29U, 15U), TensorShape(29U), TensorShape(29U, 7U, 36U), 1.0f, 0.0f);
+        add_config(TensorShape(15U, 7U, 36U, 2U), TensorShape(29U, 15U), TensorShape(29U), TensorShape(29U, 7U, 36U, 2U), 1.0f, 0.0f);
     }
 };
commit	1b6377b603c5833e52d704dc17db14f446ebc670	[log] [tgz]
author	SiCong Li <sicong.li@arm.com>	Mon Jan 09 15:34:20 2023 +0000
committer	SiCong Li <sicong.li@arm.com>	Wed Jan 18 16:44:11 2023 +0000
tree	cd4b03c6cc8aefe27b6b5f5fe87c559b26367d2d
parent	a68df8dd8befdb53ec03412cf88113c13aa47c09 [diff]