Add Fused Activation to OpenCL MatMul

- Added fused activation to MatMul function interface
- Added fused activation to CL backend
- Includes tests for supported Activation Functions in MatMul

Resolves: [COMPMID-6192]
Signed-off-by: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com>
Change-Id: Ie103212b600b60699eaf6a6394d609e6e1f5aba6
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/522465
Comments-Addressed: bsgcomp <bsgcomp@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Tested-by: bsgcomp <bsgcomp@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9714
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/tests/validation/CL/MatMul.cpp b/tests/validation/CL/MatMul.cpp
index 6364b16..5a262a8 100644
--- a/tests/validation/CL/MatMul.cpp
+++ b/tests/validation/CL/MatMul.cpp
@@ -26,6 +26,7 @@
 #include "arm_compute/runtime/CL/functions/CLMatMul.h"
 
 #include "tests/CL/CLAccessor.h"
+#include "tests/datasets/ActivationFunctionsDataset.h"
 #include "tests/framework/DatasetModes.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/TestCase.h"
@@ -44,11 +45,13 @@
 {
 namespace
 {
-RelativeTolerance<float>             tolerance_f32(0.001f);      /**< Tolerance value for comparing reference's output against implementation's output for fp32 data type */
-constexpr float                      abs_tolerance_f32(0.0001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for fp32 data type in case using relative tolerance fails because of small values */
-constexpr float                      abs_tolerance_f16(0.001f);  /**< Absolute tolerance value for comparing reference's output against implementation's output for fp16  data type in case using relative tolerance fails because of small values */
-RelativeTolerance<half_float::half>  tolerance_f16(half(0.01));  /**< Tolerance value for comparing reference's output against implementation's output for fp16 data type */
-constexpr AbsoluteTolerance<uint8_t> tolerance_quant(1);         /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for fp32 data type */
+constexpr float          abs_tolerance_f32(
+    0.0001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for fp32 data type in case using relative tolerance fails because of small values */
+constexpr float abs_tolerance_f16(
+    0.001f);                                                    /**< Absolute tolerance value for comparing reference's output against implementation's output for fp16  data type in case using relative tolerance fails because of small values */
+RelativeTolerance<half_float::half>  tolerance_f16(half(0.01)); /**< Tolerance value for comparing reference's output against implementation's output for fp16 data type */
+constexpr AbsoluteTolerance<uint8_t> tolerance_quant(1);        /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
 } // namespace
 
 template <typename T>
@@ -57,25 +60,71 @@
 template <typename T>
 using CLQuantizedMatMulFixture = QuantizedMatMulValidationFixture<CLTensor, CLAccessor, CLMatMul, GpuMatMulSettings, T>;
 
+template <typename T>
+using CLMatMulActivationFixture = MatMulValidationWithActivationFixture<CLTensor, CLAccessor, CLMatMul, GpuMatMulSettings, T>;
+
+template <typename T>
+using CLMatMulActivationAlphaBetaFixture = MatMulValidationWithActivationAlphaBetaFixture<CLTensor, CLAccessor, CLMatMul, GpuMatMulSettings, T>;
+
+template <typename T>
+using CLQuantizedMatMulActivationFixture = QuantizedMatMulValidationWithActivationFixture<CLTensor, CLAccessor, CLMatMul, GpuMatMulSettings, T>;
+
+/* The main act functions matmul is expected to support */
+const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+{
+    ActivationLayerInfo(),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.75f, 0.25f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH)
+});
+
+const auto ActivationFunctionsQuantizedDataset = concat(concat(concat(
+                                                                   framework::dataset::make("ActivationInfo", ActivationLayerInfo()),
+                                                                   framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))),
+                                                               framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f))),
+                                                        framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.75f, 0.25f)));
+
+/* Larger activation functions dataset, used during some nightly tests. */
+const auto AllActivationsDataset = combine(datasets::ActivationFunctions(), framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
+
+const auto AllQuantizedActivationsDataset = combine(concat(datasets::ActivationFunctionsQuantized(),
+                                                           framework::dataset::make("ActivationFunction", { ActivationLayerInfo::ActivationFunction::HARD_SWISH,
+                                                                                                            ActivationLayerInfo::ActivationFunction::LEAKY_RELU
+                                                                                                          })),
+                                                    framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
+
 TEST_SUITE(CL)
 TEST_SUITE(MatMul)
 
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallMatMulDataset(),
-                                                                                                                    framework::dataset::make("TransposeA", { false, true })),
-                                                                                                            framework::dataset::make("TransposeB", { false, true })),
-                                                                                                    framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulActivationFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallMatMulDataset(),
+                                                                                                                        framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                        framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                framework::dataset::make("DataType", DataType::F32)),
+                                                                                                        ActivationFunctionsDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLMatMulFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeMatMulDataset(),
-                                                                                                                  framework::dataset::make("TransposeA", { false, true })),
-                                                                                                          framework::dataset::make("TransposeB", { false, true })),
-                                                                                                  framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLMatMulActivationFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+                                                                                                                    framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                    framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                    framework::dataset::make("DataType", DataType::F32)),
+                                                                                                            ActivationFunctionsDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunAllActivations, CLMatMulActivationAlphaBetaFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::SmallerMatMulDataset(),
+                       framework::dataset::make("TransposeA", { false })),
+                       framework::dataset::make("TransposeB", { true })),
+                       framework::dataset::make("DataType", DataType::F32)),
+                       AllActivationsDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
@@ -85,19 +134,21 @@
 
 TEST_SUITE(FP16)
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallMatMulDataset(),
-                                                                                                                   framework::dataset::make("TransposeA", { false, true })),
-                                                                                                           framework::dataset::make("TransposeB", { false, true })),
-                                                                                                   framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulActivationFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallMatMulDataset(),
+                                                                                                                       framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                       framework::dataset::make("TransposeB", { false, true })),
+                                                                                                               framework::dataset::make("DataType", DataType::F16)),
+                                                                                                       ActivationFunctionsDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLMatMulFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeMatMulDataset(),
-                                                                                                                 framework::dataset::make("TransposeA", { false, true })),
-                                                                                                         framework::dataset::make("TransposeB", { false, true })),
-                                                                                                 framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLMatMulActivationFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+                                                                                                                   framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                   framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                   framework::dataset::make("DataType", DataType::F16)),
+                                                                                                           ActivationFunctionsDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
@@ -110,32 +161,30 @@
 TEST_SUITE(QASYMM8)
 
 FIXTURE_DATA_TEST_CASE(RunSmall, CLQuantizedMatMulFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(combine(
-    datasets::SmallMatMulDataset(),
-    framework::dataset::make("TransposeA", { false, true })),
-    framework::dataset::make("TransposeB", { false, true })),
-    framework::dataset::make("DataType", DataType::QASYMM8)),
-    framework::dataset::make("ActivationInfo", { ActivationLayerInfo() })),
-    framework::dataset::make("NumberOfExtraRuns", { 0, 1 })),
-    framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 50, 1) })),
-    framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 30, -1) })),
-    framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 2) }))
-)
+                                                                                                                     datasets::SmallMatMulDataset(),
+                                                                                                                     framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                 framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                 framework::dataset::make("DataType", DataType::QASYMM8)),
+                                                                                                                 ActivationFunctionsQuantizedDataset),
+                                                                                                                 framework::dataset::make("NumberOfExtraRuns", { 0, 1 })),
+                                                                                                                 framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 50, 1) })),
+                                                                                                                 framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 30, -1) })),
+                                                                                                         framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 2) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_quant);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLQuantizedMatMulFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine(
-    datasets::LargeMatMulDataset(),
-    framework::dataset::make("TransposeA", { false, true })),
-    framework::dataset::make("TransposeB", { false, true })),
-    framework::dataset::make("DataType", DataType::QASYMM8)),
-    framework::dataset::make("ActivationInfo", { ActivationLayerInfo() })),
-    framework::dataset::make("NumberOfExtraRuns", { 0, 1 })),
-    framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 100, 1) })),
-    framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 200, -1) })),
-    framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 2) }))
-)
+        datasets::LargeMatMulDataset(),
+        framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                     framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                     framework::dataset::make("DataType", DataType::QASYMM8)),
+                                                                                                                     ActivationFunctionsQuantizedDataset),
+                                                                                                                     framework::dataset::make("NumberOfExtraRuns", { 0, 1 })),
+                                                                                                                     framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 100, 1) })),
+                                                                                                                     framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 200, -1) })),
+                                                                                                             framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 2) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_quant);
@@ -146,32 +195,45 @@
 TEST_SUITE(QASYMM8_SIGNED)
 
 FIXTURE_DATA_TEST_CASE(RunSmall, CLQuantizedMatMulFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(combine(
-    datasets::SmallMatMulDataset(),
-    framework::dataset::make("TransposeA", { false, true })),
-    framework::dataset::make("TransposeB", { false, true })),
-    framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
-    framework::dataset::make("ActivationInfo", { ActivationLayerInfo() })),
-    framework::dataset::make("NumberOfExtraRuns", { 0, 1 })),
-    framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 50, 1) })),
-    framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 30, -1) })),
-    framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 2) }))
-)
+        datasets::SmallMatMulDataset(),
+        framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                        framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                        framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                                                                                                                        ActivationFunctionsQuantizedDataset),
+                                                                                                                        framework::dataset::make("NumberOfExtraRuns", { 0, 1 })),
+                                                                                                                        framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 50, 1) })),
+                                                                                                                framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 30, -1) })),
+                                                                                                        framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 2) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_quant);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLQuantizedMatMulFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine(
-    datasets::LargeMatMulDataset(),
-    framework::dataset::make("TransposeA", { false, true })),
-    framework::dataset::make("TransposeB", { false, true })),
-    framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
-    framework::dataset::make("ActivationInfo", { ActivationLayerInfo() })),
-    framework::dataset::make("NumberOfExtraRuns", { 0, 1 })),
-    framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 100, 1) })),
-    framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 200, -1) })),
-    framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 2) }))
-)
+                                                                                                                        datasets::LargeMatMulDataset(),
+                                                                                                                        framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                    framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                    framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                                                                                                                    ActivationFunctionsQuantizedDataset),
+                                                                                                                    framework::dataset::make("NumberOfExtraRuns", { 0, 1 })),
+                                                                                                                    framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 100, 1) })),
+                                                                                                                    framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 200, -1) })),
+                                                                                                            framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 2) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+
+FIXTURE_DATA_TEST_CASE(RunAllActivations, CLQuantizedMatMulActivationFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine(
+                           datasets::LargeMatMulDataset(),
+                           framework::dataset::make("TransposeA", { false })),
+                       framework::dataset::make("TransposeB", { true })),
+                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                       AllQuantizedActivationsDataset),
+                       framework::dataset::make("NumberOfExtraRuns", { 0, 1 })),
+                       framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 100, 1) })),
+                       framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 200, -1) })),
+                       framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 2) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_quant);