Prefer indirect Gemm vs. Direct convolution if supported Indirect GEMM uses optimized assembly path while Direct Conv uses the fallback Acl kernel for convolution. In certain cases, where the input tensor is large and filter size is greater than 7 (e.g. 9x9 filters), heuristics fall back to Direct Conv algorithm where it could still prefer the assembly path if the data layout is NHWC. This is more important when SME2 kernels are present. Resolves: COMPMID-6900 Change-Id: Ia611c975eee0423615113fcaeaa8f9eef0421456 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11254 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Anitha Raj <Anitha.Raj@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>

commit: 9167c9cd1c684218f76a3c0ec97574dd6f381b98 [log] [tgz]
author: Gunes Bayir <gunes.bayir@arm.com> Wed Mar 06 09:58:40 2024 +0000
committer: Gunes Bayir <gunes.bayir@arm.com> Mon Mar 11 10:02:41 2024 +0000
tree: 7a9608f1f6861ad164697a0bbdc784be92a8d3e5
parent: e77736fe4150648d2fd0649cf61c1bade928d69d [diff] [blame]
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp
index 62690c05..7a9230d 100644
--- a/tests/validation/NEON/ConvolutionLayer.cpp
+++ b/tests/validation/NEON/ConvolutionLayer.cpp

@@ -109,6 +109,11 @@
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f)
 });
 
+const auto NoActivation = make("ActivationInfo",
+{
+    ActivationLayerInfo(),
+});
+
 const auto ActivationFunctionsDatasetNightly = make("ActivationInfo",
 {
     ActivationLayerInfo(),
@@ -1201,6 +1206,20 @@
     // Validate output
     validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
 }
+
+// This very large shape test is required to test heuristic paths where the tensor size is > 1e7 bytes
+// and weight dimensions larger than 7
+FIXTURE_DATA_TEST_CASE(RunVeryLarge, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
+    combine(datasets::VeryLargeConvolutionLayerDataset(),
+        framework::dataset::make("ReshapeWeights", { true }),
+        framework::dataset::make("DataType", DataType::F32),
+        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+        NoActivation))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
+}
+
 TEST_SUITE_END() // FP32
 TEST_SUITE_END() // Float
commit	9167c9cd1c684218f76a3c0ec97574dd6f381b98	[log] [tgz]
author	Gunes Bayir <gunes.bayir@arm.com>	Wed Mar 06 09:58:40 2024 +0000
committer	Gunes Bayir <gunes.bayir@arm.com>	Mon Mar 11 10:02:41 2024 +0000
tree	7a9608f1f6861ad164697a0bbdc784be92a8d3e5
parent	e77736fe4150648d2fd0649cf61c1bade928d69d [diff] [blame]