COMPMID-3652 Fix CLFullyConnectedLayer failure on S10
* Fix out-of-bound mem reads in cases where M < M0 in
CLGEMMMatrixMultiplyNativeKernel and
CLGEMMMatrixMultiplyReshapedOnlyRHSKernel, as a result of the new
boundary-aware reading logics.
* Add fixture tests (alongside the padding configuration tests) in
these 2 kernels to catch all 5 possible scenarios with block dimension
configurations, which includes this particular bug as the
"...BoundaryHandlingFullInXSinglePartialInY" test case
Change-Id: I8a10ab67594171e3ea4fb6e35c84ddc4ab964fba
Signed-off-by: SiCong Li <sicong.li@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3650
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp
index c67d360..da57aa4 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp
@@ -261,6 +261,10 @@
const unsigned int partial_store_m0 = internal_m % lhs_info.m0;
const unsigned int partial_store_n0 = gemm_info.n % rhs_info.n0;
+ // Shrink M0 to be always <= M (internal_m) to prevent out-of-bounds reads.
+ // NOTE: This might have implications on heuristics and performance
+ const unsigned int internal_m0 = std::min(internal_m, lhs_info.m0);
+
// Create build options
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input0->info()->data_type()));
@@ -277,7 +281,7 @@
build_opts.add_option("-DM=" + support::cpp11::to_string(internal_m));
build_opts.add_option("-DN=" + support::cpp11::to_string(gemm_info.n));
build_opts.add_option("-DK=" + support::cpp11::to_string(gemm_info.k));
- build_opts.add_option("-DM0=" + support::cpp11::to_string(lhs_info.m0));
+ build_opts.add_option("-DM0=" + support::cpp11::to_string(internal_m0));
build_opts.add_option("-DN0=" + support::cpp11::to_string(rhs_info.n0));
build_opts.add_option("-DK0=" + support::cpp11::to_string(rhs_info.k0));
build_opts.add_option("-DPARTIAL_STORE_M0=" + support::cpp11::to_string(partial_store_m0));
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp
index 27520c6..e65726b 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp
@@ -263,6 +263,10 @@
const unsigned int partial_store_m0 = internal_m % lhs_info.m0;
const unsigned int partial_store_n0 = gemm_info.n % rhs_info.n0;
+ // Shrink M0 to be always <= M (internal_m) to prevent out-of-bounds reads.
+ // NOTE: This might have implications on heuristics and performance
+ const unsigned int internal_m0 = std::min(internal_m, lhs_info.m0);
+
// Create build options
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input0->info()->data_type()));
@@ -282,7 +286,7 @@
build_opts.add_option("-DM=" + support::cpp11::to_string(internal_m));
build_opts.add_option("-DN=" + support::cpp11::to_string(gemm_info.n));
build_opts.add_option("-DK=" + support::cpp11::to_string(gemm_info.k));
- build_opts.add_option("-DM0=" + support::cpp11::to_string(lhs_info.m0));
+ build_opts.add_option("-DM0=" + support::cpp11::to_string(internal_m0));
build_opts.add_option("-DN0=" + support::cpp11::to_string(rhs_info.n0));
build_opts.add_option("-DK0=" + support::cpp11::to_string(rhs_info.k0));
build_opts.add_option("-DH0=" + support::cpp11::to_string(rhs_info.h0));
diff --git a/tests/validation/CL/GEMMMatrixMultiplyNative.cpp b/tests/validation/CL/GEMMMatrixMultiplyNative.cpp
index cdcc1a4..6ba5012 100644
--- a/tests/validation/CL/GEMMMatrixMultiplyNative.cpp
+++ b/tests/validation/CL/GEMMMatrixMultiplyNative.cpp
@@ -118,6 +118,28 @@
/** Broadcast bias from vector to matrix */
const auto broadcast_bias_values = framework::dataset::make("broadcast_bias", { false, true } );
+/** Boundary handling cases for testing partial/non-partial (full) block dimensions, resulting from different combinations
+ * of M, M0, N and N0 values.
+ * M0 and N0 are kept constant, while the different test cases need to vary M and N.
+ *
+ * Eg. M = 64 and N = 33 result in a block dimension that has no partial blocks (all full blocks) in Y dimension and
+ * parital blocks in X dimension.
+ */
+const auto boundary_handling_cases = combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ // Large k to force potential out-of-bound reads on input0
+ framework::dataset::make("K", 315),
+ // Batch size == 1 to force potential out-of-bound reads on input0
+ framework::dataset::make("batch_size", 1)),
+ framework::dataset::make("M0", 4)),
+ framework::dataset::make("N0", 4)),
+ framework::dataset::make("K0", 4)),
+ // Only need to test F32 as F16 shares identical boundary handling logics
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("alpha", -0.75f )),
+ framework::dataset::make("beta", -0.35f )),
+ broadcast_bias_values),
+ framework::dataset::make("Activation", ActivationLayerInfo()));
+
/** Configuration test */
void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned int k_value, unsigned int b_value, unsigned int m0_value, unsigned int n0_value, unsigned int k0_value, bool broadcast_bias, DataType data_type, const ActivationLayerInfo &act_info)
{
@@ -257,6 +279,46 @@
ARM_COMPUTE_EXPECT(status, framework::LogLevel::ERRORS);
}
+FIXTURE_DATA_TEST_CASE(RunSmallBoundaryHandlingPartialInXPartialInY, CLGEMMMatrixMultiplyNativeFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(
+ framework::dataset::make("M", 3),
+ framework::dataset::make("N", 1)),
+ boundary_handling_cases))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallBoundaryHandlingPartialInXFullInY, CLGEMMMatrixMultiplyNativeFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(
+ framework::dataset::make("M", 64),
+ framework::dataset::make("N", 51)),
+ boundary_handling_cases))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallBoundaryHandlingFullInXFullInY, CLGEMMMatrixMultiplyNativeFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(
+ framework::dataset::make("M", 64),
+ framework::dataset::make("N", 32)),
+ boundary_handling_cases))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallBoundaryHandlingFullInXPartialInY, CLGEMMMatrixMultiplyNativeFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(
+ framework::dataset::make("M", 37),
+ framework::dataset::make("N", 32)),
+ boundary_handling_cases))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyNativeFixture<float>, framework::DatasetMode::ALL,
combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
m_values,
diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
index 6a1d495..bd0cd03 100644
--- a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
+++ b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
@@ -131,6 +131,32 @@
/** Broadcast bias from vector to matrix */
const auto broadcast_bias_values = framework::dataset::make("broadcast_bias", { false, true } );
+/** Boundary handling cases for testing partial/non-partial (full) block dimensions, resulting from different combinations
+ * of M, M0, N and N0 values.
+ * M0 and N0 are kept constant, while the different test cases need to vary M and N.
+ *
+ * Eg. M = 64 and N = 33 result in a block dimension that has no partial blocks (all full blocks) in Y dimension and
+ * parital blocks in X dimension.
+ */
+const auto boundary_handling_cases = combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ // Large k to force potential out-of-bound reads on input0
+ framework::dataset::make("K", 315),
+ // Batch size == 1 to force potential out-of-bound reads on input0
+ framework::dataset::make("batch_size", 1)),
+ framework::dataset::make("M0", 4)),
+ framework::dataset::make("N0", 4)),
+ framework::dataset::make("K0", 4)),
+ framework::dataset::make("H0", 3)),
+ i_values_rhs),
+ t_values_rhs),
+ framework::dataset::make("export_to_cl_image_rhs", {true, false})),
+ // Only need to test F32 as F16 shares identical boundary handling logics
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("alpha", -0.75f )),
+ framework::dataset::make("beta", -0.35f )),
+ broadcast_bias_values),
+ framework::dataset::make("Activation", ActivationLayerInfo()));
+
/** Configuration test */
bool validate_configuration(unsigned int m_value, unsigned int n_value, unsigned int k_value, unsigned int b_value,
unsigned int m0_value, unsigned int n0_value, unsigned int k0_value, unsigned int h0_value,
@@ -330,6 +356,46 @@
TEST_SUITE(Float)
TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunPrecommitBoundaryHandlingPartialInXPartialInY, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(
+ framework::dataset::make("M", 3),
+ framework::dataset::make("N", 1)),
+ boundary_handling_cases))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunPrecommitBoundaryHandlingPartialInXFullInY, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(
+ framework::dataset::make("M", 64),
+ framework::dataset::make("N", 43)),
+ boundary_handling_cases))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunPrecommitBoundaryHandlingFullInXFullInY, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(
+ framework::dataset::make("M", 64),
+ framework::dataset::make("N", 32)),
+ boundary_handling_cases))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunPrecommitBoundaryHandlingFullInXPartialInY, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture<float>, framework::DatasetMode::PRECOMMIT,
+ combine(combine(
+ framework::dataset::make("M", 37),
+ framework::dataset::make("N", 32)),
+ boundary_handling_cases))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
FIXTURE_DATA_TEST_CASE(RunPrecommit, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture<float>, framework::DatasetMode::PRECOMMIT,
combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
m_values,