blob: 962542400e4550bf89e0684bd40026edfb6d6932 [file] [log] [blame]
Gunes Bayir8918b232023-03-17 13:52:21 +00001/*
2 * Copyright (c) 2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "arm_compute/runtime/CL/CLTensor.h"
26#include "src/gpu/cl/kernels/ClNativeMatMulKernel.h"
27#include "tests/datasets/LargeMatMulDataset.h"
28#include "tests/datasets/SmallMatMulDataset.h"
29#include "tests/framework/Macros.h"
30#include "tests/framework/datasets/Datasets.h"
31#include "tests/validation/Validation.h"
32#include "tests/validation/fixtures/MatMulKernelFixture.h"
33#include "tests/validation/reference/Permute.h"
34
35#include <tuple>
36
37namespace arm_compute
38{
39namespace test
40{
41namespace validation
42{
43namespace
44{
45RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
46constexpr float abs_tolerance_f32(
47 0.0001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for floating point data types in case using relative tolerance fails because of small values */
48constexpr float abs_tolerance_f16(
49 0.001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for fp16 data types in case using relative tolerance fails because of small values */
50RelativeTolerance<half_float::half> tolerance_f16(half(0.01)); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
51} // namespace
52
53/** M0 values to test --precommit*/
54const auto m0_values_precommit = framework::dataset::make("M0", { 1, 3 });
55
56/** N0 values to test --precommit*/
57const auto n0_values_precommit = framework::dataset::make("N0", { 2, 4 });
58
59/** K0 values to test --precommit*/
60const auto k0_values_precommit = framework::dataset::make("K0", { 2, 3 });
61
62/** M0 values to test --nightly*/
63const auto m0_values_nightly_lhs_nt = framework::dataset::make("M0", { 1, 2, 3, 4, 5, 6, 7, 8 });
64const auto m0_values_nightly_lhs_t = framework::dataset::make("M0", { 1, 2, 3, 4, 8 });
65
66/** N0 values to test --nightly*/
67const auto n0_values_nightly_rhs_nt = framework::dataset::make("N0", { 1, 2, 3, 4, 8, 16 });
68const auto n0_values_nightly_rhs_t = framework::dataset::make("N0", { 1, 2, 3, 4, 8 });
69
70/** K0 values to test --nightly*/
71const auto k0_values_nightly_lhs_nt_rhs_nt = framework::dataset::make("K0", { 1, 2, 3, 4, 8, 16 });
72const auto k0_values_nightly_rhs_t = framework::dataset::make("K0", { 1, 2, 3, 4, 8 });
73const auto k0_values_nightly_lhs_t_rhs_nt = framework::dataset::make("K0", { 1, 2, 3, 4, 5, 6, 7, 8 });
74
75template <typename T>
76using CLMatMulKernelFixture = MatMulKernelValidationFixture<T>;
77
78TEST_SUITE(CL)
79TEST_SUITE(MatMulKernel)
80TEST_SUITE(Validate)
81
82TEST_CASE(SupportedBlockSizes, framework::DatasetMode::ALL)
83{
84 using MatMulConfigurationPair = std::pair<MatMulKernelInfo, bool>;
85
86 const std::vector<MatMulConfigurationPair> supported_block_sizes =
87 {
88 // MatMulKernelInfo(adj_lhs, adj_rhs, M0, N0, K0, export_rhs_to_cl_image = false)
89 // Lhs not-transposed, Rhs-not-transposed
90 { MatMulKernelInfo(false, false, 0, 1, 1), false }, // M0 should be > 0
91 { MatMulKernelInfo(false, false, 3, 5, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
92 { MatMulKernelInfo(false, false, 3, 6, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
93 { MatMulKernelInfo(false, false, 3, 3, 17), false }, // K0 not in {1, 2, 3, 4, 8, 16}
94 { MatMulKernelInfo(false, false, 3, 3, 7), false }, // K0 not in {1, 2, 3, 4, 8, 16}
95 { MatMulKernelInfo(false, false, 9, 1, 2), true },
96 { MatMulKernelInfo(false, false, 3, 16, 3), true },
97 { MatMulKernelInfo(false, false, 7, 3, 4), true },
Gunes Bayirbbeef722023-03-20 10:19:10 +000098 { MatMulKernelInfo(false, false, 7, 3, 4, true), false }, // N0 not in {4, 8, 16}
99 { MatMulKernelInfo(false, false, 7, 1, 4, true), false }, // N0 not in {4, 8, 16}
100 { MatMulKernelInfo(false, false, 7, 12, 4, true), false }, // N0 not in {4, 8, 16}
101 { MatMulKernelInfo(false, false, 7, 4, 4, true), true },
102 { MatMulKernelInfo(false, false, 7, 8, 4, true), true },
103 { MatMulKernelInfo(false, false, 7, 16, 4, true), true },
Gunes Bayir8918b232023-03-17 13:52:21 +0000104
105 // Lhs not-transposed, Rhs transposed
106 { MatMulKernelInfo(false, true, 0, 1, 1), false }, // M0 should be > 0
107 { MatMulKernelInfo(false, true, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
108 { MatMulKernelInfo(false, true, 3, 7, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
109 { MatMulKernelInfo(false, true, 3, 3, 12), false }, // K0 not in {1, 2, 3, 4, 8, 16}
110 { MatMulKernelInfo(false, true, 3, 3, 6), false }, // K0 not in {1, 2, 3, 4, 8, 16}
111 { MatMulKernelInfo(false, true, 5, 1, 2), true },
112 { MatMulKernelInfo(false, true, 3, 3, 3), true },
113 { MatMulKernelInfo(false, true, 2, 4, 8), true },
Ramy Elgammalb531b752023-03-20 10:19:10 +0000114 { MatMulKernelInfo(false, true, 2, 4, 5, true), false }, // K0 not in {4, 8, 16}
115 { MatMulKernelInfo(false, true, 2, 4, 9, true), false }, // K0 not in {4, 8, 16}
116 { MatMulKernelInfo(false, true, 2, 4, 3, true), false }, // K0 not in {4, 8, 16}
117 { MatMulKernelInfo(false, true, 2, 4, 4, true), true },
118 { MatMulKernelInfo(false, true, 2, 4, 8, true), true },
119 { MatMulKernelInfo(false, true, 2, 8, 16, true), true },
Gunes Bayir8918b232023-03-17 13:52:21 +0000120
Ramy Elgammalb531b752023-03-20 10:19:10 +0000121 // Lhs transposed, Rhs-not-transposed
Gunes Bayir8918b232023-03-17 13:52:21 +0000122 { MatMulKernelInfo(true, false, 1, 1, 0), false }, // K0 should be > 0
123 { MatMulKernelInfo(true, false, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
124 { MatMulKernelInfo(true, false, 3, 7, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
125 { MatMulKernelInfo(true, false, 6, 3, 12), false }, // M0 not in {1, 2, 3, 4, 8, 16}
126 { MatMulKernelInfo(true, false, 5, 3, 6), false }, // M0 not in {1, 2, 3, 4, 8, 16}
127 { MatMulKernelInfo(true, false, 4, 1, 22), true },
128 { MatMulKernelInfo(true, false, 3, 3, 3), true },
129 { MatMulKernelInfo(true, false, 2, 4, 8), true },
Gunes Bayirbbeef722023-03-20 10:19:10 +0000130 { MatMulKernelInfo(true, false, 2, 3, 8, true), false }, // N0 not in {4, 8, 16}
131 { MatMulKernelInfo(true, false, 2, 7, 8, true), false }, // N0 not in {4, 8, 16}
132 { MatMulKernelInfo(true, false, 2, 5, 8, true), false }, // N0 not in {4, 8, 16}
133 { MatMulKernelInfo(true, false, 2, 4, 8, true), true },
134 { MatMulKernelInfo(true, false, 2, 8, 8, true), true },
135 { MatMulKernelInfo(true, false, 2, 16, 8, true), true },
Gunes Bayir8918b232023-03-17 13:52:21 +0000136
Ramy Elgammalb531b752023-03-20 10:19:10 +0000137 // Lhs transposed, Rhs-transposed
Gunes Bayir8918b232023-03-17 13:52:21 +0000138 { MatMulKernelInfo(true, true, 2, 1, 5), false }, // K0 should in {1, 2, 3, 4, 8, 16}
139 { MatMulKernelInfo(true, true, 1, 8, 7), false }, // K0 should in {1, 2, 3, 4, 8, 16}
140 { MatMulKernelInfo(true, true, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
141 { MatMulKernelInfo(true, true, 3, 7, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
142 { MatMulKernelInfo(true, true, 6, 3, 12), false }, // M0 not in {1, 2, 3, 4, 8, 16}
143 { MatMulKernelInfo(true, true, 5, 3, 6), false }, // M0 not in {1, 2, 3, 4, 8, 16}
144 { MatMulKernelInfo(true, true, 4, 8, 16), true },
145 { MatMulKernelInfo(true, true, 3, 3, 4), true },
146 { MatMulKernelInfo(true, true, 16, 4, 8), true },
Ramy Elgammalb531b752023-03-20 10:19:10 +0000147 { MatMulKernelInfo(true, true, 2, 2, 1, true), false }, // K0 not in {4, 8, 16}
148 { MatMulKernelInfo(true, true, 2, 2, 5, true), false }, // K0 not in {4, 8, 16}
149 { MatMulKernelInfo(true, true, 2, 4, 7, true), false }, // K0 not in {4, 8, 16}
150 { MatMulKernelInfo(true, true, 2, 4, 4, true), true },
151 { MatMulKernelInfo(true, true, 2, 8, 8, true), true },
152 { MatMulKernelInfo(true, true, 2, 8, 16, true), true },
Gunes Bayir8918b232023-03-17 13:52:21 +0000153 };
154
155 // Set big enough shapes so that block sizes are not truncated. Also, set all dimensions equal
156 // so that it doesn't fail for different NT/T configurations. We aim to test the block sizes here,
157 // not the shapes themselves.
158 const TensorInfo lhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::F32);
159 const TensorInfo rhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::F32);
160
Gunes Bayirbbeef722023-03-20 10:19:10 +0000161 const bool export_to_cl_image_supported = image2d_from_buffer_supported(CLKernelLibrary::get().get_device());
Gunes Bayir8918b232023-03-17 13:52:21 +0000162 for(auto &pair : supported_block_sizes)
163 {
164 TensorInfo output_info;
165 Status status = ClNativeMatMulKernel::validate(&lhs_info, &rhs_info, &output_info, pair.first);
166
Gunes Bayirbbeef722023-03-20 10:19:10 +0000167 if(!pair.first.export_rhs_to_cl_image || export_to_cl_image_supported)
168 {
169 ARM_COMPUTE_EXPECT(bool(status) == pair.second, framework::LogLevel::ERRORS);
170 }
171 }
172}
173
174TEST_CASE(ExportToCLImage, framework::DatasetMode::ALL)
175{
176 // We skip this test if the hardware does not support exporting to CL Image
177 if(image2d_from_buffer_supported(CLKernelLibrary::get().get_device()))
178 {
179 constexpr size_t pixel_size = 4;
180 const size_t max_image_w = pixel_size * CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
181 const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
182
183 using ShapeConfigurationTuple = std::tuple<TensorShape, TensorShape, bool, bool, bool>;
184 const std::vector<ShapeConfigurationTuple> shape_configurations =
185 {
186 // lhs_shape, rhs_shape, adj_lhs, adj_rhs, expected
187 // Lhs t/Nt, Rhs Nt
188 // Transposition of Lhs doesn't add any value to the tests, therefore always assumed false below
189 { TensorShape(5U, 1U), TensorShape(3U, 5U), false, false, false }, // N should be multiple of 4
190 { TensorShape(5U, 1U), TensorShape(14U, 5U), false, false, false }, // N should be multiple of 4
191 { TensorShape(5U, 1U), TensorShape(12U, 5U), false, false, true },
192 { TensorShape(5U, 1U), TensorShape(8U, 5U), false, false, true },
193 { TensorShape(5U, 1U), TensorShape(4U, 5U), false, false, true },
194 { TensorShape(max_image_h + 1, 1U), TensorShape(4U, max_image_h + 1), false, false, false }, // Cannot fit into CL Image memory's height
195 { TensorShape(5U, 1U), TensorShape(max_image_w + 1, 5U), false, false, false }, // Cannot fit into CL Image memory's width
196 { TensorShape(max_image_h, 1U), TensorShape(4U, max_image_h), false, false, true }, // Barely fits into CL Image memory's height
197 { TensorShape(5U, 1U), TensorShape(max_image_w, 5U), false, false, true }, // Barely fits into CL Image memory's width
Ramy Elgammalb531b752023-03-20 10:19:10 +0000198
199 // Lhs Nt/T , Rhs T
200 { TensorShape(5U, 1U), TensorShape(5U, 3U), false, true, false }, // K should be multiple of 4
201 { TensorShape(5U, 1U), TensorShape(5U, 14U), false, true, false }, // K should be multiple of 4
202 { TensorShape(4U, 1U), TensorShape(4U, 10U), false, true, true },
203 { TensorShape(8U, 1U), TensorShape(8U, 9U), false, true, true },
204 { TensorShape(12U, 1U), TensorShape(12U, 6U), false, true, true },
Gunes Bayirbbeef722023-03-20 10:19:10 +0000205 };
206
207 for(auto &tuple : shape_configurations)
208 {
209 TensorShape lhs_shape = std::get<0>(tuple);
210 TensorShape rhs_shape = std::get<1>(tuple);
211
212 const TensorInfo lhs_info = TensorInfo(lhs_shape, 1, DataType::F32);
213 const TensorInfo rhs_info = TensorInfo(rhs_shape, 1, DataType::F32);
214
215 const bool adj_lhs = std::get<2>(tuple);
216 const bool adj_rhs = std::get<3>(tuple);
217
218 // We choose M0, N0, K0 equal to 4 so that they're always valid for CLImage in any combination
219 const MatMulKernelInfo matmul_kernel_info {adj_lhs, adj_rhs, 4, 4, 4, true /* export_rhs_to_cl_image */};
220
221 TensorInfo output_info;
222 Status status = ClNativeMatMulKernel::validate(&lhs_info, &rhs_info, &output_info, matmul_kernel_info);
223
224 const bool expected = std::get<4>(tuple);
225 ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
226 }
Gunes Bayir8918b232023-03-17 13:52:21 +0000227 }
228}
229
230TEST_CASE(ValidateInputShapes, framework::DatasetMode::ALL)
231{
232 // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations
233 using ShapeConfigurationTuple = std::tuple<TensorShape, TensorShape, bool>;
234 const std::vector<ShapeConfigurationTuple> shape_configurations =
235 {
236 { TensorShape(5U, 1U), TensorShape(3U, 5U), true },
237 { TensorShape(10U, 12U), TensorShape(3U, 10U), true },
238 { TensorShape(8U, 4U), TensorShape(2U, 8U), true },
239 { TensorShape(8U, 4U), TensorShape(2U, 5U), false }, // Mismatch in the K dimension
240 { TensorShape(5U, 0U), TensorShape(2U, 5U), false }, // Invalid dimension
241 { TensorShape(5U, 4U, 3U, 4U, 5U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), true },
242 { TensorShape(5U, 4U, 3U, 4U, 5U, 1U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), false }, // no batch broadcasting
243 { TensorShape(5U, 4U, 3U, 4U, 9U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), false }, // mismatch in batch dimension
244 };
245
246 for(auto &tuple : shape_configurations)
247 {
248 const bool expected = std::get<2>(tuple);
249
250 for(bool adj_lhs :
251 {
252 false, true
253 })
254 {
255 for(bool adj_rhs :
256 {
257 false, true
258 })
259 {
260 TensorShape lhs_shape = std::get<0>(tuple);
261 TensorShape rhs_shape = std::get<1>(tuple);
262
263 if(adj_lhs)
264 {
265 permute(lhs_shape, PermutationVector(1U, 0U));
266 }
267
268 if(adj_rhs)
269 {
270 permute(rhs_shape, PermutationVector(1U, 0U));
271 }
272
273 const TensorInfo lhs_info = TensorInfo(lhs_shape, 1, DataType::F32);
274 const TensorInfo rhs_info = TensorInfo(rhs_shape, 1, DataType::F32);
275 TensorInfo output_info;
276
277 MatMulKernelInfo matmul_kernel_info{ adj_lhs, adj_rhs, 1, 1, 1, false /* export_rhs_to_cl_image */ };
278
279 Status status = ClNativeMatMulKernel::validate(&lhs_info, &rhs_info, &output_info, matmul_kernel_info);
280 ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
281 }
282 }
283 }
284}
285
286TEST_CASE(ValidateDataTypes, framework::DatasetMode::ALL)
287{
288 // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations
289 using DataTypeConfigurationTuple = std::tuple<DataType, DataType, DataType, bool>;
290 const std::vector<DataTypeConfigurationTuple> data_type_configurations =
291 {
292 { DataType::F32, DataType::F32, DataType::F32, true },
293 { DataType::F16, DataType::F16, DataType::F16, true },
294 { DataType::F16, DataType::F32, DataType::F32, false }, // no mixed precision
295 { DataType::F64, DataType::F64, DataType::F64, false }, // no double precision
296 { DataType::QASYMM8, DataType::QASYMM8, DataType::QASYMM8, false }, // no quantized types
297 { DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, false }, // no quantized types
298 { DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, false }, // no quantized types
299 { DataType::QASYMM16, DataType::QASYMM16, DataType::QASYMM16, false }, // no quantized types
300 { DataType::QSYMM16, DataType::QSYMM16, DataType::QSYMM16, false }, // no quantized types
301 { DataType::QSYMM8, DataType::QSYMM8, DataType::QSYMM8, false }, // no quantized types
302 { DataType::S64, DataType::S64, DataType::S64, false }, // no integral types
303 { DataType::S32, DataType::S32, DataType::S32, false }, // no integral types
304 { DataType::S16, DataType::S16, DataType::S16, false }, // no integral types
305 { DataType::S8, DataType::S8, DataType::S8, false }, // no integral types
306 { DataType::U64, DataType::U64, DataType::U64, false }, // no integral types
307 { DataType::U32, DataType::U32, DataType::U32, false }, // no integral types
308 { DataType::U16, DataType::U16, DataType::U16, false }, // no integral types
309 { DataType::U8, DataType::U8, DataType::U8, false }, // no integral types
310 };
311
312 const TensorShape shape = TensorShape(10U, 10U);
313 const MatMulKernelInfo matmul_kernel_info{ false, false, 1, 1, 1, false };
314 for(auto &tuple : data_type_configurations)
315 {
316 const bool expected = std::get<3>(tuple);
317
318 const TensorInfo lhs_info(shape, 1, std::get<0>(tuple));
319 const TensorInfo rhs_info(shape, 1, std::get<1>(tuple));
320 TensorInfo output_info(shape, 1, std::get<2>(tuple));
321
322 Status status = ClNativeMatMulKernel::validate(&lhs_info, &rhs_info, &output_info, matmul_kernel_info);
323 ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
324 }
325}
326
327TEST_SUITE_END() // Validate
328
329TEST_SUITE(Float)
330TEST_SUITE(FP32)
Gunes Bayirbbeef722023-03-20 10:19:10 +0000331TEST_SUITE(Buffer)
332FIXTURE_DATA_TEST_CASE(RunTiny, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::TinyMatMulDataset(),
333 framework::dataset::make("pretransose_A", { false, true })),
334 framework::dataset::make("pretransose_B", { false, true })),
335 m0_values_precommit),
336 n0_values_precommit),
337 k0_values_precommit),
338 framework::dataset::make("export_rhs_to_cl_image", { false })),
339 framework::dataset::make("DataType", DataType::F32)))
Gunes Bayir8918b232023-03-17 13:52:21 +0000340{
341 // Validate output
342 validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
343}
Gunes Bayirbbeef722023-03-20 10:19:10 +0000344FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(),
345 framework::dataset::make("pretransose_A", { false, true })),
346 framework::dataset::make("pretransose_B", { false, true })),
347 m0_values_precommit),
348 n0_values_precommit),
349 k0_values_precommit),
350 framework::dataset::make("export_rhs_to_cl_image", { false })),
351 framework::dataset::make("DataType", DataType::F32)))
Gunes Bayir8918b232023-03-17 13:52:21 +0000352{
353 // Validate output
354 validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
355}
Gunes Bayirbbeef722023-03-20 10:19:10 +0000356FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
Gunes Bayir8918b232023-03-17 13:52:21 +0000357 framework::dataset::make("pretransose_A", { false })),
358 framework::dataset::make("pretransose_B", { false })),
359 m0_values_nightly_lhs_nt),
360 n0_values_nightly_rhs_nt),
361 k0_values_nightly_lhs_nt_rhs_nt),
Gunes Bayirbbeef722023-03-20 10:19:10 +0000362 framework::dataset::make("export_rhs_to_cl_image", { false })),
Gunes Bayir8918b232023-03-17 13:52:21 +0000363 framework::dataset::make("DataType", DataType::F32)))
364{
365 // Validate output
366 validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
367}
Gunes Bayirbbeef722023-03-20 10:19:10 +0000368FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
Gunes Bayir8918b232023-03-17 13:52:21 +0000369 framework::dataset::make("pretransose_A", { false })),
370 framework::dataset::make("pretransose_B", { true })),
371 m0_values_nightly_lhs_nt),
372 n0_values_nightly_rhs_t),
373 k0_values_nightly_rhs_t),
Gunes Bayirbbeef722023-03-20 10:19:10 +0000374 framework::dataset::make("export_rhs_to_cl_image", { false })),
Gunes Bayir8918b232023-03-17 13:52:21 +0000375 framework::dataset::make("DataType", DataType::F32)))
376{
377 // Validate output
378 validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
379}
Gunes Bayirbbeef722023-03-20 10:19:10 +0000380FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
Gunes Bayir8918b232023-03-17 13:52:21 +0000381 framework::dataset::make("pretransose_A", { true })),
382 framework::dataset::make("pretransose_B", { false })),
383 m0_values_nightly_lhs_t),
384 n0_values_nightly_rhs_nt),
385 k0_values_nightly_lhs_t_rhs_nt),
Gunes Bayirbbeef722023-03-20 10:19:10 +0000386 framework::dataset::make("export_rhs_to_cl_image", { false })),
Gunes Bayir8918b232023-03-17 13:52:21 +0000387 framework::dataset::make("DataType", DataType::F32)))
388{
389 // Validate output
390 validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
391}
392FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY,
Gunes Bayirbbeef722023-03-20 10:19:10 +0000393 combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
Gunes Bayir8918b232023-03-17 13:52:21 +0000394 framework::dataset::make("pretransose_A", { true })),
395 framework::dataset::make("pretransose_B", { true })),
396 m0_values_nightly_lhs_t),
397 n0_values_nightly_rhs_t),
398 k0_values_nightly_rhs_t),
Gunes Bayirbbeef722023-03-20 10:19:10 +0000399 framework::dataset::make("export_rhs_to_cl_image", { false })),
Gunes Bayir8918b232023-03-17 13:52:21 +0000400 framework::dataset::make("DataType", DataType::F32)))
401{
402 // Validate output
403 validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
404}
405// Running High Dimensional test is enough for FP32, because we're stressing the number of dimensions, not data type or M0/N0/K0
406// It's a good idea to test for each Lhs/Rhs T/NT combinations because they're different CL kernels
Gunes Bayirbbeef722023-03-20 10:19:10 +0000407FIXTURE_DATA_TEST_CASE(RunHighDimensional, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::HighDimensionalMatMulDataset(),
408 framework::dataset::make("pretransose_A", { false, true })),
409 framework::dataset::make("pretransose_B", { false, true })),
410 framework::dataset::make("M0", { 2 })),
411 framework::dataset::make("N0", { 2 })),
412 framework::dataset::make("K0", { 2 })),
413 framework::dataset::make("export_rhs_to_cl_image", { false })),
414 framework::dataset::make("DataType", DataType::F32)))
Gunes Bayir8918b232023-03-17 13:52:21 +0000415{
416 // Validate output
417 validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
418}
Gunes Bayirbbeef722023-03-20 10:19:10 +0000419TEST_SUITE_END() // Buffer
420
421TEST_SUITE(ExportRhsToCLImage)
422FIXTURE_DATA_TEST_CASE(RunSmallRhsNotTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDatasetRhsExportToCLImageRhsNT(),
423 framework::dataset::make("pretransose_A", { true, false })),
424 framework::dataset::make("pretransose_B", { false })),
425 framework::dataset::make("M0", { 2 })),
426 framework::dataset::make("N0", { 4, 8, 16 })),
427 framework::dataset::make("K0", { 2, 4 })),
428 framework::dataset::make("export_rhs_to_cl_image", { true })),
429 framework::dataset::make("DataType", DataType::F32)))
430{
431 // Validate output
432 if(_device_supports_export_to_cl_image)
433 {
434 validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
435 }
436}
437FIXTURE_DATA_TEST_CASE(RunLargeRhsNotTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDatasetRhsExportToCLImageRhsNT(),
438 framework::dataset::make("pretransose_A", { true, false })),
439 framework::dataset::make("pretransose_B", { false })),
440 framework::dataset::make("M0", { 2 })), // Choices of M0 does not matter much because it's related to Lhs tensor
441 framework::dataset::make("N0", { 4, 8, 16 })),
442 framework::dataset::make("K0", { 1, 2, 3, 4 })),
443 framework::dataset::make("export_rhs_to_cl_image", { true })),
444 framework::dataset::make("DataType", DataType::F32)))
445{
446 // Validate output
447 if(_device_supports_export_to_cl_image)
448 {
449 validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
450 }
451}
Ramy Elgammalb531b752023-03-20 10:19:10 +0000452FIXTURE_DATA_TEST_CASE(RunSmallRhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDatasetRhsExportToCLImageRhsT(),
453 framework::dataset::make("pretransose_A", { true, false })),
454 framework::dataset::make("pretransose_B", { true })),
455 framework::dataset::make("M0", { 2 })),
456 framework::dataset::make("N0", { 2, 4 })),
457 framework::dataset::make("K0", { 4, 8, 16 })),
458 framework::dataset::make("export_rhs_to_cl_image", { true })),
459 framework::dataset::make("DataType", DataType::F32)))
460{
461 // Validate output
462 if(_device_supports_export_to_cl_image)
463 {
464 validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
465 }
466}
467FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDatasetRhsExportToCLImageRhsT(),
468 framework::dataset::make("pretransose_A", { true, false })),
469 framework::dataset::make("pretransose_B", { true })),
470 framework::dataset::make("M0", { 2 })), // Choices of M0 does not matter much because it's related to Lhs tensor
471 framework::dataset::make("N0", { 1, 2, 3, 4 })),
472 framework::dataset::make("K0", { 4, 8, 16 })),
473 framework::dataset::make("export_rhs_to_cl_image", { true })),
474 framework::dataset::make("DataType", DataType::F32)))
475{
476 // Validate output
477 if(_device_supports_export_to_cl_image)
478 {
479 validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
480 }
481}
Gunes Bayirbbeef722023-03-20 10:19:10 +0000482TEST_SUITE_END() // ExportRhsToCLImage
Gunes Bayir8918b232023-03-17 13:52:21 +0000483TEST_SUITE_END() // FP32
484
485TEST_SUITE(FP16)
Gunes Bayirbbeef722023-03-20 10:19:10 +0000486TEST_SUITE(Buffer)
487FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulKernelFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(),
488 framework::dataset::make("pretransose_A", { false, true })),
489 framework::dataset::make("pretransose_B", { false, true })),
490 m0_values_precommit),
491 n0_values_precommit),
492 k0_values_precommit),
493 framework::dataset::make("export_rhs_to_cl_image", { false })),
494 framework::dataset::make("DataType", DataType::F16)))
Gunes Bayir8918b232023-03-17 13:52:21 +0000495{
496 // Validate output
497 validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
498}
Gunes Bayirbbeef722023-03-20 10:19:10 +0000499FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
Gunes Bayir8918b232023-03-17 13:52:21 +0000500 framework::dataset::make("pretransose_A", { false })),
501 framework::dataset::make("pretransose_B", { false })),
502 m0_values_nightly_lhs_nt),
503 n0_values_nightly_rhs_nt),
504 k0_values_nightly_lhs_nt_rhs_nt),
Gunes Bayirbbeef722023-03-20 10:19:10 +0000505 framework::dataset::make("export_rhs_to_cl_image", { false })),
Gunes Bayir8918b232023-03-17 13:52:21 +0000506 framework::dataset::make("DataType", DataType::F16)))
507{
508 // Validate output
509 validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
510}
Gunes Bayirbbeef722023-03-20 10:19:10 +0000511FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
Gunes Bayir8918b232023-03-17 13:52:21 +0000512 framework::dataset::make("pretransose_A", { false })),
513 framework::dataset::make("pretransose_B", { true })),
514 m0_values_nightly_lhs_nt),
515 n0_values_nightly_rhs_t),
516 k0_values_nightly_rhs_t),
Gunes Bayirbbeef722023-03-20 10:19:10 +0000517 framework::dataset::make("export_rhs_to_cl_image", { false })),
Gunes Bayir8918b232023-03-17 13:52:21 +0000518 framework::dataset::make("DataType", DataType::F16)))
519{
520 // Validate output
521 validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
522}
Gunes Bayirbbeef722023-03-20 10:19:10 +0000523FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
Gunes Bayir8918b232023-03-17 13:52:21 +0000524 framework::dataset::make("pretransose_A", { true })),
525 framework::dataset::make("pretransose_B", { false })),
526 m0_values_nightly_lhs_t),
527 n0_values_nightly_rhs_nt),
528 k0_values_nightly_lhs_t_rhs_nt),
Gunes Bayirbbeef722023-03-20 10:19:10 +0000529 framework::dataset::make("export_rhs_to_cl_image", { false })),
Gunes Bayir8918b232023-03-17 13:52:21 +0000530 framework::dataset::make("DataType", DataType::F16)))
531{
532 // Validate output
533 validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
534}
Gunes Bayirbbeef722023-03-20 10:19:10 +0000535FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
Gunes Bayir8918b232023-03-17 13:52:21 +0000536 framework::dataset::make("pretransose_A", { true })),
537 framework::dataset::make("pretransose_B", { true })),
538 m0_values_nightly_lhs_t),
539 n0_values_nightly_rhs_t),
540 k0_values_nightly_rhs_t),
Gunes Bayirbbeef722023-03-20 10:19:10 +0000541 framework::dataset::make("export_rhs_to_cl_image", { false })),
Gunes Bayir8918b232023-03-17 13:52:21 +0000542 framework::dataset::make("DataType", DataType::F16)))
543{
544 // Validate output
545 validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
546}
Gunes Bayirbbeef722023-03-20 10:19:10 +0000547TEST_SUITE_END() // Buffer
548
549TEST_SUITE(ExportRhsToCLImage)
Ramy Elgammalb531b752023-03-20 10:19:10 +0000550FIXTURE_DATA_TEST_CASE(RunSmallRhsNotTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDatasetRhsExportToCLImageRhsNT(),
Gunes Bayirbbeef722023-03-20 10:19:10 +0000551 framework::dataset::make("pretransose_A", { true, false })),
552 framework::dataset::make("pretransose_B", { false })),
553 framework::dataset::make("M0", { 2 })),
554 framework::dataset::make("N0", { 4, 8, 16 })),
555 framework::dataset::make("K0", { 2, 4 })),
556 framework::dataset::make("export_rhs_to_cl_image", { true })),
557 framework::dataset::make("DataType", DataType::F16)))
558{
559 // Validate output
560 if(_device_supports_export_to_cl_image)
561 {
562 validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
563 }
564}
Ramy Elgammalb531b752023-03-20 10:19:10 +0000565FIXTURE_DATA_TEST_CASE(RunLargeRhsNotTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDatasetRhsExportToCLImageRhsNT(),
Gunes Bayirbbeef722023-03-20 10:19:10 +0000566 framework::dataset::make("pretransose_A", { true, false })),
567 framework::dataset::make("pretransose_B", { false })),
568 framework::dataset::make("M0", { 2 })), // Choices of M0 does not matter much because it's related to Lhs tensor
569 framework::dataset::make("N0", { 4, 8, 16 })),
570 framework::dataset::make("K0", { 1, 2, 3, 4 })),
571 framework::dataset::make("export_rhs_to_cl_image", { true })),
572 framework::dataset::make("DataType", DataType::F16)))
573{
574 // Validate output
575 if(_device_supports_export_to_cl_image)
576 {
577 validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
578 }
579}
Ramy Elgammalb531b752023-03-20 10:19:10 +0000580FIXTURE_DATA_TEST_CASE(RunSmallRhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDatasetRhsExportToCLImageRhsT(),
581 framework::dataset::make("pretransose_A", { true, false })),
582 framework::dataset::make("pretransose_B", { true })),
583 framework::dataset::make("M0", { 2 })),
584 framework::dataset::make("N0", { 2, 4 })),
585 framework::dataset::make("K0", { 4, 8, 16 })),
586 framework::dataset::make("export_rhs_to_cl_image", { true })),
587 framework::dataset::make("DataType", DataType::F16)))
588{
589 // Validate output
590 if(_device_supports_export_to_cl_image)
591 {
592 validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
593 }
594}
595FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDatasetRhsExportToCLImageRhsT(),
596 framework::dataset::make("pretransose_A", { true, false })),
597 framework::dataset::make("pretransose_B", { true })),
598 framework::dataset::make("M0", { 2 })), // Choices of M0 does not matter much because it's related to Lhs tensor
599 framework::dataset::make("N0", { 1, 2, 3, 4 })),
600 framework::dataset::make("K0", { 4, 8, 16 })),
601 framework::dataset::make("export_rhs_to_cl_image", { true })),
602 framework::dataset::make("DataType", DataType::F16)))
603{
604 // Validate output
605 if(_device_supports_export_to_cl_image)
606 {
607 validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
608 }
609}
Gunes Bayirbbeef722023-03-20 10:19:10 +0000610TEST_SUITE_END() // ExportRhsToCLImage
Gunes Bayir8918b232023-03-17 13:52:21 +0000611TEST_SUITE_END() // FP16
612TEST_SUITE_END() // Float
613TEST_SUITE_END() // MatMulKernel
614TEST_SUITE_END() // CL
615} // namespace validation
616} // namespace test
617} // namespace arm_compute