blob: 4b923547c47e1aeb39c9823bc489a0f5e68d7577 [file] [log] [blame]
Gian Marco Iodice352c07d2023-05-03 12:21:38 +01001/*
2 * Copyright (c) 2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h"
25
26#include "arm_compute/core/CL/CLHelpers.h"
27#include "arm_compute/core/CL/CLKernelLibrary.h"
28#include "arm_compute/core/GPUTarget.h"
29#include "arm_compute/core/KernelDescriptors.h"
30#include "arm_compute/core/TensorInfo.h"
Gian Marco Iodice352c07d2023-05-03 12:21:38 +010031
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010032#include "src/gpu/cl/kernels/ClMatMulNativeKernel.h"
Gian Marco Iodice352c07d2023-05-03 12:21:38 +010033#include "src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.h"
34
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010035#include <utility>
36
Gian Marco Iodice352c07d2023-05-03 12:21:38 +010037namespace arm_compute
38{
39namespace cl_matmul
40{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010041ClMatMulNativeDefaultConfigValhall::ClMatMulNativeDefaultConfigValhall(GPUTarget gpu) : IClMatMulNativeKernelConfig(gpu)
Gian Marco Iodice352c07d2023-05-03 12:21:38 +010042{
43}
44
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010045MatMulKernelInfo
46ClMatMulNativeDefaultConfigValhall::configure(const ITensorInfo *lhs, const ITensorInfo *rhs, const MatMulInfo &info)
Gian Marco Iodice352c07d2023-05-03 12:21:38 +010047{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010048 using ConfigurationFunctionExecutorPtr = MatMulKernelInfo (ClMatMulNativeDefaultConfigValhall::*)(
49 unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info);
Gian Marco Iodice352c07d2023-05-03 12:21:38 +010050
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010051 ClMatMulNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G710(
52 &ClMatMulNativeDefaultConfigValhall::configure_G710_f32,
53 &ClMatMulNativeDefaultConfigValhall::configure_G710_f16,
54 &ClMatMulNativeDefaultConfigValhall::configure_G710_u8);
Gian Marco Iodice352c07d2023-05-03 12:21:38 +010055
Gunes Bayirc1204c72023-10-10 17:41:56 +010056 ClMatMulNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G715(
57 &ClMatMulNativeDefaultConfigValhall::configure_G715_f32,
58 &ClMatMulNativeDefaultConfigValhall::configure_G715_f16,
59 &ClMatMulNativeDefaultConfigValhall::configure_G715_u8);
60
Gian Marco Iodice352c07d2023-05-03 12:21:38 +010061 ConfigurationFunctionExecutorPtr func = nullptr;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010062 switch (_target)
Gian Marco Iodice352c07d2023-05-03 12:21:38 +010063 {
Gunes Bayirc1204c72023-10-10 17:41:56 +010064 case GPUTarget::G715:
65 func = configs_G715.get_function(lhs->data_type());
66 break;
Gian Marco Iodice352c07d2023-05-03 12:21:38 +010067 case GPUTarget::G710:
68 default:
69 func = configs_G710.get_function(lhs->data_type());
70 break;
71 }
72
73 const bool adj_lhs = info.adj_lhs();
74 const bool adj_rhs = info.adj_rhs();
75
76 TensorShape lhs_shape = lhs->tensor_shape();
77 TensorShape rhs_shape = rhs->tensor_shape();
78
79 const bool is_batched = lhs_shape.num_dimensions() > 2;
80
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010081 if (is_batched == true)
Gian Marco Iodice352c07d2023-05-03 12:21:38 +010082 {
83 lhs_shape.collapse_from(2);
84 }
85
86 const unsigned int m = adj_lhs ? lhs_shape.x() : lhs_shape.y();
87 const unsigned int n = adj_rhs ? rhs_shape.y() : rhs_shape.x();
88 const unsigned int k = adj_lhs ? lhs_shape.y() : lhs_shape.x();
89 const unsigned int b = lhs_shape.z();
90
91 ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for matmul native");
92 return (this->*func)(m, n, k, b, rhs->lock_paddings(), info);
93}
94
Gunes Bayirc1204c72023-10-10 17:41:56 +010095MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G715_f32(
96 unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info)
97{
98 ARM_COMPUTE_UNUSED(m, n, k, b, rhs_lock_padding);
99 return {info.adj_lhs(), info.adj_rhs(), /* m0 */ 1, /* n0 */ 4, /* k0 */ 1, /* export_to_cl_image */ false};
100}
101
102MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G715_f16(
103 unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info)
104{
105 return configure_G715_f32(m, n, k, b, rhs_lock_padding, info);
106}
107
108MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G715_u8(
109 unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info)
110{
111 ARM_COMPUTE_UNUSED(m, n, k, b, rhs_lock_padding);
Gian Marco Iodice4a9dbed2023-11-07 10:27:13 +0000112 return {info.adj_lhs(), info.adj_rhs(), /* m0 */ 4, /* n0 */ 16, /* k0 */ 4, /* export_to_cl_image */ false};
Gunes Bayirc1204c72023-10-10 17:41:56 +0100113}
114
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100115MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G710_f32(
116 unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info)
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100117{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100118 const MatMulNativeConfigsMatrix configs_mnkb_best_nt_nt = {
119 {3136, 64, 64, 36, 4, 4, 16, 1}, {4096, 48, 32, 36, 4, 4, 4, 1}, {688, 92, 68, 32, 2, 8, 4, 1},
120 {24, 464, 412, 24, 2, 8, 4, 1}, {112, 184, 144, 28, 4, 4, 16, 1}, {5776, 64, 32, 36, 2, 4, 16, 1},
121 {1568, 64, 40, 36, 2, 8, 8, 1}, {2920, 64, 64, 24, 4, 4, 16, 1}};
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100122
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100123 const MatMulNativeConfigsMatrix configs_mnkb_fallback_nt_nt = {
124 {3136, 64, 64, 36, 4, 4, 8, 0}, {4096, 48, 32, 36, 4, 4, 8, 0}, {688, 92, 68, 32, 5, 4, 4, 0},
125 {24, 464, 412, 24, 6, 2, 8, 0}, {112, 184, 144, 28, 6, 4, 4, 0}, {5776, 64, 32, 36, 5, 4, 4, 0},
126 {1568, 64, 40, 36, 4, 4, 8, 0}, {2920, 64, 64, 24, 4, 4, 8, 0}};
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100127
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100128 const MatMulNativeConfigsMatrix configs_mnkb_best_nt_t = {
129 {3136, 64, 64, 36, 4, 4, 4, 1}, {4096, 48, 32, 36, 2, 2, 16, 1}, {688, 92, 68, 32, 4, 4, 4, 1},
130 {24, 464, 412, 24, 6, 2, 8, 1}, {112, 184, 144, 28, 4, 2, 16, 1}, {5776, 64, 32, 36, 4, 4, 4, 1},
131 {1568, 64, 40, 36, 4, 4, 8, 1}, {2920, 64, 64, 24, 4, 4, 4, 1}};
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100132
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100133 const MatMulNativeConfigsMatrix configs_mnkb_fallback_nt_t = {
134 {3136, 64, 64, 36, 5, 4, 4, 0}, {4096, 48, 32, 36, 5, 4, 4, 0}, {688, 92, 68, 32, 5, 4, 4, 0},
135 {24, 464, 412, 24, 6, 2, 4, 0}, {112, 184, 144, 28, 5, 4, 4, 0}, {5776, 64, 32, 36, 5, 4, 4, 0},
136 {1568, 64, 40, 36, 5, 4, 4, 0}, {2920, 64, 64, 24, 6, 2, 4, 0}};
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100137
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100138 const MatMulNativeConfigsMatrix configs_mnkb_best_t_nt = {
139 {3136, 64, 64, 36, 4, 4, 16, 1}, {4096, 48, 32, 36, 4, 4, 4, 1}, {688, 92, 68, 32, 2, 8, 4, 1},
140 {24, 464, 412, 24, 2, 8, 4, 1}, {112, 184, 144, 28, 4, 4, 16, 1}, {5776, 64, 32, 36, 2, 8, 8, 1},
141 {1568, 64, 40, 36, 4, 4, 8, 1}, {2920, 64, 64, 24, 4, 4, 16, 1}};
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100142
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100143 const MatMulNativeConfigsMatrix configs_mnkb_fallback_t_nt = {
144 {3136, 64, 64, 36, 4, 4, 4, 0}, {4096, 48, 32, 36, 4, 4, 4, 0}, {688, 92, 68, 32, 4, 4, 4, 0},
145 {24, 464, 412, 24, 4, 4, 4, 0}, {112, 184, 144, 28, 4, 4, 4, 0}, {5776, 64, 32, 36, 4, 4, 8, 0},
146 {1568, 64, 40, 36, 4, 4, 4, 0}, {2920, 64, 64, 24, 4, 4, 4, 0}};
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100147
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100148 const MatMulNativeConfigsMatrix configs_mnkb_best_t_t = {
149 {3136, 64, 64, 36, 4, 4, 4, 1}, {4096, 48, 32, 36, 4, 4, 4, 1}, {688, 92, 68, 32, 4, 4, 4, 1},
150 {24, 464, 412, 24, 2, 2, 16, 1}, {112, 184, 144, 28, 4, 4, 4, 1}, {5776, 64, 32, 36, 4, 4, 4, 1},
151 {1568, 64, 40, 36, 4, 4, 4, 1}, {2920, 64, 64, 24, 4, 4, 4, 1}};
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100152
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100153 const MatMulNativeConfigsMatrix configs_mnkb_fallback_t_t = {
154 {3136, 64, 64, 36, 4, 4, 4, 0}, {4096, 48, 32, 36, 4, 4, 4, 0}, {688, 92, 68, 32, 4, 4, 4, 0},
155 {24, 464, 412, 24, 4, 2, 8, 0}, {112, 184, 144, 28, 4, 4, 4, 0}, {5776, 64, 32, 36, 4, 4, 4, 0},
156 {1568, 64, 40, 36, 4, 4, 4, 0}, {2920, 64, 64, 24, 4, 4, 4, 0}};
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100157
158 const bool adj_lhs = info.adj_lhs();
159 const bool adj_rhs = info.adj_rhs();
160
161 const MatMulNativeConfigsMatrix *configs_best_to_use = nullptr;
162 const MatMulNativeConfigsMatrix *configs_fallback_to_use = nullptr;
163
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100164 if ((adj_lhs == false) && (adj_rhs == false))
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100165 {
166 configs_best_to_use = &configs_mnkb_best_nt_nt;
167 configs_fallback_to_use = &configs_mnkb_fallback_nt_nt;
168 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100169 else if ((adj_lhs == false) && (adj_rhs == true))
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100170 {
171 configs_best_to_use = &configs_mnkb_best_nt_t;
172 configs_fallback_to_use = &configs_mnkb_fallback_nt_t;
173 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100174 else if ((adj_lhs == true) && (adj_rhs == false))
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100175 {
176 configs_best_to_use = &configs_mnkb_best_t_nt;
177 configs_fallback_to_use = &configs_mnkb_fallback_t_nt;
178 }
179 else
180 {
181 configs_best_to_use = &configs_mnkb_best_t_t;
182 configs_fallback_to_use = &configs_mnkb_fallback_t_t;
183 }
184
185 MatMulKernelInfo desc0 = find_info(*configs_best_to_use, adj_lhs, adj_rhs, m, n, k, b);
186 MatMulKernelInfo desc1 = find_info(*configs_fallback_to_use, adj_lhs, adj_rhs, m, n, k, b);
187
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100188 return select_info(desc0, desc1, m, n, k, b, DataType::F32, rhs_lock_padding);
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100189}
190
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100191MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G710_f16(
192 unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info)
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100193{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100194 const MatMulNativeConfigsMatrix configs_mnkb_best_nt_nt = {
195 {3136, 64, 64, 36, 4, 4, 16, 1}, {4096, 48, 32, 36, 4, 4, 8, 1}, {688, 92, 68, 32, 4, 4, 16, 1},
196 {24, 464, 412, 24, 4, 4, 4, 1}, {112, 184, 144, 28, 4, 4, 16, 1}, {5776, 64, 32, 36, 4, 4, 8, 1},
197 {1568, 64, 40, 36, 4, 4, 8, 1}, {2920, 64, 64, 24, 4, 4, 16, 1}};
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100198
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100199 const MatMulNativeConfigsMatrix configs_mnkb_fallback_nt_nt = {
200 {3136, 64, 64, 36, 6, 4, 8, 0}, {4096, 48, 32, 36, 6, 4, 8, 0}, {688, 92, 68, 32, 6, 4, 8, 0},
201 {24, 464, 412, 24, 4, 4, 8, 0}, {112, 184, 144, 28, 6, 4, 8, 0}, {5776, 64, 32, 36, 6, 4, 8, 0},
202 {1568, 64, 40, 36, 6, 4, 8, 0}, {2920, 64, 64, 24, 6, 4, 8, 0}};
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100203
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100204 const MatMulNativeConfigsMatrix configs_mnkb_best_nt_t = {
205 {3136, 64, 64, 36, 6, 4, 8, 1}, {4096, 48, 32, 36, 6, 4, 8, 1}, {688, 92, 68, 32, 4, 4, 4, 1},
206 {24, 464, 412, 24, 6, 2, 4, 1}, {112, 184, 144, 28, 4, 2, 16, 1}, {5776, 64, 32, 36, 6, 4, 8, 1},
207 {1568, 64, 40, 36, 6, 4, 8, 1}, {2920, 64, 64, 24, 6, 4, 8, 1}};
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100208
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100209 const MatMulNativeConfigsMatrix configs_mnkb_fallback_nt_t = {
210 {3136, 64, 64, 36, 6, 2, 16, 0}, {4096, 48, 32, 36, 5, 4, 8, 0}, {688, 92, 68, 32, 6, 2, 16, 0},
211 {24, 464, 412, 24, 6, 2, 16, 0}, {112, 184, 144, 28, 6, 2, 16, 0}, {5776, 64, 32, 36, 5, 4, 8, 0},
212 {1568, 64, 40, 36, 5, 4, 8, 0}, {2920, 64, 64, 24, 6, 2, 16, 0}};
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100213
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100214 const MatMulNativeConfigsMatrix configs_mnkb_best_t_nt = {
215 {3136, 64, 64, 36, 4, 4, 16, 1}, {4096, 48, 32, 36, 4, 4, 4, 1}, {688, 92, 68, 32, 4, 4, 4, 1},
216 {24, 464, 412, 24, 4, 4, 4, 1}, {112, 184, 144, 28, 4, 4, 4, 1}, {5776, 64, 32, 36, 4, 4, 4, 1},
217 {1568, 64, 40, 36, 4, 4, 4, 1}, {2920, 64, 64, 24, 4, 4, 4, 1}};
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100218
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100219 const MatMulNativeConfigsMatrix configs_mnkb_fallback_t_nt = {
220 {3136, 64, 64, 36, 4, 4, 4, 0}, {4096, 48, 32, 36, 4, 4, 4, 0}, {688, 92, 68, 32, 4, 4, 4, 0},
221 {24, 464, 412, 24, 4, 4, 4, 0}, {112, 184, 144, 28, 4, 4, 4, 0}, {5776, 64, 32, 36, 4, 4, 4, 0},
222 {1568, 64, 40, 36, 4, 4, 4, 0}, {2920, 64, 64, 24, 4, 4, 4, 0}};
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100223
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100224 const MatMulNativeConfigsMatrix configs_mnkb_best_t_t = {
225 {3136, 64, 64, 36, 4, 4, 16, 1}, {4096, 48, 32, 36, 4, 4, 8, 1}, {688, 92, 68, 32, 4, 4, 4, 1},
226 {24, 464, 412, 24, 4, 2, 8, 1}, {112, 184, 144, 28, 4, 2, 16, 1}, {5776, 64, 32, 36, 4, 4, 16, 1},
227 {1568, 64, 40, 36, 4, 4, 8, 1}, {2920, 64, 64, 24, 4, 4, 16, 1}};
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100228
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100229 const MatMulNativeConfigsMatrix configs_mnkb_fallback_t_t = {
230 {3136, 64, 64, 36, 4, 4, 8, 0}, {4096, 48, 32, 36, 4, 4, 8, 0}, {688, 92, 68, 32, 4, 4, 8, 0},
231 {24, 464, 412, 24, 4, 4, 8, 0}, {112, 184, 144, 28, 4, 4, 8, 0}, {5776, 64, 32, 36, 4, 4, 8, 0},
232 {1568, 64, 40, 36, 4, 4, 8, 0}, {2920, 64, 64, 24, 4, 4, 8, 0}};
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100233
234 const bool adj_lhs = info.adj_lhs();
235 const bool adj_rhs = info.adj_rhs();
236
237 const MatMulNativeConfigsMatrix *configs_best_to_use = nullptr;
238 const MatMulNativeConfigsMatrix *configs_fallback_to_use = nullptr;
239
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100240 if ((adj_lhs == false) && (adj_rhs == false))
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100241 {
242 configs_best_to_use = &configs_mnkb_best_nt_nt;
243 configs_fallback_to_use = &configs_mnkb_fallback_nt_nt;
244 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100245 else if ((adj_lhs == false) && (adj_rhs == true))
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100246 {
247 configs_best_to_use = &configs_mnkb_best_nt_t;
248 configs_fallback_to_use = &configs_mnkb_fallback_nt_t;
249 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100250 else if ((adj_lhs == true) && (adj_rhs == false))
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100251 {
252 configs_best_to_use = &configs_mnkb_best_t_nt;
253 configs_fallback_to_use = &configs_mnkb_fallback_t_nt;
254 }
255 else
256 {
257 configs_best_to_use = &configs_mnkb_best_t_t;
258 configs_fallback_to_use = &configs_mnkb_fallback_t_t;
259 }
260
261 MatMulKernelInfo desc0 = find_info(*configs_best_to_use, adj_lhs, adj_rhs, m, n, k, b);
262 MatMulKernelInfo desc1 = find_info(*configs_fallback_to_use, adj_lhs, adj_rhs, m, n, k, b);
263
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100264 return select_info(desc0, desc1, m, n, k, b, DataType::F16, rhs_lock_padding);
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100265}
266
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100267MatMulKernelInfo ClMatMulNativeDefaultConfigValhall::configure_G710_u8(
268 unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool rhs_lock_padding, const MatMulInfo &info)
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100269{
270 ARM_COMPUTE_UNUSED(rhs_lock_padding);
271
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100272 const MatMulNativeConfigsMatrix configs_mnkb_best_nt_nt = {
273 {3136, 64, 64, 36, 6, 4, 4, 0}, {4096, 48, 32, 36, 6, 4, 4, 0}, {688, 92, 68, 32, 2, 8, 4, 0},
274 {24, 464, 412, 24, 4, 4, 4, 0}, {112, 184, 144, 28, 6, 4, 4, 0}, {5776, 64, 32, 36, 6, 4, 4, 0},
275 {1568, 64, 40, 36, 6, 4, 4, 0}, {2920, 64, 64, 24, 5, 4, 4, 0}};
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100276
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100277 const MatMulNativeConfigsMatrix configs_mnkb_best_nt_t = {
278 {3136, 64, 64, 36, 4, 4, 16, 0}, {4096, 48, 32, 36, 4, 4, 16, 0}, {688, 92, 68, 32, 4, 4, 16, 0},
279 {24, 464, 412, 24, 6, 2, 16, 0}, {112, 184, 144, 28, 4, 4, 16, 0}, {5776, 64, 32, 36, 4, 4, 16, 0},
280 {1568, 64, 40, 36, 6, 4, 4, 0}, {2920, 64, 64, 24, 4, 4, 16, 0}};
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100281
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100282 const MatMulNativeConfigsMatrix configs_mnkb_best_t_nt = {
283 {3136, 64, 64, 36, 4, 4, 8, 0}, {4096, 48, 32, 36, 4, 4, 8, 0}, {688, 92, 68, 32, 4, 4, 4, 0},
284 {24, 464, 412, 24, 4, 4, 4, 0}, {112, 184, 144, 28, 4, 4, 8, 0}, {5776, 64, 32, 36, 4, 4, 8, 0},
285 {1568, 64, 40, 36, 4, 4, 8, 0}, {2920, 64, 64, 24, 4, 4, 8, 0}};
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100286
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100287 const MatMulNativeConfigsMatrix configs_mnkb_best_t_t = {
288 {3136, 64, 64, 36, 4, 2, 16, 0}, {4096, 48, 32, 36, 4, 4, 4, 0}, {688, 92, 68, 32, 4, 4, 8, 0},
289 {24, 464, 412, 24, 4, 2, 16, 0}, {112, 184, 144, 28, 4, 2, 16, 0}, {5776, 64, 32, 36, 4, 4, 4, 0},
290 {1568, 64, 40, 36, 4, 4, 8, 0}, {2920, 64, 64, 24, 4, 2, 16, 0}};
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100291
292 const bool adj_lhs = info.adj_lhs();
293 const bool adj_rhs = info.adj_rhs();
294
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100295 if ((adj_lhs == false) && (adj_rhs == false))
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100296 {
297 return find_info(configs_mnkb_best_nt_nt, adj_lhs, adj_rhs, m, n, k, b);
298 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100299 else if ((adj_lhs == false) && (adj_rhs == true))
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100300 {
301 return find_info(configs_mnkb_best_nt_t, adj_lhs, adj_rhs, m, n, k, b);
302 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100303 else if ((adj_lhs == true) && (adj_rhs == false))
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100304 {
305 return find_info(configs_mnkb_best_t_nt, adj_lhs, adj_rhs, m, n, k, b);
306 }
307 else
308 {
309 return find_info(configs_mnkb_best_t_t, adj_lhs, adj_rhs, m, n, k, b);
310 }
311}
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100312} // namespace cl_matmul
Gian Marco Iodice352c07d2023-05-03 12:21:38 +0100313} // namespace arm_compute