blob: e4e35cb8cef28053e121e1ce0181de6b01750894 [file] [log] [blame]
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +00001/*
Gunes Bayir4bfc70e2021-12-10 16:17:56 +00002 * Copyright (c) 2020-2022 Arm Limited.
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Georgios Pinitas7891a732021-08-20 21:39:25 +010024#include "src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.h"
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000025
26#include "arm_compute/core/CL/CLHelpers.h"
27#include "arm_compute/core/CL/CLKernelLibrary.h"
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000028#include "arm_compute/core/GPUTarget.h"
Gian Marco Iodicec6eaec32020-07-20 13:31:05 +010029#include "arm_compute/core/TensorInfo.h"
30#include "arm_compute/core/TensorShape.h"
31#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Gunes Bayir4bfc70e2021-12-10 16:17:56 +000032
Georgios Pinitas7891a732021-08-20 21:39:25 +010033#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
Gunes Bayir4bfc70e2021-12-10 16:17:56 +000034#include "src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.h"
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000035
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000036#include <utility>
37
38namespace arm_compute
39{
Georgios Pinitas856f66e2021-04-22 21:13:21 +010040namespace opencl
41{
42namespace kernels
43{
44namespace gemm
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000045{
Gian Marco Iodicec6eaec32020-07-20 13:31:05 +010046using namespace arm_compute::misc::shape_calculator;
47
Georgios Pinitas856f66e2021-04-22 21:13:21 +010048ClGemmDefaultConfigReshapedRhsOnlyValhall::ClGemmDefaultConfigReshapedRhsOnlyValhall(GPUTarget gpu)
49 : IClGemmKernelConfig(gpu)
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000050{
51}
52
Georgios Pinitas856f66e2021-04-22 21:13:21 +010053std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000054{
Georgios Pinitas856f66e2021-04-22 21:13:21 +010055 using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (ClGemmDefaultConfigReshapedRhsOnlyValhall::*)(unsigned int m, unsigned int n, unsigned int k,
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000056 unsigned int b);
57
Georgios Pinitas856f66e2021-04-22 21:13:21 +010058 CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G77(&ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f32,
59 &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f16,
60 &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
Gian Marco Iodice37954912021-04-12 17:34:33 +010061
Georgios Pinitas856f66e2021-04-22 21:13:21 +010062 CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(&ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f32,
63 &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f16,
64 &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
Gian Marco Iodice37954912021-04-12 17:34:33 +010065
Gunes Bayir4bfc70e2021-12-10 16:17:56 +000066 CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G715(&ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f32,
67 &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f16,
68 &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
69
Gian Marco Iodice37954912021-04-12 17:34:33 +010070 ConfigurationFunctionExecutorPtr func = nullptr;
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000071
72 switch(_target)
73 {
Gian Marco Iodice37954912021-04-12 17:34:33 +010074 case GPUTarget::G78:
75 func = configs_G78.get_function(data_type);
76 break;
Gunes Bayir4bfc70e2021-12-10 16:17:56 +000077 case GPUTarget::G715:
78 case GPUTarget::G615:
79 func = configs_G715.get_function(data_type);
80 break;
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000081 case GPUTarget::G77:
82 default:
Gian Marco Iodice37954912021-04-12 17:34:33 +010083 func = configs_G77.get_function(data_type);
84 break;
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000085 }
Gian Marco Iodice37954912021-04-12 17:34:33 +010086
87 ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not support for GEMM");
88 return (this->*func)(m, n, k, b);
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000089}
90
Georgios Pinitas856f66e2021-04-22 21:13:21 +010091std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000092{
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000093 if(m == 1)
94 {
Gian Marco Iodice491f30c2020-11-02 15:43:57 +000095 const float r_mn = static_cast<float>(m) / static_cast<float>(n);
96 const float r_mk = static_cast<float>(m) / static_cast<float>(k);
97
98 if(r_mk <= 0.0064484127797186375)
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +000099 {
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000100 if(r_mn <= 0.0028273810748942196)
101 {
102 GEMMLHSMatrixInfo lhs_info_buf;
103 GEMMRHSMatrixInfo rhs_info_buf;
104 GEMMLHSMatrixInfo lhs_info_img;
105 GEMMRHSMatrixInfo rhs_info_img;
106
107 const unsigned int h0 = std::max(n / 4, 1U);
Gian Marco Iodice37954912021-04-12 17:34:33 +0100108 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, 0, 1, 0, 0, 1);
109 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 1, 4, 4, 1, h0, 0, 1, 0, 1, 0);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000110
111 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
112 std::make_pair(lhs_info_buf, rhs_info_buf),
113 n, k, b, DataType::F32);
114 }
115 else
116 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100117 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 8, 0, 1, 0, 0, 0);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000118 }
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000119 }
120 else
121 {
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000122 if(r_mk <= 0.020312500186264515)
123 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100124 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 4, 0, 1, 0, 0, 0);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000125 }
126 else
127 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100128 return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, 0, 1, 0, 1, 0);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000129 }
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000130 }
131 }
Gian Marco Iodicec6eaec32020-07-20 13:31:05 +0100132 else
133 {
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000134 const float r_mn = static_cast<float>(m) / static_cast<float>(n);
135 const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
SiCong Lia085a0c2020-12-02 14:54:34 +0000136 const float r_mk = static_cast<float>(m) / static_cast<float>(k);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000137
138 if(workload <= 1999.2000122070312)
Gian Marco Iodicec6eaec32020-07-20 13:31:05 +0100139 {
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000140 if(workload <= 747.1999816894531)
141 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100142 return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 8, 0, 1, 0, 1, 0);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000143 }
144 else
145 {
146 GEMMLHSMatrixInfo lhs_info_buf;
147 GEMMRHSMatrixInfo rhs_info_buf;
148 GEMMLHSMatrixInfo lhs_info_img;
149 GEMMRHSMatrixInfo rhs_info_img;
Gian Marco Iodice37954912021-04-12 17:34:33 +0100150 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 2, 4, 8, 1, 2, 0, 0, 0, 1, 1);
151 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 8, 0, 1, 0, 1, 0);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000152
153 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
154 std::make_pair(lhs_info_buf, rhs_info_buf),
155 n, k, b, DataType::F32);
156 }
Gian Marco Iodicec6eaec32020-07-20 13:31:05 +0100157 }
158 else
159 {
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000160 if(r_mn <= 0.03348214365541935)
161 {
162 if(r_mk <= 0.028125000186264515)
163 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100164 return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 8, 0, 1, 0, 1, 0);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000165 }
166 else
167 {
168 GEMMLHSMatrixInfo lhs_info_buf;
169 GEMMRHSMatrixInfo rhs_info_buf;
170 GEMMLHSMatrixInfo lhs_info_img;
171 GEMMRHSMatrixInfo rhs_info_img;
Gian Marco Iodice37954912021-04-12 17:34:33 +0100172 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 2, 4, 8, 1, 2, 0, 0, 0, 1, 1);
173 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 8, 0, 1, 0, 1, 0);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000174
175 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
176 std::make_pair(lhs_info_buf, rhs_info_buf),
177 n, k, b, DataType::F32);
178 }
179 }
180 else
181 {
182 GEMMLHSMatrixInfo lhs_info_buf;
183 GEMMRHSMatrixInfo rhs_info_buf;
184 GEMMLHSMatrixInfo lhs_info_img;
185 GEMMRHSMatrixInfo rhs_info_img;
Gian Marco Iodice37954912021-04-12 17:34:33 +0100186 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, 0, 1, 0, 0, 1);
187 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 16, 0, 1, 0, 1, 0);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000188
189 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
SiCong Lia085a0c2020-12-02 14:54:34 +0000190 std::make_pair(lhs_info_buf, rhs_info_buf),
191 n, k, b, DataType::F32);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000192 }
Gian Marco Iodicec6eaec32020-07-20 13:31:05 +0100193 }
194 }
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000195}
196
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100197std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000198{
199 ARM_COMPUTE_UNUSED(k);
200 ARM_COMPUTE_UNUSED(b);
201
202 if(m == 1)
203 {
Gian Marco Iodice1e75ada2020-09-30 17:35:05 +0100204 const unsigned int h0 = std::max(n / 2, 1U);
205 if(n <= 836.0)
Gian Marco Iodice2cfd3f72020-05-06 11:27:08 +0100206 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100207 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, 0, 1, 0, 1, 0);
Gian Marco Iodice2cfd3f72020-05-06 11:27:08 +0100208 }
209 else
210 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100211 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, 0, 1, 0, 1, 0);
Gian Marco Iodice2cfd3f72020-05-06 11:27:08 +0100212 }
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000213 }
Gian Marco Iodicec6eaec32020-07-20 13:31:05 +0100214 else if(m < 128)
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000215 {
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000216 const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(256)), static_cast<int>(1));
Gian Marco Iodice2cfd3f72020-05-06 11:27:08 +0100217 if(k >= 512)
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000218 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100219 return configure_lhs_rhs_info(m, n, 2, 4, 16, 1, h0, 0, 1, 0, 0);
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000220 }
221 else
222 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100223 return configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, 0, 1, 0, 0);
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000224 }
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000225 }
Gian Marco Iodice939586e2020-05-05 15:10:21 +0100226 else
227 {
228 const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(256)), static_cast<int>(1));
Gian Marco Iodice2886c752020-05-07 10:26:15 +0100229 if(n >= 64)
230 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100231 return configure_lhs_rhs_info(m, n, 4, 8, 4, 1, h0, 0, 1, 0, 0);
Gian Marco Iodice2886c752020-05-07 10:26:15 +0100232 }
233 else
234 {
235 if(k >= 512)
236 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100237 return configure_lhs_rhs_info(m, n, 2, 4, 16, 1, h0, 0, 1, 0, 0);
Gian Marco Iodice2886c752020-05-07 10:26:15 +0100238 }
239 else
240 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100241 return configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, 0, 1, 0, 0);
Gian Marco Iodice2886c752020-05-07 10:26:15 +0100242 }
243 }
Gian Marco Iodice939586e2020-05-05 15:10:21 +0100244 }
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000245}
246
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100247std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000248{
249 ARM_COMPUTE_UNUSED(k);
250 ARM_COMPUTE_UNUSED(b);
251
252 if(m == 1)
253 {
254 const unsigned int h0 = std::max(n / 2, 1U);
Gian Marco Iodice37954912021-04-12 17:34:33 +0100255 return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, h0, 0, 1, 0, 1);
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000256 }
257 else
258 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100259 const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(256)), static_cast<int>(1));
260 if(m >= 28)
261 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100262 return configure_lhs_rhs_info(m, n, 4, 4, 16, 1, h0, 0, 1, 0, 1);
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100263 }
264 else
265 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100266 return configure_lhs_rhs_info(m, n, 2, 4, 16, 1, h0, 0, 1, 0, 1);
267 }
268 }
269}
270
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100271std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100272{
273 const float r_mn = static_cast<float>(m) / static_cast<float>(n);
274 const float r_mk = static_cast<float>(m) / static_cast<float>(k);
275 const float r_nk = static_cast<float>(n) / static_cast<float>(k);
276 const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
277
278 if(m == 1)
279 {
280 if(workload <= 278.7000f)
281 {
282 if(workload <= 7.5000f)
283 {
284 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0);
285 }
286 else
287 {
288 if(r_mn <= 0.0031f)
289 {
290 if(workload <= 256.6000f)
291 {
292 if(workload <= 16.7500f)
293 {
294 if(r_nk <= 1.6671f)
295 {
296 return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0);
297 }
298 else
299 {
300 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0);
301 }
302 }
303 else
304 {
305 return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0);
306 }
307 }
308 else
309 {
310 return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0);
311 }
312 }
313 else
314 {
315 if(r_mk <= 0.0027f)
316 {
317 if(r_mk <= 0.0014f)
318 {
319 return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0);
320 }
321 else
322 {
323 if(workload <= 8.9500f)
324 {
325 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0);
326 }
327 else
328 {
329 return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0);
330 }
331 }
332 }
333 else
334 {
335 if(workload <= 14.1500f)
336 {
337 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0);
338 }
339 else
340 {
341 if(r_mk <= 0.0041f)
342 {
343 return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0);
344 }
345 else
346 {
347 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0);
348 }
349 }
350 }
351 }
352 }
353 }
354 else
355 {
356 if(workload <= 363.7000f)
357 {
358 if(r_mk <= 0.0031f)
359 {
360 return configure_lhs_rhs_info(m, n, 1, 4, 2, 1, 32, 0, 1, 0, 1, 0);
361 }
362 else
363 {
364 return configure_lhs_rhs_info(m, n, 1, 4, 4, 1, 32, 0, 1, 0, 1, 0);
365 }
366 }
367 else
368 {
369 return configure_lhs_rhs_info(m, n, 1, 4, 2, 1, 32, 0, 1, 0, 1, 0);
370 }
371 }
372 }
373 else
374 {
375 if(workload <= 1384.8000f)
376 {
377 if(workload <= 704.0000f)
378 {
379 return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 32, 0, 1, 0, 1, 0);
380 }
381 else
382 {
383 return configure_lhs_rhs_info(m, n, 2, 4, 8, 1, 4, 0, 0, 0, 1, 1);
384 }
385 }
386 else
387 {
388 if(workload <= 16761.6006f)
389 {
390 if(r_mn <= 187.1250f)
391 {
392 return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 16, 0, 0, 0, 1, 1);
393 }
394 else
395 {
396 return configure_lhs_rhs_info(m, n, 2, 4, 8, 1, 4, 0, 0, 0, 1, 1);
397 }
398 }
399 else
400 {
401 if(r_mk <= 432.4630f)
402 {
403 return configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 16, 0, 0, 0, 1, 1);
404 }
405 else
406 {
407 return configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 16, 0, 1, 0, 1, 1);
408 }
409 }
410 }
411 }
412}
413
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100414std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100415{
Gian Marco Iodice37954912021-04-12 17:34:33 +0100416 const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100417 const float r_mn = static_cast<float>(m) / static_cast<float>(n);
418 const float r_mk = static_cast<float>(m) / static_cast<float>(k);
419 const float r_nk = static_cast<float>(n) / static_cast<float>(k);
Gian Marco Iodice37954912021-04-12 17:34:33 +0100420
421 if(m == 1)
422 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100423 if(r_mn <= 0.0045f)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100424 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100425 if(workload <= 278.7000f)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100426 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100427 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 8, 0, 0, 0, 1, 1);
Gian Marco Iodice37954912021-04-12 17:34:33 +0100428 }
429 else
430 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100431 return configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 32, 0, 0, 1, 0, 0);
Gian Marco Iodice37954912021-04-12 17:34:33 +0100432 }
433 }
434 else
435 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100436 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 8, 0, 0, 1, 0, 0);
Gian Marco Iodice37954912021-04-12 17:34:33 +0100437 }
438 }
439 else
440 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100441 if(workload <= 1384.8000f)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100442 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100443 if(r_nk <= 0.8333f)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100444 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100445 if(r_mk <= 0.9119f)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100446 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100447 return configure_lhs_rhs_info(m, n, 2, 2, 16, 1, 4, 0, 1, 0, 1, 1);
Gian Marco Iodice37954912021-04-12 17:34:33 +0100448 }
449 else
450 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100451 if(r_nk <= 0.1181f)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100452 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100453 return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 32, 0, 0, 1, 0, 0);
Gian Marco Iodice37954912021-04-12 17:34:33 +0100454 }
455 else
456 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100457 return configure_lhs_rhs_info(m, n, 4, 4, 8, 1, 32, 0, 1, 1, 0, 0);
Gian Marco Iodice37954912021-04-12 17:34:33 +0100458 }
459 }
460 }
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100461 else
462 {
463 if(r_mk <= 1.0013f)
464 {
465 return configure_lhs_rhs_info(m, n, 4, 4, 8, 1, 32, 0, 1, 1, 0, 1);
466 }
467 else
468 {
469 return configure_lhs_rhs_info(m, n, 5, 4, 8, 1, 4, 0, 1, 1, 0, 1);
470 }
471 }
Gian Marco Iodice37954912021-04-12 17:34:33 +0100472 }
473 else
474 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100475 if(workload <= 11404.7998f)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100476 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100477 if(r_mk <= 2.2884f)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100478 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100479 if(r_nk <= 0.9286f)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100480 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100481 return configure_lhs_rhs_info(m, n, 4, 4, 8, 1, 4, 0, 1, 1, 0, 1);
Gian Marco Iodice37954912021-04-12 17:34:33 +0100482 }
483 else
484 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100485 return configure_lhs_rhs_info(m, n, 4, 4, 8, 1, 32, 0, 1, 1, 0, 1);
Gian Marco Iodice37954912021-04-12 17:34:33 +0100486 }
487 }
488 else
489 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100490 return configure_lhs_rhs_info(m, n, 5, 4, 8, 1, 4, 0, 1, 1, 0, 1);
Gian Marco Iodice37954912021-04-12 17:34:33 +0100491 }
492 }
493 else
494 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100495 if(r_nk <= 1.1926f)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100496 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100497 if(r_mn <= 1385.7917f)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100498 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100499 return configure_lhs_rhs_info(m, n, 6, 4, 8, 1, 4, 0, 1, 1, 0, 1);
Gian Marco Iodice37954912021-04-12 17:34:33 +0100500 }
501 else
502 {
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100503 return configure_lhs_rhs_info(m, n, 2, 8, 8, 1, 32, 0, 1, 1, 0, 0);
Gian Marco Iodice37954912021-04-12 17:34:33 +0100504 }
505 }
Gian Marco Iodicedb14af62022-10-04 15:29:34 +0100506 else
507 {
508 return configure_lhs_rhs_info(m, n, 6, 4, 8, 1, 32, 0, 1, 1, 0, 1);
509 }
Gian Marco Iodice37954912021-04-12 17:34:33 +0100510 }
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100511 }
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000512 }
513}
Gunes Bayir4bfc70e2021-12-10 16:17:56 +0000514
515std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
516{
517 unsigned int best_m0;
518 unsigned int best_n0;
519
520 if(is_mmul_kernel_preferred(m, n, k, b, DataType::F32, best_m0, best_n0))
521 {
522 return configure_lhs_rhs_info(m, n, best_m0, best_n0, 1, 1, 4, false, true, false, false, true);
523 }
524 else
525 {
526 return configure_G77_f32(m, n, k, b);
527 }
528}
529
530std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
531{
532 unsigned int best_m0;
533 unsigned int best_n0;
534
535 if(is_mmul_kernel_preferred(m, n, k, b, DataType::F16, best_m0, best_n0))
536 {
537 return configure_lhs_rhs_info(m, n, best_m0, best_n0, 1, 1, 4, false, true, false, false, true);
538 }
539 else
540 {
541 return configure_G78_f16(m, n, k, b);
542 }
543}
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100544} // namespace gemm
545} // namespace kernels
546} // namespace opencl
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000547} // namespace arm_compute