blob: 4c6e6338960412b812228d89244afaffa1ca37c8 [file] [log] [blame]
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +00001/*
Gian Marco Iodice23441892021-01-21 12:23:22 +00002 * Copyright (c) 2020-2021 Arm Limited.
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Georgios Pinitas856f66e2021-04-22 21:13:21 +010024#include "src/core/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.h"
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000025
26#include "arm_compute/core/CL/CLHelpers.h"
27#include "arm_compute/core/CL/CLKernelLibrary.h"
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000028#include "arm_compute/core/GPUTarget.h"
Gian Marco Iodicec6eaec32020-07-20 13:31:05 +010029#include "arm_compute/core/TensorInfo.h"
30#include "arm_compute/core/TensorShape.h"
31#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Georgios Pinitas856f66e2021-04-22 21:13:21 +010032#include "src/core/gpu/cl/kernels/gemm/ClGemmHelpers.h"
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000033
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000034#include <utility>
35
36namespace arm_compute
37{
Georgios Pinitas856f66e2021-04-22 21:13:21 +010038namespace opencl
39{
40namespace kernels
41{
42namespace gemm
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000043{
Gian Marco Iodicec6eaec32020-07-20 13:31:05 +010044using namespace arm_compute::misc::shape_calculator;
45
Georgios Pinitas856f66e2021-04-22 21:13:21 +010046ClGemmDefaultConfigReshapedRhsOnlyValhall::ClGemmDefaultConfigReshapedRhsOnlyValhall(GPUTarget gpu)
47 : IClGemmKernelConfig(gpu)
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000048{
49}
50
Georgios Pinitas856f66e2021-04-22 21:13:21 +010051std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000052{
Georgios Pinitas856f66e2021-04-22 21:13:21 +010053 using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (ClGemmDefaultConfigReshapedRhsOnlyValhall::*)(unsigned int m, unsigned int n, unsigned int k,
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000054 unsigned int b);
55
Georgios Pinitas856f66e2021-04-22 21:13:21 +010056 CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G77(&ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f32,
57 &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f16,
58 &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
Gian Marco Iodice37954912021-04-12 17:34:33 +010059
Georgios Pinitas856f66e2021-04-22 21:13:21 +010060 CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(&ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f32,
61 &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f16,
62 &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
Gian Marco Iodice37954912021-04-12 17:34:33 +010063
64 ConfigurationFunctionExecutorPtr func = nullptr;
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000065
66 switch(_target)
67 {
Gian Marco Iodice37954912021-04-12 17:34:33 +010068 case GPUTarget::G78:
69 func = configs_G78.get_function(data_type);
70 break;
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000071 case GPUTarget::G77:
72 default:
Gian Marco Iodice37954912021-04-12 17:34:33 +010073 func = configs_G77.get_function(data_type);
74 break;
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000075 }
Gian Marco Iodice37954912021-04-12 17:34:33 +010076
77 ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not support for GEMM");
78 return (this->*func)(m, n, k, b);
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000079}
80
Georgios Pinitas856f66e2021-04-22 21:13:21 +010081std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000082{
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000083 if(m == 1)
84 {
Gian Marco Iodice491f30c2020-11-02 15:43:57 +000085 const float r_mn = static_cast<float>(m) / static_cast<float>(n);
86 const float r_mk = static_cast<float>(m) / static_cast<float>(k);
87
88 if(r_mk <= 0.0064484127797186375)
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +000089 {
Gian Marco Iodice491f30c2020-11-02 15:43:57 +000090 if(r_mn <= 0.0028273810748942196)
91 {
92 GEMMLHSMatrixInfo lhs_info_buf;
93 GEMMRHSMatrixInfo rhs_info_buf;
94 GEMMLHSMatrixInfo lhs_info_img;
95 GEMMRHSMatrixInfo rhs_info_img;
96
97 const unsigned int h0 = std::max(n / 4, 1U);
Gian Marco Iodice37954912021-04-12 17:34:33 +010098 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, 0, 1, 0, 0, 1);
99 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 1, 4, 4, 1, h0, 0, 1, 0, 1, 0);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000100
101 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
102 std::make_pair(lhs_info_buf, rhs_info_buf),
103 n, k, b, DataType::F32);
104 }
105 else
106 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100107 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 8, 0, 1, 0, 0, 0);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000108 }
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000109 }
110 else
111 {
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000112 if(r_mk <= 0.020312500186264515)
113 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100114 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 4, 0, 1, 0, 0, 0);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000115 }
116 else
117 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100118 return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, 0, 1, 0, 1, 0);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000119 }
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000120 }
121 }
Gian Marco Iodicec6eaec32020-07-20 13:31:05 +0100122 else
123 {
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000124 const float r_mn = static_cast<float>(m) / static_cast<float>(n);
125 const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
SiCong Lia085a0c2020-12-02 14:54:34 +0000126 const float r_mk = static_cast<float>(m) / static_cast<float>(k);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000127
128 if(workload <= 1999.2000122070312)
Gian Marco Iodicec6eaec32020-07-20 13:31:05 +0100129 {
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000130 if(workload <= 747.1999816894531)
131 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100132 return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 8, 0, 1, 0, 1, 0);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000133 }
134 else
135 {
136 GEMMLHSMatrixInfo lhs_info_buf;
137 GEMMRHSMatrixInfo rhs_info_buf;
138 GEMMLHSMatrixInfo lhs_info_img;
139 GEMMRHSMatrixInfo rhs_info_img;
Gian Marco Iodice37954912021-04-12 17:34:33 +0100140 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 2, 4, 8, 1, 2, 0, 0, 0, 1, 1);
141 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 8, 0, 1, 0, 1, 0);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000142
143 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
144 std::make_pair(lhs_info_buf, rhs_info_buf),
145 n, k, b, DataType::F32);
146 }
Gian Marco Iodicec6eaec32020-07-20 13:31:05 +0100147 }
148 else
149 {
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000150 if(r_mn <= 0.03348214365541935)
151 {
152 if(r_mk <= 0.028125000186264515)
153 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100154 return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 8, 0, 1, 0, 1, 0);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000155 }
156 else
157 {
158 GEMMLHSMatrixInfo lhs_info_buf;
159 GEMMRHSMatrixInfo rhs_info_buf;
160 GEMMLHSMatrixInfo lhs_info_img;
161 GEMMRHSMatrixInfo rhs_info_img;
Gian Marco Iodice37954912021-04-12 17:34:33 +0100162 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 2, 4, 8, 1, 2, 0, 0, 0, 1, 1);
163 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 8, 0, 1, 0, 1, 0);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000164
165 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
166 std::make_pair(lhs_info_buf, rhs_info_buf),
167 n, k, b, DataType::F32);
168 }
169 }
170 else
171 {
172 GEMMLHSMatrixInfo lhs_info_buf;
173 GEMMRHSMatrixInfo rhs_info_buf;
174 GEMMLHSMatrixInfo lhs_info_img;
175 GEMMRHSMatrixInfo rhs_info_img;
Gian Marco Iodice37954912021-04-12 17:34:33 +0100176 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, 0, 1, 0, 0, 1);
177 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 16, 0, 1, 0, 1, 0);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000178
179 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
SiCong Lia085a0c2020-12-02 14:54:34 +0000180 std::make_pair(lhs_info_buf, rhs_info_buf),
181 n, k, b, DataType::F32);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000182 }
Gian Marco Iodicec6eaec32020-07-20 13:31:05 +0100183 }
184 }
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000185}
186
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100187std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000188{
189 ARM_COMPUTE_UNUSED(k);
190 ARM_COMPUTE_UNUSED(b);
191
192 if(m == 1)
193 {
Gian Marco Iodice1e75ada2020-09-30 17:35:05 +0100194 const unsigned int h0 = std::max(n / 2, 1U);
195 if(n <= 836.0)
Gian Marco Iodice2cfd3f72020-05-06 11:27:08 +0100196 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100197 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, 0, 1, 0, 1, 0);
Gian Marco Iodice2cfd3f72020-05-06 11:27:08 +0100198 }
199 else
200 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100201 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, 0, 1, 0, 1, 0);
Gian Marco Iodice2cfd3f72020-05-06 11:27:08 +0100202 }
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000203 }
Gian Marco Iodicec6eaec32020-07-20 13:31:05 +0100204 else if(m < 128)
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000205 {
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000206 const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(256)), static_cast<int>(1));
Gian Marco Iodice2cfd3f72020-05-06 11:27:08 +0100207 if(k >= 512)
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000208 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100209 return configure_lhs_rhs_info(m, n, 2, 4, 16, 1, h0, 0, 1, 0, 0);
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000210 }
211 else
212 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100213 return configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, 0, 1, 0, 0);
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000214 }
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000215 }
Gian Marco Iodice939586e2020-05-05 15:10:21 +0100216 else
217 {
218 const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(256)), static_cast<int>(1));
Gian Marco Iodice2886c752020-05-07 10:26:15 +0100219 if(n >= 64)
220 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100221 return configure_lhs_rhs_info(m, n, 4, 8, 4, 1, h0, 0, 1, 0, 0);
Gian Marco Iodice2886c752020-05-07 10:26:15 +0100222 }
223 else
224 {
225 if(k >= 512)
226 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100227 return configure_lhs_rhs_info(m, n, 2, 4, 16, 1, h0, 0, 1, 0, 0);
Gian Marco Iodice2886c752020-05-07 10:26:15 +0100228 }
229 else
230 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100231 return configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, 0, 1, 0, 0);
Gian Marco Iodice2886c752020-05-07 10:26:15 +0100232 }
233 }
Gian Marco Iodice939586e2020-05-05 15:10:21 +0100234 }
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000235}
236
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100237std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000238{
239 ARM_COMPUTE_UNUSED(k);
240 ARM_COMPUTE_UNUSED(b);
241
242 if(m == 1)
243 {
244 const unsigned int h0 = std::max(n / 2, 1U);
Gian Marco Iodice37954912021-04-12 17:34:33 +0100245 return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, h0, 0, 1, 0, 1);
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000246 }
247 else
248 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100249 const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(256)), static_cast<int>(1));
250 if(m >= 28)
251 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100252 return configure_lhs_rhs_info(m, n, 4, 4, 16, 1, h0, 0, 1, 0, 1);
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100253 }
254 else
255 {
Gian Marco Iodice37954912021-04-12 17:34:33 +0100256 return configure_lhs_rhs_info(m, n, 2, 4, 16, 1, h0, 0, 1, 0, 1);
257 }
258 }
259}
260
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100261std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100262{
263 const float r_mn = static_cast<float>(m) / static_cast<float>(n);
264 const float r_mk = static_cast<float>(m) / static_cast<float>(k);
265 const float r_nk = static_cast<float>(n) / static_cast<float>(k);
266 const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
267
268 if(m == 1)
269 {
270 if(workload <= 278.7000f)
271 {
272 if(workload <= 7.5000f)
273 {
274 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0);
275 }
276 else
277 {
278 if(r_mn <= 0.0031f)
279 {
280 if(workload <= 256.6000f)
281 {
282 if(workload <= 16.7500f)
283 {
284 if(r_nk <= 1.6671f)
285 {
286 return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0);
287 }
288 else
289 {
290 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0);
291 }
292 }
293 else
294 {
295 return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0);
296 }
297 }
298 else
299 {
300 return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0);
301 }
302 }
303 else
304 {
305 if(r_mk <= 0.0027f)
306 {
307 if(r_mk <= 0.0014f)
308 {
309 return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0);
310 }
311 else
312 {
313 if(workload <= 8.9500f)
314 {
315 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0);
316 }
317 else
318 {
319 return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0);
320 }
321 }
322 }
323 else
324 {
325 if(workload <= 14.1500f)
326 {
327 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0);
328 }
329 else
330 {
331 if(r_mk <= 0.0041f)
332 {
333 return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0);
334 }
335 else
336 {
337 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0);
338 }
339 }
340 }
341 }
342 }
343 }
344 else
345 {
346 if(workload <= 363.7000f)
347 {
348 if(r_mk <= 0.0031f)
349 {
350 return configure_lhs_rhs_info(m, n, 1, 4, 2, 1, 32, 0, 1, 0, 1, 0);
351 }
352 else
353 {
354 return configure_lhs_rhs_info(m, n, 1, 4, 4, 1, 32, 0, 1, 0, 1, 0);
355 }
356 }
357 else
358 {
359 return configure_lhs_rhs_info(m, n, 1, 4, 2, 1, 32, 0, 1, 0, 1, 0);
360 }
361 }
362 }
363 else
364 {
365 if(workload <= 1384.8000f)
366 {
367 if(workload <= 704.0000f)
368 {
369 return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 32, 0, 1, 0, 1, 0);
370 }
371 else
372 {
373 return configure_lhs_rhs_info(m, n, 2, 4, 8, 1, 4, 0, 0, 0, 1, 1);
374 }
375 }
376 else
377 {
378 if(workload <= 16761.6006f)
379 {
380 if(r_mn <= 187.1250f)
381 {
382 return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 16, 0, 0, 0, 1, 1);
383 }
384 else
385 {
386 return configure_lhs_rhs_info(m, n, 2, 4, 8, 1, 4, 0, 0, 0, 1, 1);
387 }
388 }
389 else
390 {
391 if(r_mk <= 432.4630f)
392 {
393 return configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 16, 0, 0, 0, 1, 1);
394 }
395 else
396 {
397 return configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 16, 0, 1, 0, 1, 1);
398 }
399 }
400 }
401 }
402}
403
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100404std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100405{
406 const float r_mn = static_cast<float>(m) / static_cast<float>(n);
407 const float r_mk = static_cast<float>(m) / static_cast<float>(k);
408 const float r_nk = static_cast<float>(n) / static_cast<float>(k);
409 const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
410
411 if(m == 1)
412 {
413 if(r_mn <= 0.0038f)
414 {
415 if(workload <= 353.9000f)
416 {
417 if(workload <= 278.7000f)
418 {
419 return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, 32, 0, 0, 1, 0, 0);
420 }
421 else
422 {
423 if(r_mk <= 0.0004f)
424 {
425 return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, 32, 0, 0, 1, 0, 0);
426 }
427 else
428 {
429 if(r_mk <= 0.0030f)
430 {
431 return configure_lhs_rhs_info(m, n, 1, 8, 4, 1, 8, 0, 1, 1, 0, 1);
432 }
433 else
434 {
435 return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, 32, 0, 0, 1, 0, 0);
436 }
437 }
438 }
439 }
440 else
441 {
442 if(r_nk <= 1.9384f)
443 {
444 return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, 32, 0, 0, 1, 0, 0);
445 }
446 else
447 {
448 return configure_lhs_rhs_info(m, n, 1, 8, 4, 1, 8, 0, 1, 1, 0, 1);
449 }
450 }
451 }
452 else
453 {
454 if(r_nk <= 1.0368f)
455 {
456 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, 0, 0, 1, 0, 0);
457 }
458 else
459 {
460 return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, 32, 0, 0, 1, 0, 0);
461 }
462 }
463 }
464 else
465 {
466 if(workload <= 1422.4000f)
467 {
468 if(workload <= 704.0000f)
469 {
470 return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 32, 0, 0, 1, 0, 0);
471 }
472 else
473 {
474 if(workload <= 1197.6000f)
475 {
476 return configure_lhs_rhs_info(m, n, 2, 4, 8, 1, 8, 0, 1, 1, 0, 1);
477 }
478 else
479 {
480 if(workload <= 1241.6000f)
481 {
482 return configure_lhs_rhs_info(m, n, 2, 8, 8, 1, 16, 0, 1, 1, 0, 0);
483 }
484 else
485 {
486 return configure_lhs_rhs_info(m, n, 2, 4, 8, 1, 8, 0, 1, 1, 0, 1);
487 }
488 }
489 }
490 }
491 else
492 {
493 if(workload <= 2769.6000f)
494 {
495 if(workload <= 1846.4000f)
496 {
497 if(r_mn <= 2.4927f)
498 {
499 return configure_lhs_rhs_info(m, n, 2, 8, 8, 1, 16, 0, 1, 1, 0, 0);
500 }
501 else
502 {
503 return configure_lhs_rhs_info(m, n, 4, 4, 8, 1, 32, 0, 1, 1, 0, 0);
504 }
505 }
506 else
507 {
508 if(r_mn <= 0.6261f)
509 {
510 return configure_lhs_rhs_info(m, n, 4, 4, 8, 1, 32, 0, 1, 1, 0, 0);
511 }
512 else
513 {
514 if(r_mk <= 3.4453f)
515 {
516 if(r_mn <= 1.4135f)
517 {
518 return configure_lhs_rhs_info(m, n, 2, 8, 8, 1, 16, 0, 1, 1, 0, 0);
519 }
520 else
521 {
522 return configure_lhs_rhs_info(m, n, 4, 4, 8, 1, 32, 0, 1, 1, 0, 0);
523 }
524 }
525 else
526 {
527 return configure_lhs_rhs_info(m, n, 2, 8, 8, 1, 16, 0, 1, 1, 0, 0);
528 }
529 }
530 }
531 }
532 else
533 {
534 if(r_nk <= 0.0302f)
535 {
536 return configure_lhs_rhs_info(m, n, 2, 4, 8, 1, 8, 0, 1, 1, 0, 1);
537 }
538 else
539 {
540 if(r_mk <= 181.3750f)
541 {
542 return configure_lhs_rhs_info(m, n, 4, 4, 8, 1, 32, 0, 1, 1, 0, 0);
543 }
544 else
545 {
546 if(workload <= 28035.2002f)
547 {
548 return configure_lhs_rhs_info(m, n, 2, 8, 8, 1, 16, 0, 1, 1, 0, 0);
549 }
550 else
551 {
552 if(r_mk <= 808.6667f)
553 {
554 return configure_lhs_rhs_info(m, n, 4, 4, 8, 1, 32, 0, 1, 1, 0, 0);
555 }
556 else
557 {
558 return configure_lhs_rhs_info(m, n, 2, 8, 8, 1, 16, 0, 1, 1, 0, 0);
559 }
560 }
561 }
562 }
563 }
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100564 }
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000565 }
566}
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100567} // namespace gemm
568} // namespace kernels
569} // namespace opencl
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000570} // namespace arm_compute