blob: 9c23d9c998bb589b69cd2e14accb4d384ef13dc5 [file] [log] [blame]
Gian Marco Iodice926afe12019-03-19 11:44:13 +00001/*
Gian Marco Iodice23441892021-01-21 12:23:22 +00002 * Copyright (c) 2019-2021 Arm Limited.
Gian Marco Iodice926afe12019-03-19 11:44:13 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Georgios Pinitas7891a732021-08-20 21:39:25 +010024#include "src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.h"
Gian Marco Iodice926afe12019-03-19 11:44:13 +000025
26#include "arm_compute/core/CL/CLHelpers.h"
27#include "arm_compute/core/CL/CLKernelLibrary.h"
Gian Marco Iodice926afe12019-03-19 11:44:13 +000028#include "arm_compute/core/GPUTarget.h"
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +010029#include "arm_compute/core/TensorInfo.h"
30#include "arm_compute/core/TensorShape.h"
31#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Georgios Pinitas7891a732021-08-20 21:39:25 +010032#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
Gian Marco Iodice926afe12019-03-19 11:44:13 +000033#include <utility>
34
35namespace arm_compute
36{
Georgios Pinitas856f66e2021-04-22 21:13:21 +010037namespace opencl
38{
39namespace kernels
40{
41namespace gemm
Gian Marco Iodice926afe12019-03-19 11:44:13 +000042{
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +010043using namespace arm_compute::misc::shape_calculator;
44
Georgios Pinitas856f66e2021-04-22 21:13:21 +010045ClGemmDefaultConfigReshapedRhsOnlyBifrost::ClGemmDefaultConfigReshapedRhsOnlyBifrost(GPUTarget gpu)
46 : IClGemmKernelConfig(gpu)
Gian Marco Iodice926afe12019-03-19 11:44:13 +000047{
48}
49
Georgios Pinitas856f66e2021-04-22 21:13:21 +010050std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
Gian Marco Iodice926afe12019-03-19 11:44:13 +000051{
Georgios Pinitas856f66e2021-04-22 21:13:21 +010052 using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (ClGemmDefaultConfigReshapedRhsOnlyBifrost::*)(unsigned int m, unsigned int n, unsigned int k,
Gian Marco Iodice926afe12019-03-19 11:44:13 +000053 unsigned int b);
54
Georgios Pinitas856f66e2021-04-22 21:13:21 +010055 CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G51(&ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f32,
56 &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f16,
57 &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_u8);
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +010058
Georgios Pinitas856f66e2021-04-22 21:13:21 +010059 CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G52(&ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f32,
60 &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f16,
61 &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_u8);
Gian Marco Iodiceeaca67a2020-11-10 10:41:37 +000062
Pablo Marquez Tellob1496e62021-06-25 14:49:37 +010063 CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G31(&ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f32,
64 &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f16,
65 &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G31_u8);
66
Georgios Pinitas856f66e2021-04-22 21:13:21 +010067 CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G76(&ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f32,
68 &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f16,
69 &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_u8);
Gian Marco Iodice926afe12019-03-19 11:44:13 +000070
Georgios Pinitas856f66e2021-04-22 21:13:21 +010071 CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G7x(&ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f32,
72 &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f16,
73 &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_u8);
Gian Marco Iodice37954912021-04-12 17:34:33 +010074
75 ConfigurationFunctionExecutorPtr func = nullptr;
Gian Marco Iodice926afe12019-03-19 11:44:13 +000076 switch(_target)
77 {
78 case GPUTarget::G76:
Gian Marco Iodice37954912021-04-12 17:34:33 +010079 func = configs_G76.get_function(data_type);
80 break;
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +010081 case GPUTarget::G51:
Gian Marco Iodice37954912021-04-12 17:34:33 +010082 func = configs_G51.get_function(data_type);
83 break;
84 case GPUTarget::G52:
85 func = configs_G52.get_function(data_type);
86 break;
Pablo Marquez Tellob1496e62021-06-25 14:49:37 +010087 case GPUTarget::G31:
88 func = configs_G31.get_function(data_type);
89 break;
Gian Marco Iodice926afe12019-03-19 11:44:13 +000090 default:
Gian Marco Iodice37954912021-04-12 17:34:33 +010091 func = configs_G7x.get_function(data_type);
92 break;
Gian Marco Iodice926afe12019-03-19 11:44:13 +000093 }
Gian Marco Iodice37954912021-04-12 17:34:33 +010094
95 ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not support for GEMM");
96 return (this->*func)(m, n, k, b);
Gian Marco Iodice926afe12019-03-19 11:44:13 +000097}
98
Georgios Pinitas856f66e2021-04-22 21:13:21 +010099std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000100{
101 ARM_COMPUTE_UNUSED(k);
102 ARM_COMPUTE_UNUSED(b);
103
104 if(m == 1)
105 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100106 if(n <= 2548)
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000107 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100108 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 4, false, true, false, true, false);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000109 }
110 else
111 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100112 return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 8, false, true, false, true, false);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000113 }
114 }
115 else
116 {
117 return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 4, false, true, false, true);
118 }
119}
120
Pablo Marquez Tellob1496e62021-06-25 14:49:37 +0100121std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G31_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
122{
123 ARM_COMPUTE_UNUSED(k);
124 ARM_COMPUTE_UNUSED(b);
125
126 if(m == 1)
127 {
128 const unsigned int h0 = std::max(n / 2, 1U);
129 return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, h0, 0, 1, 0, 1);
130 }
131 else
132 {
133 const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(256)), static_cast<int>(1));
134 if(m >= 28)
135 {
136 return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, 0, 1, 0, 1);
137 }
138 else
139 {
140 return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, 0, 1, 0, 1);
141 }
142 }
143}
144
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100145std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000146{
147 ARM_COMPUTE_UNUSED(k);
148 ARM_COMPUTE_UNUSED(b);
149
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100150 GEMMLHSMatrixInfo lhs_info_buf;
151 GEMMRHSMatrixInfo rhs_info_buf;
152 GEMMLHSMatrixInfo lhs_info_img;
153 GEMMRHSMatrixInfo rhs_info_img;
154
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100155 const bool is_workload_big = ((m * n * b) / 16) >= 2048;
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000156
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000157 if(m == 1)
158 {
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000159 if(n >= 8192)
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100160 {
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000161 const unsigned int h0 = std::max(n / 4, 1U);
162 return configure_lhs_rhs_info(m, n, 1, 4, 8, 1, h0, false, true, false, true, false);
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100163 }
164 else
165 {
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000166 const unsigned int h0 = std::max(n / 2, 1U);
167 if(n <= 204)
168 {
169 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true, false);
170 }
171 else
172 {
173 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, false, true, false, true, false);
174 }
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100175 }
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100176 }
177 else
178 {
179 const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(16)), static_cast<int>(1));
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100180 if(is_workload_big)
181 {
182 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, true);
183 }
184 else
185 {
186 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, false, true, false, true);
187 }
188 }
189
190 // Get lhs_info/rhs_info in case of OpenCL image
191 const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(16)), static_cast<int>(1));
192 if(is_workload_big)
193 {
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100194 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, false, true);
195 }
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100196 else
197 {
198 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, false, true, false, true, true);
199 }
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100200
201 const TensorInfo tensor_rhs_info(TensorShape(n, k, b), 1, DataType::F32);
202 const TensorShape shape = compute_rhs_reshaped_shape(tensor_rhs_info, rhs_info_img);
203 const TensorInfo tensor_reshaped_info(shape, 1, DataType::F32);
204
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100205 // In case of vector by matrix or small workloads, we use the OpenCL buffer rather than the OpenCL image2d
206 const bool use_cl_image2d = ((m == 1) || ((((m * n * b) / 16) < 2048) && n < 128)) ? false : true;
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100207
208 if(bool(validate_image2d_support_on_rhs(tensor_reshaped_info, rhs_info_img)) && use_cl_image2d)
209 {
210 return std::make_pair(lhs_info_img, rhs_info_img);
211 }
212 else
213 {
214 return std::make_pair(lhs_info_buf, rhs_info_buf);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000215 }
216}
217
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100218std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodiceeaca67a2020-11-10 10:41:37 +0000219{
220 const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
221 const float r_nk = static_cast<float>(n) / static_cast<float>(k);
222
223 GEMMLHSMatrixInfo lhs_info_buf;
224 GEMMRHSMatrixInfo rhs_info_buf;
225 GEMMLHSMatrixInfo lhs_info_img;
226 GEMMRHSMatrixInfo rhs_info_img;
227
228 if(m == 1)
229 {
230 if(r_nk <= 0.4664f)
231 {
232 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 16, false, true, false, true, false);
233 }
234 else
235 {
236 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, false, true, false, true, true);
237 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, false, true, false, true, false);
238
239 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
240 std::make_pair(lhs_info_buf, rhs_info_buf),
241 n, k, b, DataType::F32);
242 }
243 }
244 else
245 {
246 if(workload <= 274.4000f)
247 {
248 return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 16, false, false, false, true, false);
249 }
250 else
251 {
252 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, false, false, true, true);
253 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, false, false, true, false);
254
255 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
256 std::make_pair(lhs_info_buf, rhs_info_buf),
257 n, k, b, DataType::F32);
258 }
259 }
260}
261
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100262std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100263{
264 ARM_COMPUTE_UNUSED(k);
265 ARM_COMPUTE_UNUSED(b);
266
267 if(m == 1)
268 {
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000269 const unsigned int n0 = n < 1280 ? 2 : 4;
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100270 const unsigned int h0 = std::max(n / n0, 1U);
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100271 return configure_lhs_rhs_info(m, n, 1, n0, 4, 1, h0, false, true, false, true);
272 }
273 else
274 {
275 return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, true, false, true);
276 }
277}
278
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100279std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100280{
281 ARM_COMPUTE_UNUSED(k);
282 ARM_COMPUTE_UNUSED(b);
283
284 if(m == 1)
285 {
286 if(n > 2048)
287 {
288 const unsigned int h0 = std::max(n / 4, 1U);
289 return configure_lhs_rhs_info(m, n, 1, 4, 4, 1, h0, false, true, false, true);
290 }
291 else
292 {
293 const unsigned int h0 = std::max(n / 2, 1U);
294 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, false, true, false, true);
295 }
296 }
297 else
298 {
299 return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 4, false, true, false, true);
300 }
301}
302
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100303std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000304{
305 const float r_mn = static_cast<float>(m) / static_cast<float>(n);
306 const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
SiCong Lia085a0c2020-12-02 14:54:34 +0000307 const float r_mk = static_cast<float>(m) / static_cast<float>(k);
308 const float r_nk = static_cast<float>(n) / static_cast<float>(k);
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000309
Gian Marco Iodice8919a1a2020-11-23 16:10:27 +0000310 GEMMLHSMatrixInfo lhs_info_buf;
311 GEMMRHSMatrixInfo rhs_info_buf;
312 GEMMLHSMatrixInfo lhs_info_img;
313 GEMMRHSMatrixInfo rhs_info_img;
314
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000315 if(m == 1)
316 {
Gian Marco Iodice8919a1a2020-11-23 16:10:27 +0000317 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, false);
318
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000319 if(r_mk <= 0.0026f)
320 {
321 if(r_nk <= 0.4664f)
322 {
323 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false);
324 }
325 else
326 {
Gian Marco Iodice8919a1a2020-11-23 16:10:27 +0000327 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, true);
328 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
329 std::make_pair(lhs_info_buf, rhs_info_buf),
330 n, k, b, DataType::F16);
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000331 }
332 }
333 else
334 {
335 if(r_mk <= 0.0148f)
336 {
337 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false);
338 }
339 else
340 {
Gian Marco Iodice8919a1a2020-11-23 16:10:27 +0000341 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, true);
342 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
343 std::make_pair(lhs_info_buf, rhs_info_buf),
344 n, k, b, DataType::F16);
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000345 }
346 }
347 }
348 else
349 {
Gian Marco Iodice8919a1a2020-11-23 16:10:27 +0000350 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 5, 8, 4, 1, 2, false, false, false, false, false);
351
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000352 if(workload <= 362.6000f)
353 {
354 return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 16, false, false, false, true, false);
355 }
356 else
357 {
358 if(r_mn <= 22.6067f)
359 {
360 if(workload <= 708.8000f)
361 {
Gian Marco Iodice8919a1a2020-11-23 16:10:27 +0000362 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, false, false, false, true);
363 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
364 std::make_pair(lhs_info_buf, rhs_info_buf),
365 n, k, b, DataType::F16);
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000366 }
367 else
368 {
369 return configure_lhs_rhs_info(m, n, 5, 8, 2, 1, 16, false, false, false, false, false);
370 }
371 }
372 else
373 {
374 if(r_nk <= 0.0917f)
375 {
376 return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 16, false, false, false, true, false);
377 }
378 else
379 {
Gian Marco Iodice8919a1a2020-11-23 16:10:27 +0000380 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, false, false, false, true);
381 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
382 std::make_pair(lhs_info_buf, rhs_info_buf),
383 n, k, b, DataType::F16);
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000384 }
385 }
386 }
387 }
388}
389
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100390std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100391{
392 ARM_COMPUTE_UNUSED(k);
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100393
394 if(m == 1)
395 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100396 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false);
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100397 }
398 else
399 {
Gian Marco Iodice839e1982020-10-29 13:36:50 +0000400 const float r_mn = static_cast<float>(m) / static_cast<float>(n);
401 const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
402
403 if(workload <= 7449.60f)
404 {
405 if(workload <= 691.60f)
406 {
407 return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 8, false, false, false, false, false);
408 }
409 else
410 {
411 if(workload <= 4155.20f)
412 {
413 return configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
414 }
415 else
416 {
417 return configure_lhs_rhs_info(m, n, 5, 8, 2, 1, 32, false, false, false, false, false);
418 }
419 }
420 }
421 else
422 {
423 if(workload <= 16300.80f)
424 {
425 if(r_mn <= 44.56f)
426 {
427 GEMMLHSMatrixInfo lhs_info_buf;
428 GEMMRHSMatrixInfo rhs_info_buf;
429 GEMMLHSMatrixInfo lhs_info_img;
430 GEMMRHSMatrixInfo rhs_info_img;
431
Gian Marco Iodice23441892021-01-21 12:23:22 +0000432 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 8, 4, 4, 1, 1, false, true, false, false, true);
Gian Marco Iodice839e1982020-10-29 13:36:50 +0000433 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
434
435 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
436 std::make_pair(lhs_info_buf, rhs_info_buf),
437 n, k, b, DataType::F16);
438 }
439 else
440 {
441 return configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
442 }
443 }
444 else
445 {
446 GEMMLHSMatrixInfo lhs_info_buf;
447 GEMMRHSMatrixInfo rhs_info_buf;
448 GEMMLHSMatrixInfo lhs_info_img;
449 GEMMRHSMatrixInfo rhs_info_img;
450
451 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, true, false, false, true);
452 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
453
454 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
455 std::make_pair(lhs_info_buf, rhs_info_buf),
456 n, k, b, DataType::F16);
457 }
458 }
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100459 }
460}
461
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100462std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100463{
464 ARM_COMPUTE_UNUSED(k);
465 ARM_COMPUTE_UNUSED(b);
466
467 if(m == 1)
468 {
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000469 const unsigned int n0 = n < 1280 ? 2 : 4;
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100470 const unsigned int h0 = std::max(n / n0, 1U);
471 return configure_lhs_rhs_info(m, n, 1, n0, 8, 1, h0, false, true, false, true);
472 }
473 else
474 {
475 return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, true, false, true);
476 }
477}
478
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100479std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000480{
481 ARM_COMPUTE_UNUSED(k);
482 ARM_COMPUTE_UNUSED(b);
483
484 if(dot8_supported(CLKernelLibrary::get().get_device()))
485 {
486 if(m == 1)
487 {
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100488 const unsigned int h0 = std::max(n / 2, 1U);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000489 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true);
490 }
491 else
492 {
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100493 const unsigned int h0 = std::max(n / 4, 1U);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000494 return configure_lhs_rhs_info(m, n, 4, 4, 16, 1, h0, false, true, false, true);
495 }
496 }
497 else
498 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100499 const int h0 = std::max(std::min(static_cast<int>(n / 2), static_cast<int>(128)), static_cast<int>(1));
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000500 if(m == 1)
501 {
Gian Marco Iodice2ec6c1e2019-04-09 12:03:05 +0100502 return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, h0, false, true, false, true);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000503 }
504 else
505 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100506 return configure_lhs_rhs_info(m, n, 4, 2, 16, 1, h0, false, true, false, true);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000507 }
508 }
509}
510
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100511std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000512{
513 ARM_COMPUTE_UNUSED(k);
514 ARM_COMPUTE_UNUSED(b);
515
516 if(m == 1)
517 {
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100518 const unsigned int h0 = std::max(n / 2, 1U);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000519 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true);
520 }
521 else
522 {
523 return configure_lhs_rhs_info(m, n, 4, 4, 16, 1, 2, false, true, false, true);
524 }
525}
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100526
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100527std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100528{
529 ARM_COMPUTE_UNUSED(k);
530 ARM_COMPUTE_UNUSED(b);
531
532 if(m == 1)
533 {
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100534 const unsigned int h0 = std::max(n / 2, 1U);
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100535 return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, h0, false, true, false, true);
536 }
537 else
538 {
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100539 const unsigned int h0 = std::max(n / 2, 1U);
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100540 return configure_lhs_rhs_info(m, n, 4, 2, 16, 1, h0, false, true, false, true);
541 }
542}
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000543
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100544} // namespace gemm
545} // namespace kernels
546} // namespace opencl
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000547} // namespace arm_compute