blob: a2c1ed2c8e80e3b749ee0028c1a15d33dc5a04bd [file] [log] [blame]
Gian Marco Iodice926afe12019-03-19 11:44:13 +00001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2019-2020 Arm Limited.
Gian Marco Iodice926afe12019-03-19 11:44:13 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010024#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h"
Gian Marco Iodice926afe12019-03-19 11:44:13 +000025
26#include "arm_compute/core/CL/CLHelpers.h"
27#include "arm_compute/core/CL/CLKernelLibrary.h"
Gian Marco Iodice926afe12019-03-19 11:44:13 +000028#include "arm_compute/core/GPUTarget.h"
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +010029#include "arm_compute/core/TensorInfo.h"
30#include "arm_compute/core/TensorShape.h"
31#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010032#include "src/core/CL/gemm/CLGEMMHelpers.h"
Gian Marco Iodice926afe12019-03-19 11:44:13 +000033
34#include <map>
35#include <utility>
36
37namespace arm_compute
38{
39namespace cl_gemm
40{
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +010041using namespace arm_compute::misc::shape_calculator;
42
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000043CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::CLGEMMReshapedOnlyRHSKernelConfigurationBifrost(GPUTarget gpu)
44 : ICLGEMMKernelConfiguration(gpu)
Gian Marco Iodice926afe12019-03-19 11:44:13 +000045{
46}
47
48std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
49{
Gian Marco Iodice926afe12019-03-19 11:44:13 +000050 using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::*)(unsigned int m, unsigned int n, unsigned int k,
51 unsigned int b);
52
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +010053 // Configurations for Mali-G51
54 static std::map<DataType, ConfigurationFunctionExecutorPtr> gemm_configs_G51 =
55 {
56 { DataType::F32, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_f32 },
Gian Marco Iodice0d548042019-10-03 15:12:09 +010057 { DataType::F16, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_f16 },
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +010058 { DataType::QASYMM8, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_u8 },
59 { DataType::QSYMM8, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_u8 },
60 { DataType::QASYMM8_SIGNED, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_u8 },
61 { DataType::QSYMM8_PER_CHANNEL, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_u8 }
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +010062 };
63
Gian Marco Iodiceeaca67a2020-11-10 10:41:37 +000064 // Configurations for Mali-G52
65 static std::map<DataType, ConfigurationFunctionExecutorPtr> gemm_configs_G52 =
66 {
67 { DataType::F32, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G52_f32 },
Gian Marco Iodice229757b2020-11-15 16:06:10 +000068 { DataType::F16, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G52_f16 },
Gian Marco Iodiceeaca67a2020-11-10 10:41:37 +000069 { DataType::QASYMM8, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_u8 },
70 { DataType::QSYMM8, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_u8 },
71 { DataType::QASYMM8_SIGNED, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_u8 },
72 { DataType::QSYMM8_PER_CHANNEL, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_u8 }
73 };
74
Gian Marco Iodice926afe12019-03-19 11:44:13 +000075 // Configurations for Mali-G76
76 static std::map<DataType, ConfigurationFunctionExecutorPtr> gemm_configs_G76 =
77 {
78 { DataType::F32, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_f32 },
Gian Marco Iodice0d548042019-10-03 15:12:09 +010079 { DataType::F16, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_f16 },
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +010080 { DataType::QASYMM8, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_u8 },
81 { DataType::QSYMM8, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_u8 },
82 { DataType::QASYMM8_SIGNED, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_u8 },
83 { DataType::QSYMM8_PER_CHANNEL, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_u8 }
Gian Marco Iodice926afe12019-03-19 11:44:13 +000084 };
85
86 // Configurations for Mali-G7x
87 static std::map<DataType, ConfigurationFunctionExecutorPtr> gemm_configs_G7x =
88 {
89 { DataType::F32, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_f32 },
Gian Marco Iodice0d548042019-10-03 15:12:09 +010090 { DataType::F16, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_f16 },
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +010091 { DataType::QASYMM8, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_u8 },
92 { DataType::QSYMM8, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_u8 },
93 { DataType::QASYMM8_SIGNED, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_u8 },
94 { DataType::QSYMM8_PER_CHANNEL, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_u8 }
Gian Marco Iodice926afe12019-03-19 11:44:13 +000095 };
96
97 switch(_target)
98 {
99 case GPUTarget::G76:
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000100 if(gemm_configs_G76.find(data_type) != gemm_configs_G76.end())
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100101 {
102 return (this->*gemm_configs_G76[data_type])(m, n, k, b);
103 }
104 else
105 {
106 ARM_COMPUTE_ERROR("Not supported data type");
107 }
Gian Marco Iodiceeaca67a2020-11-10 10:41:37 +0000108 case GPUTarget::G52:
109 if(gemm_configs_G52.find(data_type) != gemm_configs_G52.end())
110 {
111 return (this->*gemm_configs_G52[data_type])(m, n, k, b);
112 }
113 else
114 {
115 ARM_COMPUTE_ERROR("Not supported data type");
116 }
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100117 case GPUTarget::G51:
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000118 if(gemm_configs_G51.find(data_type) != gemm_configs_G51.end())
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100119 {
120 return (this->*gemm_configs_G51[data_type])(m, n, k, b);
121 }
122 else
123 {
124 ARM_COMPUTE_ERROR("Not supported data type");
125 }
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000126 default:
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000127 if(gemm_configs_G7x.find(data_type) != gemm_configs_G7x.end())
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100128 {
129 return (this->*gemm_configs_G7x[data_type])(m, n, k, b);
130 }
131 else
132 {
133 ARM_COMPUTE_ERROR("Not supported data type");
134 }
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000135 }
136}
137
138std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
139{
140 ARM_COMPUTE_UNUSED(k);
141 ARM_COMPUTE_UNUSED(b);
142
143 if(m == 1)
144 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100145 if(n <= 2548)
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000146 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100147 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 4, false, true, false, true, false);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000148 }
149 else
150 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100151 return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 8, false, true, false, true, false);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000152 }
153 }
154 else
155 {
156 return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 4, false, true, false, true);
157 }
158}
159
160std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
161{
162 ARM_COMPUTE_UNUSED(k);
163 ARM_COMPUTE_UNUSED(b);
164
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100165 GEMMLHSMatrixInfo lhs_info_buf;
166 GEMMRHSMatrixInfo rhs_info_buf;
167 GEMMLHSMatrixInfo lhs_info_img;
168 GEMMRHSMatrixInfo rhs_info_img;
169
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100170 const bool is_workload_big = ((m * n * b) / 16) >= 2048;
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000171
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000172 if(m == 1)
173 {
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000174 if(n >= 8192)
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100175 {
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000176 const unsigned int h0 = std::max(n / 4, 1U);
177 return configure_lhs_rhs_info(m, n, 1, 4, 8, 1, h0, false, true, false, true, false);
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100178 }
179 else
180 {
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000181 const unsigned int h0 = std::max(n / 2, 1U);
182 if(n <= 204)
183 {
184 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true, false);
185 }
186 else
187 {
188 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, false, true, false, true, false);
189 }
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100190 }
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100191 }
192 else
193 {
194 const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(16)), static_cast<int>(1));
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100195 if(is_workload_big)
196 {
197 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, true);
198 }
199 else
200 {
201 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, false, true, false, true);
202 }
203 }
204
205 // Get lhs_info/rhs_info in case of OpenCL image
206 const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(16)), static_cast<int>(1));
207 if(is_workload_big)
208 {
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100209 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, false, true);
210 }
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100211 else
212 {
213 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, false, true, false, true, true);
214 }
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100215
216 const TensorInfo tensor_rhs_info(TensorShape(n, k, b), 1, DataType::F32);
217 const TensorShape shape = compute_rhs_reshaped_shape(tensor_rhs_info, rhs_info_img);
218 const TensorInfo tensor_reshaped_info(shape, 1, DataType::F32);
219
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100220 // In case of vector by matrix or small workloads, we use the OpenCL buffer rather than the OpenCL image2d
221 const bool use_cl_image2d = ((m == 1) || ((((m * n * b) / 16) < 2048) && n < 128)) ? false : true;
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100222
223 if(bool(validate_image2d_support_on_rhs(tensor_reshaped_info, rhs_info_img)) && use_cl_image2d)
224 {
225 return std::make_pair(lhs_info_img, rhs_info_img);
226 }
227 else
228 {
229 return std::make_pair(lhs_info_buf, rhs_info_buf);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000230 }
231}
232
Gian Marco Iodiceeaca67a2020-11-10 10:41:37 +0000233std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G52_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
234{
235 const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
236 const float r_nk = static_cast<float>(n) / static_cast<float>(k);
237
238 GEMMLHSMatrixInfo lhs_info_buf;
239 GEMMRHSMatrixInfo rhs_info_buf;
240 GEMMLHSMatrixInfo lhs_info_img;
241 GEMMRHSMatrixInfo rhs_info_img;
242
243 if(m == 1)
244 {
245 if(r_nk <= 0.4664f)
246 {
247 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 16, false, true, false, true, false);
248 }
249 else
250 {
251 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, false, true, false, true, true);
252 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, false, true, false, true, false);
253
254 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
255 std::make_pair(lhs_info_buf, rhs_info_buf),
256 n, k, b, DataType::F32);
257 }
258 }
259 else
260 {
261 if(workload <= 274.4000f)
262 {
263 return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 16, false, false, false, true, false);
264 }
265 else
266 {
267 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, false, false, true, true);
268 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, false, false, true, false);
269
270 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
271 std::make_pair(lhs_info_buf, rhs_info_buf),
272 n, k, b, DataType::F32);
273 }
274 }
275}
276
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100277std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
278{
279 ARM_COMPUTE_UNUSED(k);
280 ARM_COMPUTE_UNUSED(b);
281
282 if(m == 1)
283 {
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000284 const unsigned int n0 = n < 1280 ? 2 : 4;
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100285 const unsigned int h0 = std::max(n / n0, 1U);
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100286 return configure_lhs_rhs_info(m, n, 1, n0, 4, 1, h0, false, true, false, true);
287 }
288 else
289 {
290 return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, true, false, true);
291 }
292}
293
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100294std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
295{
296 ARM_COMPUTE_UNUSED(k);
297 ARM_COMPUTE_UNUSED(b);
298
299 if(m == 1)
300 {
301 if(n > 2048)
302 {
303 const unsigned int h0 = std::max(n / 4, 1U);
304 return configure_lhs_rhs_info(m, n, 1, 4, 4, 1, h0, false, true, false, true);
305 }
306 else
307 {
308 const unsigned int h0 = std::max(n / 2, 1U);
309 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, false, true, false, true);
310 }
311 }
312 else
313 {
314 return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 4, false, true, false, true);
315 }
316}
317
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000318std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G52_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
319{
320 const float r_mn = static_cast<float>(m) / static_cast<float>(n);
321 const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
322 const float r_mk = static_cast<float>(m) / static_cast<float>(k);
323 const float r_nk = static_cast<float>(n) / static_cast<float>(k);
324
325 if(m == 1)
326 {
327 if(r_mk <= 0.0026f)
328 {
329 if(r_nk <= 0.4664f)
330 {
331 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false);
332 }
333 else
334 {
335 return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, true);
336 }
337 }
338 else
339 {
340 if(r_mk <= 0.0148f)
341 {
342 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false);
343 }
344 else
345 {
346 return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, true);
347 }
348 }
349 }
350 else
351 {
352 if(workload <= 362.6000f)
353 {
354 return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 16, false, false, false, true, false);
355 }
356 else
357 {
358 if(r_mn <= 22.6067f)
359 {
360 if(workload <= 708.8000f)
361 {
362 return configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, false, false, false, true);
363 }
364 else
365 {
366 return configure_lhs_rhs_info(m, n, 5, 8, 2, 1, 16, false, false, false, false, false);
367 }
368 }
369 else
370 {
371 if(r_nk <= 0.0917f)
372 {
373 return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 16, false, false, false, true, false);
374 }
375 else
376 {
377 return configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, false, false, false, true);
378 }
379 }
380 }
381 }
382}
383
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100384std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
385{
386 ARM_COMPUTE_UNUSED(k);
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100387
388 if(m == 1)
389 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100390 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false);
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100391 }
392 else
393 {
Gian Marco Iodice839e1982020-10-29 13:36:50 +0000394 const float r_mn = static_cast<float>(m) / static_cast<float>(n);
395 const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
396
397 if(workload <= 7449.60f)
398 {
399 if(workload <= 691.60f)
400 {
401 return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 8, false, false, false, false, false);
402 }
403 else
404 {
405 if(workload <= 4155.20f)
406 {
407 return configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
408 }
409 else
410 {
411 return configure_lhs_rhs_info(m, n, 5, 8, 2, 1, 32, false, false, false, false, false);
412 }
413 }
414 }
415 else
416 {
417 if(workload <= 16300.80f)
418 {
419 if(r_mn <= 44.56f)
420 {
421 GEMMLHSMatrixInfo lhs_info_buf;
422 GEMMRHSMatrixInfo rhs_info_buf;
423 GEMMLHSMatrixInfo lhs_info_img;
424 GEMMRHSMatrixInfo rhs_info_img;
425
426 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, true, false, false, true);
427 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
428
429 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
430 std::make_pair(lhs_info_buf, rhs_info_buf),
431 n, k, b, DataType::F16);
432 }
433 else
434 {
435 return configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
436 }
437 }
438 else
439 {
440 GEMMLHSMatrixInfo lhs_info_buf;
441 GEMMRHSMatrixInfo rhs_info_buf;
442 GEMMLHSMatrixInfo lhs_info_img;
443 GEMMRHSMatrixInfo rhs_info_img;
444
445 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, true, false, false, true);
446 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
447
448 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
449 std::make_pair(lhs_info_buf, rhs_info_buf),
450 n, k, b, DataType::F16);
451 }
452 }
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100453 }
454}
455
456std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
457{
458 ARM_COMPUTE_UNUSED(k);
459 ARM_COMPUTE_UNUSED(b);
460
461 if(m == 1)
462 {
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000463 const unsigned int n0 = n < 1280 ? 2 : 4;
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100464 const unsigned int h0 = std::max(n / n0, 1U);
465 return configure_lhs_rhs_info(m, n, 1, n0, 8, 1, h0, false, true, false, true);
466 }
467 else
468 {
469 return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, true, false, true);
470 }
471}
472
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000473std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
474{
475 ARM_COMPUTE_UNUSED(k);
476 ARM_COMPUTE_UNUSED(b);
477
478 if(dot8_supported(CLKernelLibrary::get().get_device()))
479 {
480 if(m == 1)
481 {
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100482 const unsigned int h0 = std::max(n / 2, 1U);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000483 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true);
484 }
485 else
486 {
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100487 const unsigned int h0 = std::max(n / 4, 1U);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000488 return configure_lhs_rhs_info(m, n, 4, 4, 16, 1, h0, false, true, false, true);
489 }
490 }
491 else
492 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100493 const int h0 = std::max(std::min(static_cast<int>(n / 2), static_cast<int>(128)), static_cast<int>(1));
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000494 if(m == 1)
495 {
Gian Marco Iodice2ec6c1e2019-04-09 12:03:05 +0100496 return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, h0, false, true, false, true);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000497 }
498 else
499 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100500 return configure_lhs_rhs_info(m, n, 4, 2, 16, 1, h0, false, true, false, true);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000501 }
502 }
503}
504
505std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
506{
507 ARM_COMPUTE_UNUSED(k);
508 ARM_COMPUTE_UNUSED(b);
509
510 if(m == 1)
511 {
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100512 const unsigned int h0 = std::max(n / 2, 1U);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000513 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true);
514 }
515 else
516 {
517 return configure_lhs_rhs_info(m, n, 4, 4, 16, 1, 2, false, true, false, true);
518 }
519}
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100520
521std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
522{
523 ARM_COMPUTE_UNUSED(k);
524 ARM_COMPUTE_UNUSED(b);
525
526 if(m == 1)
527 {
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100528 const unsigned int h0 = std::max(n / 2, 1U);
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100529 return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, h0, false, true, false, true);
530 }
531 else
532 {
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100533 const unsigned int h0 = std::max(n / 2, 1U);
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100534 return configure_lhs_rhs_info(m, n, 4, 2, 16, 1, h0, false, true, false, true);
535 }
536}
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000537
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000538} // namespace cl_gemm
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000539} // namespace arm_compute