blob: 3645a0e14180642b911f9800630738003bf8fa65 [file] [log] [blame]
Gian Marco Iodice926afe12019-03-19 11:44:13 +00001/*
Gian Marco Iodice23441892021-01-21 12:23:22 +00002 * Copyright (c) 2019-2021 Arm Limited.
Gian Marco Iodice926afe12019-03-19 11:44:13 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
SiCong Lia085a0c2020-12-02 14:54:34 +000024#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMDefaultConfigReshapedRHSOnlyBifrost.h"
Gian Marco Iodice926afe12019-03-19 11:44:13 +000025
26#include "arm_compute/core/CL/CLHelpers.h"
27#include "arm_compute/core/CL/CLKernelLibrary.h"
Gian Marco Iodice926afe12019-03-19 11:44:13 +000028#include "arm_compute/core/GPUTarget.h"
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +010029#include "arm_compute/core/TensorInfo.h"
30#include "arm_compute/core/TensorShape.h"
31#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010032#include "src/core/CL/gemm/CLGEMMHelpers.h"
Gian Marco Iodice926afe12019-03-19 11:44:13 +000033
Gian Marco Iodice926afe12019-03-19 11:44:13 +000034#include <utility>
35
36namespace arm_compute
37{
38namespace cl_gemm
39{
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +010040using namespace arm_compute::misc::shape_calculator;
41
SiCong Lia085a0c2020-12-02 14:54:34 +000042CLGEMMDefaultConfigReshapedRHSOnlyBifrost::CLGEMMDefaultConfigReshapedRHSOnlyBifrost(GPUTarget gpu)
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000043 : ICLGEMMKernelConfiguration(gpu)
Gian Marco Iodice926afe12019-03-19 11:44:13 +000044{
45}
46
SiCong Lia085a0c2020-12-02 14:54:34 +000047std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
Gian Marco Iodice926afe12019-03-19 11:44:13 +000048{
SiCong Lia085a0c2020-12-02 14:54:34 +000049 using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (CLGEMMDefaultConfigReshapedRHSOnlyBifrost::*)(unsigned int m, unsigned int n, unsigned int k,
Gian Marco Iodice926afe12019-03-19 11:44:13 +000050 unsigned int b);
51
Gian Marco Iodice37954912021-04-12 17:34:33 +010052 CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G51(&CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G51_f32,
53 &CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G51_f16,
54 &CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G51_u8);
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +010055
Gian Marco Iodice37954912021-04-12 17:34:33 +010056 CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G52(&CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G52_f32,
57 &CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G52_f16,
58 &CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G7x_u8);
Gian Marco Iodiceeaca67a2020-11-10 10:41:37 +000059
Gian Marco Iodice37954912021-04-12 17:34:33 +010060 CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G76(&CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G76_f32,
61 &CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G76_f16,
62 &CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G76_u8);
Gian Marco Iodice926afe12019-03-19 11:44:13 +000063
Gian Marco Iodice37954912021-04-12 17:34:33 +010064 CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G7x(&CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G7x_f32,
65 &CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G7x_f16,
66 &CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G7x_u8);
67
68 ConfigurationFunctionExecutorPtr func = nullptr;
Gian Marco Iodice926afe12019-03-19 11:44:13 +000069
70 switch(_target)
71 {
72 case GPUTarget::G76:
Gian Marco Iodice37954912021-04-12 17:34:33 +010073 func = configs_G76.get_function(data_type);
74 break;
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +010075 case GPUTarget::G51:
Gian Marco Iodice37954912021-04-12 17:34:33 +010076 func = configs_G51.get_function(data_type);
77 break;
78 case GPUTarget::G52:
79 func = configs_G52.get_function(data_type);
80 break;
Gian Marco Iodice926afe12019-03-19 11:44:13 +000081 default:
Gian Marco Iodice37954912021-04-12 17:34:33 +010082 func = configs_G7x.get_function(data_type);
83 break;
Gian Marco Iodice926afe12019-03-19 11:44:13 +000084 }
Gian Marco Iodice37954912021-04-12 17:34:33 +010085
86 ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not support for GEMM");
87 return (this->*func)(m, n, k, b);
Gian Marco Iodice926afe12019-03-19 11:44:13 +000088}
89
SiCong Lia085a0c2020-12-02 14:54:34 +000090std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice926afe12019-03-19 11:44:13 +000091{
92 ARM_COMPUTE_UNUSED(k);
93 ARM_COMPUTE_UNUSED(b);
94
95 if(m == 1)
96 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010097 if(n <= 2548)
Gian Marco Iodice926afe12019-03-19 11:44:13 +000098 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010099 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 4, false, true, false, true, false);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000100 }
101 else
102 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100103 return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 8, false, true, false, true, false);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000104 }
105 }
106 else
107 {
108 return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 4, false, true, false, true);
109 }
110}
111
SiCong Lia085a0c2020-12-02 14:54:34 +0000112std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000113{
114 ARM_COMPUTE_UNUSED(k);
115 ARM_COMPUTE_UNUSED(b);
116
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100117 GEMMLHSMatrixInfo lhs_info_buf;
118 GEMMRHSMatrixInfo rhs_info_buf;
119 GEMMLHSMatrixInfo lhs_info_img;
120 GEMMRHSMatrixInfo rhs_info_img;
121
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100122 const bool is_workload_big = ((m * n * b) / 16) >= 2048;
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000123
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000124 if(m == 1)
125 {
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000126 if(n >= 8192)
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100127 {
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000128 const unsigned int h0 = std::max(n / 4, 1U);
129 return configure_lhs_rhs_info(m, n, 1, 4, 8, 1, h0, false, true, false, true, false);
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100130 }
131 else
132 {
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000133 const unsigned int h0 = std::max(n / 2, 1U);
134 if(n <= 204)
135 {
136 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true, false);
137 }
138 else
139 {
140 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, false, true, false, true, false);
141 }
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100142 }
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100143 }
144 else
145 {
146 const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(16)), static_cast<int>(1));
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100147 if(is_workload_big)
148 {
149 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, true);
150 }
151 else
152 {
153 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, false, true, false, true);
154 }
155 }
156
157 // Get lhs_info/rhs_info in case of OpenCL image
158 const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(16)), static_cast<int>(1));
159 if(is_workload_big)
160 {
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100161 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, false, true);
162 }
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100163 else
164 {
165 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, false, true, false, true, true);
166 }
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100167
168 const TensorInfo tensor_rhs_info(TensorShape(n, k, b), 1, DataType::F32);
169 const TensorShape shape = compute_rhs_reshaped_shape(tensor_rhs_info, rhs_info_img);
170 const TensorInfo tensor_reshaped_info(shape, 1, DataType::F32);
171
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100172 // In case of vector by matrix or small workloads, we use the OpenCL buffer rather than the OpenCL image2d
173 const bool use_cl_image2d = ((m == 1) || ((((m * n * b) / 16) < 2048) && n < 128)) ? false : true;
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100174
175 if(bool(validate_image2d_support_on_rhs(tensor_reshaped_info, rhs_info_img)) && use_cl_image2d)
176 {
177 return std::make_pair(lhs_info_img, rhs_info_img);
178 }
179 else
180 {
181 return std::make_pair(lhs_info_buf, rhs_info_buf);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000182 }
183}
184
SiCong Lia085a0c2020-12-02 14:54:34 +0000185std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G52_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodiceeaca67a2020-11-10 10:41:37 +0000186{
187 const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
188 const float r_nk = static_cast<float>(n) / static_cast<float>(k);
189
190 GEMMLHSMatrixInfo lhs_info_buf;
191 GEMMRHSMatrixInfo rhs_info_buf;
192 GEMMLHSMatrixInfo lhs_info_img;
193 GEMMRHSMatrixInfo rhs_info_img;
194
195 if(m == 1)
196 {
197 if(r_nk <= 0.4664f)
198 {
199 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 16, false, true, false, true, false);
200 }
201 else
202 {
203 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, false, true, false, true, true);
204 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, false, true, false, true, false);
205
206 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
207 std::make_pair(lhs_info_buf, rhs_info_buf),
208 n, k, b, DataType::F32);
209 }
210 }
211 else
212 {
213 if(workload <= 274.4000f)
214 {
215 return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 16, false, false, false, true, false);
216 }
217 else
218 {
219 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, false, false, true, true);
220 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, false, false, true, false);
221
222 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
223 std::make_pair(lhs_info_buf, rhs_info_buf),
224 n, k, b, DataType::F32);
225 }
226 }
227}
228
SiCong Lia085a0c2020-12-02 14:54:34 +0000229std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G51_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100230{
231 ARM_COMPUTE_UNUSED(k);
232 ARM_COMPUTE_UNUSED(b);
233
234 if(m == 1)
235 {
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000236 const unsigned int n0 = n < 1280 ? 2 : 4;
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100237 const unsigned int h0 = std::max(n / n0, 1U);
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100238 return configure_lhs_rhs_info(m, n, 1, n0, 4, 1, h0, false, true, false, true);
239 }
240 else
241 {
242 return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, true, false, true);
243 }
244}
245
SiCong Lia085a0c2020-12-02 14:54:34 +0000246std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100247{
248 ARM_COMPUTE_UNUSED(k);
249 ARM_COMPUTE_UNUSED(b);
250
251 if(m == 1)
252 {
253 if(n > 2048)
254 {
255 const unsigned int h0 = std::max(n / 4, 1U);
256 return configure_lhs_rhs_info(m, n, 1, 4, 4, 1, h0, false, true, false, true);
257 }
258 else
259 {
260 const unsigned int h0 = std::max(n / 2, 1U);
261 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, false, true, false, true);
262 }
263 }
264 else
265 {
266 return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 4, false, true, false, true);
267 }
268}
269
SiCong Lia085a0c2020-12-02 14:54:34 +0000270std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G52_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000271{
272 const float r_mn = static_cast<float>(m) / static_cast<float>(n);
273 const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
SiCong Lia085a0c2020-12-02 14:54:34 +0000274 const float r_mk = static_cast<float>(m) / static_cast<float>(k);
275 const float r_nk = static_cast<float>(n) / static_cast<float>(k);
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000276
Gian Marco Iodice8919a1a2020-11-23 16:10:27 +0000277 GEMMLHSMatrixInfo lhs_info_buf;
278 GEMMRHSMatrixInfo rhs_info_buf;
279 GEMMLHSMatrixInfo lhs_info_img;
280 GEMMRHSMatrixInfo rhs_info_img;
281
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000282 if(m == 1)
283 {
Gian Marco Iodice8919a1a2020-11-23 16:10:27 +0000284 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, false);
285
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000286 if(r_mk <= 0.0026f)
287 {
288 if(r_nk <= 0.4664f)
289 {
290 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false);
291 }
292 else
293 {
Gian Marco Iodice8919a1a2020-11-23 16:10:27 +0000294 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, true);
295 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
296 std::make_pair(lhs_info_buf, rhs_info_buf),
297 n, k, b, DataType::F16);
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000298 }
299 }
300 else
301 {
302 if(r_mk <= 0.0148f)
303 {
304 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false);
305 }
306 else
307 {
Gian Marco Iodice8919a1a2020-11-23 16:10:27 +0000308 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, true);
309 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
310 std::make_pair(lhs_info_buf, rhs_info_buf),
311 n, k, b, DataType::F16);
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000312 }
313 }
314 }
315 else
316 {
Gian Marco Iodice8919a1a2020-11-23 16:10:27 +0000317 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 5, 8, 4, 1, 2, false, false, false, false, false);
318
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000319 if(workload <= 362.6000f)
320 {
321 return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 16, false, false, false, true, false);
322 }
323 else
324 {
325 if(r_mn <= 22.6067f)
326 {
327 if(workload <= 708.8000f)
328 {
Gian Marco Iodice8919a1a2020-11-23 16:10:27 +0000329 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, false, false, false, true);
330 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
331 std::make_pair(lhs_info_buf, rhs_info_buf),
332 n, k, b, DataType::F16);
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000333 }
334 else
335 {
336 return configure_lhs_rhs_info(m, n, 5, 8, 2, 1, 16, false, false, false, false, false);
337 }
338 }
339 else
340 {
341 if(r_nk <= 0.0917f)
342 {
343 return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 16, false, false, false, true, false);
344 }
345 else
346 {
Gian Marco Iodice8919a1a2020-11-23 16:10:27 +0000347 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, false, false, false, true);
348 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
349 std::make_pair(lhs_info_buf, rhs_info_buf),
350 n, k, b, DataType::F16);
Gian Marco Iodice229757b2020-11-15 16:06:10 +0000351 }
352 }
353 }
354 }
355}
356
SiCong Lia085a0c2020-12-02 14:54:34 +0000357std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100358{
359 ARM_COMPUTE_UNUSED(k);
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100360
361 if(m == 1)
362 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100363 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false);
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100364 }
365 else
366 {
Gian Marco Iodice839e1982020-10-29 13:36:50 +0000367 const float r_mn = static_cast<float>(m) / static_cast<float>(n);
368 const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
369
370 if(workload <= 7449.60f)
371 {
372 if(workload <= 691.60f)
373 {
374 return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 8, false, false, false, false, false);
375 }
376 else
377 {
378 if(workload <= 4155.20f)
379 {
380 return configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
381 }
382 else
383 {
384 return configure_lhs_rhs_info(m, n, 5, 8, 2, 1, 32, false, false, false, false, false);
385 }
386 }
387 }
388 else
389 {
390 if(workload <= 16300.80f)
391 {
392 if(r_mn <= 44.56f)
393 {
394 GEMMLHSMatrixInfo lhs_info_buf;
395 GEMMRHSMatrixInfo rhs_info_buf;
396 GEMMLHSMatrixInfo lhs_info_img;
397 GEMMRHSMatrixInfo rhs_info_img;
398
Gian Marco Iodice23441892021-01-21 12:23:22 +0000399 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 8, 4, 4, 1, 1, false, true, false, false, true);
Gian Marco Iodice839e1982020-10-29 13:36:50 +0000400 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
401
402 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
403 std::make_pair(lhs_info_buf, rhs_info_buf),
404 n, k, b, DataType::F16);
405 }
406 else
407 {
408 return configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
409 }
410 }
411 else
412 {
413 GEMMLHSMatrixInfo lhs_info_buf;
414 GEMMRHSMatrixInfo rhs_info_buf;
415 GEMMLHSMatrixInfo lhs_info_img;
416 GEMMRHSMatrixInfo rhs_info_img;
417
418 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, true, false, false, true);
419 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
420
421 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
422 std::make_pair(lhs_info_buf, rhs_info_buf),
423 n, k, b, DataType::F16);
424 }
425 }
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100426 }
427}
428
SiCong Lia085a0c2020-12-02 14:54:34 +0000429std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G51_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100430{
431 ARM_COMPUTE_UNUSED(k);
432 ARM_COMPUTE_UNUSED(b);
433
434 if(m == 1)
435 {
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000436 const unsigned int n0 = n < 1280 ? 2 : 4;
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100437 const unsigned int h0 = std::max(n / n0, 1U);
438 return configure_lhs_rhs_info(m, n, 1, n0, 8, 1, h0, false, true, false, true);
439 }
440 else
441 {
442 return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, true, false, true);
443 }
444}
445
SiCong Lia085a0c2020-12-02 14:54:34 +0000446std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000447{
448 ARM_COMPUTE_UNUSED(k);
449 ARM_COMPUTE_UNUSED(b);
450
451 if(dot8_supported(CLKernelLibrary::get().get_device()))
452 {
453 if(m == 1)
454 {
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100455 const unsigned int h0 = std::max(n / 2, 1U);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000456 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true);
457 }
458 else
459 {
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100460 const unsigned int h0 = std::max(n / 4, 1U);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000461 return configure_lhs_rhs_info(m, n, 4, 4, 16, 1, h0, false, true, false, true);
462 }
463 }
464 else
465 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100466 const int h0 = std::max(std::min(static_cast<int>(n / 2), static_cast<int>(128)), static_cast<int>(1));
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000467 if(m == 1)
468 {
Gian Marco Iodice2ec6c1e2019-04-09 12:03:05 +0100469 return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, h0, false, true, false, true);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000470 }
471 else
472 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100473 return configure_lhs_rhs_info(m, n, 4, 2, 16, 1, h0, false, true, false, true);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000474 }
475 }
476}
477
SiCong Lia085a0c2020-12-02 14:54:34 +0000478std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000479{
480 ARM_COMPUTE_UNUSED(k);
481 ARM_COMPUTE_UNUSED(b);
482
483 if(m == 1)
484 {
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100485 const unsigned int h0 = std::max(n / 2, 1U);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000486 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true);
487 }
488 else
489 {
490 return configure_lhs_rhs_info(m, n, 4, 4, 16, 1, 2, false, true, false, true);
491 }
492}
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100493
SiCong Lia085a0c2020-12-02 14:54:34 +0000494std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure_G51_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100495{
496 ARM_COMPUTE_UNUSED(k);
497 ARM_COMPUTE_UNUSED(b);
498
499 if(m == 1)
500 {
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100501 const unsigned int h0 = std::max(n / 2, 1U);
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100502 return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, h0, false, true, false, true);
503 }
504 else
505 {
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100506 const unsigned int h0 = std::max(n / 2, 1U);
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100507 return configure_lhs_rhs_info(m, n, 4, 2, 16, 1, h0, false, true, false, true);
508 }
509}
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000510
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000511} // namespace cl_gemm
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000512} // namespace arm_compute