blob: 3105db6693553f98acbc4770e96434f097c6912a [file] [log] [blame]
Gian Marco Iodice926afe12019-03-19 11:44:13 +00001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2019-2020 Arm Limited.
Gian Marco Iodice926afe12019-03-19 11:44:13 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010024#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h"
Gian Marco Iodice926afe12019-03-19 11:44:13 +000025
26#include "arm_compute/core/CL/CLHelpers.h"
27#include "arm_compute/core/CL/CLKernelLibrary.h"
Gian Marco Iodice926afe12019-03-19 11:44:13 +000028#include "arm_compute/core/GPUTarget.h"
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +010029#include "arm_compute/core/TensorInfo.h"
30#include "arm_compute/core/TensorShape.h"
31#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010032#include "src/core/CL/gemm/CLGEMMHelpers.h"
Gian Marco Iodice926afe12019-03-19 11:44:13 +000033
34#include <map>
35#include <utility>
36
37namespace arm_compute
38{
39namespace cl_gemm
40{
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +010041using namespace arm_compute::misc::shape_calculator;
42
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000043CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::CLGEMMReshapedOnlyRHSKernelConfigurationBifrost(GPUTarget gpu)
44 : ICLGEMMKernelConfiguration(gpu)
Gian Marco Iodice926afe12019-03-19 11:44:13 +000045{
46}
47
48std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
49{
Gian Marco Iodice926afe12019-03-19 11:44:13 +000050 using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::*)(unsigned int m, unsigned int n, unsigned int k,
51 unsigned int b);
52
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +010053 // Configurations for Mali-G51
54 static std::map<DataType, ConfigurationFunctionExecutorPtr> gemm_configs_G51 =
55 {
56 { DataType::F32, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_f32 },
Gian Marco Iodice0d548042019-10-03 15:12:09 +010057 { DataType::F16, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_f16 },
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +010058 { DataType::QASYMM8, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_u8 },
59 { DataType::QSYMM8, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_u8 },
60 { DataType::QASYMM8_SIGNED, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_u8 },
61 { DataType::QSYMM8_PER_CHANNEL, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_u8 }
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +010062 };
63
Gian Marco Iodice926afe12019-03-19 11:44:13 +000064 // Configurations for Mali-G76
65 static std::map<DataType, ConfigurationFunctionExecutorPtr> gemm_configs_G76 =
66 {
67 { DataType::F32, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_f32 },
Gian Marco Iodice0d548042019-10-03 15:12:09 +010068 { DataType::F16, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_f16 },
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +010069 { DataType::QASYMM8, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_u8 },
70 { DataType::QSYMM8, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_u8 },
71 { DataType::QASYMM8_SIGNED, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_u8 },
72 { DataType::QSYMM8_PER_CHANNEL, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_u8 }
Gian Marco Iodice926afe12019-03-19 11:44:13 +000073 };
74
75 // Configurations for Mali-G7x
76 static std::map<DataType, ConfigurationFunctionExecutorPtr> gemm_configs_G7x =
77 {
78 { DataType::F32, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_f32 },
Gian Marco Iodice0d548042019-10-03 15:12:09 +010079 { DataType::F16, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_f16 },
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +010080 { DataType::QASYMM8, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_u8 },
81 { DataType::QSYMM8, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_u8 },
82 { DataType::QASYMM8_SIGNED, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_u8 },
83 { DataType::QSYMM8_PER_CHANNEL, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_u8 }
Gian Marco Iodice926afe12019-03-19 11:44:13 +000084 };
85
86 switch(_target)
87 {
88 case GPUTarget::G76:
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000089 if(gemm_configs_G76.find(data_type) != gemm_configs_G76.end())
Gian Marco Iodice0d548042019-10-03 15:12:09 +010090 {
91 return (this->*gemm_configs_G76[data_type])(m, n, k, b);
92 }
93 else
94 {
95 ARM_COMPUTE_ERROR("Not supported data type");
96 }
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +010097 case GPUTarget::G51:
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000098 if(gemm_configs_G51.find(data_type) != gemm_configs_G51.end())
Gian Marco Iodice0d548042019-10-03 15:12:09 +010099 {
100 return (this->*gemm_configs_G51[data_type])(m, n, k, b);
101 }
102 else
103 {
104 ARM_COMPUTE_ERROR("Not supported data type");
105 }
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000106 default:
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000107 if(gemm_configs_G7x.find(data_type) != gemm_configs_G7x.end())
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100108 {
109 return (this->*gemm_configs_G7x[data_type])(m, n, k, b);
110 }
111 else
112 {
113 ARM_COMPUTE_ERROR("Not supported data type");
114 }
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000115 }
116}
117
118std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
119{
120 ARM_COMPUTE_UNUSED(k);
121 ARM_COMPUTE_UNUSED(b);
122
123 if(m == 1)
124 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100125 if(n <= 2548)
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000126 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100127 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 4, false, true, false, true, false);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000128 }
129 else
130 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100131 return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 8, false, true, false, true, false);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000132 }
133 }
134 else
135 {
136 return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 4, false, true, false, true);
137 }
138}
139
140std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
141{
142 ARM_COMPUTE_UNUSED(k);
143 ARM_COMPUTE_UNUSED(b);
144
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100145 GEMMLHSMatrixInfo lhs_info_buf;
146 GEMMRHSMatrixInfo rhs_info_buf;
147 GEMMLHSMatrixInfo lhs_info_img;
148 GEMMRHSMatrixInfo rhs_info_img;
149
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100150 const bool is_workload_big = ((m * n * b) / 16) >= 2048;
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100151 // Get lhs_info/rhs_info in case of OpenCL buffer
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000152 if(m == 1)
153 {
Gian Marco Iodice839e1982020-10-29 13:36:50 +0000154 if(n <= 204.0)
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100155 {
Gian Marco Iodice839e1982020-10-29 13:36:50 +0000156 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 16, false, true, false, true, false);
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100157 }
158 else
159 {
Gian Marco Iodice839e1982020-10-29 13:36:50 +0000160 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 32, false, true, false, true, false);
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100161 }
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100162 }
163 else
164 {
165 const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(16)), static_cast<int>(1));
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100166 if(is_workload_big)
167 {
168 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, true);
169 }
170 else
171 {
172 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, false, true, false, true);
173 }
174 }
175
176 // Get lhs_info/rhs_info in case of OpenCL image
177 const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(16)), static_cast<int>(1));
178 if(is_workload_big)
179 {
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100180 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, false, true);
181 }
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100182 else
183 {
184 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, false, true, false, true, true);
185 }
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100186
187 const TensorInfo tensor_rhs_info(TensorShape(n, k, b), 1, DataType::F32);
188 const TensorShape shape = compute_rhs_reshaped_shape(tensor_rhs_info, rhs_info_img);
189 const TensorInfo tensor_reshaped_info(shape, 1, DataType::F32);
190
Gian Marco Iodice4aed4aa2020-08-07 15:36:30 +0100191 // In case of vector by matrix or small workloads, we use the OpenCL buffer rather than the OpenCL image2d
192 const bool use_cl_image2d = ((m == 1) || ((((m * n * b) / 16) < 2048) && n < 128)) ? false : true;
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100193
194 if(bool(validate_image2d_support_on_rhs(tensor_reshaped_info, rhs_info_img)) && use_cl_image2d)
195 {
196 return std::make_pair(lhs_info_img, rhs_info_img);
197 }
198 else
199 {
200 return std::make_pair(lhs_info_buf, rhs_info_buf);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000201 }
202}
203
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100204std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
205{
206 ARM_COMPUTE_UNUSED(k);
207 ARM_COMPUTE_UNUSED(b);
208
209 if(m == 1)
210 {
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000211 const unsigned int n0 = n < 1280 ? 2 : 4;
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100212 const unsigned int h0 = std::max(n / n0, 1U);
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100213 return configure_lhs_rhs_info(m, n, 1, n0, 4, 1, h0, false, true, false, true);
214 }
215 else
216 {
217 return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, true, false, true);
218 }
219}
220
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100221std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
222{
223 ARM_COMPUTE_UNUSED(k);
224 ARM_COMPUTE_UNUSED(b);
225
226 if(m == 1)
227 {
228 if(n > 2048)
229 {
230 const unsigned int h0 = std::max(n / 4, 1U);
231 return configure_lhs_rhs_info(m, n, 1, 4, 4, 1, h0, false, true, false, true);
232 }
233 else
234 {
235 const unsigned int h0 = std::max(n / 2, 1U);
236 return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, false, true, false, true);
237 }
238 }
239 else
240 {
241 return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 4, false, true, false, true);
242 }
243}
244
245std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
246{
247 ARM_COMPUTE_UNUSED(k);
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100248
249 if(m == 1)
250 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100251 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false);
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100252 }
253 else
254 {
Gian Marco Iodice839e1982020-10-29 13:36:50 +0000255 const float r_mn = static_cast<float>(m) / static_cast<float>(n);
256 const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
257
258 if(workload <= 7449.60f)
259 {
260 if(workload <= 691.60f)
261 {
262 return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 8, false, false, false, false, false);
263 }
264 else
265 {
266 if(workload <= 4155.20f)
267 {
268 return configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
269 }
270 else
271 {
272 return configure_lhs_rhs_info(m, n, 5, 8, 2, 1, 32, false, false, false, false, false);
273 }
274 }
275 }
276 else
277 {
278 if(workload <= 16300.80f)
279 {
280 if(r_mn <= 44.56f)
281 {
282 GEMMLHSMatrixInfo lhs_info_buf;
283 GEMMRHSMatrixInfo rhs_info_buf;
284 GEMMLHSMatrixInfo lhs_info_img;
285 GEMMRHSMatrixInfo rhs_info_img;
286
287 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, true, false, false, true);
288 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
289
290 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
291 std::make_pair(lhs_info_buf, rhs_info_buf),
292 n, k, b, DataType::F16);
293 }
294 else
295 {
296 return configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
297 }
298 }
299 else
300 {
301 GEMMLHSMatrixInfo lhs_info_buf;
302 GEMMRHSMatrixInfo rhs_info_buf;
303 GEMMLHSMatrixInfo lhs_info_img;
304 GEMMRHSMatrixInfo rhs_info_img;
305
306 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, true, false, false, true);
307 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
308
309 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
310 std::make_pair(lhs_info_buf, rhs_info_buf),
311 n, k, b, DataType::F16);
312 }
313 }
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100314 }
315}
316
317std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
318{
319 ARM_COMPUTE_UNUSED(k);
320 ARM_COMPUTE_UNUSED(b);
321
322 if(m == 1)
323 {
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000324 const unsigned int n0 = n < 1280 ? 2 : 4;
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100325 const unsigned int h0 = std::max(n / n0, 1U);
326 return configure_lhs_rhs_info(m, n, 1, n0, 8, 1, h0, false, true, false, true);
327 }
328 else
329 {
330 return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, true, false, true);
331 }
332}
333
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000334std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
335{
336 ARM_COMPUTE_UNUSED(k);
337 ARM_COMPUTE_UNUSED(b);
338
339 if(dot8_supported(CLKernelLibrary::get().get_device()))
340 {
341 if(m == 1)
342 {
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100343 const unsigned int h0 = std::max(n / 2, 1U);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000344 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true);
345 }
346 else
347 {
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100348 const unsigned int h0 = std::max(n / 4, 1U);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000349 return configure_lhs_rhs_info(m, n, 4, 4, 16, 1, h0, false, true, false, true);
350 }
351 }
352 else
353 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100354 const int h0 = std::max(std::min(static_cast<int>(n / 2), static_cast<int>(128)), static_cast<int>(1));
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000355 if(m == 1)
356 {
Gian Marco Iodice2ec6c1e2019-04-09 12:03:05 +0100357 return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, h0, false, true, false, true);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000358 }
359 else
360 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100361 return configure_lhs_rhs_info(m, n, 4, 2, 16, 1, h0, false, true, false, true);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000362 }
363 }
364}
365
366std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
367{
368 ARM_COMPUTE_UNUSED(k);
369 ARM_COMPUTE_UNUSED(b);
370
371 if(m == 1)
372 {
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100373 const unsigned int h0 = std::max(n / 2, 1U);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000374 return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true);
375 }
376 else
377 {
378 return configure_lhs_rhs_info(m, n, 4, 4, 16, 1, 2, false, true, false, true);
379 }
380}
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100381
382std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
383{
384 ARM_COMPUTE_UNUSED(k);
385 ARM_COMPUTE_UNUSED(b);
386
387 if(m == 1)
388 {
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100389 const unsigned int h0 = std::max(n / 2, 1U);
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100390 return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, h0, false, true, false, true);
391 }
392 else
393 {
Gian Marco Iodice0d548042019-10-03 15:12:09 +0100394 const unsigned int h0 = std::max(n / 2, 1U);
Gian Marco Iodiceee6454a2019-09-17 10:56:51 +0100395 return configure_lhs_rhs_info(m, n, 4, 2, 16, 1, h0, false, true, false, true);
396 }
397}
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000398
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000399} // namespace cl_gemm
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000400} // namespace arm_compute