blob: 851e23bc84545d1c348bbec3c3ebbf7bc0cec901 [file] [log] [blame]
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +00001/*
Gian Marco Iodicef16eed92023-04-19 16:29:26 +01002 * Copyright (c) 2020-2023 Arm Limited.
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
SiCong Lia085a0c2020-12-02 14:54:34 +000024#include "src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.h"
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +000025
26#include "arm_compute/core/CL/CLHelpers.h"
27#include "arm_compute/core/CL/CLKernelLibrary.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010028
Georgios Pinitas7891a732021-08-20 21:39:25 +010029#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +000030
31#include <map>
32#include <utility>
33
34namespace arm_compute
35{
36namespace cl_gemm
37{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010038CLGEMMDefaultTypeValhall::CLGEMMDefaultTypeValhall(GPUTarget gpu) : ICLGEMMKernelSelection(gpu)
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +000039{
40}
41
SiCong Lia085a0c2020-12-02 14:54:34 +000042CLGEMMKernelType CLGEMMDefaultTypeValhall::select_kernel(const CLGEMMKernelSelectionParams &params)
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +000043{
44 // _target could be used in the future to have a dedicated heuristic for each GPU IP
45 ARM_COMPUTE_UNUSED(_target);
46
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010047 using FunctionExecutorPtr = CLGEMMKernelType (CLGEMMDefaultTypeValhall::*)(
48 unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +000049
Gian Marco Iodice491f30c2020-11-02 15:43:57 +000050 // Default configurations for Valhall architectures
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010051 static std::map<DataType, FunctionExecutorPtr> gemm_default_configs = {
52 {DataType::F32, &CLGEMMDefaultTypeValhall::default_f32},
53 {DataType::F16, &CLGEMMDefaultTypeValhall::default_f16},
54 {DataType::QASYMM8, &CLGEMMDefaultTypeValhall::default_q8},
55 {DataType::QASYMM8_SIGNED, &CLGEMMDefaultTypeValhall::default_q8},
56 {DataType::QSYMM8, &CLGEMMDefaultTypeValhall::default_q8},
57 {DataType::QSYMM8_PER_CHANNEL, &CLGEMMDefaultTypeValhall::default_q8}};
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +000058
Gian Marco Iodice491f30c2020-11-02 15:43:57 +000059 // Mali-G77 configurations
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010060 static std::map<DataType, FunctionExecutorPtr> gemm_g77_configs = {
61 {DataType::F32, &CLGEMMDefaultTypeValhall::default_f32},
62 {DataType::F16, &CLGEMMDefaultTypeValhall::g77_f16},
63 {DataType::QASYMM8, &CLGEMMDefaultTypeValhall::default_q8},
64 {DataType::QASYMM8_SIGNED, &CLGEMMDefaultTypeValhall::default_q8},
65 {DataType::QSYMM8, &CLGEMMDefaultTypeValhall::default_q8},
66 {DataType::QSYMM8_PER_CHANNEL, &CLGEMMDefaultTypeValhall::default_q8}};
Gian Marco Iodice491f30c2020-11-02 15:43:57 +000067
Gian Marco Iodice37954912021-04-12 17:34:33 +010068 // Mali-G78 configurations
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010069 static std::map<DataType, FunctionExecutorPtr> gemm_g78_configs = {
70 {DataType::F32, &CLGEMMDefaultTypeValhall::g78_f32},
71 {DataType::F16, &CLGEMMDefaultTypeValhall::g78_f16},
72 {DataType::QASYMM8, &CLGEMMDefaultTypeValhall::default_q8},
73 {DataType::QASYMM8_SIGNED, &CLGEMMDefaultTypeValhall::default_q8},
74 {DataType::QSYMM8, &CLGEMMDefaultTypeValhall::default_q8},
75 {DataType::QSYMM8_PER_CHANNEL, &CLGEMMDefaultTypeValhall::default_q8}};
Gian Marco Iodice37954912021-04-12 17:34:33 +010076
Gian Marco Iodice7a0f1bd2023-04-26 14:55:02 +010077 // Mali-G710 and Mali-G610 configurations
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010078 static std::map<DataType, FunctionExecutorPtr> gemm_g710_configs = {
79 {DataType::F32, &CLGEMMDefaultTypeValhall::default_f32},
80 {DataType::F16, &CLGEMMDefaultTypeValhall::g710_f16},
81 {DataType::QASYMM8, &CLGEMMDefaultTypeValhall::default_q8},
82 {DataType::QASYMM8_SIGNED, &CLGEMMDefaultTypeValhall::default_q8},
83 {DataType::QSYMM8, &CLGEMMDefaultTypeValhall::default_q8},
84 {DataType::QSYMM8_PER_CHANNEL, &CLGEMMDefaultTypeValhall::default_q8}};
Gian Marco Iodice7a0f1bd2023-04-26 14:55:02 +010085
Gunes Bayir4bfc70e2021-12-10 16:17:56 +000086 // Mali-G715 and Mali-G615 configurations
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010087 static std::map<DataType, FunctionExecutorPtr> gemm_g715_configs = {
88 {DataType::F32, &CLGEMMDefaultTypeValhall::g715_f32},
89 {DataType::F16, &CLGEMMDefaultTypeValhall::g715_f16},
90 {DataType::QASYMM8, &CLGEMMDefaultTypeValhall::default_q8},
91 {DataType::QASYMM8_SIGNED, &CLGEMMDefaultTypeValhall::default_q8},
92 {DataType::QSYMM8, &CLGEMMDefaultTypeValhall::default_q8},
93 {DataType::QSYMM8_PER_CHANNEL, &CLGEMMDefaultTypeValhall::default_q8}};
Gunes Bayir4bfc70e2021-12-10 16:17:56 +000094
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +000095 const DataType data_type = params.data_type;
96
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010097 switch (_target)
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +000098 {
Gian Marco Iodice7a0f1bd2023-04-26 14:55:02 +010099 case GPUTarget::G710:
100 case GPUTarget::G610:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100101 if (gemm_g710_configs.find(data_type) != gemm_g710_configs.end())
Gian Marco Iodice7a0f1bd2023-04-26 14:55:02 +0100102 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100103 return (this->*gemm_g710_configs[data_type])(params.m, params.n, params.k, params.b,
104 params.is_rhs_constant);
Gian Marco Iodice7a0f1bd2023-04-26 14:55:02 +0100105 }
106 ARM_COMPUTE_ERROR("Not supported data type");
Gunes Bayir4bfc70e2021-12-10 16:17:56 +0000107 case GPUTarget::G715:
108 case GPUTarget::G615:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100109 if (gemm_g715_configs.find(data_type) != gemm_g715_configs.end())
Gunes Bayir4bfc70e2021-12-10 16:17:56 +0000110 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100111 return (this->*gemm_g715_configs[data_type])(params.m, params.n, params.k, params.b,
112 params.is_rhs_constant);
Gunes Bayir4bfc70e2021-12-10 16:17:56 +0000113 }
114 ARM_COMPUTE_ERROR("Not supported data type");
Gian Marco Iodice37954912021-04-12 17:34:33 +0100115 case GPUTarget::G78:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100116 if (gemm_g78_configs.find(data_type) != gemm_g78_configs.end())
Gian Marco Iodice37954912021-04-12 17:34:33 +0100117 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100118 return (this->*gemm_g78_configs[data_type])(params.m, params.n, params.k, params.b,
119 params.is_rhs_constant);
Gian Marco Iodice37954912021-04-12 17:34:33 +0100120 }
121 ARM_COMPUTE_ERROR("Not supported data type");
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000122 case GPUTarget::G77:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100123 if (gemm_g77_configs.find(data_type) != gemm_g77_configs.end())
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000124 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100125 return (this->*gemm_g77_configs[data_type])(params.m, params.n, params.k, params.b,
126 params.is_rhs_constant);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000127 }
128 ARM_COMPUTE_ERROR("Not supported data type");
129 default:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100130 if (gemm_default_configs.find(data_type) != gemm_default_configs.end())
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000131 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100132 return (this->*gemm_default_configs[data_type])(params.m, params.n, params.k, params.b,
133 params.is_rhs_constant);
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000134 }
135 ARM_COMPUTE_ERROR("Not supported data type");
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000136 }
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000137}
138
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100139CLGEMMKernelType CLGEMMDefaultTypeValhall::default_f32(
140 unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant)
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000141{
Gian Marco Iodice026d0452020-08-28 13:52:12 +0100142 ARM_COMPUTE_UNUSED(m, n, k, b);
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000143
Gian Marco Iodicec9cecc02021-10-15 10:23:24 +0100144 return is_rhs_constant ? CLGEMMKernelType::RESHAPED_ONLY_RHS : CLGEMMKernelType::NATIVE;
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000145}
146
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100147CLGEMMKernelType CLGEMMDefaultTypeValhall::default_f16(
148 unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant)
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000149{
Gian Marco Iodice026d0452020-08-28 13:52:12 +0100150 ARM_COMPUTE_UNUSED(m, n, k, b);
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000151
Gian Marco Iodicec9cecc02021-10-15 10:23:24 +0100152 return is_rhs_constant ? CLGEMMKernelType::RESHAPED_ONLY_RHS : CLGEMMKernelType::NATIVE;
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000153}
154
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100155CLGEMMKernelType
156CLGEMMDefaultTypeValhall::g77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant)
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000157{
Gian Marco Iodicef16eed92023-04-19 16:29:26 +0100158 ARM_COMPUTE_UNUSED(m, n, k, b);
159
Gian Marco Iodice7a0f1bd2023-04-26 14:55:02 +0100160 return is_rhs_constant ? CLGEMMKernelType::RESHAPED_ONLY_RHS : CLGEMMKernelType::NATIVE;
161}
162
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100163CLGEMMKernelType
164CLGEMMDefaultTypeValhall::g710_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant)
Gian Marco Iodice7a0f1bd2023-04-26 14:55:02 +0100165{
166 ARM_COMPUTE_UNUSED(m, n, k, b);
167
168 return is_rhs_constant ? CLGEMMKernelType::RESHAPED_ONLY_RHS : CLGEMMKernelType::NATIVE;
Gian Marco Iodice491f30c2020-11-02 15:43:57 +0000169}
170
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100171CLGEMMKernelType CLGEMMDefaultTypeValhall::default_q8(
172 unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant)
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000173{
Gian Marco Iodice026d0452020-08-28 13:52:12 +0100174 ARM_COMPUTE_UNUSED(m, n, k, b);
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000175
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100176 if (is_rhs_constant)
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000177 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100178 return CLGEMMKernelType::RESHAPED_ONLY_RHS;
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000179 }
180 else
181 {
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +0100182 return CLGEMMKernelType::NATIVE;
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000183 }
184}
Gian Marco Iodice37954912021-04-12 17:34:33 +0100185
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100186CLGEMMKernelType
187CLGEMMDefaultTypeValhall::g78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100188{
189 ARM_COMPUTE_UNUSED(b);
190
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100191 if (!is_rhs_constant)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100192 {
Gian Marco Iodicec9cecc02021-10-15 10:23:24 +0100193 return CLGEMMKernelType::NATIVE;
Gian Marco Iodice37954912021-04-12 17:34:33 +0100194 }
195
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100196 if (m == 1)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100197 {
198 return CLGEMMKernelType::RESHAPED_ONLY_RHS;
199 }
200
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100201 if (n <= 272.0000f)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100202 {
203 return CLGEMMKernelType::RESHAPED_ONLY_RHS;
204 }
205 else
206 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100207 if (k <= 471.0000f)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100208 {
209 return CLGEMMKernelType::RESHAPED_ONLY_RHS;
210 }
211 else
212 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100213 if (m <= 72.5000f)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100214 {
215 return CLGEMMKernelType::RESHAPED_ONLY_RHS;
216 }
217 else
218 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100219 if (m <= 90.5000f)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100220 {
221 return CLGEMMKernelType::RESHAPED;
222 }
223 else
224 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100225 if (k <= 2448.0000f)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100226 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100227 if (n <= 756.0000f)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100228 {
229 return CLGEMMKernelType::RESHAPED_ONLY_RHS;
230 }
231 else
232 {
233 return CLGEMMKernelType::RESHAPED;
234 }
235 }
236 else
237 {
238 return CLGEMMKernelType::RESHAPED;
239 }
240 }
241 }
242 }
243 }
244}
245
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100246CLGEMMKernelType
247CLGEMMDefaultTypeValhall::g78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100248{
249 ARM_COMPUTE_UNUSED(m, n, k, b);
250
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100251 if (!is_rhs_constant)
Gian Marco Iodice37954912021-04-12 17:34:33 +0100252 {
Gian Marco Iodicec9cecc02021-10-15 10:23:24 +0100253 return CLGEMMKernelType::NATIVE;
Gian Marco Iodice37954912021-04-12 17:34:33 +0100254 }
255
256 return CLGEMMKernelType::RESHAPED_ONLY_RHS;
257}
Gunes Bayir4bfc70e2021-12-10 16:17:56 +0000258
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100259CLGEMMKernelType
260CLGEMMDefaultTypeValhall::g715_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant)
Gunes Bayir4bfc70e2021-12-10 16:17:56 +0000261{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100262 if (!is_rhs_constant)
Gunes Bayir4bfc70e2021-12-10 16:17:56 +0000263 {
264 return default_f32(m, n, k, b, is_rhs_constant);
265 }
266
267 unsigned int best_m0;
268 unsigned int best_n0;
269
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100270 if (opencl::kernels::gemm::is_mmul_kernel_preferred(m, n, k, b, DataType::F32, best_m0, best_n0))
Gunes Bayir4bfc70e2021-12-10 16:17:56 +0000271 {
272 return CLGEMMKernelType::RESHAPED_ONLY_RHS_MMUL;
273 }
274 else
275 {
276 return default_f32(m, n, k, b, is_rhs_constant);
277 }
278}
279
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100280CLGEMMKernelType
281CLGEMMDefaultTypeValhall::g715_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant)
Gunes Bayir4bfc70e2021-12-10 16:17:56 +0000282{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100283 if (!is_rhs_constant)
Gunes Bayir4bfc70e2021-12-10 16:17:56 +0000284 {
285 return g78_f16(m, n, k, b, is_rhs_constant);
286 }
287
288 unsigned int best_m0;
289 unsigned int best_n0;
290
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100291 if (opencl::kernels::gemm::is_mmul_kernel_preferred(m, n, k, b, DataType::F16, best_m0, best_n0))
Gunes Bayir4bfc70e2021-12-10 16:17:56 +0000292 {
293 return CLGEMMKernelType::RESHAPED_ONLY_RHS_MMUL;
294 }
295 else
296 {
297 return g78_f16(m, n, k, b, is_rhs_constant);
298 }
299}
300
Gian Marco Iodice5a4fe192020-03-16 12:22:37 +0000301} // namespace cl_gemm
302} // namespace arm_compute