blob: 00c284facc88b6369f8eb4ce259da918a40da749 [file] [log] [blame]
Gian Marco Iodice926afe12019-03-19 11:44:13 +00001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2019-2020 Arm Limited.
Gian Marco Iodice926afe12019-03-19 11:44:13 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010024#include "src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h"
Gian Marco Iodice926afe12019-03-19 11:44:13 +000025
26#include "arm_compute/core/CL/CLHelpers.h"
27#include "arm_compute/core/CL/CLKernelLibrary.h"
Gian Marco Iodice926afe12019-03-19 11:44:13 +000028#include "arm_compute/core/GPUTarget.h"
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +010029#include "arm_compute/core/TensorInfo.h"
30#include "arm_compute/core/TensorShape.h"
31#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010032#include "src/core/CL/gemm/CLGEMMHelpers.h"
Gian Marco Iodice926afe12019-03-19 11:44:13 +000033
34#include <map>
35#include <utility>
36
37namespace arm_compute
38{
39namespace cl_gemm
40{
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +010041using namespace arm_compute::misc::shape_calculator;
42
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000043CLGEMMReshapedKernelConfigurationBifrost::CLGEMMReshapedKernelConfigurationBifrost(GPUTarget gpu)
44 : ICLGEMMKernelConfiguration(gpu)
Gian Marco Iodice926afe12019-03-19 11:44:13 +000045{
46}
47
48std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfigurationBifrost::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
49{
Gian Marco Iodice926afe12019-03-19 11:44:13 +000050 using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (CLGEMMReshapedKernelConfigurationBifrost::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
51
52 // Configurations for Mali-G76
53 static std::map<DataType, ConfigurationFunctionExecutorPtr> gemm_configs_G76 =
54 {
55 { DataType::F32, &CLGEMMReshapedKernelConfigurationBifrost::configure_G76_f32 },
Gian Marco Iodice05639f62019-09-24 12:05:06 +010056 { DataType::F16, &CLGEMMReshapedKernelConfigurationBifrost::configure_G76_f16 },
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +010057 { DataType::QASYMM8, &CLGEMMReshapedKernelConfigurationBifrost::configure_G76_u8 },
58 { DataType::QSYMM8, &CLGEMMReshapedKernelConfigurationBifrost::configure_G76_u8 },
59 { DataType::QASYMM8_SIGNED, &CLGEMMReshapedKernelConfigurationBifrost::configure_G76_u8 },
60 { DataType::QSYMM8_PER_CHANNEL, &CLGEMMReshapedKernelConfigurationBifrost::configure_G76_u8 }
Gian Marco Iodice926afe12019-03-19 11:44:13 +000061 };
62
63 // Configurations for Mali-G7x
64 static std::map<DataType, ConfigurationFunctionExecutorPtr> gemm_configs_G7x =
65 {
66 { DataType::F32, &CLGEMMReshapedKernelConfigurationBifrost::configure_G7x_f32 },
Gian Marco Iodice05639f62019-09-24 12:05:06 +010067 { DataType::F16, &CLGEMMReshapedKernelConfigurationBifrost::configure_G7x_f16 },
Gian Marco Iodiceeb65f6d2020-04-15 11:42:15 +010068 { DataType::QASYMM8, &CLGEMMReshapedKernelConfigurationBifrost::configure_G7x_u8 },
69 { DataType::QSYMM8, &CLGEMMReshapedKernelConfigurationBifrost::configure_G7x_u8 },
70 { DataType::QASYMM8_SIGNED, &CLGEMMReshapedKernelConfigurationBifrost::configure_G7x_u8 },
71 { DataType::QSYMM8_PER_CHANNEL, &CLGEMMReshapedKernelConfigurationBifrost::configure_G7x_u8 }
Gian Marco Iodice926afe12019-03-19 11:44:13 +000072 };
73
74 switch(_target)
75 {
76 case GPUTarget::G76:
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000077 if(gemm_configs_G76.find(data_type) != gemm_configs_G76.end())
Gian Marco Iodice0c17aa22019-09-27 09:23:15 +010078 {
79 return (this->*gemm_configs_G76[data_type])(m, n, k, b);
80 }
81 else
82 {
83 ARM_COMPUTE_ERROR("Not supported data type");
84 }
Gian Marco Iodice926afe12019-03-19 11:44:13 +000085 default:
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +000086 if(gemm_configs_G7x.find(data_type) != gemm_configs_G7x.end())
Gian Marco Iodice0c17aa22019-09-27 09:23:15 +010087 {
88 return (this->*gemm_configs_G7x[data_type])(m, n, k, b);
89 }
90 else
91 {
92 ARM_COMPUTE_ERROR("Not supported data type");
93 }
Gian Marco Iodice926afe12019-03-19 11:44:13 +000094 }
95}
96
97std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfigurationBifrost::configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
98{
99 ARM_COMPUTE_UNUSED(k);
100 ARM_COMPUTE_UNUSED(b);
101
102 if(n <= 4)
103 {
104 return configure_lhs_rhs_info(m, n, 4, 2, 8, 16, 16, true, false, false, true);
105 }
106 else
107 {
108 return configure_lhs_rhs_info(m, n, 5, 4, 4, 2, 16, false, true, false, true);
109 }
110}
111
Gian Marco Iodice05639f62019-09-24 12:05:06 +0100112std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfigurationBifrost::configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
113{
114 ARM_COMPUTE_UNUSED(k);
115 ARM_COMPUTE_UNUSED(b);
116
117 if(n <= 4)
118 {
119 return configure_lhs_rhs_info(m, n, 4, 2, 8, 8, 2, true, true, true, false);
120 }
121 else
122 {
123 return configure_lhs_rhs_info(m, n, 4, 8, 4, 4, 2, true, true, true, false);
124 }
125}
126
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000127std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfigurationBifrost::configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
128{
129 ARM_COMPUTE_UNUSED(k);
130 ARM_COMPUTE_UNUSED(b);
131
132 if(dot8_supported(CLKernelLibrary::get().get_device()))
133 {
134 if(n <= 4)
135 {
136 return configure_lhs_rhs_info(m, n, 4, 2, 16, 2, 2, true, false, false, true);
137 }
138 else
139 {
140 return configure_lhs_rhs_info(m, n, 4, 4, 16, 2, 2, true, false, false, true);
141 }
142 }
143 else
144 {
145 if(n <= 4)
146 {
147 return configure_lhs_rhs_info(m, n, 4, 2, 8, 2, 2, true, false, false, true);
148 }
149 else
150 {
151 return configure_lhs_rhs_info(m, n, 6, 4, 4, 2, 2, true, true, false, true);
152 }
153 }
154}
155
156std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfigurationBifrost::configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
157{
158 ARM_COMPUTE_UNUSED(k);
159 ARM_COMPUTE_UNUSED(b);
160
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100161 GEMMLHSMatrixInfo lhs_info_buf;
162 GEMMRHSMatrixInfo rhs_info_buf;
163 GEMMLHSMatrixInfo lhs_info_img;
164 GEMMRHSMatrixInfo rhs_info_img;
165
166 // Get lhs_info/rhs_info in case of OpenCL buffer
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000167 if(n <= 4)
168 {
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100169 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 2, 8, 16, 16, true, false, false, true);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000170 }
171 else
172 {
Gian Marco Iodiceed5fe692020-07-09 08:41:10 +0100173 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 2, 8, 16, false, false, false, true);
174 }
175
176 // Get lhs_info/rhs_info in case of OpenCL image
177 // Condition on the GPU workload
178 if((m / 4) * (n / 4) >= 2560)
179 {
180 // Big workload
181 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 8, true, true, true, false, true);
182 }
183 else
184 {
185 // Small workload
186 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 1, true, true, true, false, true);
187 }
188
189 const TensorInfo tensor_rhs_info(TensorShape(n, k, b), 1, DataType::F32);
190 const TensorShape shape = compute_rhs_reshaped_shape(tensor_rhs_info, rhs_info_img);
191 const TensorInfo tensor_reshaped_info(shape, 1, DataType::F32);
192
193 // In case of vector by matrix with few work-items, we use the OpenCL buffer rather than the OpenCL image2d
194 const bool use_cl_image2d = (n <= 4) ? false : true;
195
196 if(bool(validate_image2d_support_on_rhs(tensor_reshaped_info, rhs_info_img)) && use_cl_image2d)
197 {
198 return std::make_pair(lhs_info_img, rhs_info_img);
199 }
200 else
201 {
202 return std::make_pair(lhs_info_buf, rhs_info_buf);
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000203 }
204}
205
Gian Marco Iodice05639f62019-09-24 12:05:06 +0100206std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfigurationBifrost::configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
207{
208 ARM_COMPUTE_UNUSED(k);
Gian Marco Iodice05639f62019-09-24 12:05:06 +0100209
Gian Marco Iodicec6af9db2020-09-24 13:55:20 +0100210 const float r_mn = static_cast<float>(m) / static_cast<float>(n);
211 const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
212
213 if(workload <= 1049.59f)
Gian Marco Iodice05639f62019-09-24 12:05:06 +0100214 {
Gian Marco Iodicec6af9db2020-09-24 13:55:20 +0100215 if(b <= 5)
216 {
217 if(workload <= 790.39f)
218 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100219 return configure_lhs_rhs_info(m, n, 2, 4, 4, 2, 2, false, false, true, false, false);
Gian Marco Iodicec6af9db2020-09-24 13:55:20 +0100220 }
221 else
222 {
223 if(workload <= 982.39f)
224 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100225 return configure_lhs_rhs_info(m, n, 4, 2, 4, 4, 4, false, false, true, false, false);
Gian Marco Iodicec6af9db2020-09-24 13:55:20 +0100226 }
227 else
228 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100229 return configure_lhs_rhs_info(m, n, 2, 4, 4, 2, 1, false, true, true, false, false);
Gian Marco Iodicec6af9db2020-09-24 13:55:20 +0100230 }
231 }
232 }
233 else
234 {
235 if(r_mn <= 0.21f)
236 {
237 if(r_mn <= 0.11f)
238 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100239 return configure_lhs_rhs_info(m, n, 2, 4, 4, 2, 2, false, false, true, false, false);
Gian Marco Iodicec6af9db2020-09-24 13:55:20 +0100240 }
241 else
242 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100243 return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, false, true, true, false, false);
Gian Marco Iodicec6af9db2020-09-24 13:55:20 +0100244 }
245 }
246 else
247 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100248 return configure_lhs_rhs_info(m, n, 2, 4, 4, 2, 2, false, false, true, false, false);
Gian Marco Iodicec6af9db2020-09-24 13:55:20 +0100249 }
250 }
Gian Marco Iodice05639f62019-09-24 12:05:06 +0100251 }
252 else
253 {
Gian Marco Iodicec6af9db2020-09-24 13:55:20 +0100254 if(n <= 200)
255 {
256 if(workload <= 29772.79f)
257 {
258 if(m <= 64.5)
259 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100260 return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 4, true, false, true, false, false);
Gian Marco Iodicec6af9db2020-09-24 13:55:20 +0100261 }
262 else
263 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100264 return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, false, true, true, false, false);
Gian Marco Iodicec6af9db2020-09-24 13:55:20 +0100265 }
266 }
267 else
268 {
269 if(r_mn <= 1.09f)
270 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100271 return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, false, true, true, false, false);
Gian Marco Iodicec6af9db2020-09-24 13:55:20 +0100272 }
273 else
274 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100275 return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, true, true, true, false, false);
Gian Marco Iodicec6af9db2020-09-24 13:55:20 +0100276 }
277 }
278 }
279 else
280 {
281 if(m <= 43)
282 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100283 return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 4, true, false, true, false, false);
Gian Marco Iodicec6af9db2020-09-24 13:55:20 +0100284 }
285 else
286 {
287 if(workload <= 26364.79f)
288 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100289 return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, false, true, true, false, false);
Gian Marco Iodicec6af9db2020-09-24 13:55:20 +0100290 }
291 else
292 {
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +0100293 return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, false, true, true, false, false);
Gian Marco Iodicec6af9db2020-09-24 13:55:20 +0100294 }
295 }
296 }
Gian Marco Iodice05639f62019-09-24 12:05:06 +0100297 }
298}
299
Gian Marco Iodice926afe12019-03-19 11:44:13 +0000300std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfigurationBifrost::configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
301{
302 ARM_COMPUTE_UNUSED(k);
303 ARM_COMPUTE_UNUSED(b);
304
305 if(n <= 4)
306 {
307 return configure_lhs_rhs_info(m, n, 4, 2, 16, 4, 1, false, false, false, true);
308 }
309 else
310 {
311 return configure_lhs_rhs_info(m, n, 4, 4, 16, 2, 2, false, true, false, true);
312 }
313}
314} // namespace cl_gemm
Gian Marco Iodice12f2b8c2020-02-13 12:27:37 +0000315} // namespace arm_compute