blob: 15a5632d0b1ceba7ba5d427d344ca538de7ee870 [file] [log] [blame]
SiCong Lif44bbc52022-08-29 18:25:51 +01001/*
Gian Marco Iodice3cce35d2022-12-30 16:07:45 +00002 * Copyright (c) 2022-2023 Arm Limited.
SiCong Lif44bbc52022-08-29 18:25:51 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "ClKernelRuntime.h"
25#include "arm_compute/core/CL/ICLTensor.h"
26#include "src/core/CL/CLUtils.h"
SiCong Li23882a92023-06-28 09:49:45 +010027#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
28#include "src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h"
29#endif // ACL_INTERNAL_TEST_CKW_IN_DF
SiCong Lif44bbc52022-08-29 18:25:51 +010030#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
31#include "src/gpu/cl/ClKernelLibrary.h"
32
33#include "support/Cast.h"
34namespace arm_compute
35{
36namespace experimental
37{
38namespace dynamic_fusion
39{
40using namespace arm_compute::opencl;
41
42void ClKernelRuntime::configure(const ClCompileContext &compile_ctx, const GpuKernelSourceCode &code)
43{
44 // Create kernel from kernel source string
45 opencl::ClKernelLibrary &klib = opencl::ClKernelLibrary::get();
46 _kernel = static_cast<cl::Kernel>(compile_ctx.create_kernel(code.name(),
Ramy Elgammal002e6532023-01-11 18:48:04 +000047 code.name(), // program name has to be provided to differentiate between different unfusable components' kernels.
SiCong Li16b37522023-07-18 17:56:49 +010048 // Each program contains exactly one kernel
SiCong Lif44bbc52022-08-29 18:25:51 +010049 code.code(),
50 klib.kernel_path() /* Kernel path: Used in cases of embedded kernels */,
51 code.build_options().options(),
52 false /* Is source binary */));
53
54 // Configure execution window
55 IClKernel::configure_internal(code.window());
56
57 // Set config id for lws tuning
58 _config_id = code.config_id();
59
60 // Set kernel arguments
61 _arguments = code.arguments();
62}
63
SiCong Li23882a92023-06-28 09:49:45 +010064#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
65
SiCong Lif44bbc52022-08-29 18:25:51 +010066inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, const GpuKernelArgumentInfo &arg, const ICLTensor *tensor, const Window &arg_slice, std::vector<cl::Image2D> &cl_images)
67{
SiCong Li19844f62023-05-16 16:46:34 +010068 ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
69
SiCong Lif44bbc52022-08-29 18:25:51 +010070 switch(arg.type)
71 {
72 case GpuKernelArgumentInfo::Type::Scalar:
73 {
74 ARM_COMPUTE_ERROR("Unsupported yet");
75 break;
76 }
77
78 case GpuKernelArgumentInfo::Type::Vector:
79 {
80 add_1D_tensor_argument(idx, tensor, arg_slice);
81 break;
82 }
83
84 case GpuKernelArgumentInfo::Type::Image:
85 {
86 add_2D_tensor_argument(idx, tensor, arg_slice);
87 break;
88 }
89 case GpuKernelArgumentInfo::Type::Image_Reinterpret_As_3D:
90 {
91 add_2D_tensor_argument(idx, tensor, arg_slice);
92 const unsigned int total_cross_plane_pad = tensor->info()->padding().top + tensor->info()->padding().bottom;
93 _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(total_cross_plane_pad));
94 break;
95 }
96 case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
97 {
98 const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) * tensor->info()->dimension(2) * tensor->info()->dimension(3));
99 const size_t image_row_pitch = tensor->info()->strides_in_bytes()[1];
Gian Marco Iodice3cce35d2022-12-30 16:07:45 +0000100 cl::Image2D tensor_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d, tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
SiCong Lif44bbc52022-08-29 18:25:51 +0100101 cl_images.push_back(tensor_image2d);
102 _kernel.setArg(idx++, tensor_image2d);
103 break;
104 }
105
106 case GpuKernelArgumentInfo::Type::Image_3D:
107 {
108 add_2D_tensor_argument(idx, tensor, arg_slice);
109 _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2]));
110 break;
111 }
112 case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
113 {
114 const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) * tensor->info()->dimension(2) * tensor->info()->dimension(3));
115 const size_t image_row_pitch = tensor->info()->strides_in_bytes()[1];
Gian Marco Iodice3cce35d2022-12-30 16:07:45 +0000116 cl::Image2D tensor_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d, tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
SiCong Lif44bbc52022-08-29 18:25:51 +0100117 cl_images.push_back(tensor_image2d);
118 _kernel.setArg(idx++, tensor_image2d);
119 _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2]));
120 break;
121 }
122
123 case GpuKernelArgumentInfo::Type::Tensor_3D:
124 {
125 add_3D_tensor_argument(idx, tensor, arg_slice);
126 break;
127 }
128
129 case GpuKernelArgumentInfo::Type::Tensor_4D:
130 {
131 add_4D_tensor_argument(idx, tensor, arg_slice);
132 break;
133 }
134 case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer:
135 {
136 add_4d_tensor_nhwc_argument(idx, tensor);
137 break;
138 }
139 case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
140 {
141 const size_t image_w = tensor->info()->dimension(0) / 4;
142 const size_t image_h = tensor->info()->tensor_shape().total_size_upper(1);
143 const size_t image_stride_y = tensor->info()->strides_in_bytes()[1];
144
145 cl::Image2D tensor_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(),
Gian Marco Iodice3cce35d2022-12-30 16:07:45 +0000146 TensorShape(image_w, image_h), tensor->info()->data_type(), image_stride_y, CLImage2DType::ReadOnly);
SiCong Lif44bbc52022-08-29 18:25:51 +0100147 cl_images.push_back(tensor_image2d);
148
149 _kernel.setArg(idx++, tensor_image2d);
150 add_4d_tensor_nhwc_argument(idx, tensor);
151 break;
152 }
SiCong Li19844f62023-05-16 16:46:34 +0100153 case GpuKernelArgumentInfo::Type::Tensor_Special_0:
154 {
155 const ITensorInfo *info = tensor->info();
156 const Strides &strides = info->strides_in_bytes();
157
158 _kernel.setArg(idx++, tensor->cl_buffer());
159 const size_t dim1xdim2 = info->tensor_shape()[1] * info->tensor_shape()[2];
160 _kernel.setArg<cl_int>(idx++, static_cast<int32_t>(dim1xdim2));
161 const size_t stride1 = strides[1];
162 _kernel.setArg<cl_int>(idx++, static_cast<int32_t>(stride1));
163 break;
164 }
SiCong Lif44bbc52022-08-29 18:25:51 +0100165 default:
166 {
167 ARM_COMPUTE_ERROR("Unsupported");
168 }
169 }
170}
171
SiCong Li23882a92023-06-28 09:49:45 +0100172#else // ACL_INTERNAL_TEST_CKW_IN_DF
173inline void ClKernelRuntime::add_kernel_argument(unsigned int &idx, const GpuKernelArgumentBinding &arg, const ICLTensor *tensor, std::vector<cl::Image2D> &cl_images)
174{
175 switch(arg.type())
176 {
177 case GpuKernelArgumentBinding::Type::TensorStorage:
178 {
179 switch(arg.tensor_storage_type())
180 {
181 case TensorStorageType::ClBufferUint8Ptr:
182 {
183 cl_add_buffer_argument(_kernel, idx, tensor->cl_buffer());
184 break;
185 }
186 case TensorStorageType::ClImage2dReadOnly:
187 {
188 cl::Image2D tensor_image2d = create_image2d_from_tensor(tensor, CLImage2DType::ReadOnly);
189 cl_images.push_back(tensor_image2d);
190 cl_add_texture_argument(_kernel, idx, tensor_image2d);
191 break;
192 }
193 case TensorStorageType::ClImage2dWriteOnly:
194 {
195 cl::Image2D tensor_image2d = create_image2d_from_tensor(tensor, CLImage2DType::WriteOnly);
196 cl_images.push_back(tensor_image2d);
197 cl_add_texture_argument(_kernel, idx, tensor_image2d);
198 break;
199 }
200 default:
201 {
202 ARM_COMPUTE_ERROR("Do not accept other TensorStorageType");
203 break;
204 }
205 }
206 break;
207 }
208 case GpuKernelArgumentBinding::Type::TensorComponent:
209 {
210 cl_add_tensor_component_argument(_kernel, idx, tensor, arg.tensor_component_type());
211 break;
212 }
213 default:
214 {
215 ARM_COMPUTE_ERROR("Do not accept other types of kernel arguments");
216 break;
217 }
218 }
219}
220
221#endif // ACL_INTERNAL_TEST_CKW_IN_DF
SiCong Lif44bbc52022-08-29 18:25:51 +0100222void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
223{
224 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
225 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
226
227 Window slice = window.first_slice_window_3D();
SiCong Lif44bbc52022-08-29 18:25:51 +0100228
229 /// NOTE: Parameters extracted from old kernels. So far they seem to be constant
230 /// but we may need to make them into another configuration passed from GpuWorkloadSourceCode if needed in the future
SiCong Lif44bbc52022-08-29 18:25:51 +0100231 constexpr bool skip_sliding_window = false;
232 constexpr bool use_dummy_work_items = false;
233
234 unsigned int idx = 0;
235 do
236 {
237 // Set kernel arguments
SiCong Lif44bbc52022-08-29 18:25:51 +0100238 // CLImages created from tensor arguments. Need to be retained until enqueue
239 std::vector<cl::Image2D> cl_images;
SiCong Li23882a92023-06-28 09:49:45 +0100240#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
SiCong Lif44bbc52022-08-29 18:25:51 +0100241 for(auto id_arg : _arguments)
242 {
243 const auto arg = id_arg.second;
244 auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(id_arg.first));
245 ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
246 ARM_COMPUTE_ERROR_ON_NULLPTR(tensor->info());
SiCong Li23882a92023-06-28 09:49:45 +0100247 add_tensor_argument(idx, *arg.kernel_argument_info(), tensor, slice, cl_images);
SiCong Lif44bbc52022-08-29 18:25:51 +0100248 }
249
SiCong Li23882a92023-06-28 09:49:45 +0100250#else // ACL_INTERNAL_TEST_CKW_IN_DF
251 for(const auto &arg : _arguments)
252 {
253 auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(arg.id()));
254 ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
255 ARM_COMPUTE_ERROR_ON_NULLPTR(tensor->info());
256 add_kernel_argument(idx, arg, tensor, cl_images);
257 }
258#endif // ACL_INTERNAL_TEST_CKW_IN_DF
259
SiCong Lif44bbc52022-08-29 18:25:51 +0100260 // Dispatch kernel
261 enqueue(queue, *this, slice, lws_hint(), use_dummy_work_items);
262 }
263 while(skip_sliding_window && window.slide_window_slice_3D(slice));
264}
265
266} // namespace dynamic_fusion
267} // namespace experimental
268} // namespace arm_compute