blob: 43bcc47fa09fd12c6bf45fccf9ec856d4525a8eb [file] [log] [blame]
SiCong Lif44bbc52022-08-29 18:25:51 +01001/*
Ramy Elgammal002e6532023-01-11 18:48:04 +00002 * Copyright (c) 2022-2023 Arm Limited.
SiCong Lif44bbc52022-08-29 18:25:51 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE
25#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE
26
27#include "arm_compute/core/experimental/Types.h"
28#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010029
SiCong Lif44bbc52022-08-29 18:25:51 +010030#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
SiCong Li23882a92023-06-28 09:49:45 +010031#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h"
SiCong Lif44bbc52022-08-29 18:25:51 +010032
33namespace arm_compute
34{
35namespace experimental
36{
37namespace dynamic_fusion
38{
SiCong Li23882a92023-06-28 09:49:45 +010039#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
40namespace
41{
42/** Extract kernel arguments of one tensor from a flat list of kernel arguments.
43 *
44 * @param[in] flat_kernel_args
45 * @return GpuKernelArgumentList
46 */
47GpuKernelArgumentList extract_kernel_args_for_one_tensor(GpuKernelArgumentList &flat_kernel_args)
48{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010049 if (flat_kernel_args.empty())
SiCong Li23882a92023-06-28 09:49:45 +010050 {
51 return {};
52 }
53 GpuKernelArgumentList tensor_kargs{};
54
55 const GpuKernelArgumentBinding &karg_head = flat_kernel_args.front();
56 tensor_kargs.push_back(karg_head);
57 flat_kernel_args.pop_front();
58 const auto tensor_id = karg_head.id();
59
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010060 while (!flat_kernel_args.empty())
SiCong Li23882a92023-06-28 09:49:45 +010061 {
62 const GpuKernelArgumentBinding &karg = flat_kernel_args.front();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010063 if (karg.id() != tensor_id) // Encounter the next tensor, return the current tensor's kernel arguments
SiCong Li23882a92023-06-28 09:49:45 +010064 {
65 return tensor_kargs;
66 }
67 tensor_kargs.push_back(karg);
68 flat_kernel_args.pop_front();
69 }
70 return tensor_kargs;
71}
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010072} // namespace
SiCong Li23882a92023-06-28 09:49:45 +010073#endif // ACL_INTERNAL_TEST_CKW_IN_DF
SiCong Lif44bbc52022-08-29 18:25:51 +010074/** Uniquely identifies a @ref GpuUnitWorkload within a @ref GpuWorkloadSourceCode */
75using UnitWorkloadId = int32_t;
76
SiCong Li23882a92023-06-28 09:49:45 +010077/** Describes all the info related to a **workload argument** (tensor) in order to:
SiCong Lif44bbc52022-08-29 18:25:51 +010078 * - be used by runtime to configure gpu kernel argument
79 * - be used by memory managers to allocate required memory
80 */
81class GpuWorkloadArgument
82{
83public:
84 /** Default constructor */
85 GpuWorkloadArgument() = default;
SiCong Li23882a92023-06-28 09:49:45 +010086#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
SiCong Lif44bbc52022-08-29 18:25:51 +010087 /** Constructor
88 *
89 * @param[in] tensor_info @ref ITensorInfo of the workload argument
90 * @param[in] mem_desc @ref MemoryDescriptor of the workload argument
91 * @param[in] kernel_arg_info @ref GpuKernelArgumentInfo of the workload argument
92 */
93 GpuWorkloadArgument(const ITensorInfo &tensor_info,
94 const MemoryDescriptor &mem_desc,
95 const GpuKernelArgumentInfo &kernel_arg_info)
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010096 : _tensor_info{tensor_info}, _mem_desc{mem_desc}, _kernel_arg_info{kernel_arg_info}
SiCong Lif44bbc52022-08-29 18:25:51 +010097 {
98 }
SiCong Li23882a92023-06-28 09:49:45 +010099#else // ACL_INTERNAL_TEST_CKW_IN_DF
100 /** Constructor
101 *
102 * @param[in] tensor_info @ref ITensorInfo of the workload argument
103 * @param[in] mem_desc @ref MemoryDescriptor of the workload argument
104 * @param[in] kernel_arg_list @ref GpuKernelArgumentList of the workload argument
105 */
106 GpuWorkloadArgument(const ITensorInfo &tensor_info,
107 const MemoryDescriptor &mem_desc,
108 const GpuKernelArgumentList &kernel_args)
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100109 : _tensor_info{tensor_info}, _mem_desc{mem_desc}, _kernel_args{kernel_args}
SiCong Li23882a92023-06-28 09:49:45 +0100110 {
111 }
112#endif // ACL_INTERNAL_TEST_CKW_IN_DF
SiCong Lif44bbc52022-08-29 18:25:51 +0100113 /** Get tensor id within workload */
114 ITensorInfo::Id id() const
115 {
116 return _tensor_info.id();
117 }
118 /** Get @ref ITensorInfo of the argument */
119 ITensorInfo *tensor_info()
120 {
121 return &_tensor_info;
122 }
123 /** Get @ref ITensorInfo of the argument */
124 const ITensorInfo *tensor_info() const
125 {
126 return &_tensor_info;
127 }
128 /** Get @ref MemoryDescriptor of the argument */
129 MemoryDescriptor *memory_descriptor()
130 {
131 return &_mem_desc;
132 }
133 /** Get @ref MemoryDescriptor of the argument */
134 const MemoryDescriptor *memory_descriptor() const
135 {
136 return &_mem_desc;
137 }
SiCong Li23882a92023-06-28 09:49:45 +0100138#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
SiCong Lif44bbc52022-08-29 18:25:51 +0100139 /** Get @ref GpuKernelArgumentInfo of the argument */
140 GpuKernelArgumentInfo *kernel_argument_info()
141 {
142 return &_kernel_arg_info;
143 }
144 /** Get @ref GpuKernelArgumentInfo of the argument */
145 const GpuKernelArgumentInfo *kernel_argument_info() const
146 {
147 return &_kernel_arg_info;
148 }
SiCong Li23882a92023-06-28 09:49:45 +0100149#else // ACL_INTERNAL_TEST_CKW_IN_DF
150 /** Get @ref GpuKernelArgumentList of the workload tensor */
151 GpuKernelArgumentList *kernel_argument_list()
152 {
153 return &_kernel_args;
154 }
155 /** Get @ref GpuKernelArgumentList of the workload tensor */
156 const GpuKernelArgumentList *kernel_argument_list() const
157 {
158 return &_kernel_args;
159 }
160#endif // ACL_INTERNAL_TEST_CKW_IN_DF
SiCong Lif44bbc52022-08-29 18:25:51 +0100161 /** Check if the workload argument has valid id
162 *
163 * @return true If has valid id
164 * @return false Otherwise
165 */
166 bool has_valid_id() const
167 {
168 return _tensor_info.has_valid_id();
169 }
170
171private:
SiCong Li23882a92023-06-28 09:49:45 +0100172 TensorInfo _tensor_info{};
173 MemoryDescriptor _mem_desc{};
174#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100175 GpuKernelArgumentInfo _kernel_arg_info{};
SiCong Li23882a92023-06-28 09:49:45 +0100176#else // ACL_INTERNAL_TEST_CKW_IN_DF
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100177 GpuKernelArgumentList _kernel_args{};
SiCong Li23882a92023-06-28 09:49:45 +0100178#endif // ACL_INTERNAL_TEST_CKW_IN_DF
SiCong Lif44bbc52022-08-29 18:25:51 +0100179};
180
181/** Describes when a unit workload is run.
182 */
183struct UnitWorkloadStage
184{
185 enum class Stage
186 {
187 Prepare, /**< Only run once at the beginning. */
188 Run, /**< Run every time after the first time. */
189 };
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100190 Stage stage{Stage::Run};
SiCong Lif44bbc52022-08-29 18:25:51 +0100191};
192
193inline bool operator==(const UnitWorkloadStage &stage0, const UnitWorkloadStage &stage1)
194{
195 return stage0.stage == stage1.stage;
196}
197
198/** The atomic unit in a Gpu workload. It contains exactly one kernel to run.
199 */
200class GpuUnitWorkload
201{
202public:
203 /** Default constructor */
204 GpuUnitWorkload() = default;
205 /** Constructor
206 *
207 * @param[in] id Id that uniquely identifies this unit workload in a workload
208 * @param[in] kernel_code @ref GpuKernelSourceCode contained within
209 * @param[in] stage Stage of the unit workload
210 */
211 GpuUnitWorkload(UnitWorkloadId id, const GpuKernelSourceCode &kernel_code, const UnitWorkloadStage &stage)
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100212 : _id{id}, _kernel_code{kernel_code}, _stage{stage}
SiCong Lif44bbc52022-08-29 18:25:51 +0100213 {
214 }
215 /** Get the id of the unit workload */
216 UnitWorkloadId id() const
217 {
218 return _id;
219 }
220 /** Get reference to the underlying @ref GpuKernelSourceCode */
221 const GpuKernelSourceCode &code() const
222 {
223 return _kernel_code;
224 }
225 /** Get the stage of the unit workload */
226 UnitWorkloadStage stage() const
227 {
228 return _stage;
229 }
230
231private:
232 UnitWorkloadId _id{};
233 GpuKernelSourceCode _kernel_code{};
234 UnitWorkloadStage _stage{};
235};
236
237/** Hold the generated kernel source code and other information required to compile and run the workload.
238 */
239class GpuWorkloadSourceCode
240{
241public:
242 /** Default constructor */
243 GpuWorkloadSourceCode() = default;
244 /** Add a unit workload to the workload code
245 *
246 * @param[in] kernel_code @ref GpuKernelSourceCode to be contained within the unit workload
247 * @param[in] stage Stage of the unit workload
248 * @param[in] mem_map @ref MemoryDescriptor map for all tensors within the unit workload
SiCong Li23882a92023-06-28 09:49:45 +0100249 * @param[in] context @ref GpuWorkloadContext associated with the unit workload
SiCong Lif44bbc52022-08-29 18:25:51 +0100250 *
251 * @return UnitWorkloadId Allocated unit workload id
252 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100253 UnitWorkloadId add_unit_workload(const GpuKernelSourceCode &kernel_code,
254 const UnitWorkloadStage &stage,
255 const MemoryDescriptorMap &mem_map,
256 const GpuWorkloadContext *context)
SiCong Lif44bbc52022-08-29 18:25:51 +0100257 {
258 // Use the size of the kernel codes as Id
259 const auto uwk_id = static_cast<UnitWorkloadId>(_unit_workloads.size());
260 const auto unit_work = GpuUnitWorkload(uwk_id, kernel_code, stage);
261 _unit_workloads.push_back(unit_work);
SiCong Li23882a92023-06-28 09:49:45 +0100262#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
263 ARM_COMPUTE_UNUSED(context);
SiCong Lif44bbc52022-08-29 18:25:51 +0100264 // Assemble kernel argument with memory descriptor to form workload argument
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100265 for (const auto &id_arg : kernel_code.arguments())
SiCong Lif44bbc52022-08-29 18:25:51 +0100266 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100267 const auto arg_id = id_arg.first;
268 const auto arg = id_arg.second;
269 _workload_arguments[arg_id] =
270 GpuWorkloadArgument{*arg.tensor_info(), mem_map.at(arg_id), *arg.kernel_argument_info()};
271 if (_tensor_uwork_map.find(arg_id) == _tensor_uwork_map.end())
SiCong Lif44bbc52022-08-29 18:25:51 +0100272 {
273 _tensor_uwork_map[arg_id] = std::set<UnitWorkloadId>();
274 }
275 _tensor_uwork_map[arg_id].insert(uwk_id);
276 }
SiCong Li23882a92023-06-28 09:49:45 +0100277#else // ACL_INTERNAL_TEST_CKW_IN_DF
278 GpuKernelArgumentList flat_kernel_args = kernel_code.arguments();
279 GpuKernelArgumentList tensor_kargs{};
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100280 while (true)
SiCong Li23882a92023-06-28 09:49:45 +0100281 {
282 tensor_kargs = extract_kernel_args_for_one_tensor(flat_kernel_args);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100283 if (tensor_kargs.empty())
SiCong Li23882a92023-06-28 09:49:45 +0100284 {
285 break;
286 }
287 else
288 {
289 const auto tensor_id = tensor_kargs.at(0).id();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100290 _workload_arguments[tensor_id] = GpuWorkloadArgument{
291 *context->implementation().get_tensor_info(tensor_id), mem_map.at(tensor_id), tensor_kargs};
292 if (_tensor_uwork_map.find(tensor_id) == _tensor_uwork_map.end())
SiCong Li23882a92023-06-28 09:49:45 +0100293 {
294 _tensor_uwork_map[tensor_id] = std::set<UnitWorkloadId>();
295 }
296 _tensor_uwork_map[tensor_id].insert(uwk_id);
297 }
298 }
299#endif // ACL_INTERNAL_TEST_CKW_IN_DF
SiCong Lif44bbc52022-08-29 18:25:51 +0100300 return uwk_id;
301 }
302 /** Get a unit workload from its id */
303 const GpuUnitWorkload &query_unit_workload(UnitWorkloadId id) const
304 {
305 ARM_COMPUTE_ERROR_ON(id < 0);
306 return _unit_workloads.at(id);
307 }
308 /** Get all unit workloads sorted in topological order */
309 std::vector<UnitWorkloadId> unit_workloads() const
310 {
311 std::vector<UnitWorkloadId> ids{};
312
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100313 for (const auto &uwk : _unit_workloads)
SiCong Lif44bbc52022-08-29 18:25:51 +0100314 {
315 ids.push_back(uwk.id());
316 }
317 return ids;
318 }
319 /** Get a @ref GpuWorkloadArgument from its associated tensor id */
320 const GpuWorkloadArgument *query_tensor(ITensorInfo::Id t_id) const
321 {
322 return &_workload_arguments.at(t_id);
323 }
324 /** Get all tensors in the entire workload */
325 std::vector<ITensorInfo::Id> tensors() const
326 {
327 std::vector<ITensorInfo::Id> ids{};
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100328 for (const auto &id_tensor : _workload_arguments)
SiCong Lif44bbc52022-08-29 18:25:51 +0100329 {
330 ids.push_back(id_tensor.first);
331 }
332 return ids;
333 }
334 /** Get all unit workloads connected to the tensor with @p t_id */
335 std::vector<UnitWorkloadId> get_unit_workloads_from_tensor(ITensorInfo::Id t_id) const
336 {
337 const auto unit_work_set = _tensor_uwork_map.at(t_id);
338 return std::vector<UnitWorkloadId>(unit_work_set.begin(), unit_work_set.end());
339 }
340
341private:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100342 std::vector<GpuUnitWorkload> _unit_workloads{};
SiCong Lif44bbc52022-08-29 18:25:51 +0100343 std::map<ITensorInfo::Id, GpuWorkloadArgument> _workload_arguments{};
344 std::map<ITensorInfo::Id, std::set<UnitWorkloadId>> _tensor_uwork_map{};
345};
346} // namespace dynamic_fusion
347} // namespace experimental
348} // namespace arm_compute
349#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE */