Blame - src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h - ml/ComputeLibrary

blob: 43bcc47fa09fd12c6bf45fccf9ec856d4525a8eb [file] [log] [blame]

SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	1	/*
Ramy Elgammal	002e653	2023-01-11 18:48:04 +0000	[diff] [blame]	2	* Copyright (c) 2022-2023 Arm Limited.
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE
				25	#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE
				26
				27	#include "arm_compute/core/experimental/Types.h"
				28	#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	29
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	30	#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	31	#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h"
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	32
				33	namespace arm_compute
				34	{
				35	namespace experimental
				36	{
				37	namespace dynamic_fusion
				38	{
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	39	#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
				40	namespace
				41	{
				42	/** Extract kernel arguments of one tensor from a flat list of kernel arguments.
				43	*
				44	* @param[in] flat_kernel_args
				45	* @return GpuKernelArgumentList
				46	*/
				47	GpuKernelArgumentList extract_kernel_args_for_one_tensor(GpuKernelArgumentList &flat_kernel_args)
				48	{
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	49	if (flat_kernel_args.empty())
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	50	{
				51	return {};
				52	}
				53	GpuKernelArgumentList tensor_kargs{};
				54
				55	const GpuKernelArgumentBinding &karg_head = flat_kernel_args.front();
				56	tensor_kargs.push_back(karg_head);
				57	flat_kernel_args.pop_front();
				58	const auto tensor_id = karg_head.id();
				59
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	60	while (!flat_kernel_args.empty())
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	61	{
				62	const GpuKernelArgumentBinding &karg = flat_kernel_args.front();
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	63	if (karg.id() != tensor_id) // Encounter the next tensor, return the current tensor's kernel arguments
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	64	{
				65	return tensor_kargs;
				66	}
				67	tensor_kargs.push_back(karg);
				68	flat_kernel_args.pop_front();
				69	}
				70	return tensor_kargs;
				71	}
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	72	} // namespace
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	73	#endif // ACL_INTERNAL_TEST_CKW_IN_DF
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	74	/** Uniquely identifies a @ref GpuUnitWorkload within a @ref GpuWorkloadSourceCode */
				75	using UnitWorkloadId = int32_t;
				76
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	77	/ Describes all the info related to a workload argument** (tensor) in order to:
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	78	* - be used by runtime to configure gpu kernel argument
				79	* - be used by memory managers to allocate required memory
				80	*/
				81	class GpuWorkloadArgument
				82	{
				83	public:
				84	/** Default constructor */
				85	GpuWorkloadArgument() = default;
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	86	#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	87	/** Constructor
				88	*
				89	* @param[in] tensor_info @ref ITensorInfo of the workload argument
				90	* @param[in] mem_desc @ref MemoryDescriptor of the workload argument
				91	* @param[in] kernel_arg_info @ref GpuKernelArgumentInfo of the workload argument
				92	*/
				93	GpuWorkloadArgument(const ITensorInfo &tensor_info,
				94	const MemoryDescriptor &mem_desc,
				95	const GpuKernelArgumentInfo &kernel_arg_info)
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	96	: _tensor_info{tensor_info}, _mem_desc{mem_desc}, _kernel_arg_info{kernel_arg_info}
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	97	{
				98	}
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	99	#else // ACL_INTERNAL_TEST_CKW_IN_DF
				100	/** Constructor
				101	*
				102	* @param[in] tensor_info @ref ITensorInfo of the workload argument
				103	* @param[in] mem_desc @ref MemoryDescriptor of the workload argument
				104	* @param[in] kernel_arg_list @ref GpuKernelArgumentList of the workload argument
				105	*/
				106	GpuWorkloadArgument(const ITensorInfo &tensor_info,
				107	const MemoryDescriptor &mem_desc,
				108	const GpuKernelArgumentList &kernel_args)
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	109	: _tensor_info{tensor_info}, _mem_desc{mem_desc}, _kernel_args{kernel_args}
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	110	{
				111	}
				112	#endif // ACL_INTERNAL_TEST_CKW_IN_DF
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	113	/** Get tensor id within workload */
				114	ITensorInfo::Id id() const
				115	{
				116	return _tensor_info.id();
				117	}
				118	/** Get @ref ITensorInfo of the argument */
				119	ITensorInfo *tensor_info()
				120	{
				121	return &_tensor_info;
				122	}
				123	/** Get @ref ITensorInfo of the argument */
				124	const ITensorInfo *tensor_info() const
				125	{
				126	return &_tensor_info;
				127	}
				128	/** Get @ref MemoryDescriptor of the argument */
				129	MemoryDescriptor *memory_descriptor()
				130	{
				131	return &_mem_desc;
				132	}
				133	/** Get @ref MemoryDescriptor of the argument */
				134	const MemoryDescriptor *memory_descriptor() const
				135	{
				136	return &_mem_desc;
				137	}
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	138	#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	139	/** Get @ref GpuKernelArgumentInfo of the argument */
				140	GpuKernelArgumentInfo *kernel_argument_info()
				141	{
				142	return &_kernel_arg_info;
				143	}
				144	/** Get @ref GpuKernelArgumentInfo of the argument */
				145	const GpuKernelArgumentInfo *kernel_argument_info() const
				146	{
				147	return &_kernel_arg_info;
				148	}
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	149	#else // ACL_INTERNAL_TEST_CKW_IN_DF
				150	/** Get @ref GpuKernelArgumentList of the workload tensor */
				151	GpuKernelArgumentList *kernel_argument_list()
				152	{
				153	return &_kernel_args;
				154	}
				155	/** Get @ref GpuKernelArgumentList of the workload tensor */
				156	const GpuKernelArgumentList *kernel_argument_list() const
				157	{
				158	return &_kernel_args;
				159	}
				160	#endif // ACL_INTERNAL_TEST_CKW_IN_DF
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	161	/** Check if the workload argument has valid id
				162	*
				163	* @return true If has valid id
				164	* @return false Otherwise
				165	*/
				166	bool has_valid_id() const
				167	{
				168	return _tensor_info.has_valid_id();
				169	}
				170
				171	private:
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	172	TensorInfo _tensor_info{};
				173	MemoryDescriptor _mem_desc{};
				174	#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	175	GpuKernelArgumentInfo _kernel_arg_info{};
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	176	#else // ACL_INTERNAL_TEST_CKW_IN_DF
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	177	GpuKernelArgumentList _kernel_args{};
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	178	#endif // ACL_INTERNAL_TEST_CKW_IN_DF
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	179	};
				180
				181	/** Describes when a unit workload is run.
				182	*/
				183	struct UnitWorkloadStage
				184	{
				185	enum class Stage
				186	{
				187	Prepare, /*< Only run once at the beginning. /
				188	Run, /*< Run every time after the first time. /
				189	};
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	190	Stage stage{Stage::Run};
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	191	};
				192
				193	inline bool operator==(const UnitWorkloadStage &stage0, const UnitWorkloadStage &stage1)
				194	{
				195	return stage0.stage == stage1.stage;
				196	}
				197
				198	/** The atomic unit in a Gpu workload. It contains exactly one kernel to run.
				199	*/
				200	class GpuUnitWorkload
				201	{
				202	public:
				203	/** Default constructor */
				204	GpuUnitWorkload() = default;
				205	/** Constructor
				206	*
				207	* @param[in] id Id that uniquely identifies this unit workload in a workload
				208	* @param[in] kernel_code @ref GpuKernelSourceCode contained within
				209	* @param[in] stage Stage of the unit workload
				210	*/
				211	GpuUnitWorkload(UnitWorkloadId id, const GpuKernelSourceCode &kernel_code, const UnitWorkloadStage &stage)
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	212	: _id{id}, _kernel_code{kernel_code}, _stage{stage}
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	213	{
				214	}
				215	/** Get the id of the unit workload */
				216	UnitWorkloadId id() const
				217	{
				218	return _id;
				219	}
				220	/** Get reference to the underlying @ref GpuKernelSourceCode */
				221	const GpuKernelSourceCode &code() const
				222	{
				223	return _kernel_code;
				224	}
				225	/** Get the stage of the unit workload */
				226	UnitWorkloadStage stage() const
				227	{
				228	return _stage;
				229	}
				230
				231	private:
				232	UnitWorkloadId _id{};
				233	GpuKernelSourceCode _kernel_code{};
				234	UnitWorkloadStage _stage{};
				235	};
				236
				237	/** Hold the generated kernel source code and other information required to compile and run the workload.
				238	*/
				239	class GpuWorkloadSourceCode
				240	{
				241	public:
				242	/** Default constructor */
				243	GpuWorkloadSourceCode() = default;
				244	/** Add a unit workload to the workload code
				245	*
				246	* @param[in] kernel_code @ref GpuKernelSourceCode to be contained within the unit workload
				247	* @param[in] stage Stage of the unit workload
				248	* @param[in] mem_map @ref MemoryDescriptor map for all tensors within the unit workload
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	249	* @param[in] context @ref GpuWorkloadContext associated with the unit workload
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	250	*
				251	* @return UnitWorkloadId Allocated unit workload id
				252	*/
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	253	UnitWorkloadId add_unit_workload(const GpuKernelSourceCode &kernel_code,
				254	const UnitWorkloadStage &stage,
				255	const MemoryDescriptorMap &mem_map,
				256	const GpuWorkloadContext *context)
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	257	{
				258	// Use the size of the kernel codes as Id
				259	const auto uwk_id = static_cast<UnitWorkloadId>(_unit_workloads.size());
				260	const auto unit_work = GpuUnitWorkload(uwk_id, kernel_code, stage);
				261	_unit_workloads.push_back(unit_work);
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	262	#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
				263	ARM_COMPUTE_UNUSED(context);
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	264	// Assemble kernel argument with memory descriptor to form workload argument
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	265	for (const auto &id_arg : kernel_code.arguments())
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	266	{
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	267	const auto arg_id = id_arg.first;
				268	const auto arg = id_arg.second;
				269	_workload_arguments[arg_id] =
				270	GpuWorkloadArgument{arg.tensor_info(), mem_map.at(arg_id), arg.kernel_argument_info()};
				271	if (_tensor_uwork_map.find(arg_id) == _tensor_uwork_map.end())
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	272	{
				273	_tensor_uwork_map[arg_id] = std::set<UnitWorkloadId>();
				274	}
				275	_tensor_uwork_map[arg_id].insert(uwk_id);
				276	}
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	277	#else // ACL_INTERNAL_TEST_CKW_IN_DF
				278	GpuKernelArgumentList flat_kernel_args = kernel_code.arguments();
				279	GpuKernelArgumentList tensor_kargs{};
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	280	while (true)
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	281	{
				282	tensor_kargs = extract_kernel_args_for_one_tensor(flat_kernel_args);
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	283	if (tensor_kargs.empty())
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	284	{
				285	break;
				286	}
				287	else
				288	{
				289	const auto tensor_id = tensor_kargs.at(0).id();
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	290	_workload_arguments[tensor_id] = GpuWorkloadArgument{
				291	*context->implementation().get_tensor_info(tensor_id), mem_map.at(tensor_id), tensor_kargs};
				292	if (_tensor_uwork_map.find(tensor_id) == _tensor_uwork_map.end())
SiCong Li	23882a9	2023-06-28 09:49:45 +0100	[diff] [blame]	293	{
				294	_tensor_uwork_map[tensor_id] = std::set<UnitWorkloadId>();
				295	}
				296	_tensor_uwork_map[tensor_id].insert(uwk_id);
				297	}
				298	}
				299	#endif // ACL_INTERNAL_TEST_CKW_IN_DF
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	300	return uwk_id;
				301	}
				302	/** Get a unit workload from its id */
				303	const GpuUnitWorkload &query_unit_workload(UnitWorkloadId id) const
				304	{
				305	ARM_COMPUTE_ERROR_ON(id < 0);
				306	return _unit_workloads.at(id);
				307	}
				308	/** Get all unit workloads sorted in topological order */
				309	std::vector<UnitWorkloadId> unit_workloads() const
				310	{
				311	std::vector<UnitWorkloadId> ids{};
				312
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	313	for (const auto &uwk : _unit_workloads)
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	314	{
				315	ids.push_back(uwk.id());
				316	}
				317	return ids;
				318	}
				319	/** Get a @ref GpuWorkloadArgument from its associated tensor id */
				320	const GpuWorkloadArgument *query_tensor(ITensorInfo::Id t_id) const
				321	{
				322	return &_workload_arguments.at(t_id);
				323	}
				324	/** Get all tensors in the entire workload */
				325	std::vector<ITensorInfo::Id> tensors() const
				326	{
				327	std::vector<ITensorInfo::Id> ids{};
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	328	for (const auto &id_tensor : _workload_arguments)
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	329	{
				330	ids.push_back(id_tensor.first);
				331	}
				332	return ids;
				333	}
				334	/** Get all unit workloads connected to the tensor with @p t_id */
				335	std::vector<UnitWorkloadId> get_unit_workloads_from_tensor(ITensorInfo::Id t_id) const
				336	{
				337	const auto unit_work_set = _tensor_uwork_map.at(t_id);
				338	return std::vector<UnitWorkloadId>(unit_work_set.begin(), unit_work_set.end());
				339	}
				340
				341	private:
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	342	std::vector<GpuUnitWorkload> _unit_workloads{};
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	343	std::map<ITensorInfo::Id, GpuWorkloadArgument> _workload_arguments{};
				344	std::map<ITensorInfo::Id, std::set<UnitWorkloadId>> _tensor_uwork_map{};
				345	};
				346	} // namespace dynamic_fusion
				347	} // namespace experimental
				348	} // namespace arm_compute
				349	#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE */