Blame - src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h - ml/ComputeLibrary

blob: d1d0bdf77f426b28334720d7689dab8a7ab9337e [file] [log] [blame]

SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	1	/*
Ramy Elgammal	002e653	2023-01-11 18:48:04 +0000	[diff] [blame]	2	* Copyright (c) 2022-2023 Arm Limited.
SiCong Li	f44bbc5	2022-08-29 18:25:51 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE
				25	#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE
				26
				27	#include "arm_compute/core/experimental/Types.h"
				28	#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
				29	#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
				30
				31	namespace arm_compute
				32	{
				33	namespace experimental
				34	{
				35	namespace dynamic_fusion
				36	{
				37	/** Uniquely identifies a @ref GpuUnitWorkload within a @ref GpuWorkloadSourceCode */
				38	using UnitWorkloadId = int32_t;
				39
				40	/** Describes all the info related to a kernel in order to:
				41	* - be used by runtime to configure gpu kernel argument
				42	* - be used by memory managers to allocate required memory
				43	*/
				44	class GpuWorkloadArgument
				45	{
				46	public:
				47	/** Default constructor */
				48	GpuWorkloadArgument() = default;
				49	/** Constructor
				50	*
				51	* @param[in] tensor_info @ref ITensorInfo of the workload argument
				52	* @param[in] mem_desc @ref MemoryDescriptor of the workload argument
				53	* @param[in] kernel_arg_info @ref GpuKernelArgumentInfo of the workload argument
				54	*/
				55	GpuWorkloadArgument(const ITensorInfo &tensor_info,
				56	const MemoryDescriptor &mem_desc,
				57	const GpuKernelArgumentInfo &kernel_arg_info)
				58	: _tensor_info{ tensor_info },
				59	_mem_desc{ mem_desc },
				60	_kernel_arg_info{ kernel_arg_info }
				61	{
				62	}
				63	/** Get tensor id within workload */
				64	ITensorInfo::Id id() const
				65	{
				66	return _tensor_info.id();
				67	}
				68	/** Get @ref ITensorInfo of the argument */
				69	ITensorInfo *tensor_info()
				70	{
				71	return &_tensor_info;
				72	}
				73	/** Get @ref ITensorInfo of the argument */
				74	const ITensorInfo *tensor_info() const
				75	{
				76	return &_tensor_info;
				77	}
				78	/** Get @ref MemoryDescriptor of the argument */
				79	MemoryDescriptor *memory_descriptor()
				80	{
				81	return &_mem_desc;
				82	}
				83	/** Get @ref MemoryDescriptor of the argument */
				84	const MemoryDescriptor *memory_descriptor() const
				85	{
				86	return &_mem_desc;
				87	}
				88	/** Get @ref GpuKernelArgumentInfo of the argument */
				89	GpuKernelArgumentInfo *kernel_argument_info()
				90	{
				91	return &_kernel_arg_info;
				92	}
				93	/** Get @ref GpuKernelArgumentInfo of the argument */
				94	const GpuKernelArgumentInfo *kernel_argument_info() const
				95	{
				96	return &_kernel_arg_info;
				97	}
				98	/** Check if the workload argument has valid id
				99	*
				100	* @return true If has valid id
				101	* @return false Otherwise
				102	*/
				103	bool has_valid_id() const
				104	{
				105	return _tensor_info.has_valid_id();
				106	}
				107
				108	private:
				109	TensorInfo _tensor_info{};
				110	MemoryDescriptor _mem_desc{};
				111	GpuKernelArgumentInfo _kernel_arg_info{};
				112	};
				113
				114	/** Describes when a unit workload is run.
				115	*/
				116	struct UnitWorkloadStage
				117	{
				118	enum class Stage
				119	{
				120	Prepare, /*< Only run once at the beginning. /
				121	Run, /*< Run every time after the first time. /
				122	};
				123	Stage stage{ Stage::Run };
				124	};
				125
				126	inline bool operator==(const UnitWorkloadStage &stage0, const UnitWorkloadStage &stage1)
				127	{
				128	return stage0.stage == stage1.stage;
				129	}
				130
				131	/** The atomic unit in a Gpu workload. It contains exactly one kernel to run.
				132	*/
				133	class GpuUnitWorkload
				134	{
				135	public:
				136	/** Default constructor */
				137	GpuUnitWorkload() = default;
				138	/** Constructor
				139	*
				140	* @param[in] id Id that uniquely identifies this unit workload in a workload
				141	* @param[in] kernel_code @ref GpuKernelSourceCode contained within
				142	* @param[in] stage Stage of the unit workload
				143	*/
				144	GpuUnitWorkload(UnitWorkloadId id, const GpuKernelSourceCode &kernel_code, const UnitWorkloadStage &stage)
				145	: _id{ id }, _kernel_code{ kernel_code }, _stage{ stage }
				146	{
				147	}
				148	/** Get the id of the unit workload */
				149	UnitWorkloadId id() const
				150	{
				151	return _id;
				152	}
				153	/** Get reference to the underlying @ref GpuKernelSourceCode */
				154	const GpuKernelSourceCode &code() const
				155	{
				156	return _kernel_code;
				157	}
				158	/** Get the stage of the unit workload */
				159	UnitWorkloadStage stage() const
				160	{
				161	return _stage;
				162	}
				163
				164	private:
				165	UnitWorkloadId _id{};
				166	GpuKernelSourceCode _kernel_code{};
				167	UnitWorkloadStage _stage{};
				168	};
				169
				170	/** Hold the generated kernel source code and other information required to compile and run the workload.
				171	*/
				172	class GpuWorkloadSourceCode
				173	{
				174	public:
				175	/** Default constructor */
				176	GpuWorkloadSourceCode() = default;
				177	/** Add a unit workload to the workload code
				178	*
				179	* @param[in] kernel_code @ref GpuKernelSourceCode to be contained within the unit workload
				180	* @param[in] stage Stage of the unit workload
				181	* @param[in] mem_map @ref MemoryDescriptor map for all tensors within the unit workload
				182	*
				183	* @return UnitWorkloadId Allocated unit workload id
				184	*/
				185	UnitWorkloadId add_unit_workload(const GpuKernelSourceCode &kernel_code, const UnitWorkloadStage &stage, const MemoryDescriptorMap &mem_map)
				186	{
				187	// Use the size of the kernel codes as Id
				188	const auto uwk_id = static_cast<UnitWorkloadId>(_unit_workloads.size());
				189	const auto unit_work = GpuUnitWorkload(uwk_id, kernel_code, stage);
				190	_unit_workloads.push_back(unit_work);
				191	// Assemble kernel argument with memory descriptor to form workload argument
				192	for(const auto &id_arg : kernel_code.arguments())
				193	{
				194	const auto arg_id = id_arg.first;
				195	const auto arg = id_arg.second;
				196	_workload_arguments[arg_id] = GpuWorkloadArgument{ arg.tensor_info(), mem_map.at(arg_id), arg.kernel_argument_info() };
				197	if(_tensor_uwork_map.find(arg_id) == _tensor_uwork_map.end())
				198	{
				199	_tensor_uwork_map[arg_id] = std::set<UnitWorkloadId>();
				200	}
				201	_tensor_uwork_map[arg_id].insert(uwk_id);
				202	}
				203	return uwk_id;
				204	}
				205	/** Get a unit workload from its id */
				206	const GpuUnitWorkload &query_unit_workload(UnitWorkloadId id) const
				207	{
				208	ARM_COMPUTE_ERROR_ON(id < 0);
				209	return _unit_workloads.at(id);
				210	}
				211	/** Get all unit workloads sorted in topological order */
				212	std::vector<UnitWorkloadId> unit_workloads() const
				213	{
				214	std::vector<UnitWorkloadId> ids{};
				215
				216	for(const auto &uwk : _unit_workloads)
				217	{
				218	ids.push_back(uwk.id());
				219	}
				220	return ids;
				221	}
				222	/** Get a @ref GpuWorkloadArgument from its associated tensor id */
				223	const GpuWorkloadArgument *query_tensor(ITensorInfo::Id t_id) const
				224	{
				225	return &_workload_arguments.at(t_id);
				226	}
				227	/** Get all tensors in the entire workload */
				228	std::vector<ITensorInfo::Id> tensors() const
				229	{
				230	std::vector<ITensorInfo::Id> ids{};
				231	for(const auto &id_tensor : _workload_arguments)
				232	{
				233	ids.push_back(id_tensor.first);
				234	}
				235	return ids;
				236	}
				237	/** Get all unit workloads connected to the tensor with @p t_id */
				238	std::vector<UnitWorkloadId> get_unit_workloads_from_tensor(ITensorInfo::Id t_id) const
				239	{
				240	const auto unit_work_set = _tensor_uwork_map.at(t_id);
				241	return std::vector<UnitWorkloadId>(unit_work_set.begin(), unit_work_set.end());
				242	}
				243
				244	private:
				245	std::vector<GpuUnitWorkload> _unit_workloads{};
				246	std::map<ITensorInfo::Id, GpuWorkloadArgument> _workload_arguments{};
				247	std::map<ITensorInfo::Id, std::set<UnitWorkloadId>> _tensor_uwork_map{};
				248	};
				249	} // namespace dynamic_fusion
				250	} // namespace experimental
				251	} // namespace arm_compute
				252	#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE */