SiCong Li | b63b119 | 2022-01-28 18:24:39 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2022 Arm Limited. |
| 3 | * |
| 4 | * SPDX-License-Identifier: MIT |
| 5 | * |
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 7 | * of this software and associated documentation files (the "Software"), to |
| 8 | * deal in the Software without restriction, including without limitation the |
| 9 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
| 10 | * sell copies of the Software, and to permit persons to whom the Software is |
| 11 | * furnished to do so, subject to the following conditions: |
| 12 | * |
| 13 | * The above copyright notice and this permission notice shall be included in all |
| 14 | * copies or substantial portions of the Software. |
| 15 | * |
| 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 22 | * SOFTWARE. |
| 23 | */ |
SiCong Li | 4e9f568 | 2022-05-10 10:15:59 +0100 | [diff] [blame] | 24 | #ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION |
SiCong Li | b63b119 | 2022-01-28 18:24:39 +0000 | [diff] [blame] | 25 | #ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLWORKLOAD_H |
| 26 | #define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLWORKLOAD_H |
| 27 | |
| 28 | #include "arm_compute/core/CL/CLCompileContext.h" |
| 29 | #include "arm_compute/core/GPUTarget.h" |
| 30 | #include "arm_compute/core/Window.h" |
| 31 | |
| 32 | #include "arm_compute/core/experimental/IWorkload.h" |
| 33 | #include "arm_compute/core/experimental/OperatorGraph.h" |
| 34 | |
| 35 | #include <map> |
| 36 | |
| 37 | namespace arm_compute |
| 38 | { |
| 39 | namespace experimental |
| 40 | { |
| 41 | namespace dynamic_fusion |
| 42 | { |
| 43 | /** Verbose and explicit way to enumerate all the tensor arguments variants used by |
| 44 | * all kernel implementations. This avoids any ambiguity in what kernel arguments are passed |
| 45 | */ |
| 46 | enum class ClKernelTensorArgType : int |
| 47 | { |
| 48 | Scalar, |
| 49 | |
| 50 | Vector, |
| 51 | |
| 52 | Image, |
| 53 | Image_Reinterpret_As_3D, |
| 54 | Image_Export_To_ClImage2D, |
| 55 | |
| 56 | Image_3D, // 3D Tensor represented as a 2D Image + stride_z |
| 57 | Image_3D_Export_To_ClImage2D, |
| 58 | |
| 59 | Tensor_3D, |
| 60 | Tensor_4D, |
| 61 | Tensor_4D_t_Buffer, |
| 62 | Tensor_4D_t_Image |
| 63 | }; |
| 64 | |
| 65 | /** Describes all the info required to add a kernel argument at run time |
| 66 | * |
| 67 | * @note This struct can later be expanded into a more concise and formal way to specify how to set up |
| 68 | * arguments for a kernel inside a @ref ClUnitWorkload |
| 69 | */ |
| 70 | struct ClKernelArgDescriptor |
| 71 | { |
| 72 | ClKernelArgDescriptor() = default; |
| 73 | ClKernelArgDescriptor(int arg_id, ClKernelTensorArgType type, bool slide_along_dimz = true) |
| 74 | : arg_id{ arg_id }, tensor_arg_type{ type }, slide_along_dimz{ slide_along_dimz } |
| 75 | { |
| 76 | } |
| 77 | ~ClKernelArgDescriptor() = default; |
| 78 | friend bool operator==(const ClKernelArgDescriptor &arg0, const ClKernelArgDescriptor &arg1) |
| 79 | { |
| 80 | return (arg0.tensor_arg_type == arg1.tensor_arg_type) && (arg0.slide_along_dimz == arg1.slide_along_dimz); |
| 81 | } |
| 82 | int arg_id{ -1 }; /**< Arg ID in the blueprint, -1 means empty / uninitialized */ |
| 83 | ClKernelTensorArgType tensor_arg_type{ ClKernelTensorArgType::Image }; /**< tensor argument type */ |
| 84 | bool slide_along_dimz{ true }; /**< @note slide_along_dimz will be moved out of this descriptor in later iterations */ |
| 85 | }; |
| 86 | |
| 87 | using ClKernelArgList = std::map<int, ClKernelArgDescriptor>; |
| 88 | |
| 89 | /** Descriptor containing information required to run a single ClWorkload |
| 90 | */ |
| 91 | struct ClExecutionDescriptor |
| 92 | { |
| 93 | cl::NDRange suggested_lws{}; /**< Suggested local work-group size for optimal performance if not zero */ |
| 94 | cl::NDRange gws{}; /**< Global work-group to be used */ |
| 95 | bool skip_sliding_window{ false }; /**< Skip sliding window slices during execution loop */ |
| 96 | }; |
| 97 | |
| 98 | /** Contains kernel code to be compiled and run in a ClUnitWorkload |
| 99 | */ |
| 100 | struct ClKernelCode |
| 101 | { |
| 102 | friend bool operator==(const ClKernelCode &code0, const ClKernelCode &code1) |
| 103 | { |
| 104 | return (code0.name == code1.name) && (code0.code == code1.code) && (code0.config_id == code1.config_id) && (code0.build_options == code1.build_options) && (code0.window == code1.window) |
| 105 | && (code0.arguments == code1.arguments); |
| 106 | } |
| 107 | std::string name{}; /**< Kernel name */ |
| 108 | std::string code{}; /**< Kernel source code */ |
| 109 | std::string config_id{}; /**< Generated from blueprint based on complex component */ |
| 110 | CLBuildOptions build_options{}; /**< Kernel build options */ |
| 111 | Window window{}; /**< Execution window */ |
| 112 | ClKernelArgList arguments{}; /**< Kernel argument descriptors. map key is kernel ArgumentID */ |
| 113 | }; |
| 114 | |
| 115 | /** A descriptor of ClWorkload Tensors. |
| 116 | */ |
| 117 | struct ClWorkloadTensor : public WorkloadTensor |
| 118 | { |
| 119 | ClWorkloadTensor() = default; |
| 120 | ClWorkloadTensor(Id id, ITensorInfo *info, MemoryType memory_type, const AuxMemoryInfo &memory_info, const ClKernelArgDescriptor &kernel_arg) |
| 121 | : WorkloadTensor{ id, info, memory_type, memory_info }, kernel_arg{ kernel_arg } |
| 122 | { |
| 123 | } |
| 124 | ClKernelArgDescriptor kernel_arg{}; |
| 125 | friend bool operator==(const ClWorkloadTensor &t0, const ClWorkloadTensor &t1) |
| 126 | { |
| 127 | return t0.info == t1.info && t0.memory_info == t1.memory_info && t0.memory_type == t1.memory_type && t0.kernel_arg == t1.kernel_arg; |
| 128 | } |
| 129 | }; |
| 130 | |
| 131 | /** The basic atomic unit in a @ref ClWorkload. It contains exactly one kernel to run. |
| 132 | */ |
| 133 | struct ClUnitWorkload : public UnitWorkload |
| 134 | { |
| 135 | ClUnitWorkload() = default; |
| 136 | ClUnitWorkload(Id id, UnitWorkloadStage stage, const ClKernelCode &code) |
| 137 | : UnitWorkload{ id, stage }, code{ code } |
| 138 | { |
| 139 | } |
| 140 | friend bool operator==(const ClUnitWorkload &uworkload0, const ClUnitWorkload &uworkload1) |
| 141 | { |
| 142 | return uworkload0.stage == uworkload1.stage && uworkload0.code == uworkload1.code; |
| 143 | } |
| 144 | ClKernelCode code{}; |
| 145 | }; |
| 146 | |
| 147 | /** GPU information for @ref ClWorkloadContext |
| 148 | */ |
| 149 | struct GpuInfo |
| 150 | { |
| 151 | friend bool operator==(const GpuInfo &info0, const GpuInfo &info1) |
| 152 | { |
| 153 | return info0.target == info1.target; |
| 154 | } |
| 155 | GPUTarget target{ GPUTarget::UNKNOWN }; |
| 156 | }; |
| 157 | |
| 158 | /** Context (device capabilities, platform details) associated with a ClWorkload |
| 159 | * |
| 160 | * It is required for building the @ref ClKernelCode and could also be used by the runtime (e.g. schedulers) |
| 161 | */ |
| 162 | struct ClWorkloadContext |
| 163 | { |
| 164 | friend bool operator==(const ClWorkloadContext &ctx0, const ClWorkloadContext &ctx1) |
| 165 | { |
| 166 | return ctx0.gpu_info == ctx1.gpu_info; |
| 167 | } |
| 168 | GpuInfo gpu_info{}; |
| 169 | }; |
| 170 | |
| 171 | /** Workload for Cl backend |
| 172 | */ |
| 173 | struct ClWorkload : public IWorkload |
| 174 | { |
| 175 | Tid add_workload_tensor(ITensorInfo *info, MemoryType memory_type, const AuxMemoryInfo &memory_info, const ClKernelArgDescriptor &kernel_arg, Tid merge_point) |
| 176 | { |
| 177 | Tid id = graph.add_tensor(merge_point); |
| 178 | if(tensors.find(id) == tensors.end()) |
| 179 | { |
| 180 | tensors[id] = ClWorkloadTensor(id, info, memory_type, memory_info, kernel_arg); |
| 181 | } |
| 182 | return id; |
| 183 | } |
| 184 | UnitWorkId add_unit_workload(UnitWorkloadStage stage, const ClKernelCode &code, const std::vector<Tid> &inputs, const std::vector<Tid> &outputs) |
| 185 | { |
| 186 | auto op = graph.add_operator(inputs, outputs); |
| 187 | auto id = op.second; |
| 188 | unit_workloads[id] = ClUnitWorkload(id, stage, code); |
| 189 | return id; |
| 190 | } |
| 191 | friend bool operator==(const ClWorkload &workload0, const ClWorkload &workload1) |
| 192 | { |
| 193 | return std::make_tuple( |
| 194 | workload0.graph, workload0.context, workload0.unit_workloads, workload0.tensors, workload0.op_tensor_id_lut) |
| 195 | == std::make_tuple( |
| 196 | workload1.graph, workload1.context, workload1.unit_workloads, workload1.tensors, workload1.op_tensor_id_lut); |
| 197 | } |
| 198 | ClWorkloadContext context{}; /**< Workload context*/ |
| 199 | std::map<UnitWorkId, ClUnitWorkload> unit_workloads{}; /**< Unit workloads to run*/ |
| 200 | std::map<Tid, ClWorkloadTensor> tensors{}; /**< Workload tensors*/ |
| 201 | std::map<Tid, OpTensor::Id> op_tensor_id_lut{}; /**< Map from ClWorkloadTensor to SRC and DST Operator Tensors (no need to store "intermediate" Operator Tensors)*/ |
| 202 | Status status{}; /**< For compatibility with the IOperator validate method. Store if the workload is valid or not. */ |
| 203 | }; |
| 204 | |
| 205 | /** Build a @ref ClWorkload from an @ref OperatorGraph. |
| 206 | * |
| 207 | * @param[out] workload |
| 208 | * @param[in] op_graph |
| 209 | * @param[in] ctx |
| 210 | * @return Status |
| 211 | */ |
| 212 | Status build(ClWorkload &workload, const OperatorGraph &op_graph, const ClWorkloadContext &ctx); |
| 213 | |
| 214 | } // namespace dynamic_fusion |
| 215 | } // namespace experimental |
| 216 | } // namespace arm_compute |
| 217 | |
SiCong Li | 4e9f568 | 2022-05-10 10:15:59 +0100 | [diff] [blame] | 218 | #endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_CLWORKLOAD_H |
| 219 | #endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ |