blob: 81c3f0c8006ea7d7a73d4644128a01afd93bb75c [file] [log] [blame]
SiCong Lif44bbc52022-08-29 18:25:51 +01001/*
2 * Copyright (c) 2022 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "GpuKernelComponentGroup.h"
25
26#include "arm_compute/core/ITensorInfo.h"
27#include "arm_compute/core/Validate.h"
28#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
29
Viet-Hoa Do04f46202022-12-14 14:49:56 +000030#include <algorithm>
31
SiCong Lif44bbc52022-08-29 18:25:51 +010032namespace arm_compute
33{
34namespace experimental
35{
36namespace dynamic_fusion
37{
38bool GpuKernelComponentGroup::add_component(ComponentPtr component)
39{
Viet-Hoa Do04f46202022-12-14 14:49:56 +000040 ARM_COMPUTE_ERROR_ON_MSG(
41 _finalized, "The component group has been finalized and cannot be altered.");
42
SiCong Lif44bbc52022-08-29 18:25:51 +010043 // note: Constraint 1 is guaranteed as a precondition
44 // Constraint 2
45 if(component->type() != GpuComponentType::Output && _components.size() >= max_fused_components)
46 {
47 return false;
48 }
49 // Constraint 3.1: Pattern: (Unfusable + Output)
50 if(!_components.empty() && get_root_component()->type() == GpuComponentType::Unfusable && component->type() != GpuComponentType::Output)
51 {
52 return false;
53 }
54 // Constraint 3.2
55 if(!_components.empty() && (component->type() != GpuComponentType::Simple && component->type() != GpuComponentType::Output))
56 {
57 return false;
58 }
SiCong Lif44bbc52022-08-29 18:25:51 +010059 // Constraint 4
60 if(component->type() != GpuComponentType::Unfusable && component->tensors().get_const_dst_tensors().size() != 1U)
61 {
62 return false;
63 }
64 // Constraint 5
65 if(!_components.empty() && !(get_root_component()->properties() == component->properties()))
66 {
67 return false;
68 }
69 // Constraint 7
70 if(!_components.empty())
71 {
72 const auto root_dst_tensors = get_root_component()->tensors().get_const_dst_tensors();
73 ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
74 const auto first_dst_tensor = root_dst_tensors[0];
75 const auto dst_tensors = component->tensors().get_const_dst_tensors();
76 for(const auto &t : root_dst_tensors)
77 {
78 if(detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
79 {
80 return false;
81 }
82 }
83 for(const auto &t : dst_tensors)
84 {
85 if(detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
86 {
87 return false;
88 }
89 }
90 }
91 // Constraint 8
92 if(!_components.empty())
93 {
94 const auto root_dst_tensors = get_root_component()->tensors().get_const_dst_tensors();
95 ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
96 const auto first_dst_tensor_layout = root_dst_tensors[0]->data_layout();
97 const auto dst_tensors = component->tensors().get_const_dst_tensors();
98 for(const auto &t : root_dst_tensors)
99 {
100 if(t->data_layout() != first_dst_tensor_layout)
101 {
102 return false;
103 }
104 }
105 for(const auto &t : dst_tensors)
106 {
107 if(t->data_layout() != first_dst_tensor_layout)
108 {
109 return false;
110 }
111 }
112 }
113 // Constraint 9
114 if(component->tensors().get_const_dst_tensors().size() >= max_dst_tensors)
115 {
116 return false;
117 }
118 // Constraint 9 corollary
119 if(component->type() == GpuComponentType::Output && _components.size() >= max_fused_components + max_dst_tensors)
120 {
121 return false;
122 }
123 _components.push_back(component);
124 return true;
125}
126
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000127void GpuKernelComponentGroup::finalize()
SiCong Lif44bbc52022-08-29 18:25:51 +0100128{
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000129 if(_finalized)
SiCong Lif44bbc52022-08-29 18:25:51 +0100130 {
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000131 return;
SiCong Lif44bbc52022-08-29 18:25:51 +0100132 }
133
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000134 _finalized = true;
135
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000136 std::set<const ITensorInfo *> output_tensors;
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000137 std::map<const ITensorInfo *, std::vector<const ITensorInfo *>> possible_tile_map;
138 std::map<const ITensorInfo *, int32_t> tile_usages;
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000139
140 for(auto component : _components)
141 {
142 const auto tensors = component->tensors();
143 const auto src_tensors = tensors.get_const_src_tensors();
144 const auto dst_tensors = tensors.get_const_dst_tensors();
145
146 // Detect input, output and intermediate tensors.
147 for(auto tensor : src_tensors)
148 {
149 const auto output_tensors_it = output_tensors.find(tensor);
150
151 if(output_tensors_it != output_tensors.end())
152 {
153 // This tensor is the output of another operator.
154 // It must be marked as intermediate tensor.
155 output_tensors.erase(output_tensors_it);
156 _interm_tensors.insert(tensor);
157 }
158 else if(_interm_tensors.find(tensor) == _interm_tensors.end())
159 {
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000160 _input_tensors.insert(tensor);
161
162 tile_usages[tensor] = 0;
163 possible_tile_map.emplace(tensor, std::vector<const ITensorInfo *>());
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000164 }
165 }
166
167 for(auto tensor : dst_tensors)
168 {
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000169 ARM_COMPUTE_ERROR_ON(_input_tensors.find(tensor) != _input_tensors.end());
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000170 ARM_COMPUTE_ERROR_ON(output_tensors.find(tensor) != output_tensors.end());
171 ARM_COMPUTE_ERROR_ON(_interm_tensors.find(tensor) != _interm_tensors.end());
172 output_tensors.insert(tensor);
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000173
174 tile_usages[tensor] = 0;
175 possible_tile_map.emplace(tensor, std::vector<const ITensorInfo *>());
176 }
177
178 // Check if the output can overwrite the input tile.
179 const auto component_type = component->type();
180 if(component_type == GpuComponentType::Simple || component_type == GpuComponentType::Output)
181 {
182 ARM_COMPUTE_ERROR_ON(dst_tensors.size() != 1);
183
184 const auto dst_tensor = dst_tensors[0];
185 const auto &dst_shape = dst_tensor->tensor_shape();
186 const auto &dst_type = dst_tensor->data_type();
187
188 tile_usages[dst_tensor] = 0;
189
190 for(auto src_tensor : src_tensors)
191 {
192 const auto &src_shape = src_tensor->tensor_shape();
193 const auto &src_type = src_tensor->data_type();
194
195 if(src_shape == dst_shape && src_type == dst_type)
196 {
197 const auto tile_usages_it = tile_usages.find(src_tensor);
198 ARM_COMPUTE_ERROR_ON(tile_usages_it == tile_usages.end());
199
200 if(component_type == GpuComponentType::Simple || tile_usages_it->second > 0)
201 {
202 // Increase the number of tile usages unless this component is an output
203 // and the tile has not been shared with any component.
204 // (Reason: output component doesn't change the content of the tile)
205 ++tile_usages_it->second;
206 }
207
208 possible_tile_map[dst_tensor].push_back(src_tensor);
209 }
210 }
211 }
212 else
213 {
214 // Outputs of complex and unfusable components need dedicated tile.
215 for(auto tensor : dst_tensors)
216 {
217 tile_usages[tensor] = 0;
218 }
219 }
220 }
221
222 // Find the smallest list of tiles that the intermediate tensors need to write to.
223 for(auto tensor : _input_tensors)
224 {
225 _tile_map[tensor] = tensor;
226 }
227
228 for(auto component : _components)
229 {
230 const auto dst_tensors = component->tensors().get_const_dst_tensors();
231
232 for(auto tensor : dst_tensors)
233 {
234 const auto target_tiles = possible_tile_map.at(tensor);
235 _tile_map[tensor] = tensor;
236
237 for(auto target : target_tiles)
238 {
239 const auto num_usage = tile_usages[target];
240
241 if(num_usage <= 1)
242 {
243 // The target tile is consumed by only this operator, so we can reuse it
244 // for the destination tensor data.
245 _tile_map[tensor] = _tile_map.at(target);
246 break;
247 }
248 }
249 }
250 }
251
252 for(auto tensor : output_tensors)
253 {
254 _tile_map[tensor] = tensor;
255 }
256
257 // All intermediate tensors that cannot be shared with any previous tensor
258 // will need to be declared as tile variable.
259 for(auto tensor_tile : _tile_map)
260 {
261 if(tensor_tile.first == tensor_tile.second &&
262 _interm_tensors.find(tensor_tile.first) != _interm_tensors.end())
263 {
264 _tiles.push_back(tensor_tile.first);
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000265 }
266 }
267
268 std::set_union(
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000269 _input_tensors.begin(), _input_tensors.end(),
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000270 output_tensors.begin(), output_tensors.end(),
271 std::back_inserter(_argument_tensors));
272 _any_output_tensor = *output_tensors.begin();
SiCong Lif44bbc52022-08-29 18:25:51 +0100273}
274
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000275std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_tiles() const
276{
277 ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
278 return _tiles;
279}
280
281const ITensorInfo *GpuKernelComponentGroup::get_tile_for_tensor(const ITensorInfo *tensor) const
282{
283 ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
284
285 if(_tile_map.find(tensor) != _tile_map.end())
286 {
287 return _tile_map.at(tensor);
288 }
289
290 return tensor;
291}
292
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000293const ITensorInfo *GpuKernelComponentGroup::get_any_dst_tensor() const
SiCong Lif44bbc52022-08-29 18:25:51 +0100294{
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000295 ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
296 return _any_output_tensor;
SiCong Lif44bbc52022-08-29 18:25:51 +0100297}
298
299std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_argument_tensors() const
300{
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000301 ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
302 return _argument_tensors;
SiCong Lif44bbc52022-08-29 18:25:51 +0100303}
304
305GpuKernelComponentGroup::ComponentPtr GpuKernelComponentGroup::get_root_component() const
306{
307 if(empty())
308 {
309 return nullptr;
310 }
311 return _components[0];
312}
313
SiCong Lif44bbc52022-08-29 18:25:51 +0100314bool GpuKernelComponentGroup::is_intermediate_tensor(const ITensorInfo *tensor) const
315{
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000316 ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
317 return _interm_tensors.find(tensor) != _interm_tensors.end();
SiCong Lif44bbc52022-08-29 18:25:51 +0100318}
319
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000320bool GpuKernelComponentGroup::is_input_tensor(const ITensorInfo *tensor) const
321{
322 ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
323 return _input_tensors.find(tensor) != _input_tensors.end();
324}
325
SiCong Lif44bbc52022-08-29 18:25:51 +0100326size_t GpuKernelComponentGroup::size() const
327{
328 return _components.size();
329}
330bool GpuKernelComponentGroup::empty() const
331{
332 return _components.empty();
333}
334GpuKernelComponentGroup::ComponentPtr &GpuKernelComponentGroup::operator[](size_t index)
335{
336 return _components[index];
337}
338const GpuKernelComponentGroup::ComponentPtr &GpuKernelComponentGroup::operator[](size_t index) const
339{
340 return _components[index];
341}
342typename std::vector<GpuKernelComponentGroup::ComponentPtr>::iterator GpuKernelComponentGroup::begin()
343{
344 return _components.begin();
345}
346typename std::vector<GpuKernelComponentGroup::ComponentPtr>::iterator GpuKernelComponentGroup::end()
347{
348 return _components.end();
349}
350typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuKernelComponentGroup::begin() const
351{
352 return _components.cbegin();
353}
354typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuKernelComponentGroup::end() const
355{
356 return _components.cend();
357}
358typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuKernelComponentGroup::cbegin() const
359{
360 return _components.cbegin();
361}
362typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuKernelComponentGroup::cend() const
363{
364 return _components.cend();
365}
366
SiCong Lif44bbc52022-08-29 18:25:51 +0100367} // namespace dynamic_fusion
368} // namespace experimental
369} // namespace arm_compute