blob: 5a6d125d9632d9a23e8f39fef750aecae0a96ac4 [file] [log] [blame]
SiCong Lif44bbc52022-08-29 18:25:51 +01001/*
2 * Copyright (c) 2022 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "GpuKernelComponentGroup.h"
25
26#include "arm_compute/core/ITensorInfo.h"
27#include "arm_compute/core/Validate.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010028
SiCong Lif44bbc52022-08-29 18:25:51 +010029#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
30
Viet-Hoa Do04f46202022-12-14 14:49:56 +000031#include <algorithm>
32
SiCong Lif44bbc52022-08-29 18:25:51 +010033namespace arm_compute
34{
35namespace experimental
36{
37namespace dynamic_fusion
38{
39bool GpuKernelComponentGroup::add_component(ComponentPtr component)
40{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010041 ARM_COMPUTE_ERROR_ON_MSG(_finalized, "The component group has been finalized and cannot be altered.");
Viet-Hoa Do04f46202022-12-14 14:49:56 +000042
SiCong Lif44bbc52022-08-29 18:25:51 +010043 // note: Constraint 1 is guaranteed as a precondition
44 // Constraint 2
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010045 if (component->type() != GpuComponentType::Output && _components.size() >= max_fused_components)
SiCong Lif44bbc52022-08-29 18:25:51 +010046 {
47 return false;
48 }
49 // Constraint 3.1: Pattern: (Unfusable + Output)
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010050 if (!_components.empty() && get_root_component()->type() == GpuComponentType::Unfusable &&
51 component->type() != GpuComponentType::Output)
SiCong Lif44bbc52022-08-29 18:25:51 +010052 {
53 return false;
54 }
55 // Constraint 3.2
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010056 if (!_components.empty() &&
57 (component->type() != GpuComponentType::Simple && component->type() != GpuComponentType::Output))
SiCong Lif44bbc52022-08-29 18:25:51 +010058 {
59 return false;
60 }
SiCong Lif44bbc52022-08-29 18:25:51 +010061 // Constraint 4
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010062 if (component->type() != GpuComponentType::Unfusable && component->tensors().get_const_dst_tensors().size() != 1U)
SiCong Lif44bbc52022-08-29 18:25:51 +010063 {
64 return false;
65 }
66 // Constraint 5
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010067 if (!_components.empty() && !(get_root_component()->properties() == component->properties()))
SiCong Lif44bbc52022-08-29 18:25:51 +010068 {
69 return false;
70 }
71 // Constraint 7
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010072 if (!_components.empty())
SiCong Lif44bbc52022-08-29 18:25:51 +010073 {
74 const auto root_dst_tensors = get_root_component()->tensors().get_const_dst_tensors();
75 ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
76 const auto first_dst_tensor = root_dst_tensors[0];
77 const auto dst_tensors = component->tensors().get_const_dst_tensors();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010078 for (const auto &t : root_dst_tensors)
SiCong Lif44bbc52022-08-29 18:25:51 +010079 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010080 if (detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
SiCong Lif44bbc52022-08-29 18:25:51 +010081 {
82 return false;
83 }
84 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010085 for (const auto &t : dst_tensors)
SiCong Lif44bbc52022-08-29 18:25:51 +010086 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010087 if (detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
SiCong Lif44bbc52022-08-29 18:25:51 +010088 {
89 return false;
90 }
91 }
92 }
93 // Constraint 8
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010094 if (!_components.empty())
SiCong Lif44bbc52022-08-29 18:25:51 +010095 {
96 const auto root_dst_tensors = get_root_component()->tensors().get_const_dst_tensors();
97 ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
98 const auto first_dst_tensor_layout = root_dst_tensors[0]->data_layout();
99 const auto dst_tensors = component->tensors().get_const_dst_tensors();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100100 for (const auto &t : root_dst_tensors)
SiCong Lif44bbc52022-08-29 18:25:51 +0100101 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100102 if (t->data_layout() != first_dst_tensor_layout)
SiCong Lif44bbc52022-08-29 18:25:51 +0100103 {
104 return false;
105 }
106 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100107 for (const auto &t : dst_tensors)
SiCong Lif44bbc52022-08-29 18:25:51 +0100108 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100109 if (t->data_layout() != first_dst_tensor_layout)
SiCong Lif44bbc52022-08-29 18:25:51 +0100110 {
111 return false;
112 }
113 }
114 }
115 // Constraint 9
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100116 if (component->tensors().get_const_dst_tensors().size() >= max_dst_tensors)
SiCong Lif44bbc52022-08-29 18:25:51 +0100117 {
118 return false;
119 }
120 // Constraint 9 corollary
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100121 if (component->type() == GpuComponentType::Output && _components.size() >= max_fused_components + max_dst_tensors)
SiCong Lif44bbc52022-08-29 18:25:51 +0100122 {
123 return false;
124 }
125 _components.push_back(component);
126 return true;
127}
128
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000129void GpuKernelComponentGroup::finalize()
SiCong Lif44bbc52022-08-29 18:25:51 +0100130{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100131 if (_finalized)
SiCong Lif44bbc52022-08-29 18:25:51 +0100132 {
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000133 return;
SiCong Lif44bbc52022-08-29 18:25:51 +0100134 }
135
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000136 _finalized = true;
137
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100138 std::set<const ITensorInfo *> output_tensors;
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000139 std::map<const ITensorInfo *, std::vector<const ITensorInfo *>> possible_tile_map;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100140 std::map<const ITensorInfo *, int32_t> tile_usages;
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000141
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100142 for (auto component : _components)
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000143 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100144 const auto tensors = component->tensors();
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000145 const auto src_tensors = tensors.get_const_src_tensors();
146 const auto dst_tensors = tensors.get_const_dst_tensors();
147
148 // Detect input, output and intermediate tensors.
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100149 for (auto tensor : src_tensors)
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000150 {
151 const auto output_tensors_it = output_tensors.find(tensor);
152
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100153 if (output_tensors_it != output_tensors.end())
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000154 {
155 // This tensor is the output of another operator.
156 // It must be marked as intermediate tensor.
157 output_tensors.erase(output_tensors_it);
158 _interm_tensors.insert(tensor);
159 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100160 else if (_interm_tensors.find(tensor) == _interm_tensors.end())
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000161 {
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000162 _input_tensors.insert(tensor);
163
164 tile_usages[tensor] = 0;
165 possible_tile_map.emplace(tensor, std::vector<const ITensorInfo *>());
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000166 }
167 }
168
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100169 for (auto tensor : dst_tensors)
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000170 {
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000171 ARM_COMPUTE_ERROR_ON(_input_tensors.find(tensor) != _input_tensors.end());
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000172 ARM_COMPUTE_ERROR_ON(output_tensors.find(tensor) != output_tensors.end());
173 ARM_COMPUTE_ERROR_ON(_interm_tensors.find(tensor) != _interm_tensors.end());
174 output_tensors.insert(tensor);
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000175
176 tile_usages[tensor] = 0;
177 possible_tile_map.emplace(tensor, std::vector<const ITensorInfo *>());
178 }
179
180 // Check if the output can overwrite the input tile.
181 const auto component_type = component->type();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100182 if (component_type == GpuComponentType::Simple || component_type == GpuComponentType::Output)
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000183 {
184 ARM_COMPUTE_ERROR_ON(dst_tensors.size() != 1);
185
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100186 const auto dst_tensor = dst_tensors[0];
187 const auto &dst_shape = dst_tensor->tensor_shape();
188 const auto &dst_type = dst_tensor->data_type();
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000189
190 tile_usages[dst_tensor] = 0;
191
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100192 for (auto src_tensor : src_tensors)
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000193 {
194 const auto &src_shape = src_tensor->tensor_shape();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100195 const auto &src_type = src_tensor->data_type();
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000196
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100197 if (src_shape == dst_shape && src_type == dst_type)
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000198 {
199 const auto tile_usages_it = tile_usages.find(src_tensor);
200 ARM_COMPUTE_ERROR_ON(tile_usages_it == tile_usages.end());
201
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100202 if (component_type == GpuComponentType::Simple || tile_usages_it->second > 0)
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000203 {
204 // Increase the number of tile usages unless this component is an output
205 // and the tile has not been shared with any component.
206 // (Reason: output component doesn't change the content of the tile)
207 ++tile_usages_it->second;
208 }
209
210 possible_tile_map[dst_tensor].push_back(src_tensor);
211 }
212 }
213 }
214 else
215 {
216 // Outputs of complex and unfusable components need dedicated tile.
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100217 for (auto tensor : dst_tensors)
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000218 {
219 tile_usages[tensor] = 0;
220 }
221 }
222 }
223
224 // Find the smallest list of tiles that the intermediate tensors need to write to.
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100225 for (auto tensor : _input_tensors)
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000226 {
227 _tile_map[tensor] = tensor;
228 }
229
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100230 for (auto component : _components)
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000231 {
232 const auto dst_tensors = component->tensors().get_const_dst_tensors();
233
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100234 for (auto tensor : dst_tensors)
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000235 {
236 const auto target_tiles = possible_tile_map.at(tensor);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100237 _tile_map[tensor] = tensor;
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000238
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100239 for (auto target : target_tiles)
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000240 {
241 const auto num_usage = tile_usages[target];
242
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100243 if (num_usage <= 1)
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000244 {
245 // The target tile is consumed by only this operator, so we can reuse it
246 // for the destination tensor data.
247 _tile_map[tensor] = _tile_map.at(target);
248 break;
249 }
250 }
251 }
252 }
253
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100254 for (auto tensor : output_tensors)
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000255 {
256 _tile_map[tensor] = tensor;
257 }
258
259 // All intermediate tensors that cannot be shared with any previous tensor
260 // will need to be declared as tile variable.
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100261 for (auto tensor_tile : _tile_map)
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000262 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100263 if (tensor_tile.first == tensor_tile.second && _interm_tensors.find(tensor_tile.first) != _interm_tensors.end())
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000264 {
265 _tiles.push_back(tensor_tile.first);
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000266 }
267 }
268
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100269 std::set_union(_input_tensors.begin(), _input_tensors.end(), output_tensors.begin(), output_tensors.end(),
270 std::back_inserter(_argument_tensors));
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000271 _any_output_tensor = *output_tensors.begin();
SiCong Lif44bbc52022-08-29 18:25:51 +0100272}
273
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000274std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_tiles() const
275{
276 ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
277 return _tiles;
278}
279
280const ITensorInfo *GpuKernelComponentGroup::get_tile_for_tensor(const ITensorInfo *tensor) const
281{
282 ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
283
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100284 if (_tile_map.find(tensor) != _tile_map.end())
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000285 {
286 return _tile_map.at(tensor);
287 }
288
289 return tensor;
290}
291
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000292const ITensorInfo *GpuKernelComponentGroup::get_any_dst_tensor() const
SiCong Lif44bbc52022-08-29 18:25:51 +0100293{
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000294 ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
295 return _any_output_tensor;
SiCong Lif44bbc52022-08-29 18:25:51 +0100296}
297
298std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_argument_tensors() const
299{
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000300 ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
301 return _argument_tensors;
SiCong Lif44bbc52022-08-29 18:25:51 +0100302}
303
304GpuKernelComponentGroup::ComponentPtr GpuKernelComponentGroup::get_root_component() const
305{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100306 if (empty())
SiCong Lif44bbc52022-08-29 18:25:51 +0100307 {
308 return nullptr;
309 }
310 return _components[0];
311}
312
SiCong Lif44bbc52022-08-29 18:25:51 +0100313bool GpuKernelComponentGroup::is_intermediate_tensor(const ITensorInfo *tensor) const
314{
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000315 ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
316 return _interm_tensors.find(tensor) != _interm_tensors.end();
SiCong Lif44bbc52022-08-29 18:25:51 +0100317}
318
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000319bool GpuKernelComponentGroup::is_input_tensor(const ITensorInfo *tensor) const
320{
321 ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
322 return _input_tensors.find(tensor) != _input_tensors.end();
323}
324
SiCong Lif44bbc52022-08-29 18:25:51 +0100325size_t GpuKernelComponentGroup::size() const
326{
327 return _components.size();
328}
329bool GpuKernelComponentGroup::empty() const
330{
331 return _components.empty();
332}
333GpuKernelComponentGroup::ComponentPtr &GpuKernelComponentGroup::operator[](size_t index)
334{
335 return _components[index];
336}
337const GpuKernelComponentGroup::ComponentPtr &GpuKernelComponentGroup::operator[](size_t index) const
338{
339 return _components[index];
340}
341typename std::vector<GpuKernelComponentGroup::ComponentPtr>::iterator GpuKernelComponentGroup::begin()
342{
343 return _components.begin();
344}
345typename std::vector<GpuKernelComponentGroup::ComponentPtr>::iterator GpuKernelComponentGroup::end()
346{
347 return _components.end();
348}
349typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuKernelComponentGroup::begin() const
350{
351 return _components.cbegin();
352}
353typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuKernelComponentGroup::end() const
354{
355 return _components.cend();
356}
357typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuKernelComponentGroup::cbegin() const
358{
359 return _components.cbegin();
360}
361typename std::vector<GpuKernelComponentGroup::ComponentPtr>::const_iterator GpuKernelComponentGroup::cend() const
362{
363 return _components.cend();
364}
365
SiCong Lif44bbc52022-08-29 18:25:51 +0100366} // namespace dynamic_fusion
367} // namespace experimental
368} // namespace arm_compute