blob: 55eb4c5c77af6de0239558504d7992f220884173 [file] [log] [blame]
SiCong Lif44bbc52022-08-29 18:25:51 +01001/*
2 * Copyright (c) 2022 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH
25#define SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH
26
27#include "arm_compute/core/Error.h"
28#include <algorithm>
29#include <cstdint>
30#include <deque>
31#include <map>
32#include <set>
33#include <tuple>
34#include <vector>
35
36namespace arm_compute
37{
38namespace experimental
39{
40namespace dynamic_fusion
41{
42namespace
43{
44template <typename T>
45bool is_in(const T &v, const std::vector<T> &vec)
46{
47 return std::find(std::begin(vec), std::end(vec), v) != std::end(vec);
48}
49} // namespace
50
51/** A multi-input (tensors), multi-output (tensors) acyclic directed graph
52 * Represented as a doubly-linked adjacency list with the differentiation between source and destination
53 */
54class DependencyGraph
55{
56public:
57 using Id = int32_t;
58 using TensorId = Id;
59 using OperatorId = Id;
60 /** Adjacency list
61 *
62 */
63 using AdjList = std::map<Id, std::vector<Id>>;
64
65 /** A pack of operator including its input and output tensors, used by traversing through the graph in topological order
66 *
67 */
68 struct OpPack
69 {
70 OperatorId op{};
71 std::vector<TensorId> inputs{};
72 std::vector<TensorId> outputs{};
73 friend bool operator==(const OpPack &opp0, const OpPack &opp1)
74 {
75 return std::make_tuple(
76 opp0.op, opp0.inputs, opp0.outputs)
77 == std::make_tuple(
78 opp1.op, opp1.inputs, opp1.outputs);
79 }
80 };
81
82public:
83 DependencyGraph() = default;
84 friend std::ostream &operator<<(std::ostream &os, const DependencyGraph &);
85
86 /** Try adding an operator (without actually adding it), while keeping the graph as a "linear sequence" / list
87 * @note The list is expected to only grow from head to tail
88 *
89 * PRECONDITION: The current graph is already linear
90 *
91 * @return true If the operator can be added while keeping the graph as a linear sequence
92 * @return false Otherwise
93 */
94 bool try_add_operator_as_linear(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs) const
95 {
96 ARM_COMPUTE_UNUSED(op, outputs);
97 if(all_ops().empty())
98 {
99 return true;
100 }
101 std::vector<TensorId> common_tensors{};
102 auto existing_tensors = all_tensors();
103 std::sort(existing_tensors.begin(), existing_tensors.end()); // To use std::set_intersection, both input sets must be sorted
104 std::vector<TensorId> sorted_inputs{ inputs };
105 std::sort(sorted_inputs.begin(), sorted_inputs.end());
106 std::set_intersection(existing_tensors.begin(), existing_tensors.end(),
107 sorted_inputs.begin(), sorted_inputs.end(), std::back_inserter(common_tensors));
108 if(common_tensors.size() != 1U)
109 {
110 return false;
111 }
112 const auto linked_tensor = common_tensors[0];
113 const auto tail_ops = get_dst_ops();
114 ARM_COMPUTE_ERROR_ON(tail_ops.size() != 1U); // PRECONDITION
115 const auto tail = tail_ops[0];
116
117 if(!is_in(linked_tensor, dst_tensors(tail)))
118 {
119 return false;
120 }
121 return true;
122 }
123 /** Add an operator, while keeping the graph as a "linear sequence"
124 *
125 * PRECONDITION: The current graph is already linear
126 * INVARIANT: The list can only grow from head to tail
127 * INVARIANT: POSTCONDITION: The graph is linear
128 */
129 void add_operator_as_linear(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs)
130 {
131 ARM_COMPUTE_ERROR_ON(!try_add_operator_as_linear(op, inputs, outputs));
132 auto success = add_operator(op, inputs, outputs);
133 ARM_COMPUTE_ERROR_ON(!success);
134 }
135 /** Add a new operator
136 * Return invalid if it violates the DAG invariant
137 * Invalid operation will not change the graph
138 *
139 * @param[in] op Operator to add
140 * @param[in] inputs Input tensors to the operator
141 * @param[in] outputs Output tensors to the operator
142 */
143 bool add_operator(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs)
144 {
145 if(operator_exists(op))
146 {
147 return false;
148 }
149 _adj_src_tensors[op] = {};
150 _adj_dst_tensors[op] = {};
151 for(auto in_tensor : inputs)
152 {
153 // Linking input tensor to operator node will never create a cycle / loop because we guarantee
154 // each op is newly created, so every <input, op> pair / edge is new
155 link_input(op, in_tensor);
156 }
157 for(auto out_tensor : outputs)
158 {
159 // If there exists a back path from op's output tensor to op already, then linking the two will create a loop / cycle
160 if(path_exists_from_tensor_to_op(out_tensor, op))
161 {
162 remove_operator(op);
163 return false;
164 }
165 else
166 {
167 link_output(op, out_tensor);
168 }
169 }
170
171 return true;
172 }
173
174 /** Sort the graph in a topological order
175 *
176 * @return std::vector<OpPack>
177 */
178 std::vector<OpPack> topological_sort() const
179 {
180 // Incident degree (number of source operators to an op)
181 std::map<OperatorId, unsigned int> in_degree{};
182 std::set<OperatorId> visited_ops{};
183 std::deque<OperatorId> zero_in_degree_ops{};
184 std::vector<OpPack> sorted_op_packs{};
185 for(auto op : all_ops())
186 {
187 const auto degree = src_ops(op).size();
188 in_degree[op] = degree;
189 if(degree == 0)
190 {
191 zero_in_degree_ops.push_back(op);
192 visited_ops.insert(op);
193 }
194 }
195
196 while(!zero_in_degree_ops.empty())
197 {
198 const OperatorId op = zero_in_degree_ops.front();
199 zero_in_degree_ops.pop_front();
200 sorted_op_packs.push_back(OpPack{ op, src_tensors(op), dst_tensors(op) });
201
202 for(const auto next_op : dst_ops(op))
203 {
204 if(in_degree[next_op] > 0)
205 {
206 in_degree[next_op]--;
207 }
208 if(in_degree[next_op] == 0 && visited_ops.find(next_op) == visited_ops.end())
209 {
210 zero_in_degree_ops.push_back(next_op);
211 visited_ops.insert(op);
212 }
213 }
214 }
215
216 return sorted_op_packs;
217 }
218
219 void find_independent_paths_util(OperatorId op, std::vector<std::vector<OperatorId>> &paths, std::vector<OperatorId> cur_path,
220 const std::map<OperatorId, unsigned int> &in_degree) const
221 {
222 // We have found an unresolved dependency
223 if(in_degree.at(op) > 1)
224 {
225 paths.push_back(cur_path);
226 return;
227 }
228 const auto child_ops = dst_ops(op);
229
230 cur_path.push_back(op);
231 // Hit the leaf op
232 if(child_ops.empty())
233 {
234 paths.push_back(cur_path);
235 return;
236 }
237 for(const auto child_op : child_ops)
238 {
239 find_independent_paths_util(child_op, paths, cur_path, in_degree);
240 }
241 }
242 /** Find all independent linear paths from op, which doesn't depend on any other op
243 *
244 * @return std::vector<OpPack>
245 */
246 std::vector<std::vector<OperatorId>> find_independent_paths(OperatorId op,
247 const std::map<OperatorId, unsigned int> &in_degree) const
248 {
249 std::vector<std::vector<OperatorId>> paths;
250 std::vector<OperatorId> cur_path;
251 find_independent_paths_util(op, paths, cur_path, in_degree);
252 return paths;
253 }
254 /** Find a longest linear path from op, which doesn't depend on any other op
255 *
256 * @return std::vector<OpPack>
257 */
258 std::vector<OperatorId> find_longest_independent_path(OperatorId op,
259 const std::map<OperatorId, unsigned int> &in_degree) const
260 {
261 const auto &paths = find_independent_paths(op, in_degree);
262 ARM_COMPUTE_ERROR_ON(paths.empty());
263 size_t max_len = 0;
264 const std::vector<OperatorId> *max_path = nullptr;
265
266 for(const auto &path : paths)
267 {
268 if(path.size() >= max_len)
269 {
270 max_path = &path;
271 max_len = path.size();
272 }
273 }
274 return *max_path;
275 }
276 std::vector<OperatorId> propose_next_path(std::set<OperatorId> &candidate_ops,
277 const std::map<OperatorId, unsigned int> &in_degree) const
278 {
279 if(candidate_ops.empty())
280 {
281 return {};
282 }
283 size_t max_len = 0;
284 std::vector<OperatorId> max_path;
285 OperatorId chosen_op{};
286 for(auto op : candidate_ops)
287 {
288 const auto path = find_longest_independent_path(op, in_degree);
289 if(path.size() >= max_len)
290 {
291 chosen_op = op;
292 max_path = path;
293 max_len = path.size();
294 }
295 }
296 candidate_ops.erase(chosen_op);
297 return max_path;
298 }
299 /** Partition the graph into a list of linear sub-"graphs", while preserving the topological order, and trying to minimize
300 * the number of partitions
301 */
302 std::vector<std::vector<OpPack>> topological_partition() const
303 {
304 // Initialize zero incident degree and zero in degree ops
305 std::map<OperatorId, unsigned int> in_degree{};
306 std::set<OperatorId> candidate_ops{};
307 for(auto op : all_ops())
308 {
309 const auto degree = src_ops(op).size();
310 in_degree[op] = degree;
311 if(degree == 0)
312 {
313 candidate_ops.insert(op);
314 }
315 }
316
317 std::vector<std::vector<OpPack>> sorted_partitions{};
318 while(!candidate_ops.empty())
319 {
320 // generate_longest_path_from_zero_indegree_ops(in_degree, visited_ops, candidate_ops)
321 const auto path = propose_next_path(candidate_ops, in_degree);
322
323 // Append to sorted_partitions
324 std::vector<OpPack> path_op_pack{};
325 for(auto op : path)
326 {
327 path_op_pack.push_back(OpPack{ op, src_tensors(op), dst_tensors(op) });
328 }
329 sorted_partitions.push_back(path_op_pack);
330 // Remove whole path (Update in_degree, visited_ops, candidate_ops)
331 for(auto op : path)
332 {
333 for(const auto next_op_child : dst_ops(op))
334 {
335 if(in_degree[next_op_child] > 0)
336 {
337 in_degree[next_op_child]--;
338 }
339 if(in_degree[next_op_child] == 0 && !is_in(next_op_child, path)) // We do not want to put the proposed path back into candidates
340 {
341 candidate_ops.insert(next_op_child);
342 }
343 }
344 }
345 }
346 return sorted_partitions;
347 }
348
349 /** Strict equality comparison (all internal ids and order of insertion matter).
350 * In the future this may be replaced with a topological comparison, allowing equivalent graphs with different internal ids to be equal
351 *
352 *
353 * @param[in] g0
354 * @param[in] g1
355 * @return true If the same
356 * @return false Otherwise
357 */
358 friend bool operator==(const DependencyGraph &g0, const DependencyGraph &g1)
359 {
360 // Do not compare id allocators
361 return std::make_tuple(
362 g0._adj_src_tensors, g0._adj_dst_tensors, g0._adj_src_ops, g0._adj_dst_ops)
363 == std::make_tuple(
364 g1._adj_src_tensors, g1._adj_dst_tensors, g1._adj_src_ops, g1._adj_dst_ops);
365 }
366 std::vector<OperatorId> src_ops_from_tensor(TensorId tensor) const
367 {
368 return _adj_src_ops.at(tensor);
369 }
370 std::vector<OperatorId> dst_ops_from_tensor(TensorId tensor) const
371 {
372 return _adj_dst_ops.at(tensor);
373 }
374 /** Get all tensors
375 *
376 * @return std::vector<TensorId>
377 */
378 std::vector<TensorId> all_tensors() const
379 {
380 std::vector<TensorId> tensors{};
381 std::transform(std::begin(_adj_src_ops), std::end(_adj_src_ops), std::back_inserter(tensors), [](const auto & it)
382 {
383 return it.first;
384 });
385 return tensors;
386 }
387 /** Get source tensors of the whole graph
388 *
389 * @return std::vector<TensorId>
390 */
391 std::vector<TensorId> global_src_tensors() const
392 {
393 std::vector<TensorId> tensors;
394 for(auto tensor_src_ops : _adj_src_ops)
395 {
396 if(tensor_src_ops.second.empty())
397 {
398 tensors.push_back(tensor_src_ops.first);
399 }
400 }
401 return tensors;
402 }
403 /** Get destination tensors of the whole graph
404 *
405 * @return std::vector<TensorId>
406 */
407 std::vector<TensorId> global_dst_tensors() const
408 {
409 std::vector<TensorId> tensors;
410 for(auto tensor_dst_ops : _adj_dst_ops)
411 {
412 if(tensor_dst_ops.second.empty())
413 {
414 tensors.push_back(tensor_dst_ops.first);
415 }
416 }
417 return tensors;
418 }
419 /** Get all root ops. Root ops can also be referred to as "src ops" of the whole graph
420 *
421 * @return std::vector<OperatorId>
422 */
423 std::vector<OperatorId> get_root_ops() const
424 {
425 std::vector<OperatorId> ops{};
426 const auto op_list = all_ops();
427
428 for(auto op : op_list)
429 {
430 if(src_ops(op).empty())
431 {
432 ops.emplace_back(op);
433 }
434 }
435 return ops;
436 }
437
438private:
439 void link_input(OperatorId op, TensorId in_tensor)
440 {
441 ARM_COMPUTE_ERROR_ON(!operator_exists(op));
442 if(!tensor_exists(in_tensor))
443 {
444 insert_new_tensor(in_tensor);
445 }
446 ARM_COMPUTE_ERROR_ON(are_connected(op, in_tensor)); // Prevent repetitive linking
447 _adj_src_tensors[op].push_back(in_tensor);
448 _adj_dst_ops[in_tensor].push_back(op);
449 }
450 void link_output(OperatorId op, TensorId out_tensor)
451 {
452 ARM_COMPUTE_ERROR_ON(!operator_exists(op));
453 if(!tensor_exists(out_tensor))
454 {
455 insert_new_tensor(out_tensor);
456 }
457 ARM_COMPUTE_ERROR_ON(are_connected(op, out_tensor)); // Prevent repetitive linking
458 _adj_dst_tensors[op].push_back(out_tensor);
459 _adj_src_ops[out_tensor].push_back(op);
460 }
461
462 std::vector<OperatorId> src_ops(OperatorId op) const
463 {
464 ARM_COMPUTE_ERROR_ON(!operator_exists(op));
465 std::vector<OperatorId> ops{};
466 for(TensorId src_tensor : src_tensors(op))
467 {
468 ops.insert(ops.end(), std::begin(_adj_src_ops.at(src_tensor)), std::end(_adj_src_ops.at(src_tensor)));
469 }
470 return ops;
471 }
472 std::vector<OperatorId> dst_ops(OperatorId op) const
473 {
474 ARM_COMPUTE_ERROR_ON(!operator_exists(op));
475 std::vector<OperatorId> ops{};
476 for(TensorId dst_tensor : _adj_dst_tensors.at(op))
477 {
478 ops.insert(ops.end(), std::begin(_adj_dst_ops.at(dst_tensor)), std::end(_adj_dst_ops.at(dst_tensor)));
479 }
480 return ops;
481 }
482
483 /** Get source tensors to an operator
484 *
485 * @param[in] op
486 * @return std::vector<TensorId>
487 */
488 std::vector<TensorId> src_tensors(OperatorId op) const
489 {
490 ARM_COMPUTE_ERROR_ON(!operator_exists(op));
491 return _adj_src_tensors.at(op);
492 }
493 /** Get destination tensors to an operator
494 *
495 * @param[in] op
496 * @return std::vector<TensorId>
497 */
498 std::vector<TensorId> dst_tensors(OperatorId op) const
499 {
500 ARM_COMPUTE_ERROR_ON(!operator_exists(op));
501 return _adj_dst_tensors.at(op);
502 }
503 /** Get all operators
504 *
505 * @return std::vector<OperatorId>
506 */
507 std::vector<OperatorId> all_ops() const
508 {
509 std::vector<OperatorId> ops{};
510 std::transform(std::begin(_adj_src_tensors), std::end(_adj_src_tensors), std::back_inserter(ops), [](const auto & it)
511 {
512 return it.first;
513 });
514 return ops;
515 }
516 /** Remove an operator from graph.
517 *
518 * @param[in] op
519 */
520 void remove_operator(OperatorId op)
521 {
522 for(auto src_tensor : _adj_src_tensors.at(op))
523 {
524 auto &dst_ops = _adj_dst_ops.at(src_tensor);
525 dst_ops.erase(
526 std::remove(std::begin(dst_ops), std::end(dst_ops), op),
527 std::end(dst_ops));
528 }
529 for(auto dst_tensor : _adj_dst_tensors.at(op))
530 {
531 auto &src_ops = _adj_src_ops.at(dst_tensor);
532 src_ops.erase(
533 std::remove(std::begin(src_ops), std::end(src_ops), op),
534 std::end(src_ops));
535 }
536 // Remove any isolated tensors
537 // An isolated tensor is one where both its _adj_src_ops and _adj_dst_ops are empty
538 for(auto t : all_tensors())
539 {
540 if(_adj_src_ops.at(t).empty() && _adj_dst_ops.at(t).empty())
541 {
542 _adj_src_ops.erase(t);
543 _adj_dst_ops.erase(t);
544 }
545 }
546 _adj_src_tensors.erase(op);
547 _adj_dst_tensors.erase(op);
548 }
549 void insert_new_tensor(TensorId tensor)
550 {
551 _adj_src_ops[tensor] = {};
552 _adj_dst_ops[tensor] = {};
553 }
554 bool tensor_exists(TensorId tensor) const
555 {
556 return _adj_src_ops.find(tensor) != _adj_src_ops.end() && _adj_dst_ops.find(tensor) != _adj_dst_ops.end();
557 }
558 bool operator_exists(OperatorId op) const
559 {
560 return _adj_src_tensors.find(op) != _adj_src_tensors.end() && _adj_dst_tensors.find(op) != _adj_dst_tensors.end();
561 }
562 bool is_src_tensor_of(OperatorId op, TensorId tensor) const
563 {
564 if(!operator_exists(op) || !tensor_exists(tensor))
565 {
566 return false;
567 }
568 const auto op_inputs = src_tensors(op);
569 return std::find(op_inputs.begin(), op_inputs.end(), tensor) != op_inputs.end();
570 }
571 bool is_dst_tensor_of(OperatorId op, TensorId tensor) const
572 {
573 if(!operator_exists(op) || !tensor_exists(tensor))
574 {
575 return false;
576 }
577 const auto op_outputs = dst_tensors(op);
578 return std::find(op_outputs.begin(), op_outputs.end(), tensor) != op_outputs.end();
579 }
580 bool are_connected(OperatorId op, TensorId tensor) const
581 {
582 return is_src_tensor_of(op, tensor) || is_dst_tensor_of(op, tensor);
583 }
584 /** If op is the destination / leaf operator of the whole graph
585 *
586 * @param[in] op
587 * @return true
588 * @return false
589 */
590 bool is_dst_op(OperatorId op) const
591 {
592 return dst_ops(op).empty();
593 }
594 std::vector<OperatorId> get_dst_ops() const
595 {
596 std::vector<OperatorId> ops{};
597 const auto op_list = all_ops();
598
599 for(auto op : op_list)
600 {
601 if(is_dst_op(op))
602 {
603 ops.emplace_back(op);
604 }
605 }
606 return ops;
607 }
608 bool path_exists_from_tensor_to_op(TensorId src_tensor, OperatorId dst_op) const
609 {
610 if(!tensor_exists(src_tensor) || !operator_exists(dst_op))
611 {
612 return false;
613 }
614 for(auto child_op : dst_ops_from_tensor(src_tensor))
615 {
616 if(path_exists_from_op_to_op(child_op, dst_op))
617 {
618 return true;
619 }
620 }
621 return false;
622 }
623
624 bool path_exists_from_op_to_op(OperatorId src_op, OperatorId dst_op) const
625 {
626 if(!operator_exists(src_op) || !operator_exists(dst_op))
627 {
628 return false;
629 }
630 if(src_op == dst_op)
631 {
632 return true;
633 }
634 if(is_in(src_op, get_dst_ops()))
635 {
636 return false;
637 }
638 for(auto child_tensor : dst_tensors(src_op))
639 {
640 if(path_exists_from_tensor_to_op(child_tensor, dst_op))
641 {
642 return true;
643 }
644 }
645 return false;
646 }
647
648private:
649 AdjList _adj_src_tensors{};
650 AdjList _adj_dst_tensors{};
651 AdjList _adj_src_ops{};
652 AdjList _adj_dst_ops{};
653};
654
655} // namespace dynamic_fusion
656} // namespace experimental
657} // namespace arm_compute
658#endif /* SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH */