src/dynamic_fusion/sketch/utils/DependencyGraph.h - ml/ComputeLibrary - Gitiles

 /*
  * Copyright (c) 2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in all
  * copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 #ifndef SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH
 #define SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH

 #include "arm_compute/core/Error.h"
 #include <algorithm>
 #include <cstdint>
 #include <deque>
 #include <map>
 #include <set>
 #include <tuple>
 #include <vector>

 namespace arm_compute
 {
 namespace experimental
 {
 namespace dynamic_fusion
 {
 namespace
 {
 template <typename T>
 bool is_in(const T &v, const std::vector<T> &vec)
 {
     return std::find(std::begin(vec), std::end(vec), v) != std::end(vec);
 }
 } // namespace

 /** A multi-input (tensors), multi-output (tensors) acyclic directed graph
  *  Represented as a doubly-linked adjacency list with the differentiation between source and destination
  */
 class DependencyGraph
 {
 public:
     using Id         = int32_t;
     using TensorId   = Id;
     using OperatorId = Id;
     /** Adjacency list
      *
      */
     using AdjList = std::map<Id, std::vector<Id>>;

     /** A pack of operator including its input and output tensors, used by traversing through the graph in topological order
      *
      */
     struct OpPack
     {
         OperatorId            op{};
         std::vector<TensorId> inputs{};
         std::vector<TensorId> outputs{};
         friend bool operator==(const OpPack &opp0, const OpPack &opp1)
         {
             return std::make_tuple(
                        opp0.op, opp0.inputs, opp0.outputs)
                    == std::make_tuple(
                        opp1.op, opp1.inputs, opp1.outputs);
         }
     };

 public:
     DependencyGraph() = default;
     friend std::ostream &operator<<(std::ostream &os, const DependencyGraph &);

     /** Try adding an operator (without actually adding it), while keeping the graph as a "linear sequence" / list
      * @note The list is expected to only grow from head to tail
      *
      * PRECONDITION: The current graph is already linear
      *
      * @return true  If the operator can be added while keeping the graph as a linear sequence
      * @return false  Otherwise
      */
     bool try_add_operator_as_linear(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs) const
     {
         ARM_COMPUTE_UNUSED(op, outputs);
         if(all_ops().empty())
         {
             return true;
         }
         std::vector<TensorId> common_tensors{};
         auto                  existing_tensors = all_tensors();
         std::sort(existing_tensors.begin(), existing_tensors.end()); // To use std::set_intersection, both input sets must be sorted
         std::vector<TensorId> sorted_inputs{ inputs };
         std::sort(sorted_inputs.begin(), sorted_inputs.end());
         std::set_intersection(existing_tensors.begin(), existing_tensors.end(),
                               sorted_inputs.begin(), sorted_inputs.end(), std::back_inserter(common_tensors));
         if(common_tensors.size() != 1U)
         {
             return false;
         }
         const auto linked_tensor = common_tensors[0];
         const auto tail_ops      = get_dst_ops();
         ARM_COMPUTE_ERROR_ON(tail_ops.size() != 1U); // PRECONDITION
         const auto tail = tail_ops[0];

         if(!is_in(linked_tensor, dst_tensors(tail)))
         {
             return false;
         }
         return true;
     }
     /** Add an operator, while keeping the graph as a "linear sequence"
      *
      * PRECONDITION: The current graph is already linear
      * INVARIANT: The list can only grow from head to tail
      * INVARIANT: POSTCONDITION: The graph is linear
      */
     void add_operator_as_linear(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs)
     {
         ARM_COMPUTE_ERROR_ON(!try_add_operator_as_linear(op, inputs, outputs));
         auto success = add_operator(op, inputs, outputs);
         ARM_COMPUTE_UNUSED(success);
         ARM_COMPUTE_ERROR_ON(!success);
     }
     /** Add a new operator
      *  Return invalid if it violates the DAG invariant
      *  Invalid operation will not change the graph
      *
      * @param[in] op      Operator to add
      * @param[in] inputs  Input tensors to the operator
      * @param[in] outputs Output tensors to the operator
      */
     bool add_operator(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs)
     {
         if(operator_exists(op))
         {
             return false;
         }
         _adj_src_tensors[op] = {};
         _adj_dst_tensors[op] = {};
         for(auto in_tensor : inputs)
         {
             // Linking input tensor to operator node will never create a cycle / loop because we guarantee
             // each op is newly created, so every <input, op> pair / edge is new
             link_input(op, in_tensor);
         }
         for(auto out_tensor : outputs)
         {
             // If there exists a back path from op's output tensor to op already, then linking the two will create a loop / cycle
             if(path_exists_from_tensor_to_op(out_tensor, op))
             {
                 remove_operator(op);
                 return false;
             }
             else
             {
                 link_output(op, out_tensor);
             }
         }

         return true;
     }

     /** Sort the graph in a topological order
      *
      * @return std::vector<OpPack>
      */
     std::vector<OpPack> topological_sort() const
     {
         // Incident degree (number of source operators to an op)
         std::map<OperatorId, unsigned int> in_degree{};
         std::set<OperatorId>   visited_ops{};
         std::deque<OperatorId> zero_in_degree_ops{};
         std::vector<OpPack>    sorted_op_packs{};
         for(auto op : all_ops())
         {
             const auto degree = src_ops(op).size();
             in_degree[op]     = degree;
             if(degree == 0)
             {
                 zero_in_degree_ops.push_back(op);
                 visited_ops.insert(op);
             }
         }

         while(!zero_in_degree_ops.empty())
         {
             const OperatorId op = zero_in_degree_ops.front();
             zero_in_degree_ops.pop_front();
             sorted_op_packs.push_back(OpPack{ op, src_tensors(op), dst_tensors(op) });

             for(const auto next_op : dst_ops(op))
             {
                 if(in_degree[next_op] > 0)
                 {
                     in_degree[next_op]--;
                 }
                 if(in_degree[next_op] == 0 && visited_ops.find(next_op) == visited_ops.end())
                 {
                     zero_in_degree_ops.push_back(next_op);
                     visited_ops.insert(op);
                 }
             }
         }

         return sorted_op_packs;
     }

     void find_independent_paths_util(OperatorId op, std::vector<std::vector<OperatorId>> &paths, std::vector<OperatorId> cur_path,
                                      const std::map<OperatorId, unsigned int> &in_degree) const
     {
         // We have found an unresolved dependency
         if(in_degree.at(op) > 1)
         {
             paths.push_back(cur_path);
             return;
         }
         const auto child_ops = dst_ops(op);

         cur_path.push_back(op);
         // Hit the leaf op
         if(child_ops.empty())
         {
             paths.push_back(cur_path);
             return;
         }
         for(const auto child_op : child_ops)
         {
             find_independent_paths_util(child_op, paths, cur_path, in_degree);
         }
     }
     /** Find all independent linear paths from op, which doesn't depend on any other op
      *
      * @return std::vector<OpPack>
      */
     std::vector<std::vector<OperatorId>> find_independent_paths(OperatorId op,
                                                                 const std::map<OperatorId, unsigned int> &in_degree) const
     {
         std::vector<std::vector<OperatorId>> paths;
         std::vector<OperatorId>              cur_path;
         find_independent_paths_util(op, paths, cur_path, in_degree);
         return paths;
     }
     /** Find a longest linear path from op, which doesn't depend on any other op
      *
      * @return std::vector<OpPack>
      */
     std::vector<OperatorId> find_longest_independent_path(OperatorId op,
                                                           const std::map<OperatorId, unsigned int> &in_degree) const
     {
         const auto &paths = find_independent_paths(op, in_degree);
         ARM_COMPUTE_ERROR_ON(paths.empty());
         size_t                         max_len  = 0;
         const std::vector<OperatorId> *max_path = nullptr;

         for(const auto &path : paths)
         {
             if(path.size() >= max_len)
             {
                 max_path = &path;
                 max_len  = path.size();
             }
         }
         return *max_path;
     }
     std::vector<OperatorId> propose_next_path(std::set<OperatorId> &candidate_ops,
                                               const std::map<OperatorId, unsigned int> &in_degree) const
     {
         if(candidate_ops.empty())
         {
             return {};
         }
         size_t                  max_len = 0;
         std::vector<OperatorId> max_path;
         OperatorId              chosen_op{};
         for(auto op : candidate_ops)
         {
             const auto path = find_longest_independent_path(op, in_degree);
             if(path.size() >= max_len)
             {
                 chosen_op = op;
                 max_path  = path;
                 max_len   = path.size();
             }
         }
         candidate_ops.erase(chosen_op);
         return max_path;
     }
     /** Partition the graph into a list of linear sub-"graphs", while preserving the topological order, and trying to minimize
      *  the number of partitions
      */
     std::vector<std::vector<OpPack>> topological_partition() const
     {
         // Initialize zero incident degree and zero in degree ops
         std::map<OperatorId, unsigned int> in_degree{};
         std::set<OperatorId> candidate_ops{};
         for(auto op : all_ops())
         {
             const auto degree = src_ops(op).size();
             in_degree[op]     = degree;
             if(degree == 0)
             {
                 candidate_ops.insert(op);
             }
         }

         std::vector<std::vector<OpPack>> sorted_partitions{};
         while(!candidate_ops.empty())
         {
             // generate_longest_path_from_zero_indegree_ops(in_degree, visited_ops, candidate_ops)
             const auto path = propose_next_path(candidate_ops, in_degree);

             // Append to sorted_partitions
             std::vector<OpPack> path_op_pack{};
             for(auto op : path)
             {
                 path_op_pack.push_back(OpPack{ op, src_tensors(op), dst_tensors(op) });
             }
             sorted_partitions.push_back(path_op_pack);
             // Remove whole path (Update in_degree, visited_ops, candidate_ops)
             for(auto op : path)
             {
                 for(const auto next_op_child : dst_ops(op))
                 {
                     if(in_degree[next_op_child] > 0)
                     {
                         in_degree[next_op_child]--;
                     }
                     if(in_degree[next_op_child] == 0 && !is_in(next_op_child, path)) // We do not want to put the proposed path back into candidates
                     {
                         candidate_ops.insert(next_op_child);
                     }
                 }
             }
         }
         return sorted_partitions;
     }

     /** Strict equality comparison (all internal ids and order of insertion matter).
      *        In the future this may be replaced with a topological comparison, allowing equivalent graphs with different internal ids to be equal
      *
      *
      * @param[in] g0
      * @param[in] g1
      * @return true  If the same
      * @return false Otherwise
      */
     friend bool operator==(const DependencyGraph &g0, const DependencyGraph &g1)
     {
         // Do not compare id allocators
         return std::make_tuple(
                    g0._adj_src_tensors, g0._adj_dst_tensors, g0._adj_src_ops, g0._adj_dst_ops)
                == std::make_tuple(
                    g1._adj_src_tensors, g1._adj_dst_tensors, g1._adj_src_ops, g1._adj_dst_ops);
     }
     std::vector<OperatorId> src_ops_from_tensor(TensorId tensor) const
     {
         return _adj_src_ops.at(tensor);
     }
     std::vector<OperatorId> dst_ops_from_tensor(TensorId tensor) const
     {
         return _adj_dst_ops.at(tensor);
     }
     /** Get all tensors
      *
      * @return std::vector<TensorId>
      */
     std::vector<TensorId> all_tensors() const
     {
         std::vector<TensorId> tensors{};
         std::transform(std::begin(_adj_src_ops), std::end(_adj_src_ops), std::back_inserter(tensors), [](const auto & it)
         {
             return it.first;
         });
         return tensors;
     }
     /** Get source tensors of the whole graph
      *
      * @return std::vector<TensorId>
      */
     std::vector<TensorId> global_src_tensors() const
     {
         std::vector<TensorId> tensors;
         for(auto tensor_src_ops : _adj_src_ops)
         {
             if(tensor_src_ops.second.empty())
             {
                 tensors.push_back(tensor_src_ops.first);
             }
         }
         return tensors;
     }
     /** Get destination tensors of the whole graph
      *
      * @return std::vector<TensorId>
      */
     std::vector<TensorId> global_dst_tensors() const
     {
         std::vector<TensorId> tensors;
         for(auto tensor_dst_ops : _adj_dst_ops)
         {
             if(tensor_dst_ops.second.empty())
             {
                 tensors.push_back(tensor_dst_ops.first);
             }
         }
         return tensors;
     }
     /** Get intermediate tensors of the whole graph.
      *
      * @return std::vector<TensorId>
      */
     std::vector<TensorId> intermediate_tensors() const
     {
         std::vector<TensorId> tensors;

         // If a tensor is used to connect the input of an operator and the output of another operator,
         // it is not allocated in the memory. The tensor exists as a temporary variable only.
         for(auto src_tensor : _adj_src_ops)
         {
             if(!src_tensor.second.empty())
             {
                 const auto dst_tensor = _adj_dst_ops.find(src_tensor.first);
                 if(dst_tensor != _adj_dst_ops.end())
                 {
                     if(!dst_tensor->second.empty())
                     {
                         tensors.push_back(src_tensor.first);
                     }
                 }
             }
         }

         return tensors;
     }
     /** Get all root ops. Root ops can also be referred to as "src ops" of the whole graph
      *
      * @return std::vector<OperatorId>
      */
     std::vector<OperatorId> get_root_ops() const
     {
         std::vector<OperatorId> ops{};
         const auto              op_list = all_ops();

         for(auto op : op_list)
         {
             if(src_ops(op).empty())
             {
                 ops.emplace_back(op);
             }
         }
         return ops;
     }

 private:
     void link_input(OperatorId op, TensorId in_tensor)
     {
         ARM_COMPUTE_ERROR_ON(!operator_exists(op));
         if(!tensor_exists(in_tensor))
         {
             insert_new_tensor(in_tensor);
         }
         ARM_COMPUTE_ERROR_ON(are_connected(op, in_tensor)); // Prevent repetitive linking
         _adj_src_tensors[op].push_back(in_tensor);
         _adj_dst_ops[in_tensor].push_back(op);
     }
     void link_output(OperatorId op, TensorId out_tensor)
     {
         ARM_COMPUTE_ERROR_ON(!operator_exists(op));
         if(!tensor_exists(out_tensor))
         {
             insert_new_tensor(out_tensor);
         }
         ARM_COMPUTE_ERROR_ON(are_connected(op, out_tensor)); // Prevent repetitive linking
         _adj_dst_tensors[op].push_back(out_tensor);
         _adj_src_ops[out_tensor].push_back(op);
     }

     std::vector<OperatorId> src_ops(OperatorId op) const
     {
         ARM_COMPUTE_ERROR_ON(!operator_exists(op));
         std::vector<OperatorId> ops{};
         for(TensorId src_tensor : src_tensors(op))
         {
             ops.insert(ops.end(), std::begin(_adj_src_ops.at(src_tensor)), std::end(_adj_src_ops.at(src_tensor)));
         }
         return ops;
     }
     std::vector<OperatorId> dst_ops(OperatorId op) const
     {
         ARM_COMPUTE_ERROR_ON(!operator_exists(op));
         std::vector<OperatorId> ops{};
         for(TensorId dst_tensor : _adj_dst_tensors.at(op))
         {
             ops.insert(ops.end(), std::begin(_adj_dst_ops.at(dst_tensor)), std::end(_adj_dst_ops.at(dst_tensor)));
         }
         return ops;
     }

     /** Get source tensors to an operator
      *
      * @param[in] op
      * @return std::vector<TensorId>
      */
     std::vector<TensorId> src_tensors(OperatorId op) const
     {
         ARM_COMPUTE_ERROR_ON(!operator_exists(op));
         return _adj_src_tensors.at(op);
     }
     /** Get destination tensors to an operator
      *
      * @param[in] op
      * @return std::vector<TensorId>
      */
     std::vector<TensorId> dst_tensors(OperatorId op) const
     {
         ARM_COMPUTE_ERROR_ON(!operator_exists(op));
         return _adj_dst_tensors.at(op);
     }
     /** Get all operators
      *
      * @return std::vector<OperatorId>
      */
     std::vector<OperatorId> all_ops() const
     {
         std::vector<OperatorId> ops{};
         std::transform(std::begin(_adj_src_tensors), std::end(_adj_src_tensors), std::back_inserter(ops), [](const auto & it)
         {
             return it.first;
         });
         return ops;
     }
     /** Remove an operator from graph.
      *
      * @param[in] op
      */
     void remove_operator(OperatorId op)
     {
         for(auto src_tensor : _adj_src_tensors.at(op))
         {
             auto &dst_ops = _adj_dst_ops.at(src_tensor);
             dst_ops.erase(
                 std::remove(std::begin(dst_ops), std::end(dst_ops), op),
                 std::end(dst_ops));
         }
         for(auto dst_tensor : _adj_dst_tensors.at(op))
         {
             auto &src_ops = _adj_src_ops.at(dst_tensor);
             src_ops.erase(
                 std::remove(std::begin(src_ops), std::end(src_ops), op),
                 std::end(src_ops));
         }
         // Remove any isolated tensors
         // An isolated tensor is one where both its _adj_src_ops and _adj_dst_ops are empty
         for(auto t : all_tensors())
         {
             if(_adj_src_ops.at(t).empty() && _adj_dst_ops.at(t).empty())
             {
                 _adj_src_ops.erase(t);
                 _adj_dst_ops.erase(t);
             }
         }
         _adj_src_tensors.erase(op);
         _adj_dst_tensors.erase(op);
     }
     void insert_new_tensor(TensorId tensor)
     {
         _adj_src_ops[tensor] = {};
         _adj_dst_ops[tensor] = {};
     }
     bool tensor_exists(TensorId tensor) const
     {
         return _adj_src_ops.find(tensor) != _adj_src_ops.end() && _adj_dst_ops.find(tensor) != _adj_dst_ops.end();
     }
     bool operator_exists(OperatorId op) const
     {
         return _adj_src_tensors.find(op) != _adj_src_tensors.end() && _adj_dst_tensors.find(op) != _adj_dst_tensors.end();
     }
     bool is_src_tensor_of(OperatorId op, TensorId tensor) const
     {
         if(!operator_exists(op) || !tensor_exists(tensor))
         {
             return false;
         }
         const auto op_inputs = src_tensors(op);
         return std::find(op_inputs.begin(), op_inputs.end(), tensor) != op_inputs.end();
     }
     bool is_dst_tensor_of(OperatorId op, TensorId tensor) const
     {
         if(!operator_exists(op) || !tensor_exists(tensor))
         {
             return false;
         }
         const auto op_outputs = dst_tensors(op);
         return std::find(op_outputs.begin(), op_outputs.end(), tensor) != op_outputs.end();
     }
     bool are_connected(OperatorId op, TensorId tensor) const
     {
         return is_src_tensor_of(op, tensor) || is_dst_tensor_of(op, tensor);
     }
     /** If op is the destination / leaf operator of the whole graph
      *
      * @param[in] op
      * @return true
      * @return false
      */
     bool is_dst_op(OperatorId op) const
     {
         return dst_ops(op).empty();
     }
     std::vector<OperatorId> get_dst_ops() const
     {
         std::vector<OperatorId> ops{};
         const auto              op_list = all_ops();

         for(auto op : op_list)
         {
             if(is_dst_op(op))
             {
                 ops.emplace_back(op);
             }
         }
         return ops;
     }
     bool path_exists_from_tensor_to_op(TensorId src_tensor, OperatorId dst_op) const
     {
         if(!tensor_exists(src_tensor) || !operator_exists(dst_op))
         {
             return false;
         }
         for(auto child_op : dst_ops_from_tensor(src_tensor))
         {
             if(path_exists_from_op_to_op(child_op, dst_op))
             {
                 return true;
             }
         }
         return false;
     }

     bool path_exists_from_op_to_op(OperatorId src_op, OperatorId dst_op) const
     {
         if(!operator_exists(src_op) || !operator_exists(dst_op))
         {
             return false;
         }
         if(src_op == dst_op)
         {
             return true;
         }
         if(is_in(src_op, get_dst_ops()))
         {
             return false;
         }
         for(auto child_tensor : dst_tensors(src_op))
         {
             if(path_exists_from_tensor_to_op(child_tensor, dst_op))
             {
                 return true;
             }
         }
         return false;
     }

 private:
     AdjList _adj_src_tensors{};
     AdjList _adj_dst_tensors{};
     AdjList _adj_src_ops{};
     AdjList _adj_dst_ops{};
 };

 } // namespace dynamic_fusion
 } // namespace experimental
 } // namespace arm_compute
 #endif /* SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH */
	/*
	* Copyright (c) 2022 Arm Limited.
	*
	* SPDX-License-Identifier: MIT
	*
	* Permission is hereby granted, free of charge, to any person obtaining a copy
	* of this software and associated documentation files (the "Software"), to
	* deal in the Software without restriction, including without limitation the
	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
	* sell copies of the Software, and to permit persons to whom the Software is
	* furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall be included in all
	* copies or substantial portions of the Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	* SOFTWARE.
	*/
	#ifndef SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH
	#define SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH

	#include "arm_compute/core/Error.h"
	#include <algorithm>
	#include <cstdint>
	#include <deque>
	#include <map>
	#include <set>
	#include <tuple>
	#include <vector>

	namespace arm_compute
	{
	namespace experimental
	{
	namespace dynamic_fusion
	{
	namespace
	{
	template <typename T>
	bool is_in(const T &v, const std::vector<T> &vec)
	{
	return std::find(std::begin(vec), std::end(vec), v) != std::end(vec);
	}
	} // namespace

	/** A multi-input (tensors), multi-output (tensors) acyclic directed graph
	* Represented as a doubly-linked adjacency list with the differentiation between source and destination
	*/
	class DependencyGraph
	{
	public:
	using Id = int32_t;
	using TensorId = Id;
	using OperatorId = Id;
	/** Adjacency list
	*
	*/
	using AdjList = std::map<Id, std::vector<Id>>;

	/** A pack of operator including its input and output tensors, used by traversing through the graph in topological order
	*
	*/
	struct OpPack
	{
	OperatorId op{};
	std::vector<TensorId> inputs{};
	std::vector<TensorId> outputs{};
	friend bool operator==(const OpPack &opp0, const OpPack &opp1)
	{
	return std::make_tuple(
	opp0.op, opp0.inputs, opp0.outputs)
	== std::make_tuple(
	opp1.op, opp1.inputs, opp1.outputs);
	}
	};

	public:
	DependencyGraph() = default;
	friend std::ostream &operator<<(std::ostream &os, const DependencyGraph &);

	/** Try adding an operator (without actually adding it), while keeping the graph as a "linear sequence" / list
	* @note The list is expected to only grow from head to tail
	*
	* PRECONDITION: The current graph is already linear
	*
	* @return true If the operator can be added while keeping the graph as a linear sequence
	* @return false Otherwise
	*/
	bool try_add_operator_as_linear(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs) const
	{
	ARM_COMPUTE_UNUSED(op, outputs);
	if(all_ops().empty())
	{
	return true;
	}
	std::vector<TensorId> common_tensors{};
	auto existing_tensors = all_tensors();
	std::sort(existing_tensors.begin(), existing_tensors.end()); // To use std::set_intersection, both input sets must be sorted
	std::vector<TensorId> sorted_inputs{ inputs };
	std::sort(sorted_inputs.begin(), sorted_inputs.end());
	std::set_intersection(existing_tensors.begin(), existing_tensors.end(),
	sorted_inputs.begin(), sorted_inputs.end(), std::back_inserter(common_tensors));
	if(common_tensors.size() != 1U)
	{
	return false;
	}
	const auto linked_tensor = common_tensors[0];
	const auto tail_ops = get_dst_ops();
	ARM_COMPUTE_ERROR_ON(tail_ops.size() != 1U); // PRECONDITION
	const auto tail = tail_ops[0];

	if(!is_in(linked_tensor, dst_tensors(tail)))
	{
	return false;
	}
	return true;
	}
	/** Add an operator, while keeping the graph as a "linear sequence"
	*
	* PRECONDITION: The current graph is already linear
	* INVARIANT: The list can only grow from head to tail
	* INVARIANT: POSTCONDITION: The graph is linear
	*/
	void add_operator_as_linear(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs)
	{
	ARM_COMPUTE_ERROR_ON(!try_add_operator_as_linear(op, inputs, outputs));
	auto success = add_operator(op, inputs, outputs);
	ARM_COMPUTE_UNUSED(success);
	ARM_COMPUTE_ERROR_ON(!success);
	}
	/** Add a new operator
	* Return invalid if it violates the DAG invariant
	* Invalid operation will not change the graph
	*
	* @param[in] op Operator to add
	* @param[in] inputs Input tensors to the operator
	* @param[in] outputs Output tensors to the operator
	*/
	bool add_operator(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs)
	{
	if(operator_exists(op))
	{
	return false;
	}
	_adj_src_tensors[op] = {};
	_adj_dst_tensors[op] = {};
	for(auto in_tensor : inputs)
	{
	// Linking input tensor to operator node will never create a cycle / loop because we guarantee
	// each op is newly created, so every <input, op> pair / edge is new
	link_input(op, in_tensor);
	}
	for(auto out_tensor : outputs)
	{
	// If there exists a back path from op's output tensor to op already, then linking the two will create a loop / cycle
	if(path_exists_from_tensor_to_op(out_tensor, op))
	{
	remove_operator(op);
	return false;
	}
	else
	{
	link_output(op, out_tensor);
	}
	}

	return true;
	}

	/** Sort the graph in a topological order
	*
	* @return std::vector<OpPack>
	*/
	std::vector<OpPack> topological_sort() const
	{
	// Incident degree (number of source operators to an op)
	std::map<OperatorId, unsigned int> in_degree{};
	std::set<OperatorId> visited_ops{};
	std::deque<OperatorId> zero_in_degree_ops{};
	std::vector<OpPack> sorted_op_packs{};
	for(auto op : all_ops())
	{
	const auto degree = src_ops(op).size();
	in_degree[op] = degree;
	if(degree == 0)
	{
	zero_in_degree_ops.push_back(op);
	visited_ops.insert(op);
	}
	}

	while(!zero_in_degree_ops.empty())
	{
	const OperatorId op = zero_in_degree_ops.front();
	zero_in_degree_ops.pop_front();
	sorted_op_packs.push_back(OpPack{ op, src_tensors(op), dst_tensors(op) });

	for(const auto next_op : dst_ops(op))
	{
	if(in_degree[next_op] > 0)
	{
	in_degree[next_op]--;
	}
	if(in_degree[next_op] == 0 && visited_ops.find(next_op) == visited_ops.end())
	{
	zero_in_degree_ops.push_back(next_op);
	visited_ops.insert(op);
	}
	}
	}

	return sorted_op_packs;
	}

	void find_independent_paths_util(OperatorId op, std::vector<std::vector<OperatorId>> &paths, std::vector<OperatorId> cur_path,
	const std::map<OperatorId, unsigned int> &in_degree) const
	{
	// We have found an unresolved dependency
	if(in_degree.at(op) > 1)
	{
	paths.push_back(cur_path);
	return;
	}
	const auto child_ops = dst_ops(op);

	cur_path.push_back(op);
	// Hit the leaf op
	if(child_ops.empty())
	{
	paths.push_back(cur_path);
	return;
	}
	for(const auto child_op : child_ops)
	{
	find_independent_paths_util(child_op, paths, cur_path, in_degree);
	}
	}
	/** Find all independent linear paths from op, which doesn't depend on any other op
	*
	* @return std::vector<OpPack>
	*/
	std::vector<std::vector<OperatorId>> find_independent_paths(OperatorId op,
	const std::map<OperatorId, unsigned int> &in_degree) const
	{
	std::vector<std::vector<OperatorId>> paths;
	std::vector<OperatorId> cur_path;
	find_independent_paths_util(op, paths, cur_path, in_degree);
	return paths;
	}
	/** Find a longest linear path from op, which doesn't depend on any other op
	*
	* @return std::vector<OpPack>
	*/
	std::vector<OperatorId> find_longest_independent_path(OperatorId op,
	const std::map<OperatorId, unsigned int> &in_degree) const
	{
	const auto &paths = find_independent_paths(op, in_degree);
	ARM_COMPUTE_ERROR_ON(paths.empty());
	size_t max_len = 0;
	const std::vector<OperatorId> *max_path = nullptr;

	for(const auto &path : paths)
	{
	if(path.size() >= max_len)
	{
	max_path = &path;
	max_len = path.size();
	}
	}
	return *max_path;
	}
	std::vector<OperatorId> propose_next_path(std::set<OperatorId> &candidate_ops,
	const std::map<OperatorId, unsigned int> &in_degree) const
	{
	if(candidate_ops.empty())
	{
	return {};
	}
	size_t max_len = 0;
	std::vector<OperatorId> max_path;
	OperatorId chosen_op{};
	for(auto op : candidate_ops)
	{
	const auto path = find_longest_independent_path(op, in_degree);
	if(path.size() >= max_len)
	{
	chosen_op = op;
	max_path = path;
	max_len = path.size();
	}
	}
	candidate_ops.erase(chosen_op);
	return max_path;
	}
	/** Partition the graph into a list of linear sub-"graphs", while preserving the topological order, and trying to minimize
	* the number of partitions
	*/
	std::vector<std::vector<OpPack>> topological_partition() const
	{
	// Initialize zero incident degree and zero in degree ops
	std::map<OperatorId, unsigned int> in_degree{};
	std::set<OperatorId> candidate_ops{};
	for(auto op : all_ops())
	{
	const auto degree = src_ops(op).size();
	in_degree[op] = degree;
	if(degree == 0)
	{
	candidate_ops.insert(op);
	}
	}

	std::vector<std::vector<OpPack>> sorted_partitions{};
	while(!candidate_ops.empty())
	{
	// generate_longest_path_from_zero_indegree_ops(in_degree, visited_ops, candidate_ops)
	const auto path = propose_next_path(candidate_ops, in_degree);

	// Append to sorted_partitions
	std::vector<OpPack> path_op_pack{};
	for(auto op : path)
	{
	path_op_pack.push_back(OpPack{ op, src_tensors(op), dst_tensors(op) });
	}
	sorted_partitions.push_back(path_op_pack);
	// Remove whole path (Update in_degree, visited_ops, candidate_ops)
	for(auto op : path)
	{
	for(const auto next_op_child : dst_ops(op))
	{
	if(in_degree[next_op_child] > 0)
	{
	in_degree[next_op_child]--;
	}
	if(in_degree[next_op_child] == 0 && !is_in(next_op_child, path)) // We do not want to put the proposed path back into candidates
	{
	candidate_ops.insert(next_op_child);
	}
	}
	}
	}
	return sorted_partitions;
	}

	/** Strict equality comparison (all internal ids and order of insertion matter).
	* In the future this may be replaced with a topological comparison, allowing equivalent graphs with different internal ids to be equal
	*
	*
	* @param[in] g0
	* @param[in] g1
	* @return true If the same
	* @return false Otherwise
	*/
	friend bool operator==(const DependencyGraph &g0, const DependencyGraph &g1)
	{
	// Do not compare id allocators
	return std::make_tuple(
	g0._adj_src_tensors, g0._adj_dst_tensors, g0._adj_src_ops, g0._adj_dst_ops)
	== std::make_tuple(
	g1._adj_src_tensors, g1._adj_dst_tensors, g1._adj_src_ops, g1._adj_dst_ops);
	}
	std::vector<OperatorId> src_ops_from_tensor(TensorId tensor) const
	{
	return _adj_src_ops.at(tensor);
	}
	std::vector<OperatorId> dst_ops_from_tensor(TensorId tensor) const
	{
	return _adj_dst_ops.at(tensor);
	}
	/** Get all tensors
	*
	* @return std::vector<TensorId>
	*/
	std::vector<TensorId> all_tensors() const
	{
	std::vector<TensorId> tensors{};
	std::transform(std::begin(_adj_src_ops), std::end(_adj_src_ops), std::back_inserter(tensors), [](const auto & it)
	{
	return it.first;
	});
	return tensors;
	}
	/** Get source tensors of the whole graph
	*
	* @return std::vector<TensorId>
	*/
	std::vector<TensorId> global_src_tensors() const
	{
	std::vector<TensorId> tensors;
	for(auto tensor_src_ops : _adj_src_ops)
	{
	if(tensor_src_ops.second.empty())
	{
	tensors.push_back(tensor_src_ops.first);
	}
	}
	return tensors;
	}
	/** Get destination tensors of the whole graph
	*
	* @return std::vector<TensorId>
	*/
	std::vector<TensorId> global_dst_tensors() const
	{
	std::vector<TensorId> tensors;
	for(auto tensor_dst_ops : _adj_dst_ops)
	{
	if(tensor_dst_ops.second.empty())
	{
	tensors.push_back(tensor_dst_ops.first);
	}
	}
	return tensors;
	}
	/** Get intermediate tensors of the whole graph.
	*
	* @return std::vector<TensorId>
	*/
	std::vector<TensorId> intermediate_tensors() const
	{
	std::vector<TensorId> tensors;

	// If a tensor is used to connect the input of an operator and the output of another operator,
	// it is not allocated in the memory. The tensor exists as a temporary variable only.
	for(auto src_tensor : _adj_src_ops)
	{
	if(!src_tensor.second.empty())
	{
	const auto dst_tensor = _adj_dst_ops.find(src_tensor.first);
	if(dst_tensor != _adj_dst_ops.end())
	{
	if(!dst_tensor->second.empty())
	{
	tensors.push_back(src_tensor.first);
	}
	}
	}
	}

	return tensors;
	}
	/** Get all root ops. Root ops can also be referred to as "src ops" of the whole graph
	*
	* @return std::vector<OperatorId>
	*/
	std::vector<OperatorId> get_root_ops() const
	{
	std::vector<OperatorId> ops{};
	const auto op_list = all_ops();

	for(auto op : op_list)
	{
	if(src_ops(op).empty())
	{
	ops.emplace_back(op);
	}
	}
	return ops;
	}

	private:
	void link_input(OperatorId op, TensorId in_tensor)
	{
	ARM_COMPUTE_ERROR_ON(!operator_exists(op));
	if(!tensor_exists(in_tensor))
	{
	insert_new_tensor(in_tensor);
	}
	ARM_COMPUTE_ERROR_ON(are_connected(op, in_tensor)); // Prevent repetitive linking
	_adj_src_tensors[op].push_back(in_tensor);
	_adj_dst_ops[in_tensor].push_back(op);
	}
	void link_output(OperatorId op, TensorId out_tensor)
	{
	ARM_COMPUTE_ERROR_ON(!operator_exists(op));
	if(!tensor_exists(out_tensor))
	{
	insert_new_tensor(out_tensor);
	}
	ARM_COMPUTE_ERROR_ON(are_connected(op, out_tensor)); // Prevent repetitive linking
	_adj_dst_tensors[op].push_back(out_tensor);
	_adj_src_ops[out_tensor].push_back(op);
	}

	std::vector<OperatorId> src_ops(OperatorId op) const
	{
	ARM_COMPUTE_ERROR_ON(!operator_exists(op));
	std::vector<OperatorId> ops{};
	for(TensorId src_tensor : src_tensors(op))
	{
	ops.insert(ops.end(), std::begin(_adj_src_ops.at(src_tensor)), std::end(_adj_src_ops.at(src_tensor)));
	}
	return ops;
	}
	std::vector<OperatorId> dst_ops(OperatorId op) const
	{
	ARM_COMPUTE_ERROR_ON(!operator_exists(op));
	std::vector<OperatorId> ops{};
	for(TensorId dst_tensor : _adj_dst_tensors.at(op))
	{
	ops.insert(ops.end(), std::begin(_adj_dst_ops.at(dst_tensor)), std::end(_adj_dst_ops.at(dst_tensor)));
	}
	return ops;
	}

	/** Get source tensors to an operator
	*
	* @param[in] op
	* @return std::vector<TensorId>
	*/
	std::vector<TensorId> src_tensors(OperatorId op) const
	{
	ARM_COMPUTE_ERROR_ON(!operator_exists(op));
	return _adj_src_tensors.at(op);
	}
	/** Get destination tensors to an operator
	*
	* @param[in] op
	* @return std::vector<TensorId>
	*/
	std::vector<TensorId> dst_tensors(OperatorId op) const
	{
	ARM_COMPUTE_ERROR_ON(!operator_exists(op));
	return _adj_dst_tensors.at(op);
	}
	/** Get all operators
	*
	* @return std::vector<OperatorId>
	*/
	std::vector<OperatorId> all_ops() const
	{
	std::vector<OperatorId> ops{};
	std::transform(std::begin(_adj_src_tensors), std::end(_adj_src_tensors), std::back_inserter(ops), [](const auto & it)
	{
	return it.first;
	});
	return ops;
	}
	/** Remove an operator from graph.
	*
	* @param[in] op
	*/
	void remove_operator(OperatorId op)
	{
	for(auto src_tensor : _adj_src_tensors.at(op))
	{
	auto &dst_ops = _adj_dst_ops.at(src_tensor);
	dst_ops.erase(
	std::remove(std::begin(dst_ops), std::end(dst_ops), op),
	std::end(dst_ops));
	}
	for(auto dst_tensor : _adj_dst_tensors.at(op))
	{
	auto &src_ops = _adj_src_ops.at(dst_tensor);
	src_ops.erase(
	std::remove(std::begin(src_ops), std::end(src_ops), op),
	std::end(src_ops));
	}
	// Remove any isolated tensors
	// An isolated tensor is one where both its _adj_src_ops and _adj_dst_ops are empty
	for(auto t : all_tensors())
	{
	if(_adj_src_ops.at(t).empty() && _adj_dst_ops.at(t).empty())
	{
	_adj_src_ops.erase(t);
	_adj_dst_ops.erase(t);
	}
	}
	_adj_src_tensors.erase(op);
	_adj_dst_tensors.erase(op);
	}
	void insert_new_tensor(TensorId tensor)
	{
	_adj_src_ops[tensor] = {};
	_adj_dst_ops[tensor] = {};
	}
	bool tensor_exists(TensorId tensor) const
	{
	return _adj_src_ops.find(tensor) != _adj_src_ops.end() && _adj_dst_ops.find(tensor) != _adj_dst_ops.end();
	}
	bool operator_exists(OperatorId op) const
	{
	return _adj_src_tensors.find(op) != _adj_src_tensors.end() && _adj_dst_tensors.find(op) != _adj_dst_tensors.end();
	}
	bool is_src_tensor_of(OperatorId op, TensorId tensor) const
	{
	if(!operator_exists(op) \|\| !tensor_exists(tensor))
	{
	return false;
	}
	const auto op_inputs = src_tensors(op);
	return std::find(op_inputs.begin(), op_inputs.end(), tensor) != op_inputs.end();
	}
	bool is_dst_tensor_of(OperatorId op, TensorId tensor) const
	{
	if(!operator_exists(op) \|\| !tensor_exists(tensor))
	{
	return false;
	}
	const auto op_outputs = dst_tensors(op);
	return std::find(op_outputs.begin(), op_outputs.end(), tensor) != op_outputs.end();
	}
	bool are_connected(OperatorId op, TensorId tensor) const
	{
	return is_src_tensor_of(op, tensor) \|\| is_dst_tensor_of(op, tensor);
	}
	/** If op is the destination / leaf operator of the whole graph
	*
	* @param[in] op
	* @return true
	* @return false
	*/
	bool is_dst_op(OperatorId op) const
	{
	return dst_ops(op).empty();
	}
	std::vector<OperatorId> get_dst_ops() const
	{
	std::vector<OperatorId> ops{};
	const auto op_list = all_ops();

	for(auto op : op_list)
	{
	if(is_dst_op(op))
	{
	ops.emplace_back(op);
	}
	}
	return ops;
	}
	bool path_exists_from_tensor_to_op(TensorId src_tensor, OperatorId dst_op) const
	{
	if(!tensor_exists(src_tensor) \|\| !operator_exists(dst_op))
	{
	return false;
	}
	for(auto child_op : dst_ops_from_tensor(src_tensor))
	{
	if(path_exists_from_op_to_op(child_op, dst_op))
	{
	return true;
	}
	}
	return false;
	}

	bool path_exists_from_op_to_op(OperatorId src_op, OperatorId dst_op) const
	{
	if(!operator_exists(src_op) \|\| !operator_exists(dst_op))
	{
	return false;
	}
	if(src_op == dst_op)
	{
	return true;
	}
	if(is_in(src_op, get_dst_ops()))
	{
	return false;
	}
	for(auto child_tensor : dst_tensors(src_op))
	{
	if(path_exists_from_tensor_to_op(child_tensor, dst_op))
	{
	return true;
	}
	}
	return false;
	}

	private:
	AdjList _adj_src_tensors{};
	AdjList _adj_dst_tensors{};
	AdjList _adj_src_ops{};
	AdjList _adj_dst_ops{};
	};

	} // namespace dynamic_fusion
	} // namespace experimental
	} // namespace arm_compute
	#endif /* SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH */