Add Auxiliary tensors

The asssign_memory_descriptors method could not automatically assign
Auxiliary tensors. Therefore changes are made to allow developers to
explicitly mark auxiliary tensors.

However, to avoid ambiguity between auxiliary and "intermediate"
tensors, we solidify the definitions of both:

Intermediate tensors are a strictly topological term. They are defined
as "inner" tensors within a workload, hidden from the user, as opposed
to input and output tensors exposed to the users.

Auxiliary tensors are a subcategory of Intermediate tensors, and are
also about memory allocation. They are intermediate tensors that need
real memory backing.

For more details please see the documentation of MemoryType enum

Rename MemoryType::NoAlloc to MemoryType::Virtual

Partially resolves: COMPMID-5523

Signed-off-by: SiCong Li <sicong.li@arm.com>
Change-Id: Ibde44c2ec1570be9423e0fb38b53bb136ffc36dd
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8940
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
diff --git a/arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h b/arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h
index 25023ff..3daedd4 100644
--- a/arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h
+++ b/arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,12 +32,31 @@
 {
 namespace dynamic_fusion
 {
-/** Type of memory used by a workload tensor */
+/** Type of memory used by a workload tensor
+ *
+ *  We can classify tensors in 2 dimensions: Topology (where they are in a workload) and Memory allocation:
+ * Topology:
+ *      Argument tensors: "Outer" tensors exposed to the users as inputs and outputs (arguments)
+ *      Intermediate tensors: "Inner" tensors hidden from the users as links between operators
+ * Memory allocation:
+ *      Alloc: Tensors that need to be allocated real backing memory
+ *      No-Alloc: Tensors that don't need to be allocated real backing memory
+ *
+ * We end up with 3 MemoryType based on the product of these two classifications
+ *          |    Argument    |   Intermediate    |
+ * ---------*----------------*-------------------*
+ * Alloc    |     User       |   Auxiliary       |
+ * ---------*----------------*-------------------*
+ * No-Alloc *     N/A        |    Virtual        |
+ * ---------*----------------*-------------------*
+ */
 enum class MemoryType
 {
+    /** Both User and Auxiliary types are of Alloc type. Since they require memory allocation */
     User      = 0, /**< Memory coming directly from users, e.g. for argument tensors */
-    Auxiliary = 1, /**< Additional memory required by the workload tensor, e.g. for temporary tensors */
-    NoAlloc   = 2, /**< Temporary tile which is not allocated as a whole tensor in the memory */
+    Auxiliary = 1, /**< Additional memory required by the workload tensor, e.g. for tensors holding temporary results between kernels */
+    /** Virtual type is of No-Alloc type. Since it doesn't require memory allocation */
+    Virtual = 2, /**< Temporary tile which is not allocated as a whole tensor in the memory. It is mainly used at sketch time to link operators; there should be no Virtual tensors at runtime */
 };
 
 /** Memory information for tensors with @ref MemoryType::Auxiliary.
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h b/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h
index f19ad6d..422edb3 100644
--- a/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h
+++ b/arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h
@@ -70,18 +70,9 @@
     TensorInfo create_tensor_info(Args &&... args)
     {
         auto tensor_info = TensorInfo(std::forward<Args>(args)...);
-        tensor_info.set_id(allocate_new_tensor_id());
+        register_new_tensor(tensor_info);
         return tensor_info;
     }
-    /** Create a @ref TensorInfo associated with the workload sketch by copying from an existing tensor info
-     * @note The newly copied tensor will have a different identity within the workload than the one copied from
-     *       To copy the identity of @p tensor_info as well, use @ref TensorInfo 's copy constructors instead
-     *
-     * @param[in] tensor_info @ref ITensorInfo to copy from
-     *
-     * @return TensorInfo   Newly created tensor info
-     */
-    TensorInfo create_tensor_info(const ITensorInfo &tensor_info);
     /** Create a default @ref TensorInfo associated with the workload sketch
      * It is usually used by user input or output tensors
      *
@@ -90,7 +81,11 @@
     TensorInfo create_tensor_info();
 
 private:
-    ITensorInfo::Id                 allocate_new_tensor_id();
+    /** Register a new tensor by setting a new id to it and register its memory descriptor in the sketch
+     *
+     * @param[in,out] tensor_info @ref ITensorInfo that will be registered
+     */
+    void register_new_tensor(ITensorInfo &tensor_info);
     std::unique_ptr<Implementation> _impl; /**< Internal opaque implementation*/
 };
 
diff --git a/src/core/TensorInfo.cpp b/src/core/TensorInfo.cpp
index 954c6c5..5905ba5 100644
--- a/src/core/TensorInfo.cpp
+++ b/src/core/TensorInfo.cpp
@@ -56,7 +56,7 @@
     _quantization_info             = info.quantization_info();
     _data_layout                   = info.data_layout();
     _are_values_constant           = info.are_values_constant();
-    _id                            = invalid_tensor_id; // Tensor Id has to be explicitly set, instead of being copied
+    _id                            = info.id();
     _lock_paddings                 = info.lock_paddings();
 }
 
@@ -77,7 +77,7 @@
     _quantization_info             = info.quantization_info();
     _data_layout                   = info.data_layout();
     _are_values_constant           = info.are_values_constant();
-    _id                            = invalid_tensor_id; // Tensor Id has to be explicitly set, instead of being copied
+    _id                            = info.id();
     _lock_paddings                 = false;
 }
 TensorInfo::TensorInfo(Format format)
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
index 669913c..4cf7a7f 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,46 +31,6 @@
 {
 namespace dynamic_fusion
 {
-namespace
-{
-/** Automatically create memory descriptors for all tensors in the graph
- *
- * @param[in] tensors @ref ITensorInfo map
- * @param[in] graph   @ref DependencyGraph of which the @p tensors are a part
- *
- * @return MemoryDescriptorMap  An assignment map of @ref MemoryDescriptors for each ITensorInfo in the graph
- */
-MemoryDescriptorMap assign_memory_descriptors(const std::map<ITensorInfo::Id, const ITensorInfo *> tensors, const DependencyGraph &graph)
-{
-    const auto all_tensors = graph.all_tensors();
-    const auto src_tensors = graph.global_src_tensors();
-    const auto dst_tensors = graph.global_dst_tensors();
-    const auto interm_tensors = graph.intermediate_tensors();
-
-    MemoryDescriptorMap mem_map{};
-    for(auto t_id : all_tensors)
-    {
-        const auto &tensor = tensors.at(t_id);
-        // Only global src and dst tensors to the entire component graph are "User" tensors, which are user-specified memories
-        if(is_in(t_id, src_tensors) || is_in(t_id, dst_tensors))
-        {
-            mem_map[t_id] = MemoryDescriptor{ MemoryType::User };
-        }
-        else if(is_in(t_id, interm_tensors))
-        {
-            mem_map[t_id] = MemoryDescriptor { MemoryType::NoAlloc };
-        }
-        else
-        {
-            AuxMemoryInfo aux_mem_info{ tensor->total_size() };
-            mem_map[t_id] = MemoryDescriptor{ MemoryType::Auxiliary, aux_mem_info };
-        }
-    }
-    return mem_map;
-}
-
-} // namespace
-
 std::vector<DependencyGraph::TensorId> GpuKernelComponentGraph::get_tensor_ids(const std::vector<const ITensorInfo *> tensors)
 {
     std::vector<DependencyGraph::TensorId> tensor_ids{};
@@ -89,19 +49,16 @@
 {
 }
 
-GpuKernelComponentStream GpuKernelComponentGraph::fuse() const
+GpuKernelComponentStream GpuKernelComponentGraph::fuse(const MemoryDescriptorMap &mem_map) const
 {
-    // Obtain memory descriptor map
-    const auto mem_map = assign_memory_descriptors(_tensors, _dependency_graph);
-
     GpuKernelComponentStream stream{ _services, mem_map };
-    const auto op_seq = _dependency_graph.build_operators_sequence();
+    const auto               op_seq = _dependency_graph.build_operators_sequence();
 
     stream.new_component_group();
     for(auto op : op_seq)
     {
         const auto component = _components.at(op.op).get();
-        const auto success = stream.add_component(component);
+        const auto success   = stream.add_component(component);
         ARM_COMPUTE_ERROR_ON(!success);
         ARM_COMPUTE_UNUSED(success);
     }
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
index e4f498b..8314ea0 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -89,8 +89,12 @@
         }
     }
     /** Perform component fusion and serialize the graph into a stream of component groups
+     *
+     * @param[in] mem_map MemoryDescriptorMap for all the tensors in the component graph
+     *
+     * @return GpuKernelComponentStream
      */
-    GpuKernelComponentStream fuse() const;
+    GpuKernelComponentStream fuse(const MemoryDescriptorMap &mem_map) const;
 
 private:
     static std::vector<DependencyGraph::TensorId> get_tensor_ids(const std::vector<const ITensorInfo *> tensors);
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
index ce7cf1e..33f6720 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,23 +43,18 @@
     return _impl->context();
 }
 
-TensorInfo GpuWorkloadSketch::create_tensor_info(const ITensorInfo &tensor_info)
+void GpuWorkloadSketch::register_new_tensor(ITensorInfo &tensor_info)
 {
-    TensorInfo tensor{ tensor_info };
-    tensor.set_id(allocate_new_tensor_id());
-    return tensor;
+    tensor_info.set_id(_impl->allocate_new_tensor_id());
+    // All input output tensors are User tensors that need real backing memory
+    _impl->register_memory_descriptor(tensor_info, MemoryDescriptor{ MemoryType::User });
 }
 
 TensorInfo GpuWorkloadSketch::create_tensor_info()
 {
-    TensorInfo tensor{};
-    tensor.set_id(allocate_new_tensor_id());
-    return tensor;
-}
-
-ITensorInfo::Id GpuWorkloadSketch::allocate_new_tensor_id()
-{
-    return _impl->allocate_new_tensor_id();
+    TensorInfo tensor_info{};
+    register_new_tensor(tensor_info);
+    return tensor_info;
 }
 
 GpuWorkloadSketch::Implementation &GpuWorkloadSketch::implementation()
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
index 08796b6..d5075d5 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
@@ -24,6 +24,7 @@
 #ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSKETCHIMPL
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSKETCHIMPL
 
+#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
 #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h"
@@ -52,7 +53,8 @@
           _comp_services{},
           _component_graph{ &_comp_services },
           _operator_group{},
-          _interm_tensor_info_list{ std::vector<std::unique_ptr<TensorInfo>>() }
+          _managed_tensor_info_list{ std::vector<std::unique_ptr<TensorInfo>>() },
+          _mem_map{}
     {
     }
     /** Prevent instances of this class from being copy constructed */
@@ -99,18 +101,47 @@
      */
     GpuWorkloadSourceCode generate_source_code() const
     {
-        return component_graph().fuse().write_workload_code();
+        return component_graph().fuse(_mem_map).write_workload_code();
     }
-    /** Create an intermediate tensor info and save it
+    /** Create a virtual (see @ref MemoryType) tensor info and save it
      *
-     * @return ITensorInfo  The created intermediate tensor info object pointer
+     * @return ITensorInfo*  The created virtual tensor info object pointer
      */
-    ITensorInfo *create_intermediate_tensor()
+    ITensorInfo *create_virtual_tensor()
     {
         auto uptr = std::make_unique<TensorInfo>();
-        uptr->set_id(-allocate_new_tensor_id()); // intermediate tensors must have negative id
-        _interm_tensor_info_list.emplace_back(std::move(uptr));
-        return _interm_tensor_info_list.back().get();
+        uptr->set_id(-allocate_new_tensor_id()); // virtual tensors must have negative id
+        register_memory_descriptor(*uptr, MemoryDescriptor{ MemoryType::Virtual });
+        _managed_tensor_info_list.emplace_back(std::move(uptr));
+        return _managed_tensor_info_list.back().get();
+    }
+    /** Create an auxiliary (see @ref MemoryType) tensor info and save it
+     *
+     * @return ITensorInfo*  The created auxiliary tensor info object pointer
+     */
+
+    /** Create an auxiliary (see @ref MemoryType) tensor info and save it
+     *
+     * @param[in] tensor_info @ref ITensorInfo to copy from
+     *
+     * @return ITensorInfo*  The created auxiliary tensor info object pointer
+     */
+    ITensorInfo *create_auxiliary_tensor(const ITensorInfo &tensor_info)
+    {
+        auto uptr = std::make_unique<TensorInfo>(tensor_info);
+        uptr->set_id(allocate_new_tensor_id());
+        register_memory_descriptor(*uptr, MemoryDescriptor{ MemoryType::Auxiliary, AuxMemoryInfo{ uptr->total_size() } });
+        _managed_tensor_info_list.emplace_back(std::move(uptr));
+        return _managed_tensor_info_list.back().get();
+    }
+    /** Register memory descriptor of a tensor info
+     *
+     * @param[in] info     @ref ITensorInfo to be registered
+     * @param[in] mem_desc @ref MemoryDescriptor to be registered with @p info
+     */
+    void register_memory_descriptor(const ITensorInfo &info, const MemoryDescriptor &mem_desc)
+    {
+        _mem_map[info.id()] = mem_desc;
     }
 
 private:
@@ -119,7 +150,8 @@
     GpuKernelComponentGraph                  _component_graph;
     GpuOperatorGroup                         _operator_group;
     ITensorInfo::Id                          _next_id{ ITensorInfo::invalid_tensor_id };
-    std::vector<std::unique_ptr<TensorInfo>> _interm_tensor_info_list;
+    std::vector<std::unique_ptr<TensorInfo>> _managed_tensor_info_list;
+    MemoryDescriptorMap                      _mem_map;
 };
 } // namespace dynamic_fusion
 } // namespace experimental
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
index 00fbb73..7a8b979 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
@@ -239,7 +239,7 @@
     // Initialize the direct convolution descriptor
     const DirectConvComputeKernelInfo desc = config_direct_convolution_nhwc(src, wei, conv_info);
 
-    ITensorInfo *dst = sketch.implementation().create_intermediate_tensor();
+    ITensorInfo *dst = sketch.implementation().create_virtual_tensor();
 
     // Assert validation
     ARM_COMPUTE_ERROR_THROW_ON(GpuConv2d::validate_op(sketch, src, wei, bia, attributes));
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp
index cd5487c..c906da8 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp
@@ -66,7 +66,7 @@
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
     ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id());
-    ARM_COMPUTE_RETURN_ERROR_ON(!is_user_tensor(dst));
+    ARM_COMPUTE_RETURN_ERROR_ON(!is_alloc_tensor(dst));
 
     // Initialize the destination tensor info.
     TensorInfo dst_to_validate = *dst;
diff --git a/src/dynamic_fusion/utils/Utils.h b/src/dynamic_fusion/utils/Utils.h
index d317ec7..c9fc2c6 100644
--- a/src/dynamic_fusion/utils/Utils.h
+++ b/src/dynamic_fusion/utils/Utils.h
@@ -33,21 +33,29 @@
 {
 namespace dynamic_fusion
 {
-inline bool is_user_tensor(const ITensorInfo *tensor_info)
+/** Tensor should have backing memory. @ref MemoryType
+ */
+inline bool is_alloc_tensor(const ITensorInfo *tensor_info)
 {
     return tensor_info->id() > ITensorInfo::invalid_tensor_id;
 }
 
-inline bool is_intermediate_tensor(const ITensorInfo *tensor_info)
+/** Tensor should not have backing memory. @ref MemoryType
+ */
+inline bool is_noalloc_tensor(const ITensorInfo *tensor_info)
 {
     return tensor_info->id() < ITensorInfo::invalid_tensor_id;
 }
 
+/** @ref ITensorInfo has valid id
+ */
 inline bool is_valid_tensor(const ITensorInfo *tensor_info)
 {
     return tensor_info->has_valid_id();
 }
 
+/** @ref ITensorInfo has invalid id
+ */
 inline bool is_invalid_tensor(const ITensorInfo *tensor_info)
 {
     return !is_valid_tensor(tensor_info);