Integrate Dynamic Fusion patches * Add public interfaces: * OperatorGraph: Describe a workload that could contain fused kernels * IWorkload: Generic interface for workloads built from OperatorGraph * ClWorkload: OpenCL workloads built from OperatorGraph * ClCompositeOperator: Runtime async operator to execute a ClWorkload * DependencyGraph (will likely be deprecated in later iterations) * Add example * cl_fused_conv2d_elementwise_add.cpp to explain how to use the new interfaces * Add internal translation layer * Refactor ClKernelBuildingAPI * Remove non-tile based gemm native kernel component * Minor interface changes * Add integration tests Resolves COMPMID-5161 Signed-off-by: SiCong Li <sicong.li@arm.com> Change-Id: Ib987ed79289ab0bcbd3130d54f5793408d9f1240 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7510 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>

commit: b63b1196adea8b07dd8db77c2492a212650deba0 [log] [tgz]
author: SiCong Li <sicong.li@arm.com> Fri Jan 28 18:24:39 2022 +0000
committer: SiCong Li <sicong.li@arm.com> Fri May 06 15:01:45 2022 +0000
tree: b264035197873f56c69784bec68cad7041b5d423
parent: 3bb72b69566f18ad5c9446d318d2fc2b5f6dba42 [diff] [blame]
diff --git a/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h
new file mode 100644
index 0000000..c33e189
--- /dev/null
+++ b/src/core/experimental/dynamic_fusion/WorkloadImpl/OperatorGraphImpl.h

@@ -0,0 +1,229 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
+#error "This experimental feature must be enabled with -DENABLE_EXPERIMENTAL_DYNAMIC_FUSION"
+#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
+#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_OPERATORGRAPHIMPL
+#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_OPERATORGRAPHIMPL
+
+#include "arm_compute/core/experimental/ClWorkload.h"
+#include "src/core/experimental/dynamic_fusion/WorkloadImpl/ITensorDescPack.h"
+
+#include "support/Cast.h"
+#include "support/DeepCopy.h"
+
+#include <map>
+#include <tuple>
+#include <type_traits>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+enum class OperatorComplexity
+{
+    Complex = 0,
+    Simple
+};
+
+struct ClKernelGraph;
+struct OpTensorContent
+{
+public:
+    using Id          = DependencyGraph::Id;
+    OpTensorContent() = default;
+    OpTensorContent(Id id)
+        : id{ id }, desc{}
+    {
+    }
+    OpTensorContent(Id id, ITensorInfo *desc)
+        : id{ id }, desc{ desc }
+    {
+    }
+    ~OpTensorContent()                       = default;
+    OpTensorContent(const OpTensorContent &) = default;
+    OpTensorContent &operator=(const OpTensorContent &) = default;
+    OpTensorContent(OpTensorContent &&)                 = default;
+    OpTensorContent &operator=(OpTensorContent &&) = default;
+    bool operator==(const OpTensorContent &other) const
+    {
+        return desc == other.desc;
+    }
+
+    const ITensorInfo *get_tensor_info() const
+    {
+        return desc;
+    }
+    ITensorInfo *get_tensor_info()
+    {
+        return desc;
+    }
+
+    Id           id{};
+    ITensorInfo *desc{};
+};
+
+struct OperatorContent
+{
+public:
+    using Id          = DependencyGraph::Id;
+    OperatorContent() = default;
+    OperatorContent(const OperatorGraph::Implementation *graph, Id id, const ITensorDescPack<OpTensorContent> &tensors)
+        : _graph{ graph }, _id{ id }, _tensors{ tensors }
+    {
+    }
+    OperatorContent(const OperatorContent &op) = default;
+    OperatorContent &operator=(const OperatorContent &op) = default;
+    OperatorContent(OperatorContent &&op)                 = default;
+    OperatorContent &operator=(OperatorContent &&op)            = default;
+    virtual ~OperatorContent()                                  = default;
+    virtual OperatorComplexity complexity() const               = 0;
+    virtual bool operator==(const OperatorContent &other) const = 0;
+    virtual Status translate(ClKernelGraph &kernel_graph) const = 0;
+
+protected:
+    const OperatorGraph::Implementation *_graph {};
+    Id                                   _id{};
+    ITensorDescPack<OpTensorContent>     _tensors{};
+};
+
+struct Conv2dContent : public OperatorContent
+{
+public:
+    Conv2dContent() = default;
+    Conv2dContent(const OperatorGraph::Implementation *graph, Id id, const Conv2dDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors)
+        : OperatorContent(graph, id, tensors), desc(desc), forced_method(), forced_method_enabled(false)
+    {
+    }
+    // Temporary. Do not need to pass ConvolutionMethod
+    Conv2dContent(const OperatorGraph::Implementation *graph, Id id, const Conv2dDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors, ConvolutionMethod method)
+        : OperatorContent(graph, id, tensors), desc(desc), forced_method(method), forced_method_enabled(true)
+    {
+    }
+    ~Conv2dContent()                     = default;
+    Conv2dContent(const Conv2dContent &) = default;
+    Conv2dContent &operator=(const Conv2dContent &) = default;
+    Conv2dContent(Conv2dContent &&)                 = default;
+    Conv2dContent &operator=(Conv2dContent &&) = default;
+    bool operator==(const OperatorContent &other) const override;
+    OperatorComplexity complexity() const override
+    {
+        return OperatorComplexity::Complex;
+    }
+    void set_method(ConvolutionMethod method)
+    {
+        forced_method_enabled = true;
+        forced_method         = method;
+    }
+
+    Status translate(ClKernelGraph &kernel_graph) const override;
+    /** Replicate heuristics of @ref ClConv2d::get_convolution_method(), except that non-supported data types and data layouts are removed from the heuristics
+     *
+     * @param src
+     * @param weights
+     * @param dst
+     * @param conv2d_desc
+     * @param gpu_target
+     * @return ConvolutionMethod
+     */
+    static ConvolutionMethod select_conv_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const Conv2dDescriptor &conv2d_desc, const GPUTarget gpu_target);
+
+    Conv2dDescriptor  desc{};
+    ConvolutionMethod forced_method{ ConvolutionMethod::GEMM_CONV2D };
+    bool              forced_method_enabled{ false };
+
+private:
+    Status translate_direct_conv2d(ClKernelGraph &kernel_graph) const;
+};
+
+class AddContent : public OperatorContent
+{
+public:
+    AddContent() = default;
+    AddContent(const OperatorGraph::Implementation *graph, Id id, const AddDescriptor &desc, const ITensorDescPack<OpTensorContent> &tensors)
+        : OperatorContent(graph, id, tensors), desc(desc)
+    {
+    }
+    ~AddContent()                  = default;
+    AddContent(const AddContent &) = default;
+    AddContent &operator=(const AddContent &) = default;
+    AddContent(AddContent &&)                 = default;
+    AddContent &operator=(AddContent &&) = default;
+    bool operator==(const OperatorContent &other) const override;
+    OperatorComplexity complexity() const override
+    {
+        return OperatorComplexity::Simple;
+    }
+    Status translate(ClKernelGraph &kernel_graph) const override;
+
+private:
+    AddDescriptor desc{};
+};
+
+struct OperatorGraph::Implementation
+{
+public:
+    template <typename ContentT, typename... Args>
+    void add_node(Operator::Id id, Args &&... args)
+    {
+        operators[id] = utils::memory::make_deep_unique<OperatorContent, ContentT>(this, id, std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    void add_tensor(OpTensor::Id id, Args &&... args)
+    {
+        tensors[id] = utils::memory::make_deep_unique<OpTensorContent, OpTensorContent>(id, std::forward<Args>(args)...);
+    }
+
+    using Dependency  = DependencyGraph;
+    using OperatorMap = std::map<Operator::Id, utils::memory::deep_unique_ptr<OperatorContent>>;
+    using OpTensorMap = std::map<OpTensor::Id, utils::memory::deep_unique_ptr<OpTensorContent>>;
+
+    Implementation()  = default;
+    ~Implementation() = default;
+
+    friend bool operator==(const OperatorGraph::Implementation &graph0, const OperatorGraph::Implementation &graph1)
+    {
+        return graph0.graph == graph1.graph && graph0.operators == graph1.operators && graph0.tensors == graph1.tensors;
+    }
+
+    Dependency  graph{};
+    OperatorMap operators{};
+    OpTensorMap tensors{};
+    Status      status{};
+};
+
+std::vector<const OperatorContent *> traverse(const OperatorGraph::Implementation &graph);
+
+std::vector<OperatorContent *> traverse(OperatorGraph::Implementation &graph);
+
+Status translate(ClKernelGraph &kernel_graph, const OperatorGraph::Implementation &op_graph);
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_OPERATORGRAPHIMPL
\ No newline at end of file
commit	b63b1196adea8b07dd8db77c2492a212650deba0	[log] [tgz]
author	SiCong Li <sicong.li@arm.com>	Fri Jan 28 18:24:39 2022 +0000
committer	SiCong Li <sicong.li@arm.com>	Fri May 06 15:01:45 2022 +0000
tree	b264035197873f56c69784bec68cad7041b5d423
parent	3bb72b69566f18ad5c9446d318d2fc2b5f6dba42 [diff] [blame]