Implement dynamic fusion softmax operator

 - Return aux tensorInfo by get_aux_tensors() at runtime to init the aux
   tensor with the right size.
 - Keep softmax unfusable for this commit
 - Hence, added Tensor3D to template writer arguments declaration, for sake of
   keeping dynamic fusion softmax componenets' kernels matching their cl
   counterparts.

Resolves: COMPMID-5523
Change-Id: I667f39545db925f667036ef448302c79a0330373
Signed-off-by: Ramy Elgammal <ramy.elgammal@arm.com>
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/483924
Tested-by: bsgcomp <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: bsgcomp <bsgcomp@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8986
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h
index 235c860..b15de71 100644
--- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h
+++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h
@@ -145,10 +145,11 @@
         // (Important) Allocate auxiliary tensor memory if there are any
         for(auto &data : runtime.get_auxiliary_tensors())
         {
-            auto       tensor      = data.first;
-            const auto aux_mem_req = data.second;
-            tensor->allocator()->init(*data.first->info(), aux_mem_req.alignment);
-            tensor->allocator()->allocate();
+            CLTensor     *tensor      = std::get<0>(data);
+            TensorInfo    info        = std::get<1>(data);
+            AuxMemoryInfo aux_mem_req = std::get<2>(data);
+            tensor->allocator()->init(info, aux_mem_req.alignment);
+            tensor->allocator()->allocate(); // Use ACL allocated memory
         }
 
         // Construct user tensors
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h
index e0aecf5..d9ce4df 100644
--- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h
+++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h
@@ -133,9 +133,10 @@
         // (Important) Allocate auxiliary tensor memory if there are any
         for(auto &data : runtime.get_auxiliary_tensors())
         {
-            auto       tensor      = data.first;
-            const auto aux_mem_req = data.second;
-            tensor->allocator()->init(*data.first->info(), aux_mem_req.alignment);
+            CLTensor     *tensor      = std::get<0>(data);
+            TensorInfo    info        = std::get<1>(data);
+            AuxMemoryInfo aux_mem_req = std::get<2>(data);
+            tensor->allocator()->init(info, aux_mem_req.alignment);
             tensor->allocator()->allocate(); // Use ACL allocated memory
         }
         // Construct user tensors
@@ -273,10 +274,11 @@
 
         for(auto &data : runtime.get_auxiliary_tensors())
         {
-            auto       tensor      = data.first;
-            const auto aux_mem_req = data.second;
-            tensor->allocator()->init(*data.first->info(), aux_mem_req.alignment);
-            tensor->allocator()->allocate();
+            CLTensor     *tensor      = std::get<0>(data);
+            TensorInfo    info        = std::get<1>(data);
+            AuxMemoryInfo aux_mem_req = std::get<2>(data);
+            tensor->allocator()->init(info, aux_mem_req.alignment);
+            tensor->allocator()->allocate(); // Use ACL allocated memory
         }
         // Construct user tensors
         TensorType t_input{};
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h
index e2722a1..faed610 100644
--- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h
+++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h
@@ -131,10 +131,11 @@
         // (Important) Allocate auxiliary tensor memory if there are any
         for(auto &data : runtime.get_auxiliary_tensors())
         {
-            TensorType   *tensor      = data.first;
-            AuxMemoryInfo aux_mem_req = data.second;
-            tensor->allocator()->init(*data.first->info(), aux_mem_req.alignment);
-            tensor->allocator()->allocate();
+            CLTensor     *tensor      = std::get<0>(data);
+            TensorInfo    info        = std::get<1>(data);
+            AuxMemoryInfo aux_mem_req = std::get<2>(data);
+            tensor->allocator()->init(info, aux_mem_req.alignment);
+            tensor->allocator()->allocate(); // Use ACL allocated memory
         }
 
         // Construct user tensors
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h
index efb67f8..efb5cf1 100644
--- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h
+++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h
@@ -109,9 +109,10 @@
         // (Important) Allocate auxiliary tensor memory if there are any
         for(auto &data : runtime.get_auxiliary_tensors())
         {
-            auto       tensor      = data.first;
-            const auto aux_mem_req = data.second;
-            tensor->allocator()->init(*data.first->info(), aux_mem_req.alignment);
+            CLTensor     *tensor      = std::get<0>(data);
+            TensorInfo    info        = std::get<1>(data);
+            AuxMemoryInfo aux_mem_req = std::get<2>(data);
+            tensor->allocator()->init(info, aux_mem_req.alignment);
             tensor->allocator()->allocate(); // Use ACL allocated memory
         }
         // Construct user tensors
@@ -142,8 +143,8 @@
         return reference::pooling_layer<T>(src, pool_info, QuantizationInfo(), nullptr, DataLayout::NCHW);
     }
 
-    TensorType       _target{};
-    SimpleTensor<T>  _reference{};
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h
index bd99902..cd39ec0 100644
--- a/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h
+++ b/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h
@@ -132,10 +132,11 @@
         // (Important) Allocate auxiliary tensor memory if there are any
         for(auto &data : runtime.get_auxiliary_tensors())
         {
-            auto       tensor      = data.first;
-            const auto aux_mem_req = data.second;
-            tensor->allocator()->init(*data.first->info(), aux_mem_req.alignment);
-            tensor->allocator()->allocate();
+            CLTensor     *tensor      = std::get<0>(data);
+            TensorInfo    info        = std::get<1>(data);
+            AuxMemoryInfo aux_mem_req = std::get<2>(data);
+            tensor->allocator()->init(info, aux_mem_req.alignment);
+            tensor->allocator()->allocate(); // Use ACL allocated memory
         }
 
         // Construct user tensors
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h
index 0d3b1f0..e0b62d0 100644
--- a/tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h
+++ b/tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h
@@ -90,9 +90,10 @@
         // (Important) Allocate auxiliary tensor memory if there are any
         for(auto &data : runtime.get_auxiliary_tensors())
         {
-            auto       tensor      = data.first;
-            const auto aux_mem_req = data.second;
-            tensor->allocator()->init(*data.first->info(), aux_mem_req.alignment);
+            CLTensor     *tensor      = std::get<0>(data);
+            TensorInfo    info        = std::get<1>(data);
+            AuxMemoryInfo aux_mem_req = std::get<2>(data);
+            tensor->allocator()->init(info, aux_mem_req.alignment);
             tensor->allocator()->allocate(); // Use ACL allocated memory
         }
 
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h
index 7eb820e..581a3e8 100644
--- a/tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h
+++ b/tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h
@@ -158,10 +158,11 @@
         // (Important) Allocate auxiliary tensor memory if there are any
         for(auto &data : runtime.get_auxiliary_tensors())
         {
-            auto       tensor      = data.first;
-            const auto aux_mem_req = data.second;
-            tensor->allocator()->init(*data.first->info(), aux_mem_req.alignment);
-            tensor->allocator()->allocate();
+            CLTensor     *tensor      = std::get<0>(data);
+            TensorInfo    info        = std::get<1>(data);
+            AuxMemoryInfo aux_mem_req = std::get<2>(data);
+            tensor->allocator()->init(info, aux_mem_req.alignment);
+            tensor->allocator()->allocate(); // Use ACL allocated memory
         }
 
         // Construct user tensors
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/SoftmaxFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/SoftmaxFixture.h
new file mode 100644
index 0000000..3817711
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/operators/SoftmaxFixture.h
@@ -0,0 +1,161 @@
+/*
+* Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_SOFTMAXFIXTURE
+#define TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_SOFTMAXFIXTURE
+
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+
+#include "tests/SimpleTensor.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/reference/SoftmaxLayer.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionSoftmaxValidationGenericFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type, float beta, size_t axis, bool is_log)
+    {
+        _reference = compute_reference(shape, data_type, beta, axis, is_log);
+        _target    = compute_target(shape, data_type, beta, axis, is_log);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor)
+    {
+        if(tensor.data_type() == DataType::F32)
+        {
+            std::uniform_real_distribution<float> distribution(-10.0f, 10.0f);
+            library->fill(tensor, distribution, 0);
+        }
+        else if(tensor.data_type() == DataType::F16)
+        {
+            arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -10.0f, 10.0f };
+            library->fill(tensor, distribution, 0);
+        }
+        else if(!is_data_type_quantized(tensor.data_type()))
+        {
+            std::uniform_int_distribution<> distribution(0, 100);
+            library->fill(tensor, distribution, 0);
+        }
+        else
+        {
+            library->fill_tensor_uniform(tensor, 0);
+        }
+    }
+
+    TensorType compute_target(const TensorShape &shape, DataType data_type, float beta, int32_t axis, bool is_log)
+    {
+        // Create a new workload sketch
+        CLCompileContext   cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+        GpuWorkloadContext gpu_ctx        = GpuWorkloadContext{ &cl_compile_ctx };
+        GpuWorkloadSketch  sketch{ &gpu_ctx };
+
+        SoftmaxAttributes softmax_attr{};
+        softmax_attr.axis(axis).beta(beta).is_log_softmax(is_log);
+        TensorInfo src_info = sketch.create_tensor_info(shape, 1, data_type);
+        TensorInfo dst_info = sketch.create_tensor_info(shape, 1, data_type);
+        FunctionType::create_op(sketch, &src_info, &dst_info, softmax_attr);
+
+        // Configure runtime
+        ClWorkloadRuntime runtime;
+        runtime.configure(sketch);
+
+        // (Important) Allocate auxiliary tensor memory if there are any
+        // Instead of using ACL allocated memory, the user can choose to import memory into the tensors
+        for(auto &data : runtime.get_auxiliary_tensors())
+        {
+            CLTensor     *tensor      = std::get<0>(data);
+            TensorInfo    info        = std::get<1>(data);
+            AuxMemoryInfo aux_mem_req = std::get<2>(data);
+            tensor->allocator()->init(info, aux_mem_req.alignment);
+            tensor->allocator()->allocate(); // Use ACL allocated memory
+        }
+        // Construct user tensors
+        TensorType src{};
+        TensorType dst{};
+
+        // Initialize user tensors
+        src.allocator()->init(src_info);
+        dst.allocator()->init(dst_info);
+
+        // Allocate and fill user tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+        fill(AccessorType(src));
+
+        // Run runtime
+        runtime.run({ &src, &dst });
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, float beta, int32_t axis, bool is_log)
+    {
+        // Create reference
+        SimpleTensor<T> src{ shape, data_type, 1 };
+
+        // Fill reference
+        fill(src);
+
+        return reference::softmax_layer<T>(src, beta, axis, is_log);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionSoftmaxValidationFixture : public DynamicFusionSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type, float beta, size_t axis, bool is_log)
+    {
+        DynamicFusionSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape,
+                                                                                                       data_type,
+                                                                                                       beta,
+                                                                                                       axis,
+                                                                                                       is_log);
+    }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif /* TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_SOFTMAXFIXTURE */