IVGCVSW-7624 GpuFsa Op: Add Softmax operator

* Added softmax operator support
* Added test cases

Signed-off-by: John Mcloughlin <john.mcloughlin@arm.com>
Change-Id: I51d530b110c4cb812f5aab31ad1ee4022d81d19e
diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
index 264381d..7de150d 100644
--- a/src/backends/backendsCommon/test/CMakeLists.txt
+++ b/src/backends/backendsCommon/test/CMakeLists.txt
@@ -1,5 +1,5 @@
 #
-# Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
+# Copyright © 2017-2024 Arm Ltd and Contributors. All rights reserved.
 # SPDX-License-Identifier: MIT
 #
 
@@ -57,6 +57,7 @@
     ResizeEndToEndTestImpl.hpp
     RuntimeTestImpl.hpp
     SliceEndToEndTestImpl.hpp
+    SoftmaxEndToEndTestImpl.hpp
     SpaceToDepthEndToEndTestImpl.cpp
     SpaceToDepthEndToEndTestImpl.hpp
     SplitterEndToEndTestImpl.hpp
diff --git a/src/backends/backendsCommon/test/SoftmaxEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/SoftmaxEndToEndTestImpl.hpp
new file mode 100644
index 0000000..f3c71f0
--- /dev/null
+++ b/src/backends/backendsCommon/test/SoftmaxEndToEndTestImpl.hpp
@@ -0,0 +1,80 @@
+//
+// Copyright © 2024 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/INetwork.hpp>
+
+#include <CommonTestUtils.hpp>
+#include <ResolveType.hpp>
+
+#include <doctest/doctest.h>
+
+namespace {
+
+template <typename armnn::DataType DataType>
+armnn::INetworkPtr CreateSoftmaxNetwork(const armnn::TensorShape& inputShape,
+                                        const armnn::TensorShape& outputShape,
+                                        const armnn::SoftmaxDescriptor& descriptor,
+                                        const float qScale = 1.0f,
+                                        const int32_t qOffset = 0)
+{
+    using namespace armnn;
+
+    // Builds up the structure of the network.
+    INetworkPtr net(INetwork::Create());
+
+    TensorInfo inputTensorInfo(inputShape, DataType, qScale, qOffset, true);
+
+    IConnectableLayer* Softmax = net->AddSoftmaxLayer(descriptor, "Softmax");
+    IConnectableLayer* input = net->AddInputLayer(0, "input");
+    Connect(input, Softmax, inputTensorInfo, 0, 0);
+
+    TensorInfo outputTensorInfo(outputShape, DataType, qScale, qOffset);
+    IConnectableLayer* output = net->AddOutputLayer(0, "output");
+    Connect(Softmax, output, outputTensorInfo, 0, 0);
+
+    return net;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+void SoftmaxEndToEnd(const std::vector<armnn::BackendId>& backends)
+{
+    using namespace armnn;
+
+    const TensorShape& inputShape  = { 2, 2 };
+    const TensorShape& outputShape = { 2, 2 };
+
+    SoftmaxDescriptor softmaxDesc;
+    softmaxDesc.m_Beta = 1.0f;
+    softmaxDesc.m_Axis = 1;
+
+    // Builds up the structure of the network
+    INetworkPtr net = CreateSoftmaxNetwork<ArmnnType>(inputShape,
+                                                      outputShape,
+                                                      softmaxDesc);
+
+    CHECK(net);
+
+    std::vector<T> inputData
+    {
+            17.0f, 16.0f, 5.0f, 14.0f
+    };
+
+    std::vector<T> expectedOutputData
+    {
+            0.731059f, 0.268941f, 0.000123f, 0.999877f
+    };
+
+    std::map<int, std::vector<T>> inputTensorData = { {0, inputData} };
+    std::map<int, std::vector<T>> expectedOutputTensorData = { {0, expectedOutputData} };
+
+    EndToEndLayerTestImpl<ArmnnType, ArmnnType>(std::move(net),
+                                                inputTensorData,
+                                                expectedOutputTensorData,
+                                                backends);
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/src/backends/gpuFsa/GpuFsaBackend.cpp b/src/backends/gpuFsa/GpuFsaBackend.cpp
index 72f8af7..ec82f3d 100644
--- a/src/backends/gpuFsa/GpuFsaBackend.cpp
+++ b/src/backends/gpuFsa/GpuFsaBackend.cpp
@@ -27,6 +27,7 @@
 #include "layers/GpuFsaElementwiseBinary.hpp"
 #include "layers/GpuFsaPooling2d.hpp"
 #include "layers/GpuFsaResize.hpp"
+#include "layers/GpuFsaSoftmax.hpp"
 
 namespace armnn
 {
@@ -336,6 +337,18 @@
                 GpuFsaResizeCreateOp(preCompiledBlobPtr, input, *desc);
                 break;
             }
+            case (LayerType::Softmax):
+            {
+                auto input = base.GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+                auto output = base.GetOutputSlot(0).GetTensorInfo();
+
+                auto desc = PolymorphicDowncast<const SoftmaxDescriptor*>(&base.GetParameters());
+                GpuFsaSoftmaxCreateOp(preCompiledBlobPtr,
+                                      input,
+                                      output,
+                                      *desc);
+                break;
+            }
             default:
                 // unsupported layer for GpuFsa backend
                 continue;
diff --git a/src/backends/gpuFsa/GpuFsaLayerSupport.cpp b/src/backends/gpuFsa/GpuFsaLayerSupport.cpp
index 85fb03a..98fb430 100644
--- a/src/backends/gpuFsa/GpuFsaLayerSupport.cpp
+++ b/src/backends/gpuFsa/GpuFsaLayerSupport.cpp
@@ -17,6 +17,7 @@
 #include "layers/GpuFsaElementwiseBinary.hpp"
 #include "layers/GpuFsaPooling2d.hpp"
 #include "layers/GpuFsaResize.hpp"
+#include "layers/GpuFsaSoftmax.hpp"
 #endif
 
 #include <vector>
@@ -206,6 +207,21 @@
                                         infos[0],
                                         *desc);
         }
+        case LayerType::Softmax:
+        {
+            if (infos.size() != 2)
+            {
+                throw InvalidArgumentException("Invalid number of Softmax TensorInfos. "
+                                               "TensorInfos should be of format: {input, output}.");
+            }
+
+            auto desc = PolymorphicDowncast<const SoftmaxDescriptor*>(&descriptor);
+            FORWARD_LAYER_VALIDATE_FUNC(GpuFsaSoftmaxValidate,
+                                        reasonIfUnsupported,
+                                        infos[0],
+                                        infos[1],
+                                        *desc);
+        }
         case LayerType::Constant:
         case LayerType::Input:
         case LayerType::Output:
diff --git a/src/backends/gpuFsa/layers/CMakeLists.txt b/src/backends/gpuFsa/layers/CMakeLists.txt
index 37d52e4..c7b83ae 100644
--- a/src/backends/gpuFsa/layers/CMakeLists.txt
+++ b/src/backends/gpuFsa/layers/CMakeLists.txt
@@ -18,6 +18,8 @@
         GpuFsaPooling2d.hpp
         GpuFsaResize.cpp
         GpuFsaResize.hpp
+        GpuFsaSoftmax.cpp
+        GpuFsaSoftmax.hpp
         UtilsGpuFsa.cpp
         UtilsGpuFsa.hpp
     )
diff --git a/src/backends/gpuFsa/layers/GpuFsaSoftmax.cpp b/src/backends/gpuFsa/layers/GpuFsaSoftmax.cpp
new file mode 100644
index 0000000..6e5aa26
--- /dev/null
+++ b/src/backends/gpuFsa/layers/GpuFsaSoftmax.cpp
@@ -0,0 +1,106 @@
+//
+// Copyright © 2024 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "GpuFsaSoftmax.hpp"
+
+#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
+
+#include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h>
+#include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h>
+#include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.h>
+#include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h>
+
+using namespace arm_compute::experimental::dynamic_fusion;
+using namespace armnn::armcomputetensorutils;
+
+namespace armnn
+{
+
+arm_compute::Status GpuFsaSoftmaxValidate(const TensorInfo& input,
+                                          const TensorInfo& output,
+                                          const SoftmaxDescriptor& descriptor)
+{
+    // Create a new workload sketch, for validation purposes
+    auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
+    auto workloadContext = GpuWorkloadContext(&compileCtx);
+    GpuWorkloadSketch sketch{ &workloadContext };
+
+    // Build and create tensor infos using the sketch
+    arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
+    arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, output.GetNumDimensions());
+    aclInputInfo.set_are_values_constant(input.IsConstant());
+    aclOutputInfo.set_are_values_constant(output.IsConstant());
+    arm_compute::ITensorInfo*  inputInfo = workloadContext.create_tensor_info(aclInputInfo);
+    arm_compute::ITensorInfo*  outputInfo = workloadContext.create_tensor_info(aclOutputInfo);
+
+    // Set Softmax attributes using descriptor
+    SoftmaxAttributes softmaxAttributes{};
+    softmaxAttributes.beta(descriptor.m_Beta);
+    softmaxAttributes.is_log_softmax(false); // Use Softmax not LogSoftmax
+    int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
+    softmaxAttributes.axis(aclAxis);
+
+    // Validate operator, check status and update reasonIfUnsupported
+    arm_compute::Status aclStatus = GpuSoftmax::validate_op(sketch,
+                                                           inputInfo,
+                                                           outputInfo,
+                                                           softmaxAttributes);
+
+#ifndef NDEBUG
+    const bool validated = aclStatus.error_code() == arm_compute::ErrorCode::OK;
+    if (!validated)
+    {
+        std::cout << "GpuFsaSoftmaxValidate failed: " << aclStatus.error_description() << std::endl;
+    }
+#endif
+
+    return aclStatus;
+}
+
+void GpuFsaSoftmaxCreateOp(GpuFsaPreCompiledBlob* blob,
+                           const TensorInfo& input,
+                           const TensorInfo& output,
+                           const SoftmaxDescriptor& descriptor)
+{
+    GpuWorkloadSketch* sketch           = blob->sketch.get();
+    GpuWorkloadContext* workloadContext = blob->workloadContext.get();
+    std::vector<arm_compute::ITensorInfo*> inputTensorInfos  = {};
+    std::vector<arm_compute::ITensorInfo*> outputTensorInfos  = {};
+
+    arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, input.GetNumDimensions());
+    arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, output.GetNumDimensions());
+    aclInputInfo.set_are_values_constant(input.IsConstant());
+    aclOutputInfo.set_are_values_constant(output.IsConstant());
+
+    inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
+    outputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclOutputInfo));
+
+    // Set Softmax attributes using descriptor
+    SoftmaxAttributes softmaxAttributes{};
+    softmaxAttributes.beta(descriptor.m_Beta); // Only used for LogSoftmax else default
+    softmaxAttributes.is_log_softmax(false); // Use Softmax not LogSoftmax
+    int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
+    softmaxAttributes.axis(aclAxis);
+
+    // Validate operator, check status and update reasonIfUnsupported
+    arm_compute::Status aclStatus = GpuSoftmax::validate_op(*sketch,
+                                                            inputTensorInfos[0],
+                                                            outputTensorInfos[0],
+                                                            softmaxAttributes);
+    const bool supported = aclStatus.error_code() == arm_compute::ErrorCode::OK;
+    if (!supported)
+    {
+        throw BackendCapabilityException("\"GpuFsa\" backend failed during softmax validation");
+    }
+
+    GpuSoftmax::create_op(*sketch, inputTensorInfos[0], outputTensorInfos[0], softmaxAttributes);
+
+    // Store the TensorInfos within the blob as unique_ptrs to be used later
+    blob->inputTensorInfos  = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
+    blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
+}
+
+}
\ No newline at end of file
diff --git a/src/backends/gpuFsa/layers/GpuFsaSoftmax.hpp b/src/backends/gpuFsa/layers/GpuFsaSoftmax.hpp
new file mode 100644
index 0000000..cf078fc
--- /dev/null
+++ b/src/backends/gpuFsa/layers/GpuFsaSoftmax.hpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2024 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Descriptors.hpp>
+
+#include <gpuFsa/GpuFsaBackend.hpp>
+
+namespace armnn
+{
+
+arm_compute::Status GpuFsaSoftmaxValidate(const TensorInfo& input,
+                                          const TensorInfo& output,
+                                          const SoftmaxDescriptor& descriptor);
+
+void GpuFsaSoftmaxCreateOp(GpuFsaPreCompiledBlob* blob,
+                           const TensorInfo& input,
+                           const TensorInfo& output,
+                           const SoftmaxDescriptor& descriptor);
+
+}
\ No newline at end of file
diff --git a/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp b/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp
index 7503c46..da6431f 100644
--- a/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp
+++ b/src/backends/gpuFsa/test/GpuFsaEndToEndTests.cpp
@@ -12,6 +12,7 @@
 #include "backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp"
 #include "backendsCommon/test/Pooling2dEndToEndTestImpl.hpp"
 #include "backendsCommon/test/ResizeEndToEndTestImpl.hpp"
+#include "backendsCommon/test/SoftmaxEndToEndTestImpl.hpp"
 
 #include <doctest/doctest.h>
 
@@ -167,8 +168,21 @@
 
 TEST_CASE("GpuFsaResizeNearestNeighborEndToEndFloatHalfPixelNhwcTest")
 {
-    ResizeNearestNeighborEndToEnd<armnn::DataType::Float32>(gpuFsaDefaultBackends, armnn::DataLayout::NHWC, 
+    ResizeNearestNeighborEndToEnd<armnn::DataType::Float32>(gpuFsaDefaultBackends, armnn::DataLayout::NHWC,
                                                             false, true);
 }
 
+TEST_CASE("UNSUPPORTED_GpuFsaSoftmaxTestFloat32")
+{
+    try
+    {
+        SoftmaxEndToEnd<armnn::DataType::Float32>(gpuFsaDefaultBackends);
+        FAIL("An exception should have been thrown");
+    }
+    catch (const armnn::InvalidArgumentException& e)
+    {
+        CHECK(strcmp(e.what(), "Failed to assign a backend to each layer") == 0);
+    }
+}
+
 }
diff --git a/src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp b/src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp
index b6f7f32..cb1ddd8 100644
--- a/src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp
+++ b/src/backends/gpuFsa/test/GpuFsaLayerSupportTests.cpp
@@ -181,4 +181,25 @@
     CHECK(supported);
 }
 
+TEST_CASE("UNSUPPORTED_IsLayerSupportedGpuFsaSoftmax")
+{
+    TensorInfo inputInfo({ 2, 2 }, DataType::Float32);
+    TensorInfo outputInfo({ 2, 2 }, DataType::Float32);
+
+    SoftmaxDescriptor desc;
+    desc.m_Axis = 1;
+    desc.m_Beta = 1.0f;
+
+    GpuFsaLayerSupport supportChecker;
+    std::string reasonIfNotSupported;
+    auto supported = supportChecker.IsLayerSupported(LayerType::Softmax,
+                                                     {inputInfo, outputInfo},
+                                                     desc,
+                                                     EmptyOptional(),
+                                                     EmptyOptional(),
+                                                     reasonIfNotSupported);
+
+    CHECK(!supported);
+}
+
 }
\ No newline at end of file
diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp
index 2c5dc37..866cff8 100644
--- a/src/backends/reference/test/RefEndToEndTests.cpp
+++ b/src/backends/reference/test/RefEndToEndTests.cpp
@@ -38,6 +38,7 @@
 #include <backendsCommon/test/ResizeEndToEndTestImpl.hpp>
 #include <backendsCommon/test/ReverseV2EndToEndTestImpl.hpp>
 #include <backendsCommon/test/SliceEndToEndTestImpl.hpp>
+#include <backendsCommon/test/SoftmaxEndToEndTestImpl.hpp>
 #include <backendsCommon/test/SpaceToDepthEndToEndTestImpl.hpp>
 #include <backendsCommon/test/SplitterEndToEndTestImpl.hpp>
 #include <backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp>
@@ -1242,6 +1243,12 @@
     LogSoftmaxEndToEndTest(defaultBackends);
 }
 
+// Softmax
+TEST_CASE("RefSoftmaxEndToEndTestFloat32")
+{
+    SoftmaxEndToEnd<armnn::DataType::Float32>(defaultBackends);
+}
+
 // Prelu
 TEST_CASE("RefPreluEndToEndTestFloat32")
 {