IVGCVSW-7835 Add ReverseV2 CL and Neon Workloads

 * Added ReverseV2 to CL and Neon backends
 * Added Cl and Neon ReverseV2 Layer unit tests

Signed-off-by: Tianle Cheng <tianle.cheng@arm.com>
Change-Id: I646275c629caf17dac1950b0cd7083f23f87f387
diff --git a/delegate/test/ReverseV2Test.cpp b/delegate/test/ReverseV2Test.cpp
index b261474..430cf2c 100644
--- a/delegate/test/ReverseV2Test.cpp
+++ b/delegate/test/ReverseV2Test.cpp
@@ -19,165 +19,159 @@
 namespace armnnDelegate
 {
 
-    void ReverseV2Float32Test(std::vector<armnn::BackendId>& backends)
+void ReverseV2Float32Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<float> inputValues =
     {
-        // Set input data
-        std::vector<float> inputValues =
-        {
-            1.0f, 2.0f, 3.0f,
-            4.0f, 5.0f, 6.0f,
-            7.0f, 8.0f, 9.0f,
+        1.0f, 2.0f, 3.0f,
+        4.0f, 5.0f, 6.0f,
+        7.0f, 8.0f, 9.0f,
 
-            11.0f, 12.0f, 13.0f,
-            14.0f, 15.0f, 16.0f,
-            17.0f, 18.0f, 19.0f,
+        11.0f, 12.0f, 13.0f,
+        14.0f, 15.0f, 16.0f,
+        17.0f, 18.0f, 19.0f,
 
-            21.0f, 22.0f, 23.0f,
-            24.0f, 25.0f, 26.0f,
-            27.0f, 28.0f, 29.0f
-        };
+        21.0f, 22.0f, 23.0f,
+        24.0f, 25.0f, 26.0f,
+        27.0f, 28.0f, 29.0f
+    };
 
-        // The output data
-        std::vector<float> expectedOutputValues =
-        {
-            3.0f, 2.0f, 1.0f,
-            6.0f, 5.0f, 4.0f,
-            9.0f, 8.0f, 7.0f,
+    // The output data
+    std::vector<float> expectedOutputValues =
+    {
+        3.0f, 2.0f, 1.0f,
+        6.0f, 5.0f, 4.0f,
+        9.0f, 8.0f, 7.0f,
 
-            13.0f, 12.0f, 11.0f,
-            16.0f, 15.0f, 14.0f,
-            19.0f, 18.0f, 17.0f,
+        13.0f, 12.0f, 11.0f,
+        16.0f, 15.0f, 14.0f,
+        19.0f, 18.0f, 17.0f,
 
-            23.0f, 22.0f, 21.0f,
-            26.0f, 25.0f, 24.0f,
-            29.0f, 28.0f, 27.0f
-        };
+        23.0f, 22.0f, 21.0f,
+        26.0f, 25.0f, 24.0f,
+        29.0f, 28.0f, 27.0f
+    };
 
-        // The axis to reverse
-        const std::vector<int32_t> axisValues = {2};
+    // The axis to reverse
+    const std::vector<int32_t> axisValues = {2};
 
-        // Shapes
-        const std::vector<int32_t> inputShape = {3, 3, 3};
-        const std::vector<int32_t> axisShapeDims = {1};
-        const std::vector<int32_t> expectedOutputShape = {3, 3, 3};
+    // Shapes
+    const std::vector<int32_t> inputShape = {3, 3, 3};
+    const std::vector<int32_t> axisShapeDims = {1};
+    const std::vector<int32_t> expectedOutputShape = {3, 3, 3};
 
-        ReverseV2FP32TestImpl(tflite::BuiltinOperator_REVERSE_V2,
-                              backends,
-                              inputValues,
-                              inputShape,
-                              axisValues,
-                              axisShapeDims,
-                              expectedOutputValues,
-                              expectedOutputShape);
+    ReverseV2FP32TestImpl(tflite::BuiltinOperator_REVERSE_V2,
+                          backends,
+                          inputValues,
+                          inputShape,
+                          axisValues,
+                          axisShapeDims,
+                          expectedOutputValues,
+                          expectedOutputShape);
+}
+
+void ReverseV2NegativeAxisFloat32Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<float> inputValues =
+    {
+        1.0f, 2.0f, 3.0f,
+        4.0f, 5.0f, 6.0f,
+        7.0f, 8.0f, 9.0f,
+
+        11.0f, 12.0f, 13.0f,
+        14.0f, 15.0f, 16.0f,
+        17.0f, 18.0f, 19.0f,
+
+        21.0f, 22.0f, 23.0f,
+        24.0f, 25.0f, 26.0f,
+        27.0f, 28.0f, 29.0f
+    };
+
+    // The output data
+    std::vector<float> expectedOutputValues =
+    {
+        7.0f, 8.0f, 9.0f,
+        4.0f, 5.0f, 6.0f,
+        1.0f, 2.0f, 3.0f,
+
+        17.0f, 18.0f, 19.0f,
+        14.0f, 15.0f, 16.0f,
+        11.0f, 12.0f, 13.0f,
+
+        27.0f, 28.0f, 29.0f,
+        24.0f, 25.0f, 26.0f,
+        21.0f, 22.0f, 23.0f
+    };
+
+    // The axis to reverse
+    const std::vector<int32_t> axisValues = {-2};
+
+    // Shapes
+    const std::vector<int32_t> inputShape = {3, 3, 3};
+    const std::vector<int32_t> axisShapeDims = {1};
+    const std::vector<int32_t> expectedOutputShape = {3, 3, 3};
+
+    ReverseV2FP32TestImpl(tflite::BuiltinOperator_REVERSE_V2,
+                          backends,
+                          inputValues,
+                          inputShape,
+                          axisValues,
+                          axisShapeDims,
+                          expectedOutputValues,
+                          expectedOutputShape);
+}
+
+TEST_SUITE("ReverseV2Tests_GpuAccTests")
+{
+
+    TEST_CASE ("ReverseV2_Float32_GpuAcc_Test")
+    {
+        std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+        ReverseV2Float32Test(backends);
     }
 
-    void ReverseV2NegativeAxisFloat32Test(std::vector<armnn::BackendId>& backends)
+    TEST_CASE ("ReverseV2_NegativeAxis_Float32_GpuAcc_Test")
     {
-        // Set input data
-        std::vector<float> inputValues =
-        {
-            1.0f, 2.0f, 3.0f,
-            4.0f, 5.0f, 6.0f,
-            7.0f, 8.0f, 9.0f,
-
-            11.0f, 12.0f, 13.0f,
-            14.0f, 15.0f, 16.0f,
-            17.0f, 18.0f, 19.0f,
-
-            21.0f, 22.0f, 23.0f,
-            24.0f, 25.0f, 26.0f,
-            27.0f, 28.0f, 29.0f
-        };
-
-        // The output data
-        std::vector<float> expectedOutputValues =
-        {
-            7.0f, 8.0f, 9.0f,
-            4.0f, 5.0f, 6.0f,
-            1.0f, 2.0f, 3.0f,
-
-            17.0f, 18.0f, 19.0f,
-            14.0f, 15.0f, 16.0f,
-            11.0f, 12.0f, 13.0f,
-
-            27.0f, 28.0f, 29.0f,
-            24.0f, 25.0f, 26.0f,
-            21.0f, 22.0f, 23.0f
-        };
-
-        // The axis to reverse
-        const std::vector<int32_t> axisValues = {-2};
-
-        // Shapes
-        const std::vector<int32_t> inputShape = {3, 3, 3};
-        const std::vector<int32_t> axisShapeDims = {1};
-        const std::vector<int32_t> expectedOutputShape = {3, 3, 3};
-
-        ReverseV2FP32TestImpl(tflite::BuiltinOperator_REVERSE_V2,
-                              backends,
-                              inputValues,
-                              inputShape,
-                              axisValues,
-                              axisShapeDims,
-                              expectedOutputValues,
-                              expectedOutputShape);
+        std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+        ReverseV2NegativeAxisFloat32Test(backends);
     }
 
-#if defined(REVERSEV2_GPUACC)
-    TEST_SUITE("ReverseV2Tests_GpuAccTests")
+} // TEST_SUITE("ReverseV2Tests_GpuAccTests")
+
+TEST_SUITE("ReverseV2Tests_CpuAccTests")
+{
+
+    TEST_CASE ("ReverseV2_Float32_CpuAcc_Test")
     {
+        std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+        ReverseV2Float32Test(backends);
+    }
 
-        TEST_CASE ("ReverseV2_Float32_GpuAcc_Test")
-        {
-            std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
-            ReverseV2Float32Test(backends);
-        }
-
-        TEST_CASE ("ReverseV2_NegativeAxis_Float32_GpuAcc_Test")
-        {
-            std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
-            ReverseV2NegativeAxisFloat32Test(backends);
-        }
-
-    } // TEST_SUITE("ReverseV2Tests_GpuAccTests")
-#endif
-
-
-#if defined(REVERSEV2_CPUACC)
-    TEST_SUITE("ReverseV2Tests_CpuAccTests")
+    TEST_CASE ("ReverseV2_NegativeAxis_Float32_CpuAcc_Test")
     {
+        std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+        ReverseV2NegativeAxisFloat32Test(backends);
+    }
 
-        TEST_CASE ("ReverseV2_Float32_CpuAcc_Test")
-        {
-            std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
-            ReverseV2Float32Test(backends);
-        }
+} // TEST_SUITE("ReverseV2Tests_CpuAccTests")
 
-        TEST_CASE ("ReverseV2_NegativeAxis_Float32_CpuAcc_Test")
-        {
-            std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
-            ReverseV2NegativeAxisFloat32Test(backends);
-        }
+TEST_SUITE("ReverseV2Tests_CpuRefTests")
+{
 
-    } // TEST_SUITE("ReverseV2Tests_CpuAccTests")
-#endif
-
-
-    TEST_SUITE("ReverseV2Tests_CpuRefTests")
+    TEST_CASE ("ReverseV2_Float32_CpuRef_Test")
     {
+        std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+        ReverseV2Float32Test(backends);
+    }
 
-        TEST_CASE ("ReverseV2_Float32_CpuRef_Test")
-        {
-            std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
-            ReverseV2Float32Test(backends);
-        }
+    TEST_CASE ("ReverseV2_NegativeAxis_Float32_CpuRef_Test")
+    {
+        std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+        ReverseV2NegativeAxisFloat32Test(backends);
+    }
 
-        TEST_CASE ("ReverseV2_NegativeAxis_Float32_CpuRef_Test")
-        {
-            std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
-            ReverseV2NegativeAxisFloat32Test(backends);
-        }
-
-    } // TEST_SUITE("ReverseV2Tests_CpuRefTests")
+} // TEST_SUITE("ReverseV2Tests_CpuRefTests")
 
 } // namespace armnnDelegate
diff --git a/src/backends/aclCommon/ArmComputeUtils.hpp b/src/backends/aclCommon/ArmComputeUtils.hpp
index 5d424af..9a30a74 100644
--- a/src/backends/aclCommon/ArmComputeUtils.hpp
+++ b/src/backends/aclCommon/ArmComputeUtils.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 #pragma once
@@ -284,20 +284,6 @@
     return aclAxis;
 }
 
-/// Function to convert axis to its positive equivalent value.
-/// [-rank, rank) --> [0, rank)
-inline unsigned int ComputePositiveAxis(const int& axis, const armnn::TensorInfo& tensor)
-{
-    int rank = static_cast<int>(tensor.GetNumDimensions());
-
-    ARMNN_ASSERT(rank != 0);
-    ARMNN_ASSERT((-1 * rank) <= axis);
-    ARMNN_ASSERT(axis < rank);
-
-    int positiveAxis = (axis < 0) ? rank + axis : axis;
-    return static_cast<unsigned int>(positiveAxis);
-}
-
 /// Utility function used to setup an arm_compute::Conv3dInfo object from convolution3d descriptor.
 inline arm_compute::Conv3dInfo ComputeConv3DInfo(const armnn::Convolution3dDescriptor descriptor,
                                                  bool isFastMathEnabled,
diff --git a/src/backends/backendsCommon/WorkloadUtils.cpp b/src/backends/backendsCommon/WorkloadUtils.cpp
index 28d01ec..e36c4b2 100644
--- a/src/backends/backendsCommon/WorkloadUtils.cpp
+++ b/src/backends/backendsCommon/WorkloadUtils.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
diff --git a/src/backends/backendsCommon/test/layerTests/ReverseV2TestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ReverseV2TestImpl.cpp
index f277403..144bf9e 100644
--- a/src/backends/backendsCommon/test/layerTests/ReverseV2TestImpl.cpp
+++ b/src/backends/backendsCommon/test/layerTests/ReverseV2TestImpl.cpp
@@ -18,73 +18,74 @@
 
 namespace
 {
-    template<armnn::DataType ArmnnType, typename T, std::size_t NumDims>
-    LayerTestResult<T, NumDims> ReverseV2TestImpl(
-        armnn::IWorkloadFactory& workloadFactory,
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-        const armnn::ITensorHandleFactory& tensorHandleFactory,
-        const std::vector<T>& input,
-        const std::vector<int>& axis,
-        const std::vector<T>& outputExpected,
-        const armnn::TensorInfo& inputInfo,
-        const armnn::TensorInfo& axisInfo,
-        const armnn::TensorInfo& outputInfo)
+template<armnn::DataType ArmnnType, typename T, std::size_t NumDims>
+LayerTestResult<T, NumDims> ReverseV2TestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory,
+    const std::vector<T>& input,
+    const std::vector<int>& axis,
+    const std::vector<T>& outputExpected,
+    const armnn::TensorInfo& inputInfo,
+    const armnn::TensorInfo& axisInfo,
+    const armnn::TensorInfo& outputInfo)
+{
+    LayerTestResult<T, NumDims> result(outputInfo);
+    std::vector<T> outputActual(outputInfo.GetNumElements());
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
+    std::unique_ptr<armnn::ITensorHandle> axisHandle = tensorHandleFactory.CreateTensorHandle(axisInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
+
+    armnn::ReverseV2QueueDescriptor queueDescriptor;
+    armnn::WorkloadInfo workloadInfo;
+
+    AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
+    AddInputToWorkload(queueDescriptor, workloadInfo, axisInfo, axisHandle.get());
+    AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
+
+    // Don't execute if ReverseV2 is not supported, as an exception will be raised.
+    const armnn::BackendId& backend = workloadFactory.GetBackendId();
+    std::string reasonIfUnsupported;
+    armnn::LayerSupportHandle handle = armnn::GetILayerSupportByBackendId(backend);
+    result.m_Supported = handle.IsReverseV2Supported(inputInfo,
+                                                     axisInfo,
+                                                     outputInfo,
+                                                     reasonIfUnsupported);
+    if (!result.m_Supported)
     {
-        LayerTestResult<T, NumDims> result(outputInfo);
-        std::vector<T> outputActual(outputInfo.GetNumElements());
-
-        std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
-        std::unique_ptr<armnn::ITensorHandle> axisHandle = tensorHandleFactory.CreateTensorHandle(axisInfo);
-        std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
-
-        armnn::ReverseV2QueueDescriptor queueDescriptor;
-        armnn::WorkloadInfo workloadInfo;
-
-        AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
-        AddInputToWorkload(queueDescriptor, workloadInfo, axisInfo, axisHandle.get());
-        AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
-
-        // Don't execute if ReverseV2 is not supported, as an exception will be raised.
-        const armnn::BackendId& backend = workloadFactory.GetBackendId();
-        std::string reasonIfUnsupported;
-        armnn::LayerSupportHandle handle = armnn::GetILayerSupportByBackendId(backend);
-        result.m_Supported = handle.IsReverseV2Supported(inputInfo,
-                                                         axisInfo,
-                                                         outputInfo,
-                                                         reasonIfUnsupported);
-        if (!result.m_Supported)
-        {
-            return result;
-        }
-
-        auto workload = workloadFactory.CreateWorkload(armnn::LayerType::ReverseV2, queueDescriptor, workloadInfo);
-
-        inputHandle->Allocate();
-        axisHandle->Allocate();
-        outputHandle->Allocate();
-
-        if (input.data() != nullptr)
-        {
-            CopyDataToITensorHandle(inputHandle.get(), input.data());
-        }
-        if (axis.data() != nullptr)
-        {
-            CopyDataToITensorHandle(axisHandle.get(), axis.data());
-        }
-
-        workload->PostAllocationConfigure();
-        ExecuteWorkload(*workload, memoryManager);
-
-        if (outputActual.data() != nullptr)
-        {
-            CopyDataFromITensorHandle(outputActual.data(), outputHandle.get());
-        }
-
-        return LayerTestResult<T, NumDims>(outputActual,
-                                           outputExpected,
-                                           outputHandle->GetShape(),
-                                           outputInfo.GetShape());
+        return result;
     }
+
+    auto workload = workloadFactory.CreateWorkload(armnn::LayerType::ReverseV2, queueDescriptor, workloadInfo);
+
+    inputHandle->Allocate();
+    axisHandle->Allocate();
+    outputHandle->Allocate();
+
+    if (input.data() != nullptr)
+    {
+        CopyDataToITensorHandle(inputHandle.get(), input.data());
+    }
+    if (axis.data() != nullptr)
+    {
+        CopyDataToITensorHandle(axisHandle.get(), axis.data());
+    }
+
+    workload->PostAllocationConfigure();
+    ExecuteWorkload(*workload, memoryManager);
+
+    if (outputActual.data() != nullptr)
+    {
+        CopyDataFromITensorHandle(outputActual.data(), outputHandle.get());
+    }
+
+    return LayerTestResult<T, NumDims>(outputActual,
+                                       outputExpected,
+                                       outputHandle->GetShape(),
+                                       outputInfo.GetShape());
+
+}
 }
 
 template<armnn::DataType ArmnnType, typename T>
@@ -107,7 +108,7 @@
         3, 4
     }, qScale, qOffset);
 
-    std::vector<int> axis = armnnUtils::QuantizedVector<int>({1, 1}, qScale, qOffset);
+    std::vector<int> axis = armnnUtils::QuantizedVector<int>({}, qScale, qOffset);
 
     std::vector<T> outputExpected = armnnUtils::QuantizedVector<T>({
         1, 2,
@@ -115,14 +116,14 @@
     }, qScale, qOffset);
 
     return ReverseV2TestImpl<ArmnnType, T, 2>(workloadFactory,
-                                                memoryManager,
-                                                tensorHandleFactory,
-                                                input,
-                                                axis,
-                                                outputExpected,
-                                                inputInfo,
-                                                axisInfo,
-                                                outputInfo);
+                                              memoryManager,
+                                              tensorHandleFactory,
+                                              input,
+                                              axis,
+                                              outputExpected,
+                                              inputInfo,
+                                              axisInfo,
+                                              outputInfo);
 }
 
 template<armnn::DataType ArmnnType, typename T>
diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp
index 1acaba0..bfe4f6e 100644
--- a/src/backends/cl/ClLayerSupport.cpp
+++ b/src/backends/cl/ClLayerSupport.cpp
@@ -70,6 +70,7 @@
 #include "workloads/ClReduceWorkload.hpp"
 #include "workloads/ClReshapeWorkload.hpp"
 #include "workloads/ClResizeWorkload.hpp"
+#include "workloads/ClReverseV2Workload.hpp"
 #include "workloads/ClRsqrtWorkload.hpp"
 #include "workloads/ClSinWorkload.hpp"
 #include "workloads/ClSliceWorkload.hpp"
@@ -571,6 +572,11 @@
                                      infos[1],
                                      *(PolymorphicDowncast<const ResizeDescriptor*>(&descriptor)),
                                      reasonIfUnsupported);
+        case LayerType::ReverseV2:
+            return IsReverseV2Supported(infos[0],
+                                        infos[1],
+                                        infos[2],
+                                        reasonIfUnsupported);
         case LayerType::Shape:
             return LayerSupportBase::IsShapeSupported(infos[0],
                                                       infos[1],
@@ -1423,6 +1429,18 @@
     FORWARD_WORKLOAD_VALIDATE_FUNC(ClResizeWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
 }
 
+bool ClLayerSupport::IsReverseV2Supported(const TensorInfo& input,
+                                          const TensorInfo& axis,
+                                          const TensorInfo& output,
+                                          Optional<std::string&> reasonIfUnsupported) const
+{
+    FORWARD_WORKLOAD_VALIDATE_FUNC(ClReverseV2WorkloadValidate,
+                                   reasonIfUnsupported,
+                                   input,
+                                   axis,
+                                   output);
+}
+
 bool ClLayerSupport::IsSliceSupported(const TensorInfo& input,
                                       const TensorInfo& output,
                                       const SliceDescriptor& descriptor,
diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp
index 9a8f896..907db01 100644
--- a/src/backends/cl/ClLayerSupport.hpp
+++ b/src/backends/cl/ClLayerSupport.hpp
@@ -295,6 +295,11 @@
                            const ResizeDescriptor& descriptor,
                            Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
 
+    bool IsReverseV2Supported(const TensorInfo& input,
+                              const TensorInfo& axis,
+                              const TensorInfo& output,
+                              Optional<std::string&> reasonIfUnsupported) const;
+
     bool IsSliceSupported(const TensorInfo& input,
                           const TensorInfo& output,
                           const SliceDescriptor& descriptor,
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index da05f67..6fe4264 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -711,6 +711,11 @@
             auto resizeQueueDescriptor = PolymorphicDowncast<const ResizeQueueDescriptor*>(&descriptor);
             return MakeWorkload<ClResizeWorkload>(*resizeQueueDescriptor, info, m_CLCompileContext);
         }
+        case LayerType::ReverseV2:
+        {
+            auto reverseV2QueueDescriptor = PolymorphicDowncast<const ReverseV2QueueDescriptor*>(&descriptor);
+            return MakeWorkload<ClReverseV2Workload>(*reverseV2QueueDescriptor, info, m_CLCompileContext);
+        }
         case LayerType::Slice :
         {
             auto sliceQueueDescriptor = PolymorphicDowncast<const SliceQueueDescriptor*>(&descriptor);
diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk
index 3499305..2143c30 100644
--- a/src/backends/cl/backend.mk
+++ b/src/backends/cl/backend.mk
@@ -79,6 +79,7 @@
         workloads/ClReduceWorkload.cpp \
         workloads/ClReshapeWorkload.cpp \
         workloads/ClResizeWorkload.cpp \
+        workloads/ClReverseV2Workload.cpp \
         workloads/ClRsqrtWorkload.cpp \
         workloads/ClSinWorkload.cpp \
         workloads/ClSliceWorkload.cpp \
diff --git a/src/backends/cl/test/ClEndToEndTests.cpp b/src/backends/cl/test/ClEndToEndTests.cpp
index 50ba1db..2436a82 100644
--- a/src/backends/cl/test/ClEndToEndTests.cpp
+++ b/src/backends/cl/test/ClEndToEndTests.cpp
@@ -22,6 +22,7 @@
 #include <backendsCommon/test/QuantizedLstmEndToEndTestImpl.hpp>
 #include <backendsCommon/test/ReduceEndToEndTestImpl.hpp>
 #include <backendsCommon/test/ReshapeEndToEndTestImpl.hpp>
+#include <backendsCommon/test/ReverseV2EndToEndTestImpl.hpp>
 #include <backendsCommon/test/SliceEndToEndTestImpl.hpp>
 #include <backendsCommon/test/SpaceToDepthEndToEndTestImpl.hpp>
 #include <backendsCommon/test/SplitterEndToEndTestImpl.hpp>
@@ -345,6 +346,12 @@
     ReshapeEndToEndFloat16<armnn::DataType::Float16>(clDefaultBackends);
 }
 
+// ReverseV2
+TEST_CASE("ClReverseV2EndToEndTest")
+{
+    ReverseV2EndToEnd<armnn::DataType::Float32>(clDefaultBackends);
+}
+
 // Space to depth
 TEST_CASE("ClSpaceToDepthNhwcEndToEndTest1")
 {
diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp
index a596a01..da2b967 100644
--- a/src/backends/cl/test/ClLayerTests.cpp
+++ b/src/backends/cl/test/ClLayerTests.cpp
@@ -2015,6 +2015,35 @@
                                  AlignCornersResizeNearestNeighbourTest<DataType::QAsymmU8>,
                                  DataLayout::NHWC)
 
+// ReverseV2
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple1DimFloat32, ReverseV2SimpleTest1Dim<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple2Dim1AxisFloat32, ReverseV2SimpleTest2Dim1Axis<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple2Dim2AxisFloat32, ReverseV2SimpleTest2Dim2Axis<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple3Dim1AxisFloat32, ReverseV2SimpleTest3Dim1Axis<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple3Dim2AxisFloat32, ReverseV2SimpleTest3Dim2Axis<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple3Dim3AxisFloat32, ReverseV2SimpleTest3Dim3Axis<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple4Dim1AxisFloat32, ReverseV2SimpleTest4Dim1Axis<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple4Dim2AxisFloat32, ReverseV2SimpleTest4Dim2Axis<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple4Dim3AxisFloat32, ReverseV2SimpleTest4Dim3Axis<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple4Dim4AxisFloat32, ReverseV2SimpleTest4Dim4Axis<DataType::Float32>)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2EvenRowOddCol2DimFloat32, ReverseV2EvenRowOddColTest2Dim<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2EvenRowOddCol3DimFloat32, ReverseV2EvenRowOddColTest3Dim<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2EvenRowEvenCol2DimFloat32, ReverseV2EvenRowEvenColTest2Dim<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2EvenRowEvenCol3DimFloat32, ReverseV2EvenRowEvenColTest3Dim<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2OddRowOddCol2DimFloat32, ReverseV2OddRowOddColTest2Dim<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2OddRowOddCol3DimFloat32, ReverseV2OddRowOddColTest3Dim<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2OddRowEvenCol2DimFloat32, ReverseV2OddRowEvenColTest2Dim<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2OddRowEvenCol3DimFloat32, ReverseV2OddRowEvenColTest3Dim<DataType::Float32>)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2NegAxis2Dim1AxisFloat32, ReverseV2NegAxisTest2Dim1Axis<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2NegAxis3Dim2AxisFloat32, ReverseV2NegAxisTest3Dim2Axis<DataType::Float32>)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple2Dim2AxisFloat16, ReverseV2SimpleTest2Dim2Axis<DataType::Float16>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple2Dim2AxisQAsymmS8, ReverseV2SimpleTest2Dim2Axis<DataType::QAsymmS8>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple2Dim2AxisQAsymmU8, ReverseV2SimpleTest2Dim2Axis<DataType::QAsymmU8>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple2Dim2AxisQSymmS16, ReverseV2SimpleTest2Dim2Axis<DataType::QSymmS16>)
+
 // Rsqrt
 ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Rsqrt2d, ClContextControlFixture, Rsqrt2dTest<DataType::Float32>)
 ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Rsqrt3d, ClContextControlFixture, Rsqrt3dTest<DataType::Float32>)
diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt
index cb16ab1..f38366f 100644
--- a/src/backends/cl/workloads/CMakeLists.txt
+++ b/src/backends/cl/workloads/CMakeLists.txt
@@ -109,6 +109,8 @@
     ClReshapeWorkload.hpp
     ClResizeWorkload.cpp
     ClResizeWorkload.hpp
+    ClReverseV2Workload.cpp
+    ClReverseV2Workload.hpp
     ClRsqrtWorkload.cpp
     ClRsqrtWorkload.hpp
     ClSinWorkload.cpp
diff --git a/src/backends/cl/workloads/ClReverseV2Workload.cpp b/src/backends/cl/workloads/ClReverseV2Workload.cpp
new file mode 100644
index 0000000..8802d33
--- /dev/null
+++ b/src/backends/cl/workloads/ClReverseV2Workload.cpp
@@ -0,0 +1,50 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClReverseV2Workload.hpp"
+#include "ClWorkloadUtils.hpp"
+#include <aclCommon/ArmComputeUtils.hpp>
+#include <cl/ClTensorHandle.hpp>
+#include <backendsCommon/WorkloadUtils.hpp>
+
+using namespace armnn::armcomputetensorutils;
+
+namespace armnn
+{
+arm_compute::Status ClReverseV2WorkloadValidate(const TensorInfo& input,
+                                                const TensorInfo& axis,
+                                                const TensorInfo& output)
+{
+    const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
+    const arm_compute::TensorInfo aclAxis = BuildArmComputeTensorInfo(axis);
+    const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
+
+    return arm_compute::CLReverse::validate(&aclInput, &aclOutput, &aclAxis, true);
+}
+
+ClReverseV2Workload::ClReverseV2Workload(const armnn::ReverseV2QueueDescriptor &descriptor,
+                                         const armnn::WorkloadInfo &info,
+                                         const arm_compute::CLCompileContext& clCompileContext)
+        : BaseWorkload<ReverseV2QueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("ClReverseV2Workload", 2, 1);
+
+    arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& axis = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+    {
+        ARMNN_SCOPED_PROFILING_EVENT_CL_NAME_GUID("ClReverseV2Workload_configure");
+        m_Layer.configure(clCompileContext, &input, &output, &axis, true);
+    }
+}
+
+void ClReverseV2Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT_CL_NAME_GUID("ClReverseV2Workload_Execute");
+    m_Layer.run();
+}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/backends/cl/workloads/ClReverseV2Workload.hpp b/src/backends/cl/workloads/ClReverseV2Workload.hpp
new file mode 100644
index 0000000..60d4a50
--- /dev/null
+++ b/src/backends/cl/workloads/ClReverseV2Workload.hpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ClBaseWorkload.hpp"
+
+#include <arm_compute/runtime/CL/functions/CLReverse.h>
+#include <arm_compute/runtime/Tensor.h>
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+namespace armnn
+{
+arm_compute::Status ClReverseV2WorkloadValidate(const TensorInfo& input,
+                                                const TensorInfo& axis,
+                                                const TensorInfo& output);
+
+class ClReverseV2Workload : public BaseWorkload<ReverseV2QueueDescriptor> 
+{
+public:
+    ClReverseV2Workload(const ReverseV2QueueDescriptor &descriptor,
+                        const WorkloadInfo &info,
+                        const arm_compute::CLCompileContext& clCompileContext);
+
+    void Execute() const override;
+
+private:
+    mutable arm_compute::CLReverse m_Layer;
+};
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp
index cec8706..40b3e99 100644
--- a/src/backends/cl/workloads/ClWorkloads.hpp
+++ b/src/backends/cl/workloads/ClWorkloads.hpp
@@ -55,6 +55,7 @@
 #include "ClReduceWorkload.hpp"
 #include "ClReshapeWorkload.hpp"
 #include "ClResizeWorkload.hpp"
+#include "ClReverseV2Workload.hpp"
 #include "ClRsqrtWorkload.hpp"
 #include "ClSinWorkload.hpp"
 #include "ClSliceWorkload.hpp"
diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp
index ef1d218..4be5b7c 100644
--- a/src/backends/neon/NeonLayerSupport.cpp
+++ b/src/backends/neon/NeonLayerSupport.cpp
@@ -69,6 +69,7 @@
 #include "workloads/NeonReduceWorkload.hpp"
 #include "workloads/NeonReshapeWorkload.hpp"
 #include "workloads/NeonResizeWorkload.hpp"
+#include "workloads/NeonReverseV2Workload.hpp"
 #include "workloads/NeonRsqrtWorkload.hpp"
 #include "workloads/NeonSinWorkload.hpp"
 #include "workloads/NeonSliceWorkload.hpp"
@@ -582,6 +583,11 @@
                                              infos[1],
                                              *(PolymorphicDowncast<const ReduceDescriptor*>(&descriptor)),
                                              reasonIfUnsupported);
+        case LayerType::ReverseV2:
+            return support.IsReverseV2Supported(infos[0],
+                                                infos[1],
+                                                infos[2],
+                                                reasonIfUnsupported);
         case LayerType::Shape:
             return support.IsShapeSupported(infos[0],
                                             infos[1],
@@ -1515,6 +1521,18 @@
                                    descriptor);
 }
 
+bool NeonLayerSupport::IsReverseV2Supported(const armnn::TensorInfo &input,
+                                            const armnn::TensorInfo &axis,
+                                            const armnn::TensorInfo &output,
+                                            Optional<std::string &> reasonIfUnsupported) const
+{
+    FORWARD_WORKLOAD_VALIDATE_FUNC(NeonReverseV2WorkloadValidate,
+                                   reasonIfUnsupported,
+                                   input,
+                                   axis,
+                                   output);
+}
+
 bool NeonLayerSupport::IsSliceSupported(const TensorInfo& input,
                                         const TensorInfo& output,
                                         const SliceDescriptor& descriptor,
diff --git a/src/backends/neon/NeonLayerSupport.hpp b/src/backends/neon/NeonLayerSupport.hpp
index 0295c2b..8372df3 100644
--- a/src/backends/neon/NeonLayerSupport.hpp
+++ b/src/backends/neon/NeonLayerSupport.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 #pragma once
@@ -295,6 +295,11 @@
                            const ResizeDescriptor& descriptor,
                            Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
 
+    bool IsReverseV2Supported(const TensorInfo& input,
+                              const TensorInfo& axis,
+                              const TensorInfo& output,
+                              Optional<std::string&> reasonIfUnsupported) const;
+
     bool IsSliceSupported(const TensorInfo& input,
                           const TensorInfo& output,
                           const SliceDescriptor& descriptor,
diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp
index 4f131ac..9b74f75 100644
--- a/src/backends/neon/NeonWorkloadFactory.cpp
+++ b/src/backends/neon/NeonWorkloadFactory.cpp
@@ -572,6 +572,11 @@
             auto resizeQueueDescriptor = PolymorphicDowncast<const ResizeQueueDescriptor*>(&descriptor);
             return std::make_unique<NeonResizeWorkload>(*resizeQueueDescriptor, info);
         }
+        case LayerType::ReverseV2 :
+        {
+            auto reverseV2QueueDescriptor = PolymorphicDowncast<const ReverseV2QueueDescriptor*>(&descriptor);
+            return std::make_unique<NeonReverseV2Workload>(*reverseV2QueueDescriptor, info);
+        }
         case LayerType::Slice :
         {
             auto sliceQueueDescriptor = PolymorphicDowncast<const SliceQueueDescriptor*>(&descriptor);
diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk
index 3961ed1..3ab07a9 100644
--- a/src/backends/neon/backend.mk
+++ b/src/backends/neon/backend.mk
@@ -77,6 +77,7 @@
         workloads/NeonReduceWorkload.cpp \
         workloads/NeonReshapeWorkload.cpp \
         workloads/NeonResizeWorkload.cpp \
+        workloads/NeonReverseV2Workload.cpp \
         workloads/NeonRsqrtWorkload.cpp \
         workloads/NeonSinWorkload.cpp \
         workloads/NeonSliceWorkload.cpp \
diff --git a/src/backends/neon/test/NeonEndToEndTests.cpp b/src/backends/neon/test/NeonEndToEndTests.cpp
index e582fad..1e2636b 100644
--- a/src/backends/neon/test/NeonEndToEndTests.cpp
+++ b/src/backends/neon/test/NeonEndToEndTests.cpp
@@ -23,6 +23,7 @@
 #include <backendsCommon/test/QuantizedLstmEndToEndTestImpl.hpp>
 #include <backendsCommon/test/ReduceEndToEndTestImpl.hpp>
 #include <backendsCommon/test/ReshapeEndToEndTestImpl.hpp>
+#include <backendsCommon/test/ReverseV2EndToEndTestImpl.hpp>
 #include <backendsCommon/test/SliceEndToEndTestImpl.hpp>
 #include <backendsCommon/test/SpaceToDepthEndToEndTestImpl.hpp>
 #include <backendsCommon/test/SplitterEndToEndTestImpl.hpp>
@@ -671,6 +672,12 @@
     ReshapeEndToEndFloat16<armnn::DataType::Float16>(neonDefaultBackends);
 }
 
+// ReverseV2
+TEST_CASE("NeonReverseV2EndToEndTest")
+{
+    ReverseV2EndToEnd<armnn::DataType::Float32>(neonDefaultBackends);
+}
+
 // Slice
 TEST_CASE("NeonSliceEndtoEndTestFloat32")
 {
diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp
index a938ceb..9db6d2c 100644
--- a/src/backends/neon/test/NeonLayerTests.cpp
+++ b/src/backends/neon/test/NeonLayerTests.cpp
@@ -1352,6 +1352,36 @@
                               AlignCornersResizeNearestNeighbourTest<DataType::QAsymmU8>,
                               DataLayout::NHWC)
 
+// ReverseV2
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2SimpleEmptyAxisFloat32, ReverseV2SimpleTestEmptyAxis<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple1DimFloat32, ReverseV2SimpleTest1Dim<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple2Dim1AxisFloat32, ReverseV2SimpleTest2Dim1Axis<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple2Dim2AxisFloat32, ReverseV2SimpleTest2Dim2Axis<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple3Dim1AxisFloat32, ReverseV2SimpleTest3Dim1Axis<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple3Dim2AxisFloat32, ReverseV2SimpleTest3Dim2Axis<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple3Dim3AxisFloat32, ReverseV2SimpleTest3Dim3Axis<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple4Dim1AxisFloat32, ReverseV2SimpleTest4Dim1Axis<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple4Dim2AxisFloat32, ReverseV2SimpleTest4Dim2Axis<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple4Dim3AxisFloat32, ReverseV2SimpleTest4Dim3Axis<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple4Dim4AxisFloat32, ReverseV2SimpleTest4Dim4Axis<DataType::Float32>)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2EvenRowOddCol2DimFloat32, ReverseV2EvenRowOddColTest2Dim<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2EvenRowOddCol3DimFloat32, ReverseV2EvenRowOddColTest3Dim<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2EvenRowEvenCol2DimFloat32, ReverseV2EvenRowEvenColTest2Dim<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2EvenRowEvenCol3DimFloat32, ReverseV2EvenRowEvenColTest3Dim<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2OddRowOddCol2DimFloat32, ReverseV2OddRowOddColTest2Dim<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2OddRowOddCol3DimFloat32, ReverseV2OddRowOddColTest3Dim<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2OddRowEvenCol2DimFloat32, ReverseV2OddRowEvenColTest2Dim<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2OddRowEvenCol3DimFloat32, ReverseV2OddRowEvenColTest3Dim<DataType::Float32>)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2NegAxis2Dim1AxisFloat32, ReverseV2NegAxisTest2Dim1Axis<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2NegAxis3Dim2AxisFloat32, ReverseV2NegAxisTest3Dim2Axis<DataType::Float32>)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple2Dim2AxisFloat16, ReverseV2SimpleTest2Dim2Axis<DataType::Float16>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple2Dim2AxisQAsymmS8, ReverseV2SimpleTest2Dim2Axis<DataType::QAsymmS8>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple2Dim2AxisQAsymmU8, ReverseV2SimpleTest2Dim2Axis<DataType::QAsymmU8>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReverseV2Simple2Dim2AxisQSymmS16, ReverseV2SimpleTest2Dim2Axis<DataType::QSymmS16>)
+
 // Slice
 ARMNN_AUTO_TEST_CASE(Slice4dFloat32, Slice4dFloat32Test)
 ARMNN_AUTO_TEST_CASE(Slice3dFloat32, Slice3dFloat32Test)
diff --git a/src/backends/neon/workloads/CMakeLists.txt b/src/backends/neon/workloads/CMakeLists.txt
index f4438e4..357fdcd 100644
--- a/src/backends/neon/workloads/CMakeLists.txt
+++ b/src/backends/neon/workloads/CMakeLists.txt
@@ -113,6 +113,8 @@
     NeonReshapeWorkload.hpp
     NeonResizeWorkload.cpp
     NeonResizeWorkload.hpp
+    NeonReverseV2Workload.cpp
+    NeonReverseV2Workload.hpp
     NeonRsqrtWorkload.cpp
     NeonRsqrtWorkload.hpp
     NeonSinWorkload.cpp
diff --git a/src/backends/neon/workloads/NeonReverseV2Workload.cpp b/src/backends/neon/workloads/NeonReverseV2Workload.cpp
new file mode 100644
index 0000000..c7d9fa1
--- /dev/null
+++ b/src/backends/neon/workloads/NeonReverseV2Workload.cpp
@@ -0,0 +1,44 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "NeonReverseV2Workload.hpp"
+#include "NeonWorkloadUtils.hpp"
+#include <armnn/utility/PolymorphicDowncast.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
+#include <backendsCommon/WorkloadUtils.hpp>
+
+namespace armnn
+{
+arm_compute::Status NeonReverseV2WorkloadValidate(const TensorInfo& input,
+                                                  const TensorInfo& axis,
+                                                  const TensorInfo& output)
+{
+    const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
+    const arm_compute::TensorInfo aclAxis = BuildArmComputeTensorInfo(axis);
+    const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
+
+    return arm_compute::NEReverse::validate(&aclInput, &aclOutput, &aclAxis, true);
+}
+
+NeonReverseV2Workload::NeonReverseV2Workload(const ReverseV2QueueDescriptor& descriptor,
+                                             const WorkloadInfo& info)
+        : BaseWorkload<ReverseV2QueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("NeonReverseV2Workload", 2, 1);
+
+    arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& axis = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+    m_Layer.configure(&input, &output, &axis, true);
+}
+
+void NeonReverseV2Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT_NEON_NAME_GUID("NeonReverseV2Workload_Execute");
+    m_Layer.run();
+}
+
+} // namespace armnn
\ No newline at end of file
diff --git a/src/backends/neon/workloads/NeonReverseV2Workload.hpp b/src/backends/neon/workloads/NeonReverseV2Workload.hpp
new file mode 100644
index 0000000..fa6a71d
--- /dev/null
+++ b/src/backends/neon/workloads/NeonReverseV2Workload.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "NeonBaseWorkload.hpp"
+
+#include <arm_compute/runtime/NEON/functions/NEReverse.h>
+#include <arm_compute/runtime/Tensor.h>
+
+namespace armnn
+{
+arm_compute::Status NeonReverseV2WorkloadValidate(const TensorInfo& input,
+                                                  const TensorInfo& axis,
+                                                  const TensorInfo& output);
+
+class NeonReverseV2Workload : public BaseWorkload<ReverseV2QueueDescriptor>
+{
+public:
+    NeonReverseV2Workload(const ReverseV2QueueDescriptor& descriptor, const WorkloadInfo& info);
+    void Execute() const override;
+
+private:
+    mutable arm_compute::NEReverse m_Layer;
+};
+
+} // namespace armnn
\ No newline at end of file
diff --git a/src/backends/neon/workloads/NeonWorkloads.hpp b/src/backends/neon/workloads/NeonWorkloads.hpp
index 615e5d8..c14b9b4 100644
--- a/src/backends/neon/workloads/NeonWorkloads.hpp
+++ b/src/backends/neon/workloads/NeonWorkloads.hpp
@@ -59,6 +59,7 @@
 #include "NeonReduceWorkload.hpp"
 #include "NeonReshapeWorkload.hpp"
 #include "NeonResizeWorkload.hpp"
+#include "NeonReverseV2Workload.hpp"
 #include "NeonRsqrtWorkload.hpp"
 #include "NeonSinWorkload.hpp"
 #include "NeonSliceWorkload.hpp"
diff --git a/src/backends/reference/workloads/RefReverseV2Workload.cpp b/src/backends/reference/workloads/RefReverseV2Workload.cpp
index b0d2f44..192de7e 100644
--- a/src/backends/reference/workloads/RefReverseV2Workload.cpp
+++ b/src/backends/reference/workloads/RefReverseV2Workload.cpp
@@ -38,7 +38,7 @@
                                                                           inputs[0]->Map());
 
         std::unique_ptr<Decoder<int>> axisDecoder = MakeDecoder<int>(GetTensorInfo(inputs[1]),
-                                                                          inputs[1]->Map());
+                                                                     inputs[1]->Map());
 
         std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]),
                                                                            outputs[0]->Map());