MLCE-418 Reduce layer does not support multiple axes

 * Added backend specific optimization to chain new reduces layers
   for each axis to simulate behaviour of a layer with multiple axes.
 * Added function to calculate reduced output shape.
 * Added unit tests.

Signed-off-by: Matthew Sloyan <matthew.sloyan@arm.com>
Change-Id: I180b0b111b7bcf3d0c283f1db0b82d5f17757682
diff --git a/src/backends/neon/workloads/NeonReduceWorkload.cpp b/src/backends/neon/workloads/NeonReduceWorkload.cpp
index 0e1b46a..6125f36 100644
--- a/src/backends/neon/workloads/NeonReduceWorkload.cpp
+++ b/src/backends/neon/workloads/NeonReduceWorkload.cpp
@@ -21,22 +21,52 @@
                                                const ReduceDescriptor& desc)
 {
     const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
-    const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
-    if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1)
-    {
-        return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
-                                   "NeonReduceWorkload: Reduction is supported only on 1 axis.");
-    }
 
     arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
                                                                           input.GetNumDimensions(),
                                                                           desc.m_vAxis);
 
-    return arm_compute::NEReductionOperation::validate(&aclInputInfo,
-                                                       &aclOutputInfo,
-                                                       static_cast<unsigned int>(coords[0]),
-                                                       ConvertReductionOperationToAcl(desc),
-                                                       desc.m_KeepDims);
+    // As ACL only support one axis, validate the layer for each axis if more than one is present.
+    if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1)
+    {
+        arm_compute::Status status;
+
+        for (unsigned int i = 0; i != desc.m_vAxis.size(); ++i)
+        {
+            TensorInfo inputToModify = input;
+            std::vector<uint32_t> singleAxis(1, desc.m_vAxis[i]);
+
+            // Calculate the output shape using the input shape for a single axis.
+            // Currently the output TensorInfo inferred will be reduced upon multiple axis
+            // which will fail validation as only one axis is supported.
+            const TensorShape& reducedShape = ComputeReductionTensorShape(inputToModify, singleAxis, desc.m_KeepDims);
+            inputToModify.SetShape(reducedShape);
+
+            const arm_compute::TensorInfo aclOutputInfoModified =
+                    armcomputetensorutils::BuildArmComputeTensorInfo(inputToModify);
+
+            status = arm_compute::NEReductionOperation::validate(&aclInputInfo,
+                                                                 &aclOutputInfoModified,
+                                                                 static_cast<unsigned int>(coords[i]),
+                                                                 ConvertReductionOperationToAcl(desc),
+                                                                 desc.m_KeepDims);
+            if (!status)
+            {
+                break;
+            }
+        }
+        return status;
+    }
+    else
+    {
+        const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+        return arm_compute::NEReductionOperation::validate(&aclInputInfo,
+                                                           &aclOutputInfo,
+                                                           static_cast<unsigned int>(coords[0]),
+                                                           ConvertReductionOperationToAcl(desc),
+                                                           desc.m_KeepDims);
+    }
 }
 
 NeonReduceWorkload::NeonReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info)
@@ -50,6 +80,7 @@
     arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(input.info()->num_dimensions(),
                                                                           info.m_InputTensorInfos[0].GetNumDimensions(),
                                                                           m_Data.m_Parameters.m_vAxis);
+
     m_Layer.configure(&input,
                       &output,
                       static_cast<unsigned int>(coords[0]),