Revert "Revert "MLCE-1093 Reshape and concat invalid results""

This reverts commit 008270f8c1359a7d62c2f881326b4d3f0d8b7b56.

Signed-off-by: Mike Kelly <mike.kelly@arm.com>
Change-Id: If8f5151aa349ff3834f03391e813669e5c51ed66
diff --git a/src/backends/aclCommon/ArmComputeUtils.hpp b/src/backends/aclCommon/ArmComputeUtils.hpp
index f466ab1..5d424af 100644
--- a/src/backends/aclCommon/ArmComputeUtils.hpp
+++ b/src/backends/aclCommon/ArmComputeUtils.hpp
@@ -9,6 +9,7 @@
 #include <armnn/utility/Assert.hpp>
 #include <armnn/utility/NumericCast.hpp>
 #include <armnn/backends/WorkloadData.hpp>
+#include <armnnUtils/TensorUtils.hpp>
 
 #include <arm_compute/runtime/FunctionDescriptors.h>
 #include <arm_compute/function_info/FullyConnectedLayerInfo.h>
@@ -248,13 +249,20 @@
     unsigned int numDimensions = desc.GetNumDimensions();
     std::set<unsigned int> splitAxis;
 
-    for (unsigned int i = 0; i < numSplit; ++i)
+    if (desc.HasAxis())
     {
-        for (unsigned int dimIdx = 0; dimIdx < numDimensions; ++dimIdx)
+        splitAxis.insert(armnnUtils::GetUnsignedAxis(desc.GetNumDimensions(), desc.GetAxis()));
+    }
+    else
+    {
+        for (unsigned int i = 0; i < numSplit; ++i)
         {
-            if (desc.GetViewSizes(i)[dimIdx] != input[dimIdx])
+            for (unsigned int dimIdx = 0; dimIdx < numDimensions; ++dimIdx)
             {
-                splitAxis.insert(dimIdx);
+                if (desc.GetViewSizes(i)[dimIdx] != input[dimIdx])
+                {
+                    splitAxis.insert(dimIdx);
+                }
             }
         }
     }
diff --git a/src/backends/backendsCommon/test/layerTests/SplitterTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/SplitterTestImpl.cpp
index 9e3d83c..13483e5 100644
--- a/src/backends/backendsCommon/test/layerTests/SplitterTestImpl.cpp
+++ b/src/backends/backendsCommon/test/layerTests/SplitterTestImpl.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2019-2020,2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -199,11 +199,29 @@
 
     // Do the first split
     armnn::SplitterQueueDescriptor data;
+    data.m_Parameters = armnn::SplitterDescriptor(2, 3);
+
     armnn::WorkloadInfo info;
     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
     AddOutputToWorkload(data, info, outputTensorInfo1, outputHandle1.get());
     AddOutputToWorkload(data, info, outputTensorInfo2, outputHandle2.get());
 
+    data.m_Parameters.SetViewSize(0, 0, outputChannels1);
+    data.m_Parameters.SetViewSize(0, 1, outputHeight1);
+    data.m_Parameters.SetViewSize(0, 2, outputWidth1);
+
+    data.m_Parameters.SetViewSize(1, 0, outputChannels2);
+    data.m_Parameters.SetViewSize(1, 1, outputHeight2);
+    data.m_Parameters.SetViewSize(1, 2, outputWidth2);
+
+    data.m_Parameters.SetViewOriginCoord(0, 0, 0);
+    data.m_Parameters.SetViewOriginCoord(0, 1, 0);
+    data.m_Parameters.SetViewOriginCoord(0, 2, 0);
+
+    data.m_Parameters.SetViewOriginCoord(1, 0, 1);
+    data.m_Parameters.SetViewOriginCoord(1, 1, 0);
+    data.m_Parameters.SetViewOriginCoord(1, 2, 0);
+
     data.m_ViewOrigins.push_back(window1);
     data.m_ViewOrigins.push_back(window2);
 
@@ -224,11 +242,29 @@
 
     // Do the second split.
     armnn::SplitterQueueDescriptor data2;
+    data2.m_Parameters = armnn::SplitterDescriptor(2, 3);
+
     armnn::WorkloadInfo info2;
     AddInputToWorkload(data2, info2, outputTensorInfo2, outputHandle2.get());
     AddOutputToWorkload(data2, info2, outputTensorInfo3, outputHandle3.get());
     AddOutputToWorkload(data2, info2, outputTensorInfo4, outputHandle4.get());
 
+    data2.m_Parameters.SetViewSize(0, 0, outputChannels1);
+    data2.m_Parameters.SetViewSize(0, 1, outputHeight1);
+    data2.m_Parameters.SetViewSize(0, 2, outputWidth1);
+
+    data2.m_Parameters.SetViewSize(1, 0, outputChannels2);
+    data2.m_Parameters.SetViewSize(1, 1, outputHeight2);
+    data2.m_Parameters.SetViewSize(1, 2, outputWidth1);
+
+    data2.m_Parameters.SetViewOriginCoord(0, 0, 0);
+    data2.m_Parameters.SetViewOriginCoord(0, 1, 0);
+    data2.m_Parameters.SetViewOriginCoord(0, 2, 0);
+
+    data2.m_Parameters.SetViewOriginCoord(1, 0, 1);
+    data2.m_Parameters.SetViewOriginCoord(1, 1, 0);
+    data2.m_Parameters.SetViewOriginCoord(1, 2, 0);
+
     data2.m_ViewOrigins.push_back(window3);
     data2.m_ViewOrigins.push_back(window4);
 
@@ -307,6 +343,17 @@
     AddInputToWorkload(data, info, tensorInfo, inputHandle.get());
     AddOutputToWorkload(data, info, tensorInfo, outputHandle.get());
 
+    data.m_Parameters = armnn::SplitterDescriptor(1, 3);
+    data.m_Parameters.SetAxis(0);
+
+    data.m_Parameters.SetViewSize(0, 0, 3);
+    data.m_Parameters.SetViewSize(0, 1, 6);
+    data.m_Parameters.SetViewSize(0, 2, 5);
+
+    data.m_Parameters.SetViewOriginCoord(0, 0, 0);
+    data.m_Parameters.SetViewOriginCoord(0, 1, 0);
+    data.m_Parameters.SetViewOriginCoord(0, 2, 0);
+
     data.m_ViewOrigins.push_back(window);
 
     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateWorkload(armnn::LayerType::Splitter,
diff --git a/src/backends/cl/ClTensorHandleFactory.cpp b/src/backends/cl/ClTensorHandleFactory.cpp
index 82e41d3..be3ca5e 100644
--- a/src/backends/cl/ClTensorHandleFactory.cpp
+++ b/src/backends/cl/ClTensorHandleFactory.cpp
@@ -103,7 +103,7 @@
 
 bool ClTensorHandleFactory::SupportsSubTensors() const
 {
-    return true;
+    return false;
 }
 
 MemorySourceFlags ClTensorHandleFactory::GetExportFlags() const
diff --git a/src/backends/neon/NeonTensorHandleFactory.cpp b/src/backends/neon/NeonTensorHandleFactory.cpp
index ce3ce5c..2597b5f 100644
--- a/src/backends/neon/NeonTensorHandleFactory.cpp
+++ b/src/backends/neon/NeonTensorHandleFactory.cpp
@@ -104,7 +104,7 @@
 
 bool NeonTensorHandleFactory::SupportsSubTensors() const
 {
-    return true;
+    return false;
 }
 
 MemorySourceFlags NeonTensorHandleFactory::GetExportFlags() const
diff --git a/src/backends/neon/test/NeonTensorHandleTests.cpp b/src/backends/neon/test/NeonTensorHandleTests.cpp
index c8e781b..a94e4dd 100644
--- a/src/backends/neon/test/NeonTensorHandleTests.cpp
+++ b/src/backends/neon/test/NeonTensorHandleTests.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2020-2021,2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 #include <Graph.hpp>
@@ -89,81 +89,6 @@
     CHECK(capabilities[0].m_Value);
 }
 
-TEST_CASE("ConcatOnXorYSubTensorsNoPaddingRequiredTest")
-{
-    armnn::INetworkPtr net(armnn::INetwork::Create());
-
-    // Set up tensor infos
-    const armnn::TensorInfo inputInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32);
-    const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({2, 3, 2, 2}, armnn::DataType::Float32);
-    const armnn::TensorInfo outputInfo = armnn::TensorInfo({2, 3, 4, 2}, armnn::DataType::Float32);
-
-    armnn::ElementwiseUnaryDescriptor descriptor(armnn::UnaryOperation::Abs);
-
-    // Create the network
-    armnn::IConnectableLayer* const input0Layer = net->AddInputLayer(0, "input_0");
-    input0Layer->GetOutputSlot(0).SetTensorInfo(inputInfo);
-    armnn::IConnectableLayer* elementwiseUnaryLayer0 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseUnary_0");
-    elementwiseUnaryLayer0->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
-    input0Layer->GetOutputSlot(0).Connect(elementwiseUnaryLayer0->GetInputSlot(0));
-
-    armnn::IConnectableLayer* const input1Layer = net->AddInputLayer(1, "input_1");
-    input1Layer->GetOutputSlot(0).SetTensorInfo(inputInfo);
-    armnn::IConnectableLayer* elementwiseUnaryLayer1 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseUnary_1");
-    elementwiseUnaryLayer1->GetOutputSlot(0).SetTensorInfo(intermediateInfo);
-    input1Layer->GetOutputSlot(0).Connect(elementwiseUnaryLayer1->GetInputSlot(0));
-
-    std::array<armnn::TensorShape, 2> concatInputShapes = { intermediateInfo.GetShape(), intermediateInfo.GetShape() };
-    armnn::IConnectableLayer* const concatLayer = net->AddConcatLayer(armnn::CreateDescriptorForConcatenation(
-        concatInputShapes.begin(), concatInputShapes.end(), 2), "concatenation");
-    concatLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
-    elementwiseUnaryLayer0->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(0));
-    elementwiseUnaryLayer1->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(1));
-
-    armnn::IConnectableLayer* const outputLayer = net->AddOutputLayer(0, "output");
-    concatLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
-
-    armnn::IRuntime::CreationOptions options;
-    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
-
-    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
-    armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
-
-    const armnn::Graph& theGraph = GetGraphForTesting(optimizedNet.get());
-
-    // Load graph into runtime
-    armnn::NetworkId networkIdentifier;
-    runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet));
-
-    // now check the concat how many sub-tensors it is using..
-    auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle)
-    {
-        if (subTensorHandle && subTensorHandle->GetParent())
-        {
-            return true;
-        }
-        return false;
-    };
-
-    for (auto&& layer : theGraph)
-    {
-        if(layer->GetType() == armnn::LayerType::Concat)
-        {
-            unsigned int numberOfSubTensors = 0;
-            for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
-            {
-                const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot();
-                if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()))
-                {
-                    ++numberOfSubTensors;
-                }
-            }
-            // sub-tensors should be supported in this configuration
-            ARMNN_ASSERT(numberOfSubTensors > 0);
-        }
-    }
-}
-
 TEST_CASE("ConcatonXorYPaddingRequiredTest")
 {
     armnn::INetworkPtr net(armnn::INetwork::Create());
@@ -247,212 +172,6 @@
     ARMNN_ASSERT(numberOfSubTensors == 0);
 }
 
-TEST_CASE("SplitteronXorYNoPaddingRequiredTest")
-{
-    using namespace armnn;
-
-    unsigned int splitAxis = 2;
-    unsigned int numSplit = 2;
-
-    const TensorShape& inputShape = { 2, 3, 4, 2 };
-    const armnn::TensorInfo intermediateInfo = armnn::TensorInfo({ 2, 3, 2, 2 }, armnn::DataType::Float32);
-    const std::vector<TensorShape> outputShapes{{ 2, 3, 2, 2 },
-                                                { 2, 3, 2, 2 }};
-    const float qScale = 1.0f;
-    const int32_t qOffset = 0;
-
-    // Creates structures for input & output.
-    std::vector<float> inputData{
-            1, 2,
-            3, 4,
-            5, 6,
-            7, 8,
-            9, 10,
-            11, 12,
-            13, 14,
-            15, 16,
-            17, 18,
-            19, 20,
-            21, 22,
-            23, 24,
-            25, 26,
-            27, 28,
-            29, 30,
-            31, 32,
-            33, 34,
-            35, 36,
-            37, 38,
-            39, 40,
-            41, 42,
-            43, 44,
-            45, 46,
-            47, 48
-    };
-
-    std::vector<float> expectedOutput0{
-            1, 2,
-            3, 4,
-            9, 10,
-            11, 12,
-            17, 18,
-            19, 20,
-            25, 26,
-            27, 28,
-            33, 34,
-            35, 36,
-            41, 42,
-            43, 44
-    };
-
-    std::vector<float> expectedOutput1{
-            5, 6,
-            7, 8,
-            13, 14,
-            15, 16,
-            21, 22,
-            23, 24,
-            29, 30,
-            31, 32,
-            37, 38,
-            39, 40,
-            45, 46,
-            47, 48
-    };
-
-    // Builds up the structure of the network.
-    INetworkPtr net(INetwork::Create());
-
-    TensorInfo inputTensorInfo(inputShape, armnn::DataType::Float32, qScale, qOffset);
-
-    armnn::ElementwiseUnaryDescriptor descriptor(armnn::UnaryOperation::Abs);
-
-    // Splitter
-    std::vector<unsigned int> splitterDimSizes(inputShape.GetNumDimensions());
-
-    // Add current input shape to splitterDimSizes
-    for (unsigned int i = 0; i < inputShape.GetNumDimensions(); ++i)
-    {
-        splitterDimSizes[i] = inputTensorInfo.GetShape()[i];
-    }
-
-    if (splitterDimSizes[splitAxis] % numSplit != 0)
-    {
-        throw ParseException("Number of splits must evenly divide the dimension");
-    }
-
-    splitterDimSizes[splitAxis] /= numSplit;
-
-    SplitterDescriptor splitDesc(numSplit, inputShape.GetNumDimensions());
-
-    for (unsigned int g = 0; g < numSplit; ++g)
-    {
-        // Set the size of the views.
-        for (unsigned int dimIdx = 0; dimIdx < splitterDimSizes.size(); ++dimIdx)
-        {
-            splitDesc.SetViewSize(g, dimIdx, splitterDimSizes[dimIdx]);
-        }
-        splitDesc.SetViewOriginCoord(g, splitAxis, splitterDimSizes[splitAxis] * g);
-    }
-    IConnectableLayer* input = net->AddInputLayer(0, "input");
-    IConnectableLayer* elementWiseUnary0 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseunary_0");
-    IConnectableLayer* elementWiseUnary1 = net->AddElementwiseUnaryLayer(descriptor, "elementwiseunary_0");
-    IConnectableLayer* splitter = net->AddSplitterLayer(splitDesc, "splitter");
-
-    // Connections
-    Connect(input, splitter, inputTensorInfo, 0, 0);
-    Connect(splitter, elementWiseUnary0, intermediateInfo, 0, 0);
-    Connect(splitter, elementWiseUnary1, intermediateInfo, 1, 0);
-
-    std::vector<IConnectableLayer*> pooling2dLayers{elementWiseUnary0, elementWiseUnary1};
-
-    for (unsigned int i = 0; i < outputShapes.size(); ++i)
-    {
-        TensorInfo outputTensorInfo(outputShapes[i], armnn::DataType::Float32, qScale, qOffset);
-        IConnectableLayer* output = net->AddOutputLayer(armnn::numeric_cast<LayerBindingId>(i));
-        Connect(pooling2dLayers[i], output, outputTensorInfo, 0, 0);
-    }
-
-    std::map<int, std::vector<float>> inputTensorData = {{ 0,inputData }};
-    std::map<int, std::vector<float>> expectedOutputData = {{ 0, expectedOutput0 }, { 1, expectedOutput1 }};
-
-    armnn::IRuntime::CreationOptions options;
-    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
-
-    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
-    armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
-
-    const armnn::Graph& theGraph = GetGraphForTesting(optimizedNet.get());
-
-    // Load graph into runtime
-    armnn::NetworkId networkIdentifier;
-    runtime->LoadNetwork(networkIdentifier, std::move(optimizedNet));
-
-    // now check the concat how many sub-tensors it is using..
-    auto TraceSubTensorHandleAncestry = [](armnn::ITensorHandle* const subTensorHandle)
-    {
-        if (subTensorHandle && subTensorHandle->GetParent())
-        {
-            return true;
-        }
-        return false;
-    };
-
-    for (auto&& layer : theGraph)
-    {
-        if(layer->GetType() == armnn::LayerType::ElementwiseUnary)
-        {
-            unsigned int numberOfSubTensors = 0;
-            for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
-            {
-                const armnn::OutputSlot* slot = layer->GetInputSlot(i).GetConnectedOutputSlot();
-                if (TraceSubTensorHandleAncestry(slot->GetOutputHandler().GetData()))
-                {
-                    ++numberOfSubTensors;
-                }
-            }
-            // sub-tensors should be supported in this configuration
-            ARMNN_ASSERT(numberOfSubTensors > 0);
-        }
-    }
-
-    InputTensors inputTensors;
-    inputTensors.reserve(inputTensorData.size());
-    for (auto&& it : inputTensorData)
-    {
-        TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(networkIdentifier, it.first);
-        inputTensorInfo.SetConstant(true);
-        inputTensors.push_back({it.first,
-                                ConstTensor(inputTensorInfo, it.second.data())});
-    }
-    OutputTensors outputTensors;
-    outputTensors.reserve(expectedOutputData.size());
-    std::map<int, std::vector<float>> outputStorage;
-    for (auto&& it : expectedOutputData)
-    {
-        std::vector<float> out(it.second.size());
-        outputStorage.emplace(it.first, out);
-        outputTensors.push_back({it.first,
-                                 Tensor(runtime->GetOutputTensorInfo(networkIdentifier, it.first),
-                                               outputStorage.at(it.first).data())});
-    }
-
-    // Does the inference.
-    runtime->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
-
-    // Checks the results.
-    float tolerance = 0.000001f;
-    for (auto&& it : expectedOutputData)
-    {
-        std::vector<float> out = outputStorage.at(it.first);
-        for (unsigned int i = 0; i < out.size(); ++i)
-        {
-            CHECK_MESSAGE(Compare<armnn::DataType::Float32>(it.second[i], out[i], tolerance) == true,
-                    "Actual output: " << out[i] << ". Expected output:" << it.second[i]);
-
-        }
-    }
-}
-
 TEST_CASE("SplitteronXorYPaddingRequiredTest")
 {
     using namespace armnn;