MLCE-1165 Model failing to load when pad is folded into Conv2d

  * Skipping the optimization which folds pad and conv2d
  together for a specific case: 1x1 filter and
  padding size >= filter size

Signed-off-by: Tracy Narine <tracy.narine@arm.com>
Change-Id: I46944e9f736df1ff60469b2d2852e1bba01ab8cd
diff --git a/src/armnn/optimizations/FoldPadIntoLayer2d.hpp b/src/armnn/optimizations/FoldPadIntoLayer2d.hpp
index 2f70e63..5592491 100644
--- a/src/armnn/optimizations/FoldPadIntoLayer2d.hpp
+++ b/src/armnn/optimizations/FoldPadIntoLayer2d.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2021-2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2021-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -154,6 +154,29 @@
         return nullptr;
     }
 
+    // Workaround an issue in the compute library. The conv2d algorithm that the
+    // compute library is choosing is not handling the 1x1 filter case when
+    // the padding size >= filter size
+    if constexpr (std::is_same<Layer2dT, armnn::Convolution2dLayer>::value)
+    {
+        // Get filter width and height
+        armnnUtils::DataLayoutIndexed dataLayoutIndex(newLayer2dDescriptor.m_DataLayout);
+        const TensorShape& filterShape = layer2d.GetInputSlot(1).GetTensorInfo().GetShape();
+        unsigned int filterWidth       = filterShape[dataLayoutIndex.GetWidthIndex()];
+        unsigned int filterHeight      = filterShape[dataLayoutIndex.GetHeightIndex()];
+        // Calculate total padding and check conditions
+        auto horizontalPadding = newLayer2dDescriptor.m_PadLeft + newLayer2dDescriptor.m_PadRight;
+        auto verticalPadding   = newLayer2dDescriptor.m_PadTop  + newLayer2dDescriptor.m_PadBottom;
+        if ((filterWidth == 1) && (horizontalPadding >= filterWidth))
+        {
+            return nullptr;
+        }
+        else if ((filterHeight == 1) && (verticalPadding >= filterHeight))
+        {
+            return nullptr;
+        }
+    }
+
     // Save original parent output slot of the pad layer
     OutputSlot& parentSlot = *padLayer.GetInputSlot(0).GetConnectedOutputSlot();
 
diff --git a/src/armnn/test/optimizations/FoldPadTests.cpp b/src/armnn/test/optimizations/FoldPadTests.cpp
index b2672ea..5c6d1b6 100644
--- a/src/armnn/test/optimizations/FoldPadTests.cpp
+++ b/src/armnn/test/optimizations/FoldPadTests.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022, 2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -97,6 +97,73 @@
                                                       &IsLayerOfType<OutputLayer>));
 }
 
+TEST_CASE("RejectFoldPadLayerIntoConvolution2dLayerWith1x1Filter")
+{
+    // This test can be fixed to check for the folding once the
+    // compute library issue is addressed and the restriction in
+    // FoldPadIntoLayer2dImpl() is removed for the 1x1 case
+
+    Graph graph;
+    const unsigned int inputShape[]   = {1, 18, 18, 512};
+    const unsigned int paddedShape[]  = {1, 19, 19, 512};
+    const unsigned int weightsShape[] = {512, 1, 1, 512};
+    const unsigned int outputShape[]  = {1, 19, 19, 512};
+
+    TensorInfo inputInfo(4, inputShape, DataType::Float32);
+    TensorInfo paddedInfo(4, paddedShape, DataType::Float32);
+    TensorInfo weightsInfo(4, weightsShape, DataType::Float32, 1.0f, 0, true);
+    TensorInfo outputInfo(4, outputShape, DataType::Float32);
+
+    Layer* input = graph.AddLayer<InputLayer>(0, "input");
+    input->GetOutputSlot().SetTensorInfo(inputInfo);
+
+    PadDescriptor padDescriptor({{0, 0},
+                                 {1, 1},
+                                 {2, 1},
+                                 {0, 0}});
+
+    PadLayer* padLayer = graph.AddLayer<PadLayer>(padDescriptor, "pad");
+    padLayer->GetOutputSlot().SetTensorInfo(paddedInfo);
+
+    Convolution2dDescriptor convolution2dDescriptor;
+    convolution2dDescriptor.m_BiasEnabled = false;
+    convolution2dDescriptor.m_StrideX     = 1;
+    convolution2dDescriptor.m_StrideY     = 1;
+    convolution2dDescriptor.m_DataLayout  = DataLayout::NHWC;
+
+    std::vector<float> weightsVector(512 * 512);
+    ConstTensor        weights(weightsInfo, weightsVector);
+
+    ConstantLayer* weightsLayer = graph.AddLayer<ConstantLayer>("Weights");
+    weightsLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(weights);
+    weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
+
+    Convolution2dLayer* conv2dLayer = graph.AddLayer<Convolution2dLayer>(convolution2dDescriptor, "conv2d");
+    conv2dLayer->GetOutputSlot().SetTensorInfo(outputInfo);
+
+    Layer* output = graph.AddLayer<OutputLayer>(0, "output");
+
+    // Connect up layers - input -> pad -> conv2d -> output
+    input->GetOutputSlot().Connect(padLayer->GetInputSlot(0));
+    padLayer->GetOutputSlot().Connect(conv2dLayer->GetInputSlot(0));
+    weightsLayer->GetOutputSlot().Connect(conv2dLayer->GetInputSlot(1));
+    conv2dLayer->GetOutputSlot().Connect(output->GetInputSlot(0));
+
+    CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<InputLayer>,
+                        &IsLayerOfType<ConstantLayer>,
+                        &IsLayerOfType<PadLayer>,
+                        &IsLayerOfType<Convolution2dLayer>,
+                        &IsLayerOfType<OutputLayer>));
+
+    armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(FoldPadIntoConvolution2d()));
+
+    CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<InputLayer>,
+                        &IsLayerOfType<ConstantLayer>,
+                        &IsLayerOfType<PadLayer>,
+                        &IsLayerOfType<Convolution2dLayer>,
+                        &IsLayerOfType<OutputLayer>));
+}
+
 TEST_CASE("FoldPadLayerIntoDepthwiseConvolution2dLayer")
 {
     Graph              graph;