IVGCVSW-2266 Remove the input swizzling from ParseDepthwiseConv2D

Change-Id: I72d94fff4cdad2c62dff98c8fd52eba78a1908f0
diff --git a/src/armnnTfParser/TfParser.cpp b/src/armnnTfParser/TfParser.cpp
index 210b825..3c7d16e 100644
--- a/src/armnnTfParser/TfParser.cpp
+++ b/src/armnnTfParser/TfParser.cpp
@@ -48,37 +48,6 @@
 const PermutationVector NHWCToArmNN = { 0, 2, 3, 1 };
 const PermutationVector ArmNNToNHWC = { 0, 3, 1, 2 };
 
-IConnectableLayer* AddSwizzleLayer(INetwork& network, IOutputSlot& input, const PermutationVector& mapping,
-    const std::string& name)
-{
-    // Adds swizzle layer.
-    IConnectableLayer* const layer = network.AddPermuteLayer(mapping, name.c_str());
-
-    // Connects intput to swizzle layer.
-    input.Connect(layer->GetInputSlot(0));
-
-    // Sets up swizzled output.
-    const TensorInfo outInfo = armnnUtils::Permuted(input.GetTensorInfo(), mapping);
-    layer->GetOutputSlot(0).SetTensorInfo(outInfo);
-
-    return layer;
-}
-
-IConnectableLayer* SwizzleInDeswizzleOut(INetwork& network, IOutputSlot& input, IConnectableLayer& layer,
-    const std::string& name)
-{
-    // Adds swizzle layer.
-    IConnectableLayer* const swizzleLayer = AddSwizzleLayer(network, input, NHWCToArmNN, "swizzle_for-" + name);
-
-    // Connects swizzledInput to layer.
-    swizzleLayer->GetOutputSlot(0).Connect(layer.GetInputSlot(0));
-
-    // Adds deswizzle layer.
-    IConnectableLayer* const deswizzleLayer = AddSwizzleLayer(network, layer.GetOutputSlot(0), ArmNNToNHWC,
-        "deswizzle_for-" + name);
-
-    return deswizzleLayer;
-}
 
 template <typename Callable>
 void ReadMandatoryNodeAttributeImpl(const tensorflow::NodeDef& nodeDef,
@@ -1181,10 +1150,10 @@
                     % nodeDef.name()
                     % CHECK_LOCATION().AsString()));
     }
+
     ParsedConstTfOperation<float>* weightNode =
         boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[1].m_IndexedValue);
 
-
     std::string paddingString = ReadMandatoryNodeStringAttribute(nodeDef, "padding");
     std::string dataFormat = ReadMandatoryNodeStringAttribute(nodeDef, "data_format");
     std::vector<uint32_t> strides = ReadMandatoryNodeUint32ListAttribute(nodeDef, "strides");
@@ -1194,59 +1163,85 @@
 
     CHECK_DATA_FORMAT(nodeDef, dataFormat, "DepthwiseConv2dNative");
 
-    if (dataFormat == "NHWC")
-    {
-        desc.m_StrideX = strides[2];
-        desc.m_StrideY = strides[1];
-        // Swizzles input to supported memory layout.
-        inputTensorInfo = armnnUtils::Permuted(inputSlot.GetTensorInfo(), NHWCToArmNN);
-    }
-    else if (dataFormat == "NCHW")
-    {
-        desc.m_StrideX = strides[3];
-        desc.m_StrideY = strides[2];
-    }
+    DataLayout dataLayout = dataFormat == "NHWC" ? DataLayout::NHWC : DataLayout::NCHW;
 
-    uint32_t inputHeight = inputTensorInfo.GetShape()[2];
-    uint32_t inputWidth = inputTensorInfo.GetShape()[3];
+    desc.m_DataLayout = dataLayout;
 
-    std::vector<float> outputTensorData;
+    DataLayoutIndexed dataLayoutIndexed(dataLayout);
 
-    ConstTensor weightTensor = weightNode->GetConstTensor(true, outputTensorData);
+    desc.m_StrideX = strides[dataLayoutIndexed.GetWidthIndex()];
+    desc.m_StrideY = strides[dataLayoutIndexed.GetHeightIndex()];
 
-    uint32_t weightHeight = weightTensor.GetShape()[2];
-    uint32_t weightWidth = weightTensor.GetShape()[3];
+    uint32_t inputHeight = inputTensorInfo.GetShape()[dataLayoutIndexed.GetHeightIndex()];
+    uint32_t inputWidth  = inputTensorInfo.GetShape()[dataLayoutIndexed.GetWidthIndex()];
+
+    // Mappings from TensorFlow filter tensors to the ArmNN filter tensors.
+    // Tensorflow weights are [H, W, In, Out].
+    // ArmNN weights have to be [Out, H, W, In] when the data layout is NHWC,
+    // and [Out, In, H, W] when the data layout is NCHW.
+    PermutationVector permutationVector =
+            dataLayout == DataLayout::NHWC ?
+            std::initializer_list<unsigned int>{ 1, 2, 3, 0 } : // NHWC: [H, W, In, Out] -> [Out, H, W, In]
+            std::initializer_list<unsigned int>{ 2, 3, 1, 0 };  // NCHW: [H, W, In, Out] -> [Out, In, H, W]
+
+    // Swizzle the tensor using the given permutation vector.
+    const TensorInfo& weightTensorInfo = weightNode->GetTensorInfo();
+    const TensorInfo weightTensorSwizzledInfo = armnnUtils::Permuted(weightTensorInfo, permutationVector);
+
+    // Swizzles the content of the tensor's permanent storage into a local storage.
+    std::vector<float> weightTensorSwizzledData(weightTensorInfo.GetNumElements());
+    armnnUtils::Permute(weightTensorSwizzledInfo.GetShape(), permutationVector,
+                        weightNode->GetStorage(), weightTensorSwizzledData.data());
+
+    // Create a weight tensor with the newly swizzled data.
+    ConstTensor weightTensor(weightTensorSwizzledInfo, weightTensorSwizzledData);
+
+    uint32_t weightHeight = weightTensor.GetShape()[dataLayoutIndexed.GetHeightIndex()];
+    uint32_t weightWidth  = weightTensor.GetShape()[dataLayoutIndexed.GetWidthIndex()];
 
     bool padding = false;
     TensorInfo outputInfo;
+    unsigned int outputHeight = 0;
+    unsigned int outputWidth = 0;
 
     CHECK_PADDING_TYPE(nodeDef, paddingString);
 
     if (paddingString == "SAME")
     {
         padding = true;
-        outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
-                                weightTensor.GetShape()[0] * weightTensor.GetShape()[1],
-                                static_cast<uint32_t>(ceil(
-                                    static_cast<float>(inputHeight) /
-                                    static_cast<float>(desc.m_StrideY))),
-                                static_cast<uint32_t>(ceil(
-                                    static_cast<float>(inputWidth) /
-                                    static_cast<float>(desc.m_StrideX)))
-                                }, DataType::Float32);
+
+        outputHeight = static_cast<uint32_t>(ceil(static_cast<float>(inputHeight) /
+                                                  static_cast<float>(desc.m_StrideY)));
+        outputWidth  = static_cast<uint32_t>(ceil(static_cast<float>(inputWidth) /
+                                                  static_cast<float>(desc.m_StrideX)));
     }
     else if (paddingString == "VALID")
     {
         padding = false;
-        outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
-                                weightTensor.GetShape()[0] * weightTensor.GetShape()[1],
-                                static_cast<uint32_t>(ceil(
-                                    static_cast<float>(inputHeight - weightHeight + 1) /
-                                    static_cast<float>(desc.m_StrideY))),
-                                static_cast<uint32_t>(ceil(
-                                    static_cast<float>(inputWidth - weightWidth + 1) /
-                                    static_cast<float>(desc.m_StrideX)))
-                                }, DataType::Float32);
+
+        outputHeight = static_cast<uint32_t>(ceil(static_cast<float>(inputHeight - weightHeight + 1) /
+                                                  static_cast<float>(desc.m_StrideY)));
+        outputWidth  = static_cast<uint32_t>(ceil(static_cast<float>(inputWidth - weightWidth + 1) /
+                                                  static_cast<float>(desc.m_StrideX)));
+    }
+
+    switch (dataLayout)
+    {
+        case DataLayout::NHWC:
+            outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
+                                      outputHeight,
+                                      outputWidth,
+                                      weightTensor.GetShape()[0] * weightTensor.GetShape()[3]},
+                                    DataType::Float32);
+            break;
+        case DataLayout::NCHW:
+        default:
+            outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
+                                      weightTensor.GetShape()[0] * weightTensor.GetShape()[1],
+                                      outputHeight,
+                                      outputWidth },
+                                    DataType::Float32);
+            break;
     }
 
     CalcPadding(inputHeight, weightHeight, desc.m_StrideY, desc.m_PadTop, desc.m_PadBottom, padding);
@@ -1254,15 +1249,7 @@
 
     IConnectableLayer* layer = m_Network->AddDepthwiseConvolution2dLayer(desc, weightTensor, nodeDef.name().c_str());
     layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
-
-    if (dataFormat == "NHWC")
-    {
-        layer = SwizzleInDeswizzleOut(*m_Network, inputSlot, *layer, nodeDef.name());
-    }
-    else
-    {
-        inputSlot.Connect(layer->GetInputSlot(0));
-    }
+    inputSlot.Connect(layer->GetInputSlot(0));
 
     return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
 }
diff --git a/src/armnnTfParser/test/DepthwiseConvolution2d.cpp b/src/armnnTfParser/test/DepthwiseConvolution2d.cpp
index 5a8105c..464e62f 100644
--- a/src/armnnTfParser/test/DepthwiseConvolution2d.cpp
+++ b/src/armnnTfParser/test/DepthwiseConvolution2d.cpp
@@ -9,11 +9,15 @@
 #include <string>
 #include <iostream>
 
+#include <Permute.hpp>
+using namespace armnnUtils;
+using namespace armnn;
+
 BOOST_AUTO_TEST_SUITE(TensorflowParser)
 
 struct DepthwiseConvolution2dFixture : public armnnUtils::ParserPrototxtFixture<armnnTfParser::ITfParser>
 {
-    explicit DepthwiseConvolution2dFixture(const char* paddingType)
+    explicit DepthwiseConvolution2dFixture(const std::string& dataLayout, const char* paddingType)
     {
         m_Prototext = "node { \n"
                       "    name: \"graphInput\" \n"
@@ -25,26 +29,9 @@
                       "      } \n"
                       "    } \n"
                       "    attr { \n"
-                      "      key: \"value\" \n"
+                      "      key: \"shape\" \n"
                       "      value { \n"
-                      "        tensor { \n"
-                      "          dtype: DT_FLOAT \n"
-                      "          tensor_shape { \n"
-                      "            dim { \n"
-                      "              size: 1 \n"
-                      "            } \n"
-                      "            dim { \n"
-                      "              size: 1 \n"
-                      "            } \n"
-                      "            dim { \n"
-                      "              size: 3 \n"
-                      "            } \n"
-                      "            dim { \n"
-                      "              size: 3 \n"
-                      "            } \n"
-                      "          } \n"
-                      "          tensor_content: \"\\000\\000\\200?\\000\\000\\000@\\000\\000@@\\000\\000\\200@"
-                      "\\000\\000\\240@\\000\\000\\300@\\000\\000\\340@\\000\\000\\000A\\000\\000\\020A\" \n"
+                      "        shape { \n"
                       "        } \n"
                       "      } \n"
                       "    } \n"
@@ -104,13 +91,15 @@
                       "  attr { \n"
                       "    key: \"data_format\" \n"
                       "    value { \n"
-                      "      s: \"NHWC\" \n"
+                      "      s: \"";
+        m_Prototext.append(dataLayout);
+        m_Prototext.append("\"\n"
                       "    } \n"
                       "  } \n"
                       "  attr { \n"
                       "    key: \"padding\" \n"
                       "    value { \n"
-                      "      s: \"";
+                      "      s: \"");
         m_Prototext.append(paddingType);
         m_Prototext.append("\"\n"
                       "    } \n"
@@ -134,32 +123,63 @@
                       "  } \n"
                       "} \n");
 
-        SetupSingleInputSingleOutput({ 1, 1, 3, 3 }, "graphInput", "potato");
+        if(dataLayout == "NHWC")
+        {
+            SetupSingleInputSingleOutput({ 1u, 1u, 3u, 3u }, "graphInput", "potato");
+        }
+        else
+        {
+            SetupSingleInputSingleOutput({ 1u, 3u, 1u, 3u }, "graphInput", "potato");
+        }
     }
 };
 
-struct DepthwiseConvolution2dSameFixture : DepthwiseConvolution2dFixture
+struct DepthwiseConvolution2dNhwcSameFixture : DepthwiseConvolution2dFixture
 {
-    DepthwiseConvolution2dSameFixture() : DepthwiseConvolution2dFixture("SAME") { }
+    DepthwiseConvolution2dNhwcSameFixture() : DepthwiseConvolution2dFixture("NHWC", "SAME") { }
 };
 
-BOOST_FIXTURE_TEST_CASE(ParseDepthwiseConv2DSame, DepthwiseConvolution2dSameFixture)
+BOOST_FIXTURE_TEST_CASE(ParseDepthwiseConv2DNhwcSame, DepthwiseConvolution2dNhwcSameFixture)
 {
     RunTest<4>({ 1, 2, 3, 4, 5, 6, 7, 8, 9 },
                { 2.5f, 5.f,  2.5f, 3.5f, 7.f,  3.5f, 4.5f, 9.f,  4.5f,
                  6.f,  12.f, 6.f,  7.5f, 15.f, 7.5f, 9.f,  18.f, 9.f,
-                 5.5f, 11.f, 5.5f, 6.5f, 13.f, 6.5f, 7.5f, 15.f, 7.5f});
+                 5.5f, 11.f, 5.5f, 6.5f, 13.f, 6.5f, 7.5f, 15.f, 7.5f });
 }
 
-struct DepthwiseConvolution2dValidFixture : DepthwiseConvolution2dFixture
+struct DepthwiseConvolution2dNchwSameFixture : DepthwiseConvolution2dFixture
 {
-    DepthwiseConvolution2dValidFixture() : DepthwiseConvolution2dFixture("VALID") { }
+    DepthwiseConvolution2dNchwSameFixture() : DepthwiseConvolution2dFixture("NCHW", "SAME") { }
 };
 
-BOOST_FIXTURE_TEST_CASE(ParseDepthwiseConv2DValid, DepthwiseConvolution2dValidFixture)
+BOOST_FIXTURE_TEST_CASE(ParseDepthwiseConv2DNchwSame, DepthwiseConvolution2dNchwSameFixture)
+{
+    RunTest<4>({ 1, 4, 7, 2, 5, 8, 3, 6, 9 },
+               { 2.5f, 6.f, 5.5f, 5.f, 12.f, 11.f, 2.5f, 6.f, 5.5f,
+                 3.5f, 7.5f, 6.5f, 7.f, 15.f, 13.f, 3.5f, 7.5f, 6.5f,
+                 4.5f, 9.f, 7.5f, 9.f, 18.f, 15.f, 4.5f, 9.f, 7.5f });
+}
+
+struct DepthwiseConvolution2dNhwcValidFixture : DepthwiseConvolution2dFixture
+{
+    DepthwiseConvolution2dNhwcValidFixture() : DepthwiseConvolution2dFixture("NHWC", "VALID") { }
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseDepthwiseConv2DNhwcValid, DepthwiseConvolution2dNhwcValidFixture)
 {
     RunTest<4>({ 1, 2, 3, 4, 5, 6, 7, 8, 9 }, // input data
-               { 6.f,  12.f, 6.f,  7.5f, 15.f, 7.5f, 9.f,  18.f, 9.f });  // output expected data
+               { 6.f, 12.f, 6.f, 7.5f, 15.f, 7.5f, 9.f, 18.f, 9.f });  // output expected data
+}
+
+struct DepthwiseConvolution2dNchwValidFixture : DepthwiseConvolution2dFixture
+{
+    DepthwiseConvolution2dNchwValidFixture() : DepthwiseConvolution2dFixture("NCHW", "VALID") { }
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseDepthwiseConv2DNchwValid, DepthwiseConvolution2dNchwValidFixture)
+{
+     RunTest<4>({ 1, 4, 7, 2, 5, 8, 3, 6, 9 },
+                { 6.f, 12.f, 6.f, 7.5f, 15.f, 7.5f, 9.f, 18.f, 9.f });
 }