IVGCVSW-6165 Add Support for Conv3d to TfLiteParser

 * Added CONV_3D operator support.
 * Fixed TfLiteParser test issue where QAsymmU8 tests were incorrectly
   using the Boolean comparison function.

Signed-off-by: Matthew Sloyan <matthew.sloyan@arm.com>
Change-Id: I9db03b54b6de63f918717af80c5ac5310fe26183
diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp
index 68dbbd1..104a55e 100644
--- a/src/armnnTfLiteParser/TfLiteParser.cpp
+++ b/src/armnnTfLiteParser/TfLiteParser.cpp
@@ -642,6 +642,7 @@
     m_ParserFunctions[tflite::BuiltinOperator_CAST]                    = &TfLiteParserImpl::ParseCast;
     m_ParserFunctions[tflite::BuiltinOperator_CONCATENATION]           = &TfLiteParserImpl::ParseConcatenation;
     m_ParserFunctions[tflite::BuiltinOperator_CONV_2D]                 = &TfLiteParserImpl::ParseConv2D;
+    m_ParserFunctions[tflite::BuiltinOperator_CONV_3D]                 = &TfLiteParserImpl::ParseConv3D;
     m_ParserFunctions[tflite::BuiltinOperator_CUSTOM]                  = &TfLiteParserImpl::ParseCustomOperator;
     m_ParserFunctions[tflite::BuiltinOperator_DEPTH_TO_SPACE]          = &TfLiteParserImpl::ParseDepthToSpace;
     m_ParserFunctions[tflite::BuiltinOperator_DEPTHWISE_CONV_2D]       = &TfLiteParserImpl::ParseDepthwiseConv2D;
@@ -1048,6 +1049,90 @@
     RegisterOutputSlots(subgraphIndex, operatorIndex, layer, {outputTensorIndexes[0]});
 }
 
+void TfLiteParserImpl::ParseConv3D(size_t subgraphIndex, size_t operatorIndex)
+{
+    CHECK_MODEL(m_Model, subgraphIndex, operatorIndex);
+
+    const auto& operatorPtr = m_Model->subgraphs[subgraphIndex]->operators[operatorIndex];
+    const auto* options = operatorPtr->builtin_options.AsConv3DOptions();
+
+    CHECK_SUPPORTED_FUSED_ACTIVATION(options, subgraphIndex, operatorIndex);
+
+    Convolution3dDescriptor desc;
+    desc.m_BiasEnabled = false;
+    desc.m_DataLayout = armnn::DataLayout::NDHWC;
+    desc.m_StrideX = CHECKED_NON_NEGATIVE(options->stride_w);
+    desc.m_StrideY = CHECKED_NON_NEGATIVE(options->stride_h);
+    desc.m_StrideZ = CHECKED_NON_NEGATIVE(options->stride_d);
+    desc.m_DilationX = CHECKED_NON_NEGATIVE(options->dilation_w_factor);
+    desc.m_DilationY = CHECKED_NON_NEGATIVE(options->dilation_h_factor);
+    desc.m_DilationZ = CHECKED_NON_NEGATIVE(options->dilation_d_factor);
+
+    auto inputs = GetInputs(m_Model, subgraphIndex, operatorIndex);
+    CHECK_VALID_SIZE(inputs.size(), 2, 3);
+
+    auto outputs = GetOutputs(m_Model, subgraphIndex, operatorIndex);
+    CHECK_VALID_SIZE(outputs.size(), 1);
+
+    armnn::TensorInfo inputTensorInfo  = ToTensorInfo(inputs[0]);
+    armnn::TensorInfo filterTensorInfo = ToTensorInfo(inputs[1]);
+
+    // Assuming input is NDHWC
+    unsigned int inputDepth  = inputTensorInfo.GetShape()[1];
+    unsigned int inputHeight = inputTensorInfo.GetShape()[2];
+    unsigned int inputWidth  = inputTensorInfo.GetShape()[3];
+
+    // Assuming the filter is DHWIO : Depth, Height, Width, OutputChannels, InputChannels
+    unsigned int filterDepth  = filterTensorInfo.GetShape()[0];
+    unsigned int filterHeight = filterTensorInfo.GetShape()[1];
+    unsigned int filterWidth  = filterTensorInfo.GetShape()[2];
+
+    CalcPadding(inputDepth, filterDepth, desc.m_StrideZ,
+                desc.m_DilationY, desc.m_PadFront, desc.m_PadBack, options->padding);
+    CalcPadding(inputHeight, filterHeight, desc.m_StrideY,
+                desc.m_DilationY, desc.m_PadTop, desc.m_PadBottom, options->padding);
+    CalcPadding(inputWidth, filterWidth, desc.m_StrideX,
+                desc.m_DilationX, desc.m_PadLeft, desc.m_PadRight, options->padding);
+
+    auto filterTensorAndData = CreateConstTensorNonPermuted(inputs[1], filterTensorInfo);
+
+    armnn::IConnectableLayer* layer = nullptr;
+    auto layerName = fmt::format("Conv3D:{}:{}", subgraphIndex, operatorIndex);
+
+    if (inputs.size() == 3)
+    {
+        desc.m_BiasEnabled = true;
+        armnn::TensorInfo biasTensorInfo = ToTensorInfo(inputs[2]);
+        auto biasTensorAndData = CreateConstTensorNonPermuted(inputs[2], biasTensorInfo);
+        layer = m_Network->AddConvolution3dLayer(desc,
+                                                 filterTensorAndData,
+                                                 Optional<ConstTensor>(biasTensorAndData),
+                                                 layerName.c_str());
+    }
+    else
+    {
+        layer = m_Network->AddConvolution3dLayer(desc,
+                                                 filterTensorAndData,
+                                                 EmptyOptional(),
+                                                 layerName.c_str());
+    }
+
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0], true);
+    layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+    // Register the input connection slots for the layer, connections are made after all layers have been created
+    // only the tensors for the inputs are relevant, exclude the const tensors
+    auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex));
+    RegisterInputSlots(subgraphIndex, operatorIndex, layer, {inputTensorIndexes[0]});
+
+    layer = AddFusedActivationLayer(layer, 0, options->fused_activation_function);
+    // Register the output connection slots for the layer, connections are made after all layers have been created
+    auto outputTensorIndexes = AsUnsignedVector(GetOutputTensorIds(m_Model, subgraphIndex, operatorIndex));
+    RegisterOutputSlots(subgraphIndex, operatorIndex, layer, {outputTensorIndexes[0]});
+}
+
 void TfLiteParserImpl::ParseDepthwiseConv2D(size_t subgraphIndex, size_t operatorIndex)
 {
     CHECK_MODEL(m_Model, subgraphIndex, operatorIndex);
diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp
index 5645503..8eb5299 100644
--- a/src/armnnTfLiteParser/TfLiteParser.hpp
+++ b/src/armnnTfLiteParser/TfLiteParser.hpp
@@ -113,6 +113,7 @@
     void ParseComparison(size_t subgraphIndex, size_t operatorIndex, armnn::ComparisonOperation comparisonOperation);
     void ParseConcatenation(size_t subgraphIndex, size_t operatorIndex);
     void ParseConv2D(size_t subgraphIndex, size_t operatorIndex);
+    void ParseConv3D(size_t subgraphIndex, size_t operatorIndex);
     void ParseDepthToSpace(size_t subgraphIndex, size_t operatorIndex);
     void ParseDepthwiseConv2D(size_t subgraphIndex, size_t operatorIndex);
     void ParseDequantize(size_t subgraphIndex, size_t operatorIndex);
diff --git a/src/armnnTfLiteParser/test/Conv3D.cpp b/src/armnnTfLiteParser/test/Conv3D.cpp
new file mode 100644
index 0000000..32cd6fe
--- /dev/null
+++ b/src/armnnTfLiteParser/test/Conv3D.cpp
@@ -0,0 +1,286 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ParserFlatbuffersFixture.hpp"
+#include <sstream>
+
+TEST_SUITE("TensorflowLiteParser_Conv3D")
+{
+struct SimpleConv3DFixture : public ParserFlatbuffersFixture
+{
+    explicit SimpleConv3DFixture()
+    {
+        m_JsonString = R"(
+            {
+                "version": 3,
+                "operator_codes": [ { "builtin_code": "CONV_3D" } ],
+                "subgraphs": [ {
+                    "tensors": [
+                        {
+                            "shape": [ 1, 2, 3, 3, 1 ],
+                            "type": "UINT8",
+                            "buffer": 0,
+                            "name": "inputTensor",
+                            "quantization": {
+                                "min": [ 0.0 ],
+                                "max": [ 255.0 ],
+                                "scale": [ 1.0 ],
+                                "zero_point": [ 0 ],
+                            }
+                        },
+                        {
+                            "shape": [ 1, 1, 1, 1, 1 ],
+                            "type": "UINT8",
+                            "buffer": 1,
+                            "name": "outputTensor",
+                            "quantization": {
+                                "min": [ 0.0 ],
+                                "max": [ 511.0 ],
+                                "scale": [ 2.0 ],
+                                "zero_point": [ 0 ],
+                            }
+                        },
+                        {
+                            "shape": [ 2, 3, 3, 1, 1 ],
+                            "type": "UINT8",
+                            "buffer": 2,
+                            "name": "filterTensor",
+                            "quantization": {
+                                "min": [ 0.0 ],
+                                "max": [ 255.0 ],
+                                "scale": [ 1.0 ],
+                                "zero_point": [ 0 ],
+                            }
+                        }
+                    ],
+                    "inputs": [ 0 ],
+                    "outputs": [ 1 ],
+                    "operators": [
+                        {
+                            "opcode_index": 0,
+                            "inputs": [ 0, 2 ],
+                            "outputs": [ 1 ],
+                            "builtin_options_type": "Conv3DOptions",
+                            "builtin_options": {
+                                "padding": "VALID",
+                                "stride_d": 1,
+                                "stride_w": 1,
+                                "stride_h": 1,
+                                "fused_activation_function": "NONE"
+                            },
+                            "custom_options_format": "FLEXBUFFERS"
+                        }
+                    ],
+                } ],
+                "buffers" : [
+                    { },
+                    { },
+                    { "data": [ 2,1,0,  6,2,1, 4,1,2,
+                                1,2,1,  2,0,2, 2,1,1 ], },
+                    { },
+                ]
+            }
+        )";
+        SetupSingleInputSingleOutput("inputTensor", "outputTensor");
+    }
+};
+
+TEST_CASE_FIXTURE(SimpleConv3DFixture, "ParseSimpleConv3D")
+{
+    RunTest<5, armnn::DataType::QAsymmU8>(
+        0,
+        {
+            1, 2, 3,
+            4, 5, 6,
+            7, 8, 9,
+
+            10, 11, 12,
+            13, 14, 15,
+            16, 17, 18,
+        },
+        // Due to the output scaling we need to half the values.
+        {
+            (1*2 + 2*1 + 3*0 +
+             4*6 + 5*2 + 6*1 +
+             7*4 + 8*1 + 9*2 +
+
+             10*1 + 11*2 + 12*1 +
+             13*2 + 14*0 + 15*2 +
+             16*2 + 17*1 + 18*1) /2
+        });
+}
+struct Conv3DWithBiasesFixture : public ParserFlatbuffersFixture
+{
+    explicit Conv3DWithBiasesFixture(const std::string& inputShape,
+                                     const std::string& outputShape,
+                                     const std::string& filterShape,
+                                     const std::string& filterData,
+                                     const std::string& biasShape,
+                                     const std::string& biasData,
+                                     const std::string& strides,
+                                     const std::string& activation="NONE",
+                                     const std::string& filterScale="1.0",
+                                     const std::string& filterZeroPoint="0",
+                                     const std::string& outputScale="1.0",
+                                     const std::string& outputZeroPoint="0")
+    {
+        m_JsonString = R"(
+            {
+                "version": 3,
+                "operator_codes": [ { "builtin_code": "CONV_3D" } ],
+                "subgraphs": [ {
+                    "tensors": [
+                        {
+                            "shape": )" + inputShape + R"(,
+                            "type": "UINT8",
+                            "buffer": 0,
+                            "name": "inputTensor",
+                            "quantization": {
+                                "min": [ 0.0 ],
+                                "max": [ 255.0 ],
+                                "scale": [ 1.0 ],
+                                "zero_point": [ 0 ],
+                            }
+                        },
+                        {
+                            "shape": )" + outputShape + R"(,
+                            "type": "UINT8",
+                            "buffer": 1,
+                            "name": "outputTensor",
+                            "quantization": {
+                                "min": [ 0.0 ],
+                                "max": [ 511.0 ],
+                                "scale": [ )" + outputScale + R"( ],
+                                "zero_point": [ )" + outputZeroPoint + R"( ],
+                            }
+                        },
+                        {
+                            "shape": )" + filterShape + R"( ,
+                            "type": "UINT8",
+                            "buffer": 2,
+                            "name": "filterTensor",
+                            "quantization": {
+                                "min": [ 0.0 ],
+                                "max": [ 255.0 ],
+                                "scale": [ )" + filterScale + R"( ],
+                                "zero_point": [ )" + filterZeroPoint + R"( ],
+                            }
+                        },
+                        {
+                            "shape": )" + biasShape + R"( ,
+                            "type": "INT32",
+                            "buffer": 3,
+                            "name": "biasTensor",
+                            "quantization": {
+                                "min": [ 0.0 ],
+                                "max": [ 255.0 ],
+                                "scale": [ 1.0 ],
+                                "zero_point": [ 0 ],
+                            }
+                        }
+                    ],
+                    "inputs": [ 0 ],
+                    "outputs": [ 1 ],
+                    "operators": [
+                        {
+                            "opcode_index": 0,
+                            "inputs": [ 0, 2, 3 ],
+                            "outputs": [ 1 ],
+                            "builtin_options_type": "Conv3DOptions",
+                            "builtin_options": {
+                                "padding": "SAME",
+                                "stride_d": )" + strides + R"(,
+                                "stride_w": )" + strides + R"(,
+                                "stride_h": )" + strides + R"(,
+                                "fused_activation_function": )" + activation + R"(
+                            },
+                            "custom_options_format": "FLEXBUFFERS"
+                        }
+                    ],
+                } ],
+                "buffers" : [
+                    { },
+                    { },
+                    { "data": )" + filterData + R"(, },
+                    { "data": )" + biasData + R"(, },
+                ]
+            }
+        )";
+        SetupSingleInputSingleOutput("inputTensor", "outputTensor");
+    }
+};
+
+struct SimpleConv3DWithBiasesFixture : Conv3DWithBiasesFixture
+{
+    SimpleConv3DWithBiasesFixture()
+    : Conv3DWithBiasesFixture("[ 1, 2, 2, 2, 1 ]",      // inputShape
+                              "[ 1, 2, 2, 2, 1 ]",      // outputShape
+                              "[ 2, 2, 2, 1, 1 ]",      // filterShape
+                              "[ 2,1, 1,0, 0,1, 1,1 ]", // filterData
+                              "[ 1 ]",                  // biasShape
+                              "[ 5, 0, 0, 0 ]",         // biasData
+                              "1")                      // stride d, w and h
+    {}
+};
+
+TEST_CASE_FIXTURE(SimpleConv3DWithBiasesFixture, "ParseConv3DWithBias")
+{
+    RunTest<5,
+            armnn::DataType::QAsymmU8>(0,
+                                       { 1, 2, 3, 4, 5, 6, 7, 8 },
+                                       { 33, 21, 23, 13, 28, 25, 27, 21 });
+}
+
+TEST_CASE_FIXTURE(SimpleConv3DWithBiasesFixture, "ParseDynamicConv3DWithBias")
+{
+    RunTest<5,
+            armnn::DataType::QAsymmU8,
+            armnn::DataType::QAsymmU8>(0,
+                                       { { "inputTensor", { 2, 4, 6, 8, 10, 12, 14, 16 } } },
+                                       { { "outputTensor", {  61, 37, 41, 21, 51, 45, 49, 37 } } },
+                                       true);
+}
+
+struct Relu6Conv3DWithBiasesFixture : Conv3DWithBiasesFixture
+{
+    Relu6Conv3DWithBiasesFixture()
+    : Conv3DWithBiasesFixture("[ 1, 2, 2, 2, 1 ]",       // inputShape
+                              "[ 1, 2, 2, 2, 1 ]",       // outputShape
+                              "[ 2, 2, 2, 1, 1 ]",       // filterShape
+                              "[ 2,1, 1,0, 0,1, 1,1 ]",  // filterData
+                              "[ 1 ]",                   // biasShape
+                              "[ 0, 0, 0, 0 ]",          // biasData
+                              "1",                       // stride d, w, and h
+                              "RELU6",                   // activation
+                              "1.0",                     // filter scale
+                              "0",                       // filter zero point
+                              "2.0",                     // output scale
+                              "0")                       // output zero point
+    {}
+};
+
+TEST_CASE_FIXTURE(Relu6Conv3DWithBiasesFixture, "ParseConv3DAndRelu6WithBias")
+{
+    uint8_t relu6Min = 6 / 2; // Divide by output scale
+
+    RunTest<5, armnn::DataType::QAsymmU8>(
+        0,
+        {
+           1, 2, 3, 4, 5, 6, 7, 8
+        },
+        // RELU6 cuts output values at +6
+        {
+            std::min(relu6Min, static_cast<uint8_t>((1*2 + 2*1 + 3*1 + 4*0 + 5*0 + 6*1 + 7*1 + 8*1)/2)),
+            std::min(relu6Min, static_cast<uint8_t>((2*2 + 0*1 + 0*1 + 0*0 + 0*0 + 0*1 + 8*1 + 0*1)/2)),
+            std::min(relu6Min, static_cast<uint8_t>((3*2 + 0*1 + 0*1 + 0*0 + 0*0 + 8*1 + 0*1 + 0*1)/2)),
+            std::min(relu6Min, static_cast<uint8_t>((4*2 + 0*1 + 0*1 + 0*0 + 8*0 + 0*1 + 0*1 + 0*1)/2)),
+            std::min(relu6Min, static_cast<uint8_t>((5*2 + 0*1 + 0*1 + 8*0 + 0*0 + 0*1 + 0*1 + 0*1)/2)),
+            std::min(relu6Min, static_cast<uint8_t>((6*2 + 0*1 + 8*1 + 0*0 + 0*0 + 0*1 + 0*1 + 0*1)/2)),
+            std::min(relu6Min, static_cast<uint8_t>((7*2 + 8*1 + 0*1 + 0*0 + 0*0 + 0*1 + 0*1 + 0*1)/2)),
+            std::min(relu6Min, static_cast<uint8_t>((8*2 + 0*1 + 0*1 + 0*0 + 0*0 + 0*1 + 0*1 + 0*1)/2))
+        });
+}
+
+}
diff --git a/src/armnnTfLiteParser/test/ParserFlatbuffersFixture.hpp b/src/armnnTfLiteParser/test/ParserFlatbuffersFixture.hpp
index b4653cd..a237d2f 100644
--- a/src/armnnTfLiteParser/test/ParserFlatbuffersFixture.hpp
+++ b/src/armnnTfLiteParser/test/ParserFlatbuffersFixture.hpp
@@ -376,25 +376,18 @@
 
     m_Runtime->EnqueueWorkload(m_NetworkIdentifier, inputTensors, outputTensors);
 
+    // Set flag so that the correct comparison function is called if the output is boolean.
+    bool isBoolean = armnnType2 == armnn::DataType::Boolean ? true : false;
+
     // Compare each output tensor to the expected values
     for (auto&& it : expectedOutputData)
     {
         armnn::BindingPointInfo bindingInfo = m_Parser->GetNetworkOutputBindingInfo(subgraphId, it.first);
         auto outputExpected = it.second;
-        if (std::is_same<DataType2, uint8_t>::value)
-        {
-            auto result = CompareTensors(outputExpected, outputStorage[it.first],
-                                         bindingInfo.second.GetShape(), bindingInfo.second.GetShape(),
-                                         true, isDynamic);
-            CHECK_MESSAGE(result.m_Result, result.m_Message.str());
-        }
-        else
-        {
-            auto result = CompareTensors(outputExpected, outputStorage[it.first],
-                                         bindingInfo.second.GetShape(), bindingInfo.second.GetShape(),
-                                         false, isDynamic);
-            CHECK_MESSAGE(result.m_Result, result.m_Message.str());
-        }
+        auto result = CompareTensors(outputExpected, outputStorage[it.first],
+                                     bindingInfo.second.GetShape(), bindingInfo.second.GetShape(),
+                                     isBoolean, isDynamic);
+        CHECK_MESSAGE(result.m_Result, result.m_Message.str());
     }
 
     if (isDynamic)
@@ -504,22 +497,17 @@
 
     m_Runtime->EnqueueWorkload(m_NetworkIdentifier, inputTensors, outputTensors);
 
+    // Set flag so that the correct comparison function is called if the output is boolean.
+    bool isBoolean = outputType == armnn::DataType::Boolean ? true : false;
+
     // Compare each output tensor to the expected values
     for (auto&& it : expectedOutputData)
     {
         armnn::BindingPointInfo bindingInfo = m_Parser->GetNetworkOutputBindingInfo(subgraphId, it.first);
         auto outputExpected = it.second;
-        if (std::is_same<DataType2, uint8_t>::value)
-        {
-            auto result = CompareTensors(outputExpected, outputStorage[it.first],
-                                         bindingInfo.second.GetShape(), bindingInfo.second.GetShape(), true);
-            CHECK_MESSAGE(result.m_Result, result.m_Message.str());
-        }
-        else
-        {
-            auto result = CompareTensors(outputExpected, outputStorage[it.first],
-                                         bindingInfo.second.GetShape(), bindingInfo.second.GetShape());
-            CHECK_MESSAGE(result.m_Result, result.m_Message.str());
-        }
+        auto result = CompareTensors(outputExpected, outputStorage[it.first],
+                                     bindingInfo.second.GetShape(), bindingInfo.second.GetShape(),
+                                     isBoolean);
+        CHECK_MESSAGE(result.m_Result, result.m_Message.str());
     }
 }
\ No newline at end of file