IVGCVSW-6457 Add FLOOR_DIV Support to the TfLiteDelegate

Change-Id: Ia4bf42b1f3f86b947825dff8e538d2d4343effab
Signed-off-by: Jim Flynn <jim.flynn@arm.com>
diff --git a/delegate/CMakeLists.txt b/delegate/CMakeLists.txt
index 54ddd61..9d57d21 100644
--- a/delegate/CMakeLists.txt
+++ b/delegate/CMakeLists.txt
@@ -32,6 +32,7 @@
         src/Gather.hpp
         src/LogicalBinary.hpp
         src/Lstm.hpp
+        src/MultiLayerFacade.hpp
         src/Normalization.hpp
         src/Pack.hpp
         src/Pad.hpp
@@ -43,6 +44,8 @@
         src/Resize.hpp
         src/Round.hpp
         src/Shape.hpp
+        src/SharedFunctions.hpp
+        src/SharedFunctions.cpp
         src/Slice.hpp
         src/Softmax.hpp
         src/SpaceDepth.hpp
diff --git a/delegate/src/DelegateUtils.hpp b/delegate/src/DelegateUtils.hpp
index 4517445..940d269 100644
--- a/delegate/src/DelegateUtils.hpp
+++ b/delegate/src/DelegateUtils.hpp
@@ -595,7 +595,7 @@
     for (unsigned int inputIndex = 0; inputIndex < layer->GetNumInputSlots(); ++inputIndex)
     {
         const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[inputIndex]];
-        if(tflite::IsConstantTensor(&tfLiteInputTensor))
+        if (tflite::IsConstantTensor(&tfLiteInputTensor))
         {
             armnn::TensorInfo inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
             bool isSupported = false;
@@ -618,7 +618,6 @@
 
             delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[inputIndex]] = &outputSlot;
         }
-
     }
     return kTfLiteOk;
 }
@@ -633,4 +632,13 @@
     return static_cast<unsigned int>(wrappedIndex);
 };
 
+bool AreAllSigned32(const armnn::TensorInfo& inputInfo1,
+                    const armnn::TensorInfo& inputInfo2,
+                    const armnn::TensorInfo& outputInfo)
+{
+    return (armnn::DataType::Signed32 == inputInfo1.GetDataType()) &&
+           (armnn::DataType::Signed32 == inputInfo2.GetDataType()) &&
+           (armnn::DataType::Signed32 == outputInfo.GetDataType());
+}
+
 } // namespace anonymous
diff --git a/delegate/src/ElementwiseBinary.hpp b/delegate/src/ElementwiseBinary.hpp
index 58d7aca..0534c07 100644
--- a/delegate/src/ElementwiseBinary.hpp
+++ b/delegate/src/ElementwiseBinary.hpp
@@ -6,6 +6,8 @@
 #pragma once
 
 #include "DelegateUtils.hpp"
+#include "MultiLayerFacade.hpp"
+#include "SharedFunctions.hpp"
 
 #include <tensorflow/lite/builtin_ops.h>
 #include <tensorflow/lite/c/builtin_op_data.h>
@@ -39,6 +41,7 @@
     return isSupported ? kTfLiteOk : kTfLiteError;
 }
 
+
 TfLiteStatus ValidateDivOperator(DelegateData& delegateData,
                                  TfLiteContext* tfLiteContext,
                                  const armnn::TensorInfo& inputInfo1,
@@ -62,6 +65,35 @@
     return isSupported ? kTfLiteOk : kTfLiteError;
 }
 
+TfLiteStatus ValidateFloorDivOperator(DelegateData& delegateData,
+                                      TfLiteContext* tfLiteContext,
+                                      const armnn::TensorInfo& inputInfo1,
+                                      const armnn::TensorInfo& inputInfo2,
+                                      const armnn::TensorInfo& outputInfo)
+{
+    // need first to validate that the div operator is supported
+    // then that the floor operator is supported
+    TfLiteStatus status = ValidateDivOperator(delegateData, tfLiteContext, inputInfo1, inputInfo2, outputInfo);
+    if (status != kTfLiteOk)
+    {
+        return status;
+    }
+    // if the inputs and output of the div are all Signed32 we don't need to add the floor operator afterward.
+    if (AreAllSigned32(inputInfo1, inputInfo2, outputInfo))
+    {
+        return status;
+    }
+    // in case broadcasting is being done from one of the inputs to the div
+    // choose the full sized input tensor to pass to the floor validation routine
+    armnn::TensorInfo floorInputInfo = inputInfo1;
+    if (inputInfo1.GetNumDimensions() < inputInfo2.GetNumDimensions())
+    {
+        floorInputInfo = inputInfo2;
+    }
+    status = ValidateFloorOperator(delegateData, tfLiteContext, floorInputInfo, outputInfo);
+    return status;
+}
+
 TfLiteStatus ValidateMaximumOperator(DelegateData& delegateData,
                                      TfLiteContext* tfLiteContext,
                                      const armnn::TensorInfo& inputInfo1,
@@ -154,6 +186,23 @@
     return isSupported ? kTfLiteOk : kTfLiteError;
 }
 
+std::pair<armnn::IConnectableLayer*, armnn::IConnectableLayer*> AddFloorDivLayer(
+    DelegateData& delegateData,
+    const armnn::TensorInfo& outputTensorInfo)
+{
+    armnn::IConnectableLayer* divisionLayer = delegateData.m_Network->AddDivisionLayer();
+    // if the output of the div is Signed32 the Floor layer is not required
+    if (armnn::DataType::Signed32 == outputTensorInfo.GetDataType())
+    {
+        return std::make_pair(divisionLayer, divisionLayer);
+    }
+    armnn::IOutputSlot& outputSlot = divisionLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+    armnn::IConnectableLayer* floorLayer = delegateData.m_Network->AddFloorLayer();
+    outputSlot.Connect(floorLayer->GetInputSlot(0));
+    return std::make_pair(divisionLayer, floorLayer);
+}
+
 TfLiteStatus VisitElementwiseBinaryOperator(DelegateData& delegateData,
                                             TfLiteContext* tfLiteContext,
                                             TfLiteNode* tfLiteNode,
@@ -215,6 +264,12 @@
                                            inputTensorInfo0,
                                            inputTensorInfo1,
                                            outputTensorInfo);
+            case kTfLiteBuiltinFloorDiv:
+                return ValidateFloorDivOperator(delegateData,
+                                                tfLiteContext,
+                                                inputTensorInfo0,
+                                                inputTensorInfo1,
+                                                outputTensorInfo);
             case kTfLiteBuiltinMaximum:
                 return ValidateMaximumOperator(delegateData,
                                                tfLiteContext,
@@ -245,7 +300,7 @@
     }
 
     armnn::IConnectableLayer* elementwiseBinaryLayer = nullptr;
-
+    MultiLayerFacade multiLayer;
     switch(elementwiseBinaryOperatorCode)
     {
         case kTfLiteBuiltinAdd:
@@ -254,6 +309,13 @@
         case kTfLiteBuiltinDiv:
             elementwiseBinaryLayer = delegateData.m_Network->AddDivisionLayer();
             break;
+        case kTfLiteBuiltinFloorDiv:
+            {
+                auto layers = AddFloorDivLayer(delegateData, outputTensorInfo);
+                multiLayer.AssignValues(layers.first, layers.second);
+                elementwiseBinaryLayer = &multiLayer;
+            }
+            break;
         case kTfLiteBuiltinMaximum:
             elementwiseBinaryLayer = delegateData.m_Network->AddMaximumLayer();
             break;
diff --git a/delegate/src/MultiLayerFacade.hpp b/delegate/src/MultiLayerFacade.hpp
new file mode 100644
index 0000000..fa23980
--- /dev/null
+++ b/delegate/src/MultiLayerFacade.hpp
@@ -0,0 +1,137 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+// NOTE: the MultiLayerFacade class is a utility class which makes a chain
+//       of operators look like a single IConnectableLayer with the first
+//       layer in the chain supplying the input slots and the last supplying
+//       the output slots. It enables us, for example, to simulate a
+//       Tensorflow Lite FloorDiv operator by chaining a Div layer followed
+//       by a Floor layer and pass them as a single unit to the code that
+//       connects up the graph as the delegate proceeds to build up the
+//       Arm NN subgraphs.
+//
+
+#include <common/include/ProfilingGuid.hpp>
+#include <armnn/INetwork.hpp>
+
+namespace armnnDelegate
+{
+
+class MultiLayerFacade : public armnn::IConnectableLayer
+{
+public:
+    MultiLayerFacade() :
+        m_FirstLayer(nullptr), m_LastLayer(nullptr) {}
+
+    MultiLayerFacade(armnn::IConnectableLayer* firstLayer, armnn::IConnectableLayer* lastLayer) :
+        m_FirstLayer(firstLayer), m_LastLayer(lastLayer) {}
+
+    MultiLayerFacade(const MultiLayerFacade& obj) :
+        m_FirstLayer(obj.m_FirstLayer), m_LastLayer(obj.m_LastLayer) {}
+
+    ~MultiLayerFacade() {} // we don't own the pointers
+
+    MultiLayerFacade& operator=(const MultiLayerFacade& obj)
+    {
+        m_FirstLayer = obj.m_FirstLayer;
+        m_LastLayer = obj.m_LastLayer;
+        return *this;
+    }
+
+    void AssignValues(armnn::IConnectableLayer* firstLayer, armnn::IConnectableLayer* lastLayer)
+    {
+        m_FirstLayer = firstLayer;
+        m_LastLayer = lastLayer;
+    }
+
+    virtual const char* GetName() const override
+    {
+        return m_FirstLayer->GetName();
+    }
+
+    virtual unsigned int GetNumInputSlots() const override
+    {
+        return m_FirstLayer->GetNumInputSlots();
+    }
+
+    virtual unsigned int GetNumOutputSlots() const override
+    {
+        return m_LastLayer->GetNumOutputSlots();
+    }
+
+    virtual const armnn::IInputSlot& GetInputSlot(unsigned int index) const override
+    {
+        return m_FirstLayer->GetInputSlot(index);
+    }
+
+    virtual armnn::IInputSlot& GetInputSlot(unsigned int index) override
+    {
+        return m_FirstLayer->GetInputSlot(index);
+    }
+
+    virtual const armnn::IOutputSlot& GetOutputSlot(unsigned int index) const override
+    {
+        return m_LastLayer->GetOutputSlot(index);
+    }
+
+    virtual armnn::IOutputSlot& GetOutputSlot(unsigned int index) override
+    {
+        return m_LastLayer->GetOutputSlot(index);
+    }
+
+    virtual std::vector<armnn::TensorShape> InferOutputShapes(
+        const std::vector<armnn::TensorShape>& inputShapes) const override
+    {
+        // NOTE: do not expect this function to be used. Likely that if it is it might need to be overridden
+        //       for particular sequences of operators.
+        return m_FirstLayer->InferOutputShapes(inputShapes);
+    }
+
+    virtual armnn::LayerGuid GetGuid() const override
+    {
+        return m_FirstLayer->GetGuid();
+    }
+
+    // The Accept function needs to be wrapped in a no warn macro to avoid deprecation warnings from
+    // the deprecated ILayerVisitor which is used in the function.
+    ARMNN_NO_DEPRECATE_WARN_BEGIN
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Accept is deprecated. The ILayerVisitor that works in conjunction with this "
+                                      "Accept function is deprecated. Use IStrategy in combination with "
+                                      "ExecuteStrategy instead, which is an ABI/API stable version of the "
+                                      "visitor pattern.",
+                                      "22.05")
+    virtual void Accept(armnn::ILayerVisitor& visitor) const override
+    {
+        // Do not expect this function to be used so not providing an implementation
+    }
+    ARMNN_NO_DEPRECATE_WARN_END
+
+    virtual void ExecuteStrategy(armnn::IStrategy& strategy) const override
+    {
+        // Do not expect this function to be used so not providing an implementation
+        // if an implementation is required and the chain contains more than two operators
+        // would have to provide a way to record the intermediate layers so they could be
+        // visited... the same applies to the Accept method above and the BackendSelectionHint
+        // below.
+    }
+
+    virtual void BackendSelectionHint(armnn::Optional<armnn::BackendId> backend) override
+    {
+        // Do not expect this function to be used so not providing an implementation
+    }
+
+    virtual armnn::LayerType GetType() const override
+    {
+        return m_FirstLayer->GetType();
+    }
+
+private:
+    armnn::IConnectableLayer* m_FirstLayer;
+    armnn::IConnectableLayer* m_LastLayer;
+};
+
+} // namespace armnnDelegate
diff --git a/delegate/src/Round.hpp b/delegate/src/Round.hpp
index 1677607..016af11 100644
--- a/delegate/src/Round.hpp
+++ b/delegate/src/Round.hpp
@@ -5,6 +5,8 @@
 
 #pragma once
 
+#include "SharedFunctions.hpp"
+
 #include <tensorflow/lite/builtin_ops.h>
 #include <tensorflow/lite/c/builtin_op_data.h>
 #include <tensorflow/lite/c/common.h>
@@ -36,27 +38,17 @@
     }
 
     const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    // NOTE: looks like the outputTensorInfo is the only thing that is required for the case
+    //       where we are adding the floor layer so maybe move the other stuff inside the
+    //       if !delegateData block for efficiency.
     const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
 
-    bool isSupported = false;
-    auto validateFunc = [&](const armnn::TensorInfo& outInfo, bool& isSupported)
-    {
-        FORWARD_LAYER_SUPPORT_FUNC(__func__,
-                                   tfLiteContext,
-                                   IsFloorSupported,
-                                   delegateData.m_Backends,
-                                   isSupported,
-                                   inputTensorInfo,
-                                   outInfo);
-    };
-
     // If the m_Network is a nullptr, this signals that a prerequisite TfLite callback is required to clarify the
     // support for the operator
     // If supported, VisitFloorOperator will be called again to add the layer to the network as seen further below
     if (!delegateData.m_Network)
     {
-        validateFunc(outputTensorInfo, isSupported);
-        return isSupported ? kTfLiteOk : kTfLiteError;
+        return ValidateFloorOperator(delegateData, tfLiteContext, inputTensorInfo, outputTensorInfo);
     }
 
     // Add a Floor layer
diff --git a/delegate/src/SharedFunctions.cpp b/delegate/src/SharedFunctions.cpp
new file mode 100644
index 0000000..79b9f90
--- /dev/null
+++ b/delegate/src/SharedFunctions.cpp
@@ -0,0 +1,40 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+
+#include "SharedFunctions.hpp"
+
+#include "DelegateUtils.hpp"
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus ValidateFloorOperator(DelegateData& delegateData,
+                                   TfLiteContext* tfLiteContext,
+                                   const armnn::TensorInfo& inputTensorInfo,
+                                   const armnn::TensorInfo& outputTensorInfo)
+{
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsFloorSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outInfo);
+    };
+    validateFunc(outputTensorInfo, isSupported);
+    return isSupported ? kTfLiteOk : kTfLiteError;
+}
+
+} // namespace armnnDelegate
+
diff --git a/delegate/src/SharedFunctions.hpp b/delegate/src/SharedFunctions.hpp
new file mode 100644
index 0000000..bf6b603
--- /dev/null
+++ b/delegate/src/SharedFunctions.hpp
@@ -0,0 +1,19 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn_delegate.hpp>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus ValidateFloorOperator(DelegateData& delegateData,
+                                   TfLiteContext* tfLiteContext,
+                                   const armnn::TensorInfo& inputTensorInfo,
+                                   const armnn::TensorInfo& outputTensorInfo);
+
+} // namespace armnnDelegate
+
diff --git a/delegate/src/armnn_delegate.cpp b/delegate/src/armnn_delegate.cpp
index 966d17c..4c1bc57 100644
--- a/delegate/src/armnn_delegate.cpp
+++ b/delegate/src/armnn_delegate.cpp
@@ -42,6 +42,7 @@
 #include <armnnUtils/Filesystem.hpp>
 #include <flatbuffers/flatbuffers.h>
 #include <tensorflow/lite/context_util.h>
+#include <tensorflow/lite/schema/schema_generated.h>
 
 #include <algorithm>
 #include <iostream>
@@ -583,6 +584,12 @@
                                       tfLiteNode,
                                       nodeIndex,
                                       kTfLiteBuiltinFloor);
+        case kTfLiteBuiltinFloorDiv:
+            return VisitElementwiseBinaryOperator(delegateData,
+                                                  tfLiteContext,
+                                                  tfLiteNode,
+                                                  nodeIndex,
+                                                  kTfLiteBuiltinFloorDiv);
         case kTfLiteBuiltinFullyConnected:
             return VisitFullyConnectedOperator(delegateData,
                                                tfLiteContext,
diff --git a/delegate/src/test/ElementwiseBinaryTest.cpp b/delegate/src/test/ElementwiseBinaryTest.cpp
index 448b3e6..9d03204 100644
--- a/delegate/src/test/ElementwiseBinaryTest.cpp
+++ b/delegate/src/test/ElementwiseBinaryTest.cpp
@@ -332,6 +332,43 @@
                                    expectedOutputValues, 0.25f, 0);
 }
 
+void FloorDivFP32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> input1Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 2, 2, 2, 2 };
+
+    std::vector<float> input0Values =
+    {
+        -37.5f, -15.2f, -8.76f, -2.0f,  -2.6f, -1.0f,  -0.8f,   0.0f,
+          4.0f,   1.6f,  2.0f,   5.2f,   6.0f, 35.04f, 60.8f, 150.0f
+    };
+
+    std::vector<float> input1Values =
+    {
+        1.f, 1.f, 1.f, 1.f, 2.f, 2.f, 2.f, 2.f,
+        4.f, 4.f, 4.f, 4.f, 4.f, 4.f, 4.f, 4.f
+    };
+
+    std::vector<float> expectedOutputValues =
+    {
+        -38.0f, -16.0f, -9.0f,  -2.0f, -2.0f, -1.0f,  -1.0f,  0.0f,
+          1.0f,   0.0f,  0.0f,   1.0f,  1.0f,  8.0f,  15.0f, 37.0f
+    };
+
+    ElementwiseBinaryTest<float>(tflite::BuiltinOperator_FLOOR_DIV,
+                                 tflite::ActivationFunctionType_NONE,
+                                 ::tflite::TensorType_FLOAT32,
+                                 backends,
+                                 input0Shape,
+                                 input1Shape,
+                                 expectedOutputShape,
+                                 input0Values,
+                                 input1Values,
+                                 expectedOutputValues);
+
+}
+
 void MaxFP32Test(std::vector<armnn::BackendId>& backends)
 {
     std::vector<int32_t> input0Shape { 2, 2, 2, 2 };
@@ -745,6 +782,12 @@
     DivBroadcastTest(backends);
 }
 
+TEST_CASE ("FLOORDIV_FP32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    FloorDivFP32Test(backends);
+}
+
 TEST_CASE ("MAX_FP32_GpuAcc_Test")
 {
     std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
@@ -866,6 +909,12 @@
     DivBroadcastTest(backends);
 }
 
+TEST_CASE ("FLOORDIV_FP32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    FloorDivFP32Test(backends);
+}
+
 TEST_CASE ("MAX_FP32_CpuAcc_Test")
 {
     std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
@@ -992,6 +1041,12 @@
     DivBroadcastTest(backends);
 }
 
+TEST_CASE ("FLOORDIV_FP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    FloorDivFP32Test(backends);
+}
+
 TEST_CASE ("DIV_UINT8_CpuRef_Test")
 {
     std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
@@ -1078,4 +1133,4 @@
 
 } // TEST_SUITE("ElementwiseBinary_CpuRefTests")
 
-} // namespace armnnDelegate
\ No newline at end of file
+} // namespace armnnDelegate
diff --git a/delegate/src/test/ElementwiseBinaryTestHelper.hpp b/delegate/src/test/ElementwiseBinaryTestHelper.hpp
index 13b336e..69b0c88 100644
--- a/delegate/src/test/ElementwiseBinaryTestHelper.hpp
+++ b/delegate/src/test/ElementwiseBinaryTestHelper.hpp
@@ -123,6 +123,12 @@
             operatorBuiltinOptions = CreateSubOptions(flatBufferBuilder, activationType).Union();
             break;
         }
+        case BuiltinOperator_FLOOR_DIV:
+        {
+            operatorBuiltinOptionsType = tflite::BuiltinOptions_FloorDivOptions;
+            operatorBuiltinOptions = CreateSubOptions(flatBufferBuilder, activationType).Union();
+            break;
+        }
         default:
             break;
     }