IVGCVSW-6858 Add GATHERNd Support to the TfLite Delegate


Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com>
Change-Id: I56418875b3bb2ae45b5c69bfeaafa1a6126b8085
diff --git a/delegate/CMakeLists.txt b/delegate/CMakeLists.txt
index bae1d31..d488de4 100644
--- a/delegate/CMakeLists.txt
+++ b/delegate/CMakeLists.txt
@@ -30,6 +30,7 @@
         src/Fill.hpp
         src/FullyConnected.hpp
         src/Gather.hpp
+        src/GatherNd.hpp
         src/LogicalBinary.hpp
         src/Lstm.hpp
         src/MultiLayerFacade.hpp
@@ -161,6 +162,8 @@
         src/test/FullyConnectedTestHelper.hpp
         src/test/GatherTest.cpp
         src/test/GatherTestHelper.hpp
+        src/test/GatherNdTest.cpp
+        src/test/GatherNdTestHelper.hpp
         src/test/LogicalTest.cpp
         src/test/LogicalTestHelper.hpp
         src/test/LstmTest.cpp
diff --git a/delegate/src/GatherNd.hpp b/delegate/src/GatherNd.hpp
new file mode 100644
index 0000000..b2d7a508
--- /dev/null
+++ b/delegate/src/GatherNd.hpp
@@ -0,0 +1,81 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateUtils.hpp"
+#include <algorithm>
+#include <iterator>
+#include <string>
+#include <vector>
+
+namespace armnnDelegate
+{
+TfLiteStatus VisitGatherNdOperator(DelegateData& delegateData,
+                                 TfLiteContext* tfLiteContext,
+                                 TfLiteNode* tfLiteNode,
+                                 int nodeIndex,
+                                 int32_t operatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteIndicesTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (!IsValid(tfLiteContext, tfLiteIndicesTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& indicesTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteIndicesTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    if (!delegateData.m_Network)
+    {
+        // Check if supported
+        bool isSupported = false;
+        FORWARD_LAYER_SUPPORT_FUNC("GATHER_ND",
+                                   tfLiteContext,
+                                   IsGatherNdSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   indicesTensorInfo,
+                                   outputTensorInfo);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddGatherNdLayer();
+    ARMNN_ASSERT(layer != nullptr);
+    layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+    auto inputsTensorsProcess = ProcessInputs(layer,
+                                              delegateData,
+                                              tfLiteContext,
+                                              tfLiteNode);
+    if (inputsTensorsProcess == kTfLiteError)
+    {
+        return inputsTensorsProcess;
+    }
+
+    Connect(layer, tfLiteNode, delegateData);
+
+    return kTfLiteOk;
+}
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/delegate/src/armnn_delegate.cpp b/delegate/src/armnn_delegate.cpp
index 03db4a1..4d71f26 100644
--- a/delegate/src/armnn_delegate.cpp
+++ b/delegate/src/armnn_delegate.cpp
@@ -18,6 +18,7 @@
 #include "Fill.hpp"
 #include "FullyConnected.hpp"
 #include "Gather.hpp"
+#include "GatherNd.hpp"
 #include "LogicalBinary.hpp"
 #include "Lstm.hpp"
 #include "Normalization.hpp"
@@ -635,6 +636,12 @@
                                        tfLiteNode,
                                        nodeIndex,
                                        kTfLiteBuiltinGather);
+        case kTfLiteBuiltinGatherNd:
+            return VisitGatherNdOperator(delegateData,
+                                         tfLiteContext,
+                                         tfLiteNode,
+                                         nodeIndex,
+                                         kTfLiteBuiltinGatherNd);
         case kTfLiteBuiltinGreater:
             return VisitComparisonOperator(delegateData,
                                            tfLiteContext,
diff --git a/delegate/src/test/GatherNdTest.cpp b/delegate/src/test/GatherNdTest.cpp
new file mode 100644
index 0000000..b56a931
--- /dev/null
+++ b/delegate/src/test/GatherNdTest.cpp
@@ -0,0 +1,113 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "GatherNdTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+// GATHER_ND Operator
+void GatherNdUint8Test(std::vector<armnn::BackendId>& backends)
+{
+
+    std::vector<int32_t> paramsShape{8};
+    std::vector<int32_t> indicesShape{3,1};
+    std::vector<int32_t> expectedOutputShape{3};
+
+    std::vector<uint8_t> paramsValues{1, 2, 3, 4, 5, 6, 7, 8};
+    std::vector<int32_t> indicesValues{7, 6, 5};
+    std::vector<uint8_t> expectedOutputValues{8, 7, 6};
+
+    GatherNdTest<uint8_t>(::tflite::TensorType_UINT8,
+                          backends,
+                          paramsShape,
+                          indicesShape,
+                          expectedOutputShape,
+                          paramsValues,
+                          indicesValues,
+                          expectedOutputValues);
+}
+
+void GatherNdFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> paramsShape{8};
+    std::vector<int32_t> indicesShape{3,1};
+    std::vector<int32_t> expectedOutputShape{3};
+
+    std::vector<float>   paramsValues{1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f};
+    std::vector<int32_t> indicesValues{7, 6, 5};
+    std::vector<float>   expectedOutputValues{8.8f, 7.7f, 6.6f};
+
+    GatherNdTest<float>(::tflite::TensorType_FLOAT32,
+                        backends,
+                        paramsShape,
+                        indicesShape,
+                        expectedOutputShape,
+                        paramsValues,
+                        indicesValues,
+                        expectedOutputValues);
+}
+
+// GATHER_ND Test Suite
+TEST_SUITE("GATHER_ND_CpuRefTests")
+{
+
+TEST_CASE ("GATHER_ND_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    GatherNdUint8Test(backends);
+}
+
+TEST_CASE ("GATHER_ND_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    GatherNdFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("GATHER_ND_CpuAccTests")
+{
+
+TEST_CASE ("GATHER_ND_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    GatherNdUint8Test(backends);
+}
+
+TEST_CASE ("GATHER_ND_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    GatherNdFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("GATHER_ND_GpuAccTests")
+{
+
+TEST_CASE ("GATHER_ND_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    GatherNdUint8Test(backends);
+}
+
+TEST_CASE ("GATHER_ND_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    GatherNdFp32Test(backends);
+}
+
+}
+// End of GATHER_ND Test Suite
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/delegate/src/test/GatherNdTestHelper.hpp b/delegate/src/test/GatherNdTestHelper.hpp
new file mode 100644
index 0000000..f475584
--- /dev/null
+++ b/delegate/src/test/GatherNdTestHelper.hpp
@@ -0,0 +1,178 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+std::vector<char> CreateGatherNdTfLiteModel(tflite::TensorType tensorType,
+                                          std::vector<int32_t>& paramsShape,
+                                          std::vector<int32_t>& indicesShape,
+                                          const std::vector<int32_t>& expectedOutputShape,
+                                          float quantScale = 1.0f,
+                                          int quantOffset = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    auto quantizationParameters =
+             CreateQuantizationParameters(flatBufferBuilder,
+                                          0,
+                                          0,
+                                          flatBufferBuilder.CreateVector<float>({quantScale}),
+                                          flatBufferBuilder.CreateVector<int64_t>({quantOffset}));
+
+    std::array<flatbuffers::Offset<Tensor>, 3> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(paramsShape.data(),
+                                                                      paramsShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("params"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(indicesShape.data(),
+                                                                      indicesShape.size()),
+                              ::tflite::TensorType_INT32,
+                              0,
+                              flatBufferBuilder.CreateString("indices"),
+                              quantizationParameters);
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(expectedOutputShape.data(),
+                                                                      expectedOutputShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+
+    // create operator
+    tflite::BuiltinOptions    operatorBuiltinOptionsType = tflite::BuiltinOptions_GatherNdOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions     = CreateGatherNdOptions(flatBufferBuilder).Union();
+
+    const std::vector<int>        operatorInputs{{0, 1}};
+    const std::vector<int>        operatorOutputs{2};
+    flatbuffers::Offset<Operator> controlOperator        =
+                                      CreateOperator(flatBufferBuilder,
+                                                     0,
+                                                     flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(),
+                                                                                             operatorInputs.size()),
+                                                     flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(),
+                                                                                             operatorOutputs.size()),
+                                                     operatorBuiltinOptionsType,
+                                                     operatorBuiltinOptions);
+
+    const std::vector<int>        subgraphInputs{{0, 1}};
+    const std::vector<int>        subgraphOutputs{2};
+    flatbuffers::Offset<SubGraph> subgraph               =
+                                      CreateSubGraph(flatBufferBuilder,
+                                                     flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                                                     flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(),
+                                                                                             subgraphInputs.size()),
+                                                     flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(),
+                                                                                             subgraphOutputs.size()),
+                                                     flatBufferBuilder.CreateVector(&controlOperator, 1));
+
+    flatbuffers::Offset<flatbuffers::String> modelDescription =
+                                             flatBufferBuilder.CreateString("ArmnnDelegate: GATHER_ND Operator Model");
+    flatbuffers::Offset<OperatorCode>        operatorCode     = CreateOperatorCode(flatBufferBuilder,
+                                                                                   BuiltinOperator_GATHER_ND);
+
+    flatbuffers::Offset<Model> flatbufferModel =
+                                   CreateModel(flatBufferBuilder,
+                                               TFLITE_SCHEMA_VERSION,
+                                               flatBufferBuilder.CreateVector(&operatorCode, 1),
+                                               flatBufferBuilder.CreateVector(&subgraph, 1),
+                                               modelDescription,
+                                               flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template<typename T>
+void GatherNdTest(tflite::TensorType tensorType,
+                std::vector<armnn::BackendId>& backends,
+                std::vector<int32_t>& paramsShape,
+                std::vector<int32_t>& indicesShape,
+                std::vector<int32_t>& expectedOutputShape,
+                std::vector<T>& paramsValues,
+                std::vector<int32_t>& indicesValues,
+                std::vector<T>& expectedOutputValues,
+                float quantScale = 1.0f,
+                int quantOffset = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateGatherNdTfLiteModel(tensorType,
+                                                            paramsShape,
+                                                            indicesShape,
+                                                            expectedOutputShape,
+                                                            quantScale,
+                                                            quantOffset);
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegate) == kTfLiteOk);
+    CHECK(armnnDelegate != nullptr);
+    CHECK(armnnDelegate->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteDelegate) == kTfLiteOk);
+    CHECK(tfLiteDelegate != nullptr);
+    CHECK(tfLiteDelegate->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+    theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                     armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegate->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteDelegate, 0, paramsValues);
+    armnnDelegate::FillInput<T>(armnnDelegate, 0, paramsValues);
+    armnnDelegate::FillInput<int32_t>(tfLiteDelegate, 1, indicesValues);
+    armnnDelegate::FillInput<int32_t>(armnnDelegate, 1, indicesValues);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteDelegate->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegate->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData<T>(tfLiteDelegate,
+                                        armnnDelegate,
+                                        expectedOutputShape,
+                                        expectedOutputValues,
+                                        0);
+
+    tfLiteDelegate.reset(nullptr);
+    armnnDelegate.reset(nullptr);
+}
+} // anonymous namespace
\ No newline at end of file