IVGCVSW-3474 Add end to end tests for Quantized_LSTM

Change-Id: Iaec6956b5c459308d77d29f699ae4558bee66cd5
Signed-off-by: Francis Murtagh <francis.murtagh@arm.com>
diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
index f517356..684b27f 100644
--- a/src/backends/backendsCommon/test/CMakeLists.txt
+++ b/src/backends/backendsCommon/test/CMakeLists.txt
@@ -45,6 +45,7 @@
     PreluEndToEndTestImpl.hpp
     QuantizeHelper.hpp
     QuantizeTestImpl.hpp
+    QuantizedLstmEndToEndTestImpl.hpp
     ResizeEndToEndTestImpl.hpp
     RuntimeTestImpl.hpp
     SoftmaxTestImpl.hpp
diff --git a/src/backends/backendsCommon/test/QuantizedLstmEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/QuantizedLstmEndToEndTestImpl.hpp
new file mode 100644
index 0000000..2cd1aad
--- /dev/null
+++ b/src/backends/backendsCommon/test/QuantizedLstmEndToEndTestImpl.hpp
@@ -0,0 +1,226 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "CommonTestUtils.hpp"
+#include "EndToEndTestImpl.hpp"
+
+#include <armnn/INetwork.hpp>
+#include <ResolveType.hpp>
+#include <test/TensorHelpers.hpp>
+
+#include <boost/test/unit_test.hpp>
+
+namespace
+{
+
+using MultiArray = const boost::multi_array<uint8_t, 2>&;
+
+armnn::INetworkPtr CreateQuantizedLstmNetwork(MultiArray input,
+                                              MultiArray expectedOutput)
+{
+    auto batchSize = boost::numeric_cast<unsigned int>(input.shape()[0]);
+    auto inputSize = boost::numeric_cast<unsigned int>(input.shape()[1]);
+    auto outputSize = boost::numeric_cast<unsigned int>(expectedOutput.shape()[1]);
+
+    float inputOutputScale = 0.0078125f;
+    int32_t inputOutputOffset = 128;
+
+    float weightsScale = 0.00408021f;
+    int32_t weightsOffset = 100;
+
+    float biasScale = 3.1876640625e-05f;
+    int32_t biasOffset = 0;
+
+    float cellStateScale = 0.00048828125f;
+    int32_t cellStateOffset = 0;
+
+    armnn::TensorInfo inputWeightsInfo({outputSize, inputSize},
+                                       armnn::DataType::QuantisedAsymm8,
+                                       weightsScale,
+                                       weightsOffset);
+
+    armnn::TensorInfo recurrentWeightsInfo({outputSize, outputSize},
+                                           armnn::DataType::QuantisedAsymm8,
+                                           weightsScale,
+                                           weightsOffset);
+
+    armnn::TensorInfo biasInfo({outputSize}, armnn::DataType::Signed32, biasScale, biasOffset);
+
+    armnn::QuantizedLstmInputParams data;
+
+    const std::vector<uint8_t> inputToInputWeightsVector = {146, 250, 235, 171, 10, 218, 171, 108};
+    armnn::ConstTensor inputToInputWeightsTensor(inputWeightsInfo, inputToInputWeightsVector.data());
+
+    const std::vector<uint8_t> inputToForgetWeightsVector = {24, 50, 132, 179, 158, 110, 3, 169};
+    armnn::ConstTensor inputToForgetWeightsTensor(inputWeightsInfo, inputToForgetWeightsVector.data());
+
+    const std::vector<uint8_t> inputToCellWeightsTensorVector = {133, 34, 29, 49, 206, 109, 54, 183};
+    armnn::ConstTensor inputToCellWeightsTensor(inputWeightsInfo, inputToCellWeightsTensorVector.data());
+
+    const std::vector<uint8_t> inputToOutputWeightsTensorVector = {195, 187, 11, 99, 109, 10, 218, 48};
+    armnn::ConstTensor inputToOutputWeightsTensor(inputWeightsInfo, inputToOutputWeightsTensorVector.data());
+
+    const std::vector<uint8_t> recurrentToInputWeightsTensorVector =
+            {254, 206, 77, 168, 71, 20, 215, 6, 223, 7, 118, 225, 59, 130, 174, 26};
+    armnn::ConstTensor recurrentToInputWeightsTensor(recurrentWeightsInfo, recurrentToInputWeightsTensorVector.data());
+
+    const std::vector<uint8_t> recurrentToForgetWeightsTensorVector =
+            {137, 240, 103, 52, 68, 51, 237, 112, 0, 220, 89, 23, 69, 4, 207, 253};
+    armnn::ConstTensor recurrentToForgetWeightsTensor(recurrentWeightsInfo,
+                                                      recurrentToForgetWeightsTensorVector.data());
+
+    const std::vector<uint8_t> recurrentToCellWeightsTensorVector =
+            {172, 60, 205, 65, 14, 0, 140, 168, 240, 223, 133, 56, 142, 64, 246, 216};
+    armnn::ConstTensor recurrentToCellWeightsTensor(recurrentWeightsInfo, recurrentToCellWeightsTensorVector.data());
+
+    const std::vector<uint8_t> recurrentToOutputWeightsTensorVector =
+            {106, 214, 67, 23, 59, 158, 45, 3, 119, 132, 49, 205, 129, 218, 11, 98};
+    armnn::ConstTensor recurrentToOutputWeightsTensor(recurrentWeightsInfo,
+                                                      recurrentToOutputWeightsTensorVector.data());
+
+    const std::vector<int32_t> inputGateBiasTensorVector = {-7876, 13488, -726, 32839};
+    armnn::ConstTensor inputGateBiasTensor(biasInfo, inputGateBiasTensorVector.data());
+
+    const std::vector<int32_t> forgetGateBiasTensorVector = {9206, -46884, -11693, -38724};
+    armnn::ConstTensor forgetGateBiasTensor(biasInfo, forgetGateBiasTensorVector.data());
+
+    const std::vector<int32_t> cellBiasTensorVector = {39481, 48624, 48976, -21419};
+    armnn::ConstTensor cellBiasTensor(biasInfo, cellBiasTensorVector.data());
+
+    const std::vector<int32_t> outputGateBiasTensorVector = {-58999, -17050, -41852, -40538};
+    armnn::ConstTensor outputGateBiasTensor(biasInfo, outputGateBiasTensorVector.data());
+
+    data.m_InputToInputWeights = &inputToInputWeightsTensor;
+    data.m_InputToForgetWeights = &inputToForgetWeightsTensor;
+    data.m_InputToCellWeights = &inputToCellWeightsTensor;
+    data.m_InputToOutputWeights = &inputToOutputWeightsTensor;
+    data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor;
+    data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor;
+    data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor;
+    data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor;
+    data.m_InputGateBias = &inputGateBiasTensor;
+    data.m_ForgetGateBias = &forgetGateBiasTensor;
+    data.m_CellBias = &cellBiasTensor;
+    data.m_OutputGateBias = &outputGateBiasTensor;
+
+    armnn::INetworkPtr net(armnn::INetwork::Create());
+
+    armnn::IConnectableLayer* const inputLayer   = net->AddInputLayer(0);
+    armnn::IConnectableLayer* const cellStateIn = net->AddInputLayer(1);
+    armnn::IConnectableLayer* const outputStateIn = net->AddInputLayer(2);
+    armnn::IConnectableLayer* const quantizedLstmLayer = net->AddQuantizedLstmLayer(data, "quantizedLstm");
+    armnn::IConnectableLayer* const cellStateOut  = net->AddOutputLayer(0);
+    armnn::IConnectableLayer* const outputStateOut  = net->AddOutputLayer(1);
+
+    armnn::TensorInfo inputTensorInfo({batchSize , inputSize},
+                                      armnn::DataType::QuantisedAsymm8,
+                                      inputOutputScale,
+                                      inputOutputOffset);
+
+    armnn::TensorInfo cellStateInTensorInfo({batchSize , outputSize},
+                                            armnn::DataType::QuantisedSymm16,
+                                            cellStateScale,
+                                            cellStateOffset);
+
+    armnn::TensorInfo outputStateInTensorInfo({batchSize , outputSize},
+                                              armnn::DataType::QuantisedAsymm8,
+                                              inputOutputScale,
+                                              inputOutputOffset);
+
+    armnn::TensorInfo cellStateOutTensorInfo({batchSize, outputSize},
+                                             armnn::DataType::QuantisedSymm16,
+                                             cellStateScale,
+                                             cellStateOffset);
+
+    armnn::TensorInfo outputTensorInfo({batchSize, outputSize},
+                                       armnn::DataType::QuantisedAsymm8,
+                                       inputOutputScale,
+                                       inputOutputOffset);
+
+    // connect up
+    // inputs
+    Connect(inputLayer, quantizedLstmLayer, inputTensorInfo, 0, 0);
+    Connect(cellStateIn, quantizedLstmLayer, cellStateInTensorInfo, 0, 1);
+    Connect(outputStateIn, quantizedLstmLayer, outputStateInTensorInfo, 0, 2);
+
+    // outputs
+    Connect(quantizedLstmLayer, cellStateOut, cellStateOutTensorInfo, 0, 0);
+    Connect(quantizedLstmLayer, outputStateOut, outputTensorInfo, 1, 0);
+
+    return net;
+}
+
+void QuantizedLstmEndToEnd(const std::vector<armnn::BackendId>& backends)
+{
+    std::vector<uint8_t> inputVector = {166, 179, 50, 150};
+    armnn::TensorInfo inputDesc({2, 2}, armnn::DataType::QuantisedAsymm8);
+    boost::multi_array<uint8_t, 2> input = MakeTensor<uint8_t, 2>(inputDesc, inputVector);
+
+    std::vector<int16_t> cellStateInVector = {876, 1034, 955, -909, 761, 1029, 796, -1036};
+    armnn::TensorInfo cellStateInDesc({2, 4}, armnn::DataType::QuantisedSymm16);
+    boost::multi_array<int16_t, 2> cellStateIn = MakeTensor<int16_t, 2>(cellStateInDesc, cellStateInVector);
+
+    std::vector<uint8_t> outputStateInVector = {136, 150, 140, 115, 135, 152, 138, 112};
+    armnn::TensorInfo outputStateInDesc({2, 4}, armnn::DataType::QuantisedAsymm8);
+    boost::multi_array<uint8_t, 2> outputStateIn = MakeTensor<uint8_t, 2>(outputStateInDesc, outputStateInVector);
+
+    std::vector<int16_t> cellStateOutVector = {1485, 1177, 1373, -1023, 1019, 1355, 1097, -1235};
+    armnn::TensorInfo cellStateOutVectorDesc({2, 4}, armnn::DataType::QuantisedSymm16);
+    boost::multi_array<int16_t, 2> cellStateOut = MakeTensor<int16_t, 2>(cellStateOutVectorDesc, cellStateOutVector);
+
+    std::vector<uint8_t> outputStateOutVector = {140, 151, 146, 112, 136, 156, 142, 112};
+    armnn::TensorInfo outputDesc({2, 4}, armnn::DataType::QuantisedAsymm8);
+    boost::multi_array<uint8_t, 2> outputStateOut = MakeTensor<uint8_t, 2>(outputDesc, outputStateOutVector);
+
+    // Builds up the structure of the network
+    armnn::INetworkPtr net = CreateQuantizedLstmNetwork(input, outputStateOut);
+
+    BOOST_TEST_CHECKPOINT("create a network");
+
+    IRuntime::CreationOptions options;
+    IRuntimePtr runtime(IRuntime::Create(options));
+
+    // optimize the network
+    IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+
+    // Loads it into the runtime.
+    NetworkId netId;
+    runtime->LoadNetwork(netId, std::move(optNet));
+
+    InputTensors inputTensors;
+    inputTensors.reserve(3);
+
+    // input
+    inputTensors.push_back({0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputVector.data())});
+    inputTensors.push_back({1, ConstTensor(runtime->GetInputTensorInfo(netId, 1), cellStateInVector.data())});
+    inputTensors.push_back({2, ConstTensor(runtime->GetInputTensorInfo(netId, 2), outputStateInVector.data())});
+
+    OutputTensors outputTensors;
+    outputTensors.reserve(2);
+
+    //output
+    std::vector<int16_t > cellStateOutResult(cellStateOutVector.size());
+    std::vector<uint8_t > outputStateOutResult(outputStateOutVector.size());
+    outputTensors.push_back({0, Tensor(runtime->GetOutputTensorInfo(netId, 0), cellStateOutResult.data())});
+    outputTensors.push_back({1, Tensor(runtime->GetOutputTensorInfo(netId, 1), outputStateOutResult.data())});
+
+    // Does the inference.
+    runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+    // Checks the results.
+    for (unsigned int i = 0; i < cellStateOutResult.size(); ++i)
+    {
+        BOOST_TEST(cellStateOutVector[i] == cellStateOutResult[i], boost::test_tools::tolerance(1.0f));
+    }
+
+    for (unsigned int i = 0; i < outputStateOutResult.size(); ++i)
+    {
+        BOOST_TEST(outputStateOutVector[i] == outputStateOutResult[i], boost::test_tools::tolerance(1.0f));
+    }
+}
+
+} // anonymous namespace
diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp
index 811bf8a..625d234 100644
--- a/src/backends/cl/ClLayerSupport.cpp
+++ b/src/backends/cl/ClLayerSupport.cpp
@@ -382,10 +382,7 @@
 bool ClLayerSupport::IsInputSupported(const TensorInfo& input,
                                       Optional<std::string&> reasonIfUnsupported) const
 {
-    return IsSupportedForDataTypeCl(reasonIfUnsupported,
-                                    input.GetDataType(),
-                                    &TrueFunc<>,
-                                    &TrueFunc<>);
+    return IsClBackendSupported(reasonIfUnsupported);
 }
 
 bool ClLayerSupport::IsL2NormalizationSupported(const TensorInfo& input,
@@ -491,14 +488,7 @@
 bool ClLayerSupport::IsOutputSupported(const TensorInfo& output,
                                        Optional<std::string&> reasonIfUnsupported) const
 {
-    return IsClBackendSupported(reasonIfUnsupported) &&
-           IsSupportedForDataTypeGeneric(reasonIfUnsupported,
-                                         output.GetDataType(),
-                                         &TrueFunc<>,
-                                         &TrueFunc<>,
-                                         &TrueFunc<>,
-                                         &FalseFuncI32<>,
-                                         &TrueFunc<>);
+    return IsClBackendSupported(reasonIfUnsupported);
 }
 
 bool ClLayerSupport::IsPadSupported(const TensorInfo& input,
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index 6e91dd0..ca3c30d 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -127,14 +127,13 @@
 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
                                                           const WorkloadInfo& info) const
 {
-    return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
+    return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
 }
 
 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
                                                            const WorkloadInfo& info) const
 {
-    return MakeWorkloadHelper<CopyMemGenericWorkload, CopyMemGenericWorkload, CopyMemGenericWorkload, NullWorkload,
-        CopyMemGenericWorkload>(descriptor, info);
+    return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
 }
 
 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
diff --git a/src/backends/cl/test/ClEndToEndTests.cpp b/src/backends/cl/test/ClEndToEndTests.cpp
index 06c24a3..c33190f 100644
--- a/src/backends/cl/test/ClEndToEndTests.cpp
+++ b/src/backends/cl/test/ClEndToEndTests.cpp
@@ -9,6 +9,7 @@
 #include <backendsCommon/test/ConcatTestImpl.hpp>
 #include <backendsCommon/test/DequantizeEndToEndTestImpl.hpp>
 #include <backendsCommon/test/PreluEndToEndTestImpl.hpp>
+#include <backendsCommon/test/QuantizedLstmEndToEndTestImpl.hpp>
 #include <backendsCommon/test/SpaceToDepthEndToEndTestImpl.hpp>
 #include <backendsCommon/test/SplitterEndToEndTestImpl.hpp>
 #include <backendsCommon/test/TransposeConvolution2dEndToEndTestImpl.hpp>
@@ -259,4 +260,9 @@
         defaultBackends, armnn::DataLayout::NHWC);
 }
 
+BOOST_AUTO_TEST_CASE(ClQuantizedLstmEndToEndTest)
+{
+    QuantizedLstmEndToEnd(defaultBackends);
+}
+
 BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file
diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp
index b3a57e2..bddee11 100644
--- a/src/backends/neon/NeonLayerSupport.cpp
+++ b/src/backends/neon/NeonLayerSupport.cpp
@@ -323,10 +323,7 @@
 bool NeonLayerSupport::IsInputSupported(const TensorInfo& input,
                                         Optional<std::string&> reasonIfUnsupported) const
 {
-    return IsSupportedForDataTypeNeon(reasonIfUnsupported,
-                                      input.GetDataType(),
-                                      &TrueFunc<>,
-                                      &TrueFunc<>);
+    return IsNeonBackendSupported(reasonIfUnsupported);
 }
 
 bool NeonLayerSupport::IsL2NormalizationSupported(const TensorInfo& input,
@@ -432,14 +429,7 @@
 bool NeonLayerSupport::IsOutputSupported(const TensorInfo& output,
                                          Optional<std::string&> reasonIfUnsupported) const
 {
-    return IsNeonBackendSupported(reasonIfUnsupported) &&
-           IsSupportedForDataTypeGeneric(reasonIfUnsupported,
-                                         output.GetDataType(),
-                                         &TrueFunc<>,
-                                         &TrueFunc<>,
-                                         &TrueFunc<>,
-                                         &FalseFuncI32<>,
-                                         &TrueFunc<>);
+    return IsNeonBackendSupported(reasonIfUnsupported);
 }
 
 bool NeonLayerSupport::IsPadSupported(const TensorInfo& input,
diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp
index 0e66bfc..77660c3 100644
--- a/src/backends/neon/NeonWorkloadFactory.cpp
+++ b/src/backends/neon/NeonWorkloadFactory.cpp
@@ -92,14 +92,13 @@
 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
                                                             const WorkloadInfo&        info) const
 {
-    return MakeWorkloadHelper<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
+    return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
 }
 
 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
                                                              const WorkloadInfo&        info) const
 {
-    return MakeWorkloadHelper<CopyMemGenericWorkload, CopyMemGenericWorkload,
-                              CopyMemGenericWorkload, NullWorkload, CopyMemGenericWorkload>(descriptor, info);
+    return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
 }
 
 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
diff --git a/src/backends/neon/test/NeonEndToEndTests.cpp b/src/backends/neon/test/NeonEndToEndTests.cpp
index 18af99e..81e5d80 100644
--- a/src/backends/neon/test/NeonEndToEndTests.cpp
+++ b/src/backends/neon/test/NeonEndToEndTests.cpp
@@ -9,6 +9,7 @@
 #include <backendsCommon/test/ConcatTestImpl.hpp>
 #include <backendsCommon/test/DequantizeEndToEndTestImpl.hpp>
 #include <backendsCommon/test/PreluEndToEndTestImpl.hpp>
+#include <backendsCommon/test/QuantizedLstmEndToEndTestImpl.hpp>
 #include <backendsCommon/test/SpaceToDepthEndToEndTestImpl.hpp>
 #include <backendsCommon/test/SplitterEndToEndTestImpl.hpp>
 
@@ -267,4 +268,9 @@
     Splitter4dDim3EndToEnd<armnn::DataType::QuantisedAsymm8>(defaultBackends);
 }
 
+BOOST_AUTO_TEST_CASE(NeonQuantizedLstmEndToEndTest)
+{
+    QuantizedLstmEndToEnd(defaultBackends);
+}
+
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/neon/workloads/NeonQuantizedLstmWorkload.hpp b/src/backends/neon/workloads/NeonQuantizedLstmWorkload.hpp
index ab8ea71..c3bcf78 100644
--- a/src/backends/neon/workloads/NeonQuantizedLstmWorkload.hpp
+++ b/src/backends/neon/workloads/NeonQuantizedLstmWorkload.hpp
@@ -17,6 +17,7 @@
 class NeonQuantizedLstmWorkload : public BaseWorkload<QuantizedLstmQueueDescriptor>
 {
 public:
+    using BaseWorkload<QuantizedLstmQueueDescriptor>::m_Data;
     NeonQuantizedLstmWorkload(const QuantizedLstmQueueDescriptor& descriptor, const WorkloadInfo& info);
     virtual void Execute() const override;