IVGCVSW-3341 Add Neon backend support for Quantized_LSTM (16bit cell state)

 * Add Neon Workload
 * Update NeonWorkloads.hpp
 * Update NeonWorkloadFactory
 * Update NeonLayerSupport
 * Update backends.mk and CMakeLists.txt
 * Add NeonCreateWorkload test
 * Enable LayerTest

!android-nn-driver:1685

Change-Id: Idd799bbf039acf0d59084d02c3b57766ce3691b5
Signed-off-by: Francis Murtagh <francis.murtagh@arm.com>
Signed-off-by: Matthew Bentham <Matthew.Bentham@arm.com>
diff --git a/src/backends/neon/test/NeonCreateWorkloadTests.cpp b/src/backends/neon/test/NeonCreateWorkloadTests.cpp
index 848af12..056bfb2 100644
--- a/src/backends/neon/test/NeonCreateWorkloadTests.cpp
+++ b/src/backends/neon/test/NeonCreateWorkloadTests.cpp
@@ -5,6 +5,7 @@
 
 #include "NeonWorkloadFactoryHelper.hpp"
 
+#include <aclCommon/ArmComputeTensorUtils.hpp>
 #include <backendsCommon/MemCopyWorkload.hpp>
 
 #include <aclCommon/test/CreateWorkloadClNeon.hpp>
@@ -873,4 +874,42 @@
     NeonCreateStackWorkloadTest<armnn::DataType::QuantisedAsymm8>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
 }
 
+template <typename QuantizedLstmWorkloadType>
+static void NeonCreateQuantizedLstmWorkloadTest()
+{
+    using boost::polymorphic_downcast;
+
+    Graph graph;
+    NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
+
+    auto workload = CreateQuantizedLstmWorkloadTest<QuantizedLstmWorkloadType>(factory, graph);
+
+    QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
+
+    IAclTensorHandle* inputHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    BOOST_TEST((inputHandle->GetShape() == TensorShape({2, 2})));
+    BOOST_TEST((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8));
+
+    IAclTensorHandle* cellStateInHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
+    BOOST_TEST((cellStateInHandle->GetShape() == TensorShape({2, 4})));
+    BOOST_TEST((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16));
+
+    IAclTensorHandle* outputStateInHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
+    BOOST_TEST((outputStateInHandle->GetShape() == TensorShape({2, 4})));
+    BOOST_TEST((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8));
+
+    IAclTensorHandle* cellStateOutHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    BOOST_TEST((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
+    BOOST_TEST((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
+
+    IAclTensorHandle* outputStateOutHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
+    BOOST_TEST((outputStateOutHandle->GetShape() == TensorShape({2, 4})));
+    BOOST_TEST((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8));
+}
+
+BOOST_AUTO_TEST_CASE(CreateQuantizedLstmWorkload)
+{
+    NeonCreateQuantizedLstmWorkloadTest<NeonQuantizedLstmWorkload>();
+}
+
 BOOST_AUTO_TEST_SUITE_END()