IVGCVSW-3469 Add front end for Quantized LSTM layer * Added new layer QuantizedLstm (Android Q) * Made necessary changes to APIs * Added unit tests Change-Id: I3b9f16b0e7e49f51932cf204c87cb7118798123a Signed-off-by: James Conroy <james.conroy@arm.com>

commit: ee18dc8d1725f472850ab0c398fd7cbc4b850891 [log] [tgz]
author: James Conroy <james.conroy@arm.com> Wed Jul 17 11:27:46 2019 +0100
committer: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com> Wed Jul 24 10:40:13 2019 +0100
tree: b57738b18781d512f5438ca5154652571393e4e8
parent: 7b1845206d723a91aec811edaf7cb0cf832dfd25 [diff]
diff --git a/Android.mk b/Android.mk
index 8ad790e..8a24e7b 100644
--- a/Android.mk
+++ b/Android.mk

@@ -125,6 +125,7 @@
         src/armnn/layers/PreCompiledLayer.cpp \
         src/armnn/layers/PreluLayer.cpp \
         src/armnn/layers/QuantizeLayer.cpp \
+        src/armnn/layers/QuantizedLstmLayer.cpp \
         src/armnn/layers/ReshapeLayer.cpp \
         src/armnn/layers/ResizeLayer.cpp \
         src/armnn/layers/RsqrtLayer.cpp \

diff --git a/CMakeLists.txt b/CMakeLists.txt
index aa462fb..c823b81 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt

@@ -214,6 +214,7 @@
     include/armnn/LstmParams.hpp
     include/armnn/NetworkFwd.hpp
     include/armnn/Optional.hpp
+    include/armnn/QuantizedLstmParams.hpp
     include/armnn/Tensor.hpp
     include/armnn/TensorFwd.hpp
     include/armnn/Types.hpp
@@ -292,6 +293,8 @@
     src/armnn/layers/Pooling2dLayer.cpp
     src/armnn/layers/QuantizeLayer.cpp
     src/armnn/layers/QuantizeLayer.hpp
+    src/armnn/layers/QuantizedLstmLayer.hpp
+    src/armnn/layers/QuantizedLstmLayer.cpp
     src/armnn/layers/DivisionLayer.cpp
     src/armnn/layers/DivisionLayer.hpp
     src/armnn/layers/PreCompiledLayer.hpp

diff --git a/include/armnn/ArmNN.hpp b/include/armnn/ArmNN.hpp
index 884a3ca..b18f14c 100644
--- a/include/armnn/ArmNN.hpp
+++ b/include/armnn/ArmNN.hpp

@@ -11,6 +11,7 @@
 #include "IRuntime.hpp"
 #include "LstmParams.hpp"
 #include "Optional.hpp"
+#include "QuantizedLstmParams.hpp"
 #include "Tensor.hpp"
 #include "Types.hpp"
 #include "TypesUtils.hpp"

diff --git a/include/armnn/ILayerSupport.hpp b/include/armnn/ILayerSupport.hpp
index 4301f9a..4536098 100644
--- a/include/armnn/ILayerSupport.hpp
+++ b/include/armnn/ILayerSupport.hpp

@@ -6,8 +6,9 @@
 
 #include <armnn/Deprecated.hpp>
 #include <armnn/DescriptorsFwd.hpp>
-#include <armnn/Optional.hpp>
 #include <armnn/LstmParams.hpp>
+#include <armnn/Optional.hpp>
+#include <armnn/QuantizedLstmParams.hpp>
 
 #include <cctype>
 #include <functional>
@@ -228,6 +229,14 @@
                                      const TensorInfo& output,
                                      Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
 
+    virtual bool IsQuantizedLstmSupported(const TensorInfo& input,
+                                          const TensorInfo& previousCellStateIn,
+                                          const TensorInfo& previousOutputIn,
+                                          const TensorInfo& cellStateOut,
+                                          const TensorInfo& output,
+                                          const QuantizedLstmInputParamsInfo& paramsInfo,
+                                          Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
+
     virtual bool IsReshapeSupported(const TensorInfo& input,
                                     const ReshapeDescriptor& descriptor,
                                     Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;

diff --git a/include/armnn/ILayerVisitor.hpp b/include/armnn/ILayerVisitor.hpp
index 6e5b546..1ccbf98 100644
--- a/include/armnn/ILayerVisitor.hpp
+++ b/include/armnn/ILayerVisitor.hpp

@@ -302,6 +302,14 @@
     virtual void VisitQuantizeLayer(const IConnectableLayer* layer,
                                     const char* name = nullptr) = 0;
 
+    /// Function a QuantizedLstm layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param params - The weights and biases for the Quantized LSTM cell
+    /// @param name - Optional name for the layer.
+    virtual void VisitQuantizedLstmLayer(const IConnectableLayer* layer,
+                                         const QuantizedLstmInputParams& params,
+                                         const char* name = nullptr) = 0;
+
     /// Function a reshape layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param reshapeDescriptor - Parameters for the reshape operation.

diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp
index 9e88c92..a2ff0dc 100644
--- a/include/armnn/INetwork.hpp
+++ b/include/armnn/INetwork.hpp

@@ -356,9 +356,10 @@
     virtual IConnectableLayer* AddOutputLayer(LayerBindingId id, const char* name = nullptr) = 0;
 
     /// Add a Lstm layer to the network
-    /// @param descriptor Parameters for the Lstm operation
-    /// @param name Optional name for the layer
-    /// @return Interface for configuring the layer.
+    /// @param descriptor - Parameters for the Lstm operation
+    /// @param params - Weights and biases for the LSTM cell
+    /// @param name - Optional name for the layer
+    /// @return - Interface for configuring the layer.
     virtual IConnectableLayer* AddLstmLayer(const LstmDescriptor& descriptor,
                                             const LstmInputParams& params,
                                             const char* name = nullptr) = 0;
@@ -458,6 +459,13 @@
     virtual IConnectableLayer* AddStackLayer(const StackDescriptor& descriptor,
                                              const char* name = nullptr) = 0;
 
+    /// Add a QuantizedLstm layer to the network
+    /// @param params - The weights and biases for the Quantized LSTM cell
+    /// @param name - Optional name for the layer
+    /// @return - Interface for configuring the layer.
+    virtual IConnectableLayer* AddQuantizedLstmLayer(const QuantizedLstmInputParams& params,
+                                                     const char* name = nullptr) = 0;
+
     virtual void Accept(ILayerVisitor& visitor) const = 0;
 
 protected:

diff --git a/include/armnn/LayerSupport.hpp b/include/armnn/LayerSupport.hpp
index 6a3f177..2ec086b 100644
--- a/include/armnn/LayerSupport.hpp
+++ b/include/armnn/LayerSupport.hpp

@@ -10,6 +10,7 @@
 #include <armnn/Tensor.hpp>
 #include <armnn/Types.hpp>
 #include "LstmParams.hpp"
+#include "QuantizedLstmParams.hpp"
 
 namespace armnn
 {
@@ -291,6 +292,17 @@
                           size_t reasonIfUnsupportedMaxLength = 1024);
 
 /// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsQuantizedLstmSupported(const BackendId& backend,
+                              const TensorInfo& input,
+                              const TensorInfo& previousCellStateIn,
+                              const TensorInfo& previousOutputIn,
+                              const TensorInfo& cellStateOut,
+                              const TensorInfo& output,
+                              const QuantizedLstmInputParamsInfo& paramsInfo,
+                              char* reasonIfUnsupported = nullptr,
+                              size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
 bool IsReshapeSupported(const BackendId& backend,
                         const TensorInfo& input,
                         const ReshapeDescriptor& descriptor,

diff --git a/include/armnn/LayerVisitorBase.hpp b/include/armnn/LayerVisitorBase.hpp
index f107e9f..8c5464c 100644
--- a/include/armnn/LayerVisitorBase.hpp
+++ b/include/armnn/LayerVisitorBase.hpp

@@ -157,6 +157,10 @@
     void VisitQuantizeLayer(const IConnectableLayer*,
                             const char*) override { DefaultPolicy::Apply(__func__); }
 
+    void VisitQuantizedLstmLayer(const IConnectableLayer*,
+                                 const QuantizedLstmInputParams&,
+                                 const char*) override { DefaultPolicy::Apply(__func__); }
+
     void VisitReshapeLayer(const IConnectableLayer*,
                            const ReshapeDescriptor&,
                            const char*) override { DefaultPolicy::Apply(__func__); }

diff --git a/include/armnn/NetworkFwd.hpp b/include/armnn/NetworkFwd.hpp
index 97c5e6e..e94a2cc 100644
--- a/include/armnn/NetworkFwd.hpp
+++ b/include/armnn/NetworkFwd.hpp

@@ -7,6 +7,7 @@
 namespace armnn
 {
 struct LstmInputParams;
+struct QuantizedLstmInputParams;
 class INetwork;
 class IOptimizedNetwork;
 class Graph;

diff --git a/include/armnn/QuantizedLstmParams.hpp b/include/armnn/QuantizedLstmParams.hpp
new file mode 100644
index 0000000..b3033ac
--- /dev/null
+++ b/include/armnn/QuantizedLstmParams.hpp

@@ -0,0 +1,218 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "TensorFwd.hpp"
+#include "Exceptions.hpp"
+
+namespace armnn
+{
+
+struct QuantizedLstmInputParams
+{
+    QuantizedLstmInputParams()
+        : m_InputToInputWeights(nullptr)
+        , m_InputToForgetWeights(nullptr)
+        , m_InputToCellWeights(nullptr)
+        , m_InputToOutputWeights(nullptr)
+
+        , m_RecurrentToInputWeights(nullptr)
+        , m_RecurrentToForgetWeights(nullptr)
+        , m_RecurrentToCellWeights(nullptr)
+        , m_RecurrentToOutputWeights(nullptr)
+
+        , m_InputGateBias(nullptr)
+        , m_ForgetGateBias(nullptr)
+        , m_CellBias(nullptr)
+        , m_OutputGateBias(nullptr)
+    {
+    }
+
+    const ConstTensor* m_InputToInputWeights;
+    const ConstTensor* m_InputToForgetWeights;
+    const ConstTensor* m_InputToCellWeights;
+    const ConstTensor* m_InputToOutputWeights;
+
+    const ConstTensor* m_RecurrentToInputWeights;
+    const ConstTensor* m_RecurrentToForgetWeights;
+    const ConstTensor* m_RecurrentToCellWeights;
+    const ConstTensor* m_RecurrentToOutputWeights;
+
+    const ConstTensor* m_InputGateBias;
+    const ConstTensor* m_ForgetGateBias;
+    const ConstTensor* m_CellBias;
+    const ConstTensor* m_OutputGateBias;
+
+    const ConstTensor& deref(const ConstTensor* tensorPtr) const
+    {
+        if (tensorPtr != nullptr)
+        {
+            const ConstTensor &temp = *tensorPtr;
+            return temp;
+        }
+        throw InvalidArgumentException("QuantizedLstmInputParams: Can't dereference a null pointer");
+    }
+
+    const ConstTensor& get_InputToInputWeights() const
+    {
+        return deref(m_InputToInputWeights);
+    }
+
+    const ConstTensor& get_InputToForgetWeights() const
+    {
+        return deref(m_InputToForgetWeights);
+    }
+
+    const ConstTensor& get_InputToCellWeights() const
+    {
+        return deref(m_InputToCellWeights);
+    }
+
+    const ConstTensor& get_InputToOutputWeights() const
+    {
+        return deref(m_InputToOutputWeights);
+    }
+
+    const ConstTensor& get_RecurrentToInputWeights() const
+    {
+        return deref(m_RecurrentToInputWeights);
+    }
+
+    const ConstTensor& get_RecurrentToForgetWeights() const
+    {
+        return deref(m_RecurrentToForgetWeights);
+    }
+
+    const ConstTensor& get_RecurrentToCellWeights() const
+    {
+        return deref(m_RecurrentToCellWeights);
+    }
+
+    const ConstTensor& get_RecurrentToOutputWeights() const
+    {
+        return deref(m_RecurrentToOutputWeights);
+    }
+
+    const ConstTensor& get_InputGateBias() const
+    {
+        return deref(m_InputGateBias);
+    }
+
+    const ConstTensor& get_ForgetGateBias() const
+    {
+        return deref(m_ForgetGateBias);
+    }
+
+    const ConstTensor& get_CellBias() const
+    {
+        return deref(m_CellBias);
+    }
+
+    const ConstTensor& get_OutputGateBias() const
+    {
+        return deref(m_OutputGateBias);
+    }
+};
+
+struct QuantizedLstmInputParamsInfo
+{
+    QuantizedLstmInputParamsInfo()
+        : m_InputToInputWeights(nullptr)
+        , m_InputToForgetWeights(nullptr)
+        , m_InputToCellWeights(nullptr)
+        , m_InputToOutputWeights(nullptr)
+
+        , m_RecurrentToInputWeights(nullptr)
+        , m_RecurrentToForgetWeights(nullptr)
+        , m_RecurrentToCellWeights(nullptr)
+        , m_RecurrentToOutputWeights(nullptr)
+
+        , m_InputGateBias(nullptr)
+        , m_ForgetGateBias(nullptr)
+        , m_CellBias(nullptr)
+        , m_OutputGateBias(nullptr)
+    {
+    }
+
+    const TensorInfo* m_InputToInputWeights;
+    const TensorInfo* m_InputToForgetWeights;
+    const TensorInfo* m_InputToCellWeights;
+    const TensorInfo* m_InputToOutputWeights;
+
+    const TensorInfo* m_RecurrentToInputWeights;
+    const TensorInfo* m_RecurrentToForgetWeights;
+    const TensorInfo* m_RecurrentToCellWeights;
+    const TensorInfo* m_RecurrentToOutputWeights;
+
+    const TensorInfo* m_InputGateBias;
+    const TensorInfo* m_ForgetGateBias;
+    const TensorInfo* m_CellBias;
+    const TensorInfo* m_OutputGateBias;
+
+
+    const TensorInfo& deref(const TensorInfo* tensorInfo) const
+    {
+        if (tensorInfo != nullptr)
+        {
+            const TensorInfo &temp = *tensorInfo;
+            return temp;
+        }
+        throw InvalidArgumentException("Can't dereference a null pointer");
+    }
+
+    const TensorInfo& get_InputToInputWeights() const
+    {
+        return deref(m_InputToInputWeights);
+    }
+    const TensorInfo& get_InputToForgetWeights() const
+    {
+        return deref(m_InputToForgetWeights);
+    }
+    const TensorInfo& get_InputToCellWeights() const
+    {
+        return deref(m_InputToCellWeights);
+    }
+    const TensorInfo& get_InputToOutputWeights() const
+    {
+        return deref(m_InputToOutputWeights);
+    }
+
+    const TensorInfo& get_RecurrentToInputWeights() const
+    {
+        return deref(m_RecurrentToInputWeights);
+    }
+    const TensorInfo& get_RecurrentToForgetWeights() const
+    {
+        return deref(m_RecurrentToForgetWeights);
+    }
+    const TensorInfo& get_RecurrentToCellWeights() const
+    {
+        return deref(m_RecurrentToCellWeights);
+    }
+    const TensorInfo& get_RecurrentToOutputWeights() const
+    {
+        return deref(m_RecurrentToOutputWeights);
+    }
+
+    const TensorInfo& get_InputGateBias() const
+    {
+        return deref(m_InputGateBias);
+    }
+    const TensorInfo& get_ForgetGateBias() const
+    {
+        return deref(m_ForgetGateBias);
+    }
+    const TensorInfo& get_CellBias() const
+    {
+        return deref(m_CellBias);
+    }
+    const TensorInfo& get_OutputGateBias() const
+    {
+        return deref(m_OutputGateBias);
+    }
+};
+
+} // namespace armnn
+

diff --git a/src/armnn/InternalTypes.hpp b/src/armnn/InternalTypes.hpp
index bf095ac..b0fea7c 100644
--- a/src/armnn/InternalTypes.hpp
+++ b/src/armnn/InternalTypes.hpp

@@ -51,6 +51,7 @@
     PreCompiled,
     Prelu,
     Quantize,
+    QuantizedLstm,
     Reshape,
     Resize,
     Rsqrt,

diff --git a/src/armnn/LayerSupport.cpp b/src/armnn/LayerSupport.cpp
index a2908aa..047c80a 100644
--- a/src/armnn/LayerSupport.cpp
+++ b/src/armnn/LayerSupport.cpp

@@ -446,14 +446,29 @@
 }
 
 bool IsQuantizeSupported(const BackendId& backend,
-                           const TensorInfo& input,
-                           const TensorInfo& output,
-                           char* reasonIfUnsupported,
-                           size_t reasonIfUnsupportedMaxLength)
+                         const TensorInfo& input,
+                         const TensorInfo& output,
+                         char* reasonIfUnsupported,
+                         size_t reasonIfUnsupportedMaxLength)
 {
     FORWARD_LAYER_SUPPORT_FUNC(backend, IsQuantizeSupported, input, output);
 }
 
+bool IsQuantizedLstmSupported(const BackendId& backend,
+                              const TensorInfo& input,
+                              const TensorInfo& previousCellStateIn,
+                              const TensorInfo& previousOutputIn,
+                              const TensorInfo& cellStateOut,
+                              const TensorInfo& output,
+                              const QuantizedLstmInputParamsInfo& paramsInfo,
+                              char* reasonIfUnsupported,
+                              size_t reasonIfUnsupportedMaxLength)
+
+{
+    FORWARD_LAYER_SUPPORT_FUNC(backend, IsQuantizedLstmSupported, input, previousCellStateIn, previousOutputIn,
+                               cellStateOut, output, paramsInfo);
+}
+
 bool IsPermuteSupported(const BackendId& backend,
                         const TensorInfo& input,
                         const TensorInfo& output,

diff --git a/src/armnn/LayersFwd.hpp b/src/armnn/LayersFwd.hpp
index b3f7adc..2c8d5d2 100644
--- a/src/armnn/LayersFwd.hpp
+++ b/src/armnn/LayersFwd.hpp

@@ -43,6 +43,7 @@
 #include "layers/PreCompiledLayer.hpp"
 #include "layers/PreluLayer.hpp"
 #include "layers/QuantizeLayer.hpp"
+#include "layers/QuantizedLstmLayer.hpp"
 #include "layers/ReshapeLayer.hpp"
 #include "layers/ResizeLayer.hpp"
 #include "layers/RsqrtLayer.hpp"
@@ -120,6 +121,7 @@
 DECLARE_LAYER(PreCompiled)
 DECLARE_LAYER(Prelu)
 DECLARE_LAYER(Quantize)
+DECLARE_LAYER(QuantizedLstm)
 DECLARE_LAYER(Reshape)
 DECLARE_LAYER(Resize)
 DECLARE_LAYER(Rsqrt)

diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index a438008..2195c71 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp

@@ -1458,6 +1458,44 @@
     return m_Graph->AddLayer<StackLayer>(stackDescriptor, name);
 }
 
+IConnectableLayer* Network::AddQuantizedLstmLayer(const QuantizedLstmInputParams& params,
+                                                  const char* name)
+{
+    const auto layer = m_Graph->AddLayer<QuantizedLstmLayer>(name);
+
+    // InputToX weights
+    layer->m_QuantizedLstmParameters.m_InputToInputWeights =
+            std::make_unique<ScopedCpuTensorHandle>(params.get_InputToInputWeights());
+    layer->m_QuantizedLstmParameters.m_InputToForgetWeights =
+            std::make_unique<ScopedCpuTensorHandle>(params.get_InputToForgetWeights());
+    layer->m_QuantizedLstmParameters.m_InputToCellWeights =
+            std::make_unique<ScopedCpuTensorHandle>(params.get_InputToCellWeights());
+    layer->m_QuantizedLstmParameters.m_InputToOutputWeights =
+            std::make_unique<ScopedCpuTensorHandle>(params.get_InputToOutputWeights());
+
+    // RecurrentToX weights
+    layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights =
+            std::make_unique<ScopedCpuTensorHandle>(params.get_RecurrentToInputWeights());
+    layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights =
+            std::make_unique<ScopedCpuTensorHandle>(params.get_RecurrentToForgetWeights());
+    layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights =
+            std::make_unique<ScopedCpuTensorHandle>(params.get_RecurrentToCellWeights());
+    layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights =
+            std::make_unique<ScopedCpuTensorHandle>(params.get_RecurrentToOutputWeights());
+
+    // Bias
+    layer->m_QuantizedLstmParameters.m_InputGateBias =
+            std::make_unique<ScopedCpuTensorHandle>(params.get_InputGateBias());
+    layer->m_QuantizedLstmParameters.m_ForgetGateBias =
+            std::make_unique<ScopedCpuTensorHandle>(params.get_ForgetGateBias());
+    layer->m_QuantizedLstmParameters.m_CellBias =
+            std::make_unique<ScopedCpuTensorHandle>(params.get_CellBias());
+    layer->m_QuantizedLstmParameters.m_OutputGateBias =
+            std::make_unique<ScopedCpuTensorHandle>(params.get_OutputGateBias());
+
+    return layer;
+}
+
 void Network::Accept(ILayerVisitor& visitor) const
 {
     for (auto layer : GetGraph())

diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp
index 8a99deb..679ab51 100644
--- a/src/armnn/Network.hpp
+++ b/src/armnn/Network.hpp

@@ -6,6 +6,7 @@
 
 #include <armnn/DescriptorsFwd.hpp>
 #include <armnn/LstmParams.hpp>
+#include <armnn/QuantizedLstmParams.hpp>
 #include <armnn/TensorFwd.hpp>
 #include <armnn/Types.hpp>
 
@@ -200,6 +201,9 @@
     IConnectableLayer* AddStackLayer(const StackDescriptor& stackDescriptor,
                                      const char* name = nullptr) override;
 
+    IConnectableLayer* AddQuantizedLstmLayer(const QuantizedLstmInputParams& params,
+                                             const char* name = nullptr) override;
+
     void Accept(ILayerVisitor& visitor) const override;
 
 private:

diff --git a/src/armnn/layers/QuantizedLstmLayer.cpp b/src/armnn/layers/QuantizedLstmLayer.cpp
new file mode 100644
index 0000000..1d8540d
--- /dev/null
+++ b/src/armnn/layers/QuantizedLstmLayer.cpp

@@ -0,0 +1,290 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#include "QuantizedLstmLayer.hpp"
+
+#include "LayerCloneBase.hpp"
+
+#include <armnn/TypesUtils.hpp>
+#include <backendsCommon/CpuTensorHandle.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+namespace armnn
+{
+
+QuantizedLstmLayer::QuantizedLstmLayer(const char* name)
+    : Layer(3, 2, LayerType::QuantizedLstm, name)
+{
+}
+
+std::unique_ptr<IWorkload> QuantizedLstmLayer::CreateWorkload(const Graph& graph,
+                                                              const IWorkloadFactory& factory) const
+{
+    QuantizedLstmQueueDescriptor descriptor;
+
+    // QuantizedLstmLayer parameters - there are no optional params
+    descriptor.m_InputToInputWeights  = m_QuantizedLstmParameters.m_InputToInputWeights.get();
+    descriptor.m_InputToForgetWeights = m_QuantizedLstmParameters.m_InputToForgetWeights.get();
+    descriptor.m_InputToCellWeights   = m_QuantizedLstmParameters.m_InputToCellWeights.get();
+    descriptor.m_InputToOutputWeights = m_QuantizedLstmParameters.m_InputToOutputWeights.get();
+
+    descriptor.m_RecurrentToInputWeights  = m_QuantizedLstmParameters.m_RecurrentToInputWeights.get();
+    descriptor.m_RecurrentToForgetWeights = m_QuantizedLstmParameters.m_RecurrentToForgetWeights.get();
+    descriptor.m_RecurrentToCellWeights   = m_QuantizedLstmParameters.m_RecurrentToCellWeights.get();
+    descriptor.m_RecurrentToOutputWeights = m_QuantizedLstmParameters.m_RecurrentToOutputWeights.get();
+
+    descriptor.m_InputGateBias  = m_QuantizedLstmParameters.m_InputGateBias.get();
+    descriptor.m_ForgetGateBias = m_QuantizedLstmParameters.m_ForgetGateBias.get();
+    descriptor.m_CellBias       = m_QuantizedLstmParameters.m_CellBias.get();
+    descriptor.m_OutputGateBias = m_QuantizedLstmParameters.m_OutputGateBias.get();
+
+    return factory.CreateQuantizedLstm(descriptor, PrepInfoAndDesc(descriptor, graph));
+}
+
+QuantizedLstmLayer* QuantizedLstmLayer::Clone(Graph& graph) const
+{
+    auto layer = CloneBase<QuantizedLstmLayer>(graph, GetName());
+
+    layer->m_QuantizedLstmParameters.m_InputToInputWeights = m_QuantizedLstmParameters.m_InputToInputWeights ?
+            std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_InputToInputWeights) : nullptr;
+    layer->m_QuantizedLstmParameters.m_InputToForgetWeights = m_QuantizedLstmParameters.m_InputToForgetWeights ?
+            std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_InputToForgetWeights) : nullptr;
+    layer->m_QuantizedLstmParameters.m_InputToCellWeights = m_QuantizedLstmParameters.m_InputToCellWeights ?
+            std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_InputToCellWeights) : nullptr;
+    layer->m_QuantizedLstmParameters.m_InputToOutputWeights = m_QuantizedLstmParameters.m_InputToOutputWeights ?
+            std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_InputToOutputWeights) : nullptr;
+
+    layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights = m_QuantizedLstmParameters.m_RecurrentToInputWeights ?
+            std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_RecurrentToInputWeights) : nullptr;
+    layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights = m_QuantizedLstmParameters.m_RecurrentToForgetWeights
+            ? std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_RecurrentToForgetWeights) : nullptr;
+    layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights = m_QuantizedLstmParameters.m_RecurrentToCellWeights ?
+            std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_RecurrentToCellWeights) : nullptr;
+    layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights = m_QuantizedLstmParameters.m_RecurrentToOutputWeights
+            ? std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_RecurrentToOutputWeights) : nullptr;
+
+    layer->m_QuantizedLstmParameters.m_InputGateBias = m_QuantizedLstmParameters.m_InputGateBias ?
+            std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_InputGateBias) : nullptr;
+    layer->m_QuantizedLstmParameters.m_ForgetGateBias = m_QuantizedLstmParameters.m_ForgetGateBias ?
+            std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_ForgetGateBias) : nullptr;
+    layer->m_QuantizedLstmParameters.m_CellBias = m_QuantizedLstmParameters.m_CellBias ?
+            std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_CellBias) : nullptr;
+    layer->m_QuantizedLstmParameters.m_OutputGateBias = m_QuantizedLstmParameters.m_OutputGateBias ?
+            std::make_unique<ScopedCpuTensorHandle>(*m_QuantizedLstmParameters.m_OutputGateBias) : nullptr;
+
+    return std::move(layer);
+}
+
+std::vector<TensorShape> QuantizedLstmLayer::InferOutputShapes(const std::vector<TensorShape>& inputShapes) const
+{
+    BOOST_ASSERT(inputShapes.size() == 3);
+
+    // Get input values for validation
+    unsigned int numBatches = inputShapes[0][0];
+    unsigned int outputSize = inputShapes[1][1];
+
+    std::vector<TensorShape> outShapes;
+    outShapes.push_back(TensorShape({numBatches, outputSize})); // cellStateOut
+    outShapes.push_back(TensorShape({numBatches, outputSize})); // output
+
+    return outShapes;
+}
+
+void QuantizedLstmLayer::ValidateTensorShapesFromInputs()
+{
+    VerifyLayerConnections(3, CHECK_LOCATION());
+
+    auto inferredShapes = InferOutputShapes(
+    {
+        GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape(), // input
+        GetInputSlot(1).GetConnection()->GetTensorInfo().GetShape(), // previousCellStateIn
+        GetInputSlot(2).GetConnection()->GetTensorInfo().GetShape()  // previousOutputIn
+    });
+
+    BOOST_ASSERT(inferredShapes.size() == 2);
+
+    // Check weights and bias for nullptr
+    BOOST_ASSERT_MSG(m_QuantizedLstmParameters.m_InputToInputWeights != nullptr,
+                     "QuantizedLstmLayer: m_QuantizedLstmParameters.m_InputToInputWeights should not be null.");
+    BOOST_ASSERT_MSG(m_QuantizedLstmParameters.m_InputToForgetWeights != nullptr,
+                     "QuantizedLstmLayer: m_QuantizedLstmParameters.m_InputToForgetWeights should not be null.");
+    BOOST_ASSERT_MSG(m_QuantizedLstmParameters.m_InputToCellWeights != nullptr,
+                     "QuantizedLstmLayer: m_QuantizedLstmParameters.m_InputToCellWeights should not be null.");
+    BOOST_ASSERT_MSG(m_QuantizedLstmParameters.m_InputToOutputWeights != nullptr,
+                     "QuantizedLstmLayer: m_QuantizedLstmParameters.m_InputToOutputWeights should not be null.");
+
+    BOOST_ASSERT_MSG(m_QuantizedLstmParameters.m_RecurrentToInputWeights != nullptr,
+                     "QuantizedLstmLayer: m_QuantizedLstmParameters.m_RecurrentToInputWeights should not be null.");
+    BOOST_ASSERT_MSG(m_QuantizedLstmParameters.m_RecurrentToForgetWeights != nullptr,
+                     "QuantizedLstmLayer: m_QuantizedLstmParameters.m_RecurrentToForgetWeights should not be null.");
+    BOOST_ASSERT_MSG(m_QuantizedLstmParameters.m_RecurrentToCellWeights != nullptr,
+                     "QuantizedLstmLayer: m_QuantizedLstmParameters.m_RecurrentToCellWeights should not be null.");
+    BOOST_ASSERT_MSG(m_QuantizedLstmParameters.m_RecurrentToOutputWeights != nullptr,
+                     "QuantizedLstmLayer: m_QuantizedLstmParameters.m_RecurrentToOutputWeights should not be null.");
+
+    BOOST_ASSERT_MSG(m_QuantizedLstmParameters.m_InputGateBias != nullptr,
+                     "QuantizedLstmLayer: m_QuantizedLstmParameters.m_InputGateBias should not be null.");
+    BOOST_ASSERT_MSG(m_QuantizedLstmParameters.m_ForgetGateBias != nullptr,
+                     "QuantizedLstmLayer: m_QuantizedLstmParameters.m_ForgetGateBias should not be null.");
+    BOOST_ASSERT_MSG(m_QuantizedLstmParameters.m_CellBias != nullptr,
+                     "QuantizedLstmLayer: m_QuantizedLstmParameters.m_CellBias should not be null.");
+    BOOST_ASSERT_MSG(m_QuantizedLstmParameters.m_OutputGateBias != nullptr,
+                     "QuantizedLstmLayer: m_QuantizedLstmParameters.m_OutputGateBias should not be null.");
+
+    // Check output TensorShape(s) match inferred shape
+    ConditionalThrowIfNotEqual<LayerValidationException>(
+            "QuantizedLstmLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.",
+            GetOutputSlot(0).GetTensorInfo().GetShape(),
+            inferredShapes[0]);
+
+    ConditionalThrowIfNotEqual<LayerValidationException>(
+            "QuantizedLstmLayer: TensorShape set on OutputSlot[1] does not match the inferred shape.",
+            GetOutputSlot(1).GetTensorInfo().GetShape(),
+            inferredShapes[1]);
+}
+
+Layer::ConstantTensors QuantizedLstmLayer::GetConstantTensorsByRef()
+{
+    return
+    {
+        m_QuantizedLstmParameters.m_InputToInputWeights,
+        m_QuantizedLstmParameters.m_InputToForgetWeights,
+        m_QuantizedLstmParameters.m_InputToCellWeights,
+        m_QuantizedLstmParameters.m_InputToOutputWeights,
+
+        m_QuantizedLstmParameters.m_RecurrentToInputWeights,
+        m_QuantizedLstmParameters.m_RecurrentToForgetWeights,
+        m_QuantizedLstmParameters.m_RecurrentToCellWeights,
+        m_QuantizedLstmParameters.m_RecurrentToOutputWeights,
+
+        m_QuantizedLstmParameters.m_InputGateBias,
+        m_QuantizedLstmParameters.m_ForgetGateBias,
+        m_QuantizedLstmParameters.m_CellBias,
+        m_QuantizedLstmParameters.m_OutputGateBias
+    };
+}
+
+void QuantizedLstmLayer::Accept(ILayerVisitor& visitor) const
+{
+    QuantizedLstmInputParams inputParams;
+
+    // InputToX weight tensors
+    ConstTensor inputToInputWeightsTensor;
+    if (m_QuantizedLstmParameters.m_InputToInputWeights != nullptr)
+    {
+        ConstTensor inputToInputWeightsTensorCopy(m_QuantizedLstmParameters.m_InputToInputWeights->GetTensorInfo(),
+                                                  m_QuantizedLstmParameters.m_InputToInputWeights->Map(true));
+        inputToInputWeightsTensor = inputToInputWeightsTensorCopy;
+        inputParams.m_InputToInputWeights = &inputToInputWeightsTensor;
+    }
+
+    ConstTensor inputToForgetWeightsTensor;
+    if (m_QuantizedLstmParameters.m_InputToForgetWeights != nullptr)
+    {
+        ConstTensor inputToForgetWeightsTensorCopy(m_QuantizedLstmParameters.m_InputToForgetWeights->GetTensorInfo(),
+                                                   m_QuantizedLstmParameters.m_InputToForgetWeights->Map(true));
+        inputToForgetWeightsTensor = inputToForgetWeightsTensorCopy;
+        inputParams.m_InputToForgetWeights = &inputToForgetWeightsTensor;
+    }
+
+    ConstTensor inputToCellWeightsTensor;
+    if (m_QuantizedLstmParameters.m_InputToCellWeights != nullptr)
+    {
+        ConstTensor inputToCellWeightsTensorCopy(m_QuantizedLstmParameters.m_InputToCellWeights->GetTensorInfo(),
+                                                 m_QuantizedLstmParameters.m_InputToCellWeights->Map(true));
+        inputToCellWeightsTensor = inputToCellWeightsTensorCopy;
+        inputParams.m_InputToCellWeights = &inputToCellWeightsTensor;
+    }
+
+    ConstTensor inputToOutputWeightsTensor;
+    if (m_QuantizedLstmParameters.m_InputToOutputWeights != nullptr)
+    {
+        ConstTensor inputToOutputWeightsTensorCopy(m_QuantizedLstmParameters.m_InputToOutputWeights->GetTensorInfo(),
+                                                   m_QuantizedLstmParameters.m_InputToOutputWeights->Map(true));
+        inputToOutputWeightsTensor = inputToOutputWeightsTensorCopy;
+        inputParams.m_InputToOutputWeights = &inputToOutputWeightsTensor;
+    }
+
+    // RecurrentToX weight tensors
+    ConstTensor recurrentToInputWeightsTensor;
+    if (m_QuantizedLstmParameters.m_RecurrentToInputWeights != nullptr)
+    {
+        ConstTensor recurrentToInputWeightsTensorCopy(
+                m_QuantizedLstmParameters.m_RecurrentToInputWeights->GetTensorInfo(),
+                m_QuantizedLstmParameters.m_RecurrentToInputWeights->Map(true));
+        recurrentToInputWeightsTensor = recurrentToInputWeightsTensorCopy;
+        inputParams.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor;
+    }
+
+    ConstTensor recurrentToForgetWeightsTensor;
+    if (m_QuantizedLstmParameters.m_RecurrentToForgetWeights != nullptr)
+    {
+        ConstTensor recurrentToForgetWeightsTensorCopy(
+                m_QuantizedLstmParameters.m_RecurrentToForgetWeights->GetTensorInfo(),
+                m_QuantizedLstmParameters.m_RecurrentToForgetWeights->Map(true));
+        recurrentToForgetWeightsTensor = recurrentToForgetWeightsTensorCopy;
+        inputParams.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor;
+    }
+
+    ConstTensor recurrentToCellWeightsTensor;
+    if (m_QuantizedLstmParameters.m_RecurrentToCellWeights != nullptr)
+    {
+        ConstTensor recurrentToCellWeightsTensorCopy(
+                m_QuantizedLstmParameters.m_RecurrentToCellWeights->GetTensorInfo(),
+                m_QuantizedLstmParameters.m_RecurrentToCellWeights->Map(true));
+        recurrentToCellWeightsTensor = recurrentToCellWeightsTensorCopy;
+        inputParams.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor;
+    }
+
+    ConstTensor recurrentToOutputWeightsTensor;
+    if (m_QuantizedLstmParameters.m_RecurrentToOutputWeights != nullptr)
+    {
+        ConstTensor recurrentToOutputWeightsTensorCopy(
+                m_QuantizedLstmParameters.m_RecurrentToOutputWeights->GetTensorInfo(),
+                m_QuantizedLstmParameters.m_RecurrentToOutputWeights->Map(true));
+        recurrentToOutputWeightsTensor = recurrentToOutputWeightsTensorCopy;
+        inputParams.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor;
+    }
+
+    // Bias tensors
+    ConstTensor inputGateBiasTensor;
+    if (m_QuantizedLstmParameters.m_InputGateBias != nullptr)
+    {
+        ConstTensor inputGateBiasTensorCopy(m_QuantizedLstmParameters.m_InputGateBias->GetTensorInfo(),
+                                            m_QuantizedLstmParameters.m_InputGateBias->Map(true));
+        inputGateBiasTensor = inputGateBiasTensorCopy;
+        inputParams.m_InputGateBias = &inputGateBiasTensor;
+    }
+
+    ConstTensor forgetGateBiasTensor;
+    if (m_QuantizedLstmParameters.m_ForgetGateBias != nullptr)
+    {
+        ConstTensor forgetGateBiasTensorCopy(m_QuantizedLstmParameters.m_ForgetGateBias->GetTensorInfo(),
+                                             m_QuantizedLstmParameters.m_ForgetGateBias->Map(true));
+        forgetGateBiasTensor = forgetGateBiasTensorCopy;
+        inputParams.m_ForgetGateBias = &forgetGateBiasTensor;
+    }
+
+    ConstTensor cellBiasTensor;
+    if (m_QuantizedLstmParameters.m_CellBias != nullptr)
+    {
+        ConstTensor cellBiasTensorCopy(m_QuantizedLstmParameters.m_CellBias->GetTensorInfo(),
+                                       m_QuantizedLstmParameters.m_CellBias->Map(true));
+        cellBiasTensor = cellBiasTensorCopy;
+        inputParams.m_CellBias = &cellBiasTensor;
+    }
+
+    ConstTensor outputGateBiasTensor;
+    if (m_QuantizedLstmParameters.m_OutputGateBias != nullptr)
+    {
+        ConstTensor outputGateBiasCopy(m_QuantizedLstmParameters.m_OutputGateBias->GetTensorInfo(),
+                                       m_QuantizedLstmParameters.m_OutputGateBias->Map(true));
+        outputGateBiasTensor = outputGateBiasCopy;
+        inputParams.m_OutputGateBias = &outputGateBiasTensor;
+    }
+
+    visitor.VisitQuantizedLstmLayer(this, inputParams, GetName());
+}
+
+} // namespace armnn

diff --git a/src/armnn/layers/QuantizedLstmLayer.hpp b/src/armnn/layers/QuantizedLstmLayer.hpp
new file mode 100644
index 0000000..4602f71
--- /dev/null
+++ b/src/armnn/layers/QuantizedLstmLayer.hpp

@@ -0,0 +1,87 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <Layer.hpp>
+
+namespace armnn
+{
+
+class ScopedCpuTensorHandle;
+
+struct QuantizedLstmParameters
+{
+    /// A unique pointer to represent 2D weights tensor with dimensions [outputSize, inputSize] (QAsymm8).
+    std::unique_ptr<ScopedCpuTensorHandle> m_InputToInputWeights;
+    /// A unique pointer to represent 2D weights tensor with dimensions [outputSize, inputSize] (QAsymm8).
+    std::unique_ptr<ScopedCpuTensorHandle> m_InputToForgetWeights;
+    /// A unique pointer to represent 2D weights tensor with dimensions [outputSize, inputSize] (QAsymm8).
+    std::unique_ptr<ScopedCpuTensorHandle> m_InputToCellWeights;
+    /// A unique pointer to represent 2D weights tensor with dimensions [outputSize, inputSize] (QAsymm8).
+    std::unique_ptr<ScopedCpuTensorHandle> m_InputToOutputWeights;
+
+    /// A unique pointer to represent 2D weights tensor with dimensions [outputSize, outputSize] (QAsymm8).
+    std::unique_ptr<ScopedCpuTensorHandle> m_RecurrentToInputWeights;
+    /// A unique pointer to represent 2D weights tensor with dimensions [outputSize, outputSize] (QAsymm8).
+    std::unique_ptr<ScopedCpuTensorHandle> m_RecurrentToForgetWeights;
+    /// A unique pointer to represent 2D weights tensor with dimensions [outputSize, outputSize] (QAsymm8).
+    std::unique_ptr<ScopedCpuTensorHandle> m_RecurrentToCellWeights;
+    /// A unique pointer to represent 2D weights tensor with dimensions [outputSize, outputSize] (QAsymm8).
+    std::unique_ptr<ScopedCpuTensorHandle> m_RecurrentToOutputWeights;
+
+    /// A unique pointer to represent 1D bias tensor with dimensions [outputSize] (int32).
+    std::unique_ptr<ScopedCpuTensorHandle> m_InputGateBias;
+    /// A unique pointer to represent 1D bias tensor with dimensions [outputSize] (int32).
+    std::unique_ptr<ScopedCpuTensorHandle> m_ForgetGateBias;
+    /// A unique pointer to represent 1D bias tensor with dimensions [outputSize] (int32).
+    std::unique_ptr<ScopedCpuTensorHandle> m_CellBias;
+    /// A unique pointer to represent 1D bias tensor with dimensions [outputSize] (int32).
+    std::unique_ptr<ScopedCpuTensorHandle> m_OutputGateBias;
+};
+
+/// This layer represents a QuantizedLstm operation.
+class QuantizedLstmLayer : public Layer
+{
+public:
+
+    QuantizedLstmParameters m_QuantizedLstmParameters;
+
+    /// Makes a workload for the QuantizedLstm type.
+    /// @param [in] graph The graph where this layer can be found.
+    /// @param [in] factory The workload factory which will create the workload.
+    /// @return A pointer to the created workload, or nullptr if not created.
+    virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph&            graph,
+                                                      const IWorkloadFactory& factory) const override;
+
+    /// Creates a dynamically-allocated copy of this layer.
+    /// @param [in] graph The graph into which this layer is being cloned.
+    QuantizedLstmLayer* Clone(Graph& graph) const override;
+
+    /// Check if the input tensor shape(s)
+    /// will lead to a valid configuration of @ref QuantizedLstmLayer.
+    void ValidateTensorShapesFromInputs() override;
+
+    /// By default returns inputShapes if the number of inputs are equal to number of outputs,
+    /// otherwise infers the output shapes from given input shapes and layer properties.
+    /// @param [in] inputShapes The input shapes layer has.
+    /// @return A vector to the inferred output shape.
+    std::vector<TensorShape> InferOutputShapes(const std::vector<TensorShape>& inputShapes) const override;
+
+    void Accept(ILayerVisitor& visitor) const override;
+
+protected:
+    /// Constructor to create a QuantizedLstmLayer.
+    /// @param [in] name Optional name for the layer.
+    QuantizedLstmLayer(const char* name);
+
+    /// Default destructor
+    ~QuantizedLstmLayer() = default;
+
+    /// Retrieve the handles to the constant values stored by the layer.
+    /// @return A vector of the constant tensors stored by this layer.
+    Layer::ConstantTensors GetConstantTensorsByRef() override;
+};
+
+} // namespace armnn

diff --git a/src/armnn/test/ConstTensorLayerVisitor.cpp b/src/armnn/test/ConstTensorLayerVisitor.cpp
index e17ee46..cfcdb1d 100644
--- a/src/armnn/test/ConstTensorLayerVisitor.cpp
+++ b/src/armnn/test/ConstTensorLayerVisitor.cpp

@@ -107,6 +107,64 @@
     CheckConstTensorPtrs("CellBias", m_InputParams.m_CellBias, inputParams.m_CellBias);
 }
 
+void TestQuantizedLstmLayerVisitor::CheckConstTensorPtrs(const std::string& name,
+                                                         const ConstTensor* expected,
+                                                         const ConstTensor* actual)
+{
+    if (expected == nullptr)
+    {
+        BOOST_CHECK_MESSAGE(actual == nullptr, name + " actual should have been a nullptr");
+    }
+    else
+    {
+        BOOST_CHECK_MESSAGE(actual != nullptr, name + " actual should have been set");
+        if (actual != nullptr)
+        {
+            CheckConstTensors(*expected, *actual);
+        }
+    }
+}
+
+void TestQuantizedLstmLayerVisitor::CheckInputParameters(const QuantizedLstmInputParams& inputParams)
+{
+    CheckConstTensorPtrs("InputToInputWeights",
+                         m_InputParams.m_InputToInputWeights,
+                         inputParams.m_InputToInputWeights);
+
+    CheckConstTensorPtrs("InputToForgetWeights",
+                         m_InputParams.m_InputToForgetWeights,
+                         inputParams.m_InputToForgetWeights);
+
+    CheckConstTensorPtrs("InputToCellWeights",
+                         m_InputParams.m_InputToCellWeights,
+                         inputParams.m_InputToCellWeights);
+
+    CheckConstTensorPtrs("InputToOutputWeights",
+                         m_InputParams.m_InputToOutputWeights,
+                         inputParams.m_InputToOutputWeights);
+
+    CheckConstTensorPtrs("RecurrentToInputWeights",
+                         m_InputParams.m_RecurrentToInputWeights,
+                         inputParams.m_RecurrentToInputWeights);
+
+    CheckConstTensorPtrs("RecurrentToForgetWeights",
+                         m_InputParams.m_RecurrentToForgetWeights,
+                         inputParams.m_RecurrentToForgetWeights);
+
+    CheckConstTensorPtrs("RecurrentToCellWeights",
+                         m_InputParams.m_RecurrentToCellWeights,
+                         inputParams.m_RecurrentToCellWeights);
+
+    CheckConstTensorPtrs("RecurrentToOutputWeights",
+                         m_InputParams.m_RecurrentToOutputWeights,
+                         inputParams.m_RecurrentToOutputWeights);
+
+    CheckConstTensorPtrs("InputGateBias",  m_InputParams.m_InputGateBias,  inputParams.m_InputGateBias);
+    CheckConstTensorPtrs("ForgetGateBias", m_InputParams.m_ForgetGateBias, inputParams.m_ForgetGateBias);
+    CheckConstTensorPtrs("CellBias",       m_InputParams.m_CellBias,       inputParams.m_CellBias);
+    CheckConstTensorPtrs("OutputGateBias", m_InputParams.m_OutputGateBias, inputParams.m_OutputGateBias);
+}
+
 BOOST_AUTO_TEST_SUITE(TestConstTensorLayerVisitor)
 
 BOOST_AUTO_TEST_CASE(CheckConvolution2dLayer)
@@ -1185,6 +1243,185 @@
     layer->Accept(visitor);
 }
 
+BOOST_AUTO_TEST_CASE(CheckQuantizedLstmLayer)
+{
+    std::vector<uint8_t> inputToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> inputToInputWeightsDimensions = {1, 1, 3, 3};
+    ConstTensor inputToInputWeights(
+            TensorInfo(4, inputToInputWeightsDimensions.data(), DataType::QuantisedAsymm8), inputToInputWeightsData);
+
+    std::vector<uint8_t> inputToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
+    ConstTensor inputToForgetWeights(
+            TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::QuantisedAsymm8), inputToForgetWeightsData);
+
+    std::vector<uint8_t> inputToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
+    ConstTensor inputToCellWeights(
+            TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::QuantisedAsymm8), inputToCellWeightsData);
+
+    std::vector<uint8_t> inputToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
+    ConstTensor inputToOutputWeights(
+            TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::QuantisedAsymm8), inputToOutputWeightsData);
+
+
+    std::vector<uint8_t> recurrentToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> recurrentToInputWeightsDimensions = {1, 1, 3, 3};
+    ConstTensor recurrentToInputWeights(TensorInfo(
+            4, recurrentToInputWeightsDimensions.data(), DataType::QuantisedAsymm8), recurrentToInputWeightsData);
+
+    std::vector<uint8_t> recurrentToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
+    ConstTensor recurrentToForgetWeights(TensorInfo(
+            4, recurrentToForgetWeightsDimensions.data(), DataType::QuantisedAsymm8), recurrentToForgetWeightsData);
+
+    std::vector<uint8_t> recurrentToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
+    ConstTensor recurrentToCellWeights(TensorInfo(
+            4, recurrentToCellWeightsDimensions.data(), DataType::QuantisedAsymm8), recurrentToCellWeightsData);
+
+    std::vector<uint8_t> recurrentToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
+    ConstTensor recurrentToOutputWeights(TensorInfo(
+            4, recurrentToOutputWeightsDimensions.data(), DataType::QuantisedAsymm8), recurrentToOutputWeightsData);
+
+
+    std::vector<int32_t> inputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> inputGateBiasDimensions = {1, 1, 3, 3};
+    ConstTensor inputGateBias(
+            TensorInfo(4, inputGateBiasDimensions.data(), DataType::Signed32), inputGateBiasData);
+
+    std::vector<int32_t> forgetGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
+    ConstTensor forgetGateBias(TensorInfo(
+            4, forgetGateBiasDimensions.data(), DataType::Signed32), forgetGateBiasData);
+
+    std::vector<int32_t> cellBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
+    ConstTensor cellBias(TensorInfo(
+            4, cellBiasDimensions.data(), DataType::Signed32), cellBiasData);
+
+    std::vector<int32_t> outputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
+    ConstTensor outputGateBias(TensorInfo(
+            4, outputGateBiasDimensions.data(), DataType::Signed32), outputGateBiasData);
+
+    QuantizedLstmInputParams params;
+
+    params.m_InputToInputWeights = &inputToInputWeights;
+    params.m_InputToForgetWeights = &inputToForgetWeights;
+    params.m_InputToCellWeights = &inputToCellWeights;
+    params.m_InputToOutputWeights = &inputToOutputWeights;
+
+    params.m_RecurrentToInputWeights = &recurrentToInputWeights;
+    params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
+    params.m_RecurrentToCellWeights = &recurrentToCellWeights;
+    params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
+
+    params.m_InputGateBias = &inputGateBias;
+    params.m_ForgetGateBias = &forgetGateBias;
+    params.m_CellBias = &cellBias;
+    params.m_OutputGateBias = &outputGateBias;
+
+    TestQuantizedLstmLayerVisitor visitor(params);
+
+    Network net;
+
+    IConnectableLayer* const layer = net.AddQuantizedLstmLayer(params);
+    layer->Accept(visitor);
+}
+
+BOOST_AUTO_TEST_CASE(CheckNamedQuantizedLstmLayer)
+{
+    const char* layerName = "LstmLayer";
+    std::vector<uint8_t> inputToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> inputToInputWeightsDimensions = {1, 1, 3, 3};
+    ConstTensor inputToInputWeights(
+            TensorInfo(4, inputToInputWeightsDimensions.data(), DataType::QuantisedAsymm8), inputToInputWeightsData);
+
+    std::vector<uint8_t> inputToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
+    ConstTensor inputToForgetWeights(
+            TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::QuantisedAsymm8), inputToForgetWeightsData);
+
+    std::vector<uint8_t> inputToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
+    ConstTensor inputToCellWeights(
+            TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::QuantisedAsymm8), inputToCellWeightsData);
+
+    std::vector<uint8_t> inputToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
+    ConstTensor inputToOutputWeights(
+            TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::QuantisedAsymm8), inputToOutputWeightsData);
+
+
+    std::vector<uint8_t> recurrentToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> recurrentToInputWeightsDimensions = {1, 1, 3, 3};
+    ConstTensor recurrentToInputWeights(TensorInfo(
+            4, recurrentToInputWeightsDimensions.data(), DataType::QuantisedAsymm8), recurrentToInputWeightsData);
+
+    std::vector<uint8_t> recurrentToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
+    ConstTensor recurrentToForgetWeights(TensorInfo(
+            4, recurrentToForgetWeightsDimensions.data(), DataType::QuantisedAsymm8), recurrentToForgetWeightsData);
+
+    std::vector<uint8_t> recurrentToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
+    ConstTensor recurrentToCellWeights(TensorInfo(
+            4, recurrentToCellWeightsDimensions.data(), DataType::QuantisedAsymm8), recurrentToCellWeightsData);
+
+    std::vector<uint8_t> recurrentToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
+    ConstTensor recurrentToOutputWeights(TensorInfo(
+            4, recurrentToOutputWeightsDimensions.data(), DataType::QuantisedAsymm8), recurrentToOutputWeightsData);
+
+
+    std::vector<int32_t> inputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> inputGateBiasDimensions = {1, 1, 3, 3};
+    ConstTensor inputGateBias(
+            TensorInfo(4, inputGateBiasDimensions.data(), DataType::Signed32), inputGateBiasData);
+
+    std::vector<int32_t> forgetGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
+    ConstTensor forgetGateBias(TensorInfo(
+            4, forgetGateBiasDimensions.data(), DataType::Signed32), forgetGateBiasData);
+
+    std::vector<int32_t> cellBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
+    ConstTensor cellBias(TensorInfo(
+            4, cellBiasDimensions.data(), DataType::Signed32), cellBiasData);
+
+    std::vector<int32_t> outputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
+    ConstTensor outputGateBias(TensorInfo(
+            4, outputGateBiasDimensions.data(), DataType::Signed32), outputGateBiasData);
+
+    QuantizedLstmInputParams params;
+
+    params.m_InputToInputWeights = &inputToInputWeights;
+    params.m_InputToForgetWeights = &inputToForgetWeights;
+    params.m_InputToCellWeights = &inputToCellWeights;
+    params.m_InputToOutputWeights = &inputToOutputWeights;
+
+    params.m_RecurrentToInputWeights = &recurrentToInputWeights;
+    params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
+    params.m_RecurrentToCellWeights = &recurrentToCellWeights;
+    params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
+
+    params.m_InputGateBias = &inputGateBias;
+    params.m_ForgetGateBias = &forgetGateBias;
+    params.m_CellBias = &cellBias;
+    params.m_OutputGateBias = &outputGateBias;
+
+    TestQuantizedLstmLayerVisitor visitor(params, layerName);
+
+    Network net;
+
+    IConnectableLayer* const layer = net.AddQuantizedLstmLayer(params, layerName);
+    layer->Accept(visitor);
+}
+
 BOOST_AUTO_TEST_SUITE_END()
 
 } // namespace armnn

diff --git a/src/armnn/test/ConstTensorLayerVisitor.hpp b/src/armnn/test/ConstTensorLayerVisitor.hpp
index 80409b3..203c5fd 100644
--- a/src/armnn/test/ConstTensorLayerVisitor.hpp
+++ b/src/armnn/test/ConstTensorLayerVisitor.hpp

@@ -7,6 +7,7 @@
 #include "TestLayerVisitor.hpp"
 #include <armnn/Descriptors.hpp>
 #include <armnn/LstmParams.hpp>
+#include <armnn/QuantizedLstmParams.hpp>
 
 namespace armnn
 {
@@ -220,4 +221,32 @@
     LstmInputParams m_InputParams;
 };
 
+
+class TestQuantizedLstmLayerVisitor : public TestLayerVisitor
+{
+public:
+    explicit TestQuantizedLstmLayerVisitor(const QuantizedLstmInputParams& params,
+                                           const char* name = nullptr)
+        : TestLayerVisitor(name)
+        , m_InputParams(params)
+    {}
+
+    void VisitQuantizedLstmLayer(const IConnectableLayer* layer,
+                                 const QuantizedLstmInputParams& params,
+                                 const char* name = nullptr)
+    {
+        CheckLayerPointer(layer);
+        CheckLayerName(name);
+        CheckInputParameters(params);
+    }
+
+protected:
+    void CheckInputParameters(const QuantizedLstmInputParams& inputParams);
+    void CheckConstTensorPtrs(const std::string& name, const ConstTensor* expected, const ConstTensor* actual);
+
+private:
+    QuantizedLstmInputParams m_InputParams;
+};
+
+
 } // namespace armnn

diff --git a/src/armnn/test/InferOutputTests.cpp b/src/armnn/test/InferOutputTests.cpp
index 4581d87..8606745 100644
--- a/src/armnn/test/InferOutputTests.cpp
+++ b/src/armnn/test/InferOutputTests.cpp

@@ -40,4 +40,7 @@
 // TransposeConvolution2D
 ARMNN_SIMPLE_TEST_CASE(TransposeConvolution2dInferOutputShape, TransposeConvolution2dInferOutputShapeTest)
 
+// QuantizedLstm
+ARMNN_SIMPLE_TEST_CASE(QuantizedLstmInferOutputShape, QuantizedLstmInferOutputShapeTest)
+
 BOOST_AUTO_TEST_SUITE_END()

diff --git a/src/armnn/test/InferOutputTests.hpp b/src/armnn/test/InferOutputTests.hpp
index 58a081a..2dd2ff0 100644
--- a/src/armnn/test/InferOutputTests.hpp
+++ b/src/armnn/test/InferOutputTests.hpp

@@ -443,4 +443,50 @@
     armnn::TensorShape expectedOutputShape(4, expectedOutputSizes.data());
 
     BOOST_CHECK(expectedOutputShape == depthwiseConvolution2dLayer->InferOutputShapes(shapes).at(0));
-}
\ No newline at end of file
+}
+
+// QuantizedLstm
+void QuantizedLstmInferOutputShapeImpl(const std::vector<armnn::TensorShape>& inputShapes,
+                                       std::vector<armnn::TensorShape>& outputShapes)
+{
+    armnn::Graph graph;
+    armnn::QuantizedLstmLayer* const quantizedLstmLayer = graph.AddLayer<armnn::QuantizedLstmLayer>("quantizedLstm");
+    outputShapes = quantizedLstmLayer->InferOutputShapes(inputShapes);
+}
+
+void QuantizedLstmInferOutputShapeTest()
+{
+    // Input shapes
+    const std::vector<unsigned int> inputShape{ 2, 5 };
+    const std::vector<unsigned int> previousCellStateInShape{ 2, 10 };
+    const std::vector<unsigned int> previousOutputInShape{ 2, 10 };
+    armnn::TensorShape inputTensorShape(2, inputShape.data());
+    armnn::TensorShape previousCellStateInTensorShape(2, previousCellStateInShape.data());
+    armnn::TensorShape previousOutputInTensorShape(2, previousOutputInShape.data());
+
+    std::vector<armnn::TensorShape> inShapes
+    {
+        inputTensorShape,
+        previousCellStateInTensorShape,
+        previousOutputInTensorShape
+    };
+
+    // Output shapes
+    const std::vector<unsigned int> cellStateOutShape{ 2, 10 };
+    const std::vector<unsigned int> outputShape{ 2, 10 };
+    armnn::TensorShape cellStateOutTensorShape(2, cellStateOutShape.data());
+    armnn::TensorShape outputTensorShape(2, outputShape.data());
+
+    std::vector<armnn::TensorShape> expectedOutShapes
+    {
+        cellStateOutTensorShape,
+        outputTensorShape
+    };
+
+    std::vector<armnn::TensorShape> actualOutShapes;
+    BOOST_CHECK_NO_THROW(QuantizedLstmInferOutputShapeImpl(inShapes, actualOutShapes));
+
+    BOOST_CHECK(actualOutShapes.size() == 2);
+    BOOST_CHECK(expectedOutShapes[0] == actualOutShapes[0]);
+    BOOST_CHECK(expectedOutShapes[1] == actualOutShapes[1]);
+}

diff --git a/src/armnnSerializer/Serializer.cpp b/src/armnnSerializer/Serializer.cpp
index 05df2c9..67c2f05 100644
--- a/src/armnnSerializer/Serializer.cpp
+++ b/src/armnnSerializer/Serializer.cpp

@@ -1042,6 +1042,13 @@
     CreateAnyLayer(fbLayer.o, serializer::Layer::Layer_TransposeConvolution2dLayer);
 }
 
+void SerializerVisitor::VisitQuantizedLstmLayer(const armnn::IConnectableLayer* layer,
+                                                const armnn::QuantizedLstmInputParams& params,
+                                                const char* name)
+{
+    throw UnimplementedException("SerializerVisitor::VisitQuantizedLstmLayer not yet implemented");
+}
+
 fb::Offset<serializer::LayerBase> SerializerVisitor::CreateLayerBase(const IConnectableLayer* layer,
                                                                      const serializer::LayerType layerType)
 {

diff --git a/src/armnnSerializer/Serializer.hpp b/src/armnnSerializer/Serializer.hpp
index 8404a7f..b859ae9 100644
--- a/src/armnnSerializer/Serializer.hpp
+++ b/src/armnnSerializer/Serializer.hpp

@@ -166,6 +166,10 @@
     void VisitQuantizeLayer(const armnn::IConnectableLayer* layer,
                             const char* name = nullptr) override;
 
+    void VisitQuantizedLstmLayer(const armnn::IConnectableLayer* layer,
+                                 const armnn::QuantizedLstmInputParams& params,
+                                 const char* name = nullptr) override;
+
     void VisitReshapeLayer(const armnn::IConnectableLayer* layer,
                            const armnn::ReshapeDescriptor& reshapeDescriptor,
                            const char* name = nullptr) override;

diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp
index e843423..f202fed 100644
--- a/src/backends/backendsCommon/LayerSupportBase.cpp
+++ b/src/backends/backendsCommon/LayerSupportBase.cpp

@@ -347,6 +347,17 @@
     return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
 }
 
+bool LayerSupportBase::IsQuantizedLstmSupported(const TensorInfo& input,
+                                                const TensorInfo& previousCellStateIn,
+                                                const TensorInfo& previousOutputIn,
+                                                const TensorInfo& cellStateOut,
+                                                const TensorInfo& output,
+                                                const QuantizedLstmInputParamsInfo& paramsInfo,
+                                                Optional<std::string&> reasonIfUnsupported) const
+{
+    return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
+}
+
 bool LayerSupportBase::IsReshapeSupported(const TensorInfo& input,
                                           const ReshapeDescriptor& descriptor,
                                           Optional<std::string&> reasonIfUnsupported) const

diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp
index d49fc3e..c860e34 100644
--- a/src/backends/backendsCommon/LayerSupportBase.hpp
+++ b/src/backends/backendsCommon/LayerSupportBase.hpp

@@ -214,6 +214,14 @@
                              const TensorInfo& output,
                              Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
+    bool IsQuantizedLstmSupported(const TensorInfo& input,
+                                  const TensorInfo& previousCellStateIn,
+                                  const TensorInfo& previousOutputIn,
+                                  const TensorInfo& cellStateOut,
+                                  const TensorInfo& output,
+                                  const QuantizedLstmInputParamsInfo& paramsInfo,
+                                  Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
     bool IsReshapeSupported(const TensorInfo& input,
                             const ReshapeDescriptor& descriptor,
                             Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;

diff --git a/src/backends/backendsCommon/WorkloadData.hpp b/src/backends/backendsCommon/WorkloadData.hpp
index f3d5069..d790daf 100644
--- a/src/backends/backendsCommon/WorkloadData.hpp
+++ b/src/backends/backendsCommon/WorkloadData.hpp

@@ -477,4 +477,41 @@
     void Validate(const WorkloadInfo& workloadInfo) const;
 };
 
+struct QuantizedLstmQueueDescriptor : QueueDescriptor
+{
+    QuantizedLstmQueueDescriptor()
+        : m_InputToInputWeights(nullptr)
+        , m_InputToForgetWeights(nullptr)
+        , m_InputToCellWeights(nullptr)
+        , m_InputToOutputWeights(nullptr)
+
+        , m_RecurrentToInputWeights(nullptr)
+        , m_RecurrentToForgetWeights(nullptr)
+        , m_RecurrentToCellWeights(nullptr)
+        , m_RecurrentToOutputWeights(nullptr)
+
+        , m_InputGateBias(nullptr)
+        , m_ForgetGateBias(nullptr)
+        , m_CellBias(nullptr)
+        , m_OutputGateBias(nullptr)
+    {}
+
+    const ConstCpuTensorHandle* m_InputToInputWeights;
+    const ConstCpuTensorHandle* m_InputToForgetWeights;
+    const ConstCpuTensorHandle* m_InputToCellWeights;
+    const ConstCpuTensorHandle* m_InputToOutputWeights;
+
+    const ConstCpuTensorHandle* m_RecurrentToInputWeights;
+    const ConstCpuTensorHandle* m_RecurrentToForgetWeights;
+    const ConstCpuTensorHandle* m_RecurrentToCellWeights;
+    const ConstCpuTensorHandle* m_RecurrentToOutputWeights;
+
+    const ConstCpuTensorHandle* m_InputGateBias;
+    const ConstCpuTensorHandle* m_ForgetGateBias;
+    const ConstCpuTensorHandle* m_CellBias;
+    const ConstCpuTensorHandle* m_OutputGateBias;
+
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
 } //namespace armnn

diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
index a24a325..cbaae40 100644
--- a/src/backends/backendsCommon/WorkloadFactory.cpp
+++ b/src/backends/backendsCommon/WorkloadFactory.cpp

@@ -631,6 +631,76 @@
             result = layerSupportObject->IsQuantizeSupported(input, output, reason);
             break;
         }
+        case LayerType::QuantizedLstm:
+        {
+            auto cLayer = boost::polymorphic_downcast<const QuantizedLstmLayer*>(&layer);
+
+            // Inputs
+            const TensorInfo& input = OverrideDataType(
+                    layer.GetInputSlot(0).GetConnection()->GetTensorInfo(), dataType);
+            const TensorInfo& previousCellStateIn = OverrideDataType(
+                    layer.GetInputSlot(1).GetConnection()->GetTensorInfo(), dataType);
+            const TensorInfo& previousOutputIn = OverrideDataType(
+                    layer.GetInputSlot(2).GetConnection()->GetTensorInfo(), dataType);
+
+            // Outputs
+            const TensorInfo& cellStateOut = OverrideDataType(layer.GetOutputSlot(0).GetTensorInfo(), dataType);
+            const TensorInfo& output = OverrideDataType(layer.GetOutputSlot(1).GetTensorInfo(), dataType);
+
+            // QuantizedLstm parameters
+            const TensorInfo& inputToInputWeights = OverrideDataType(
+                    cLayer->m_QuantizedLstmParameters.m_InputToInputWeights->GetTensorInfo(), dataType);
+            const TensorInfo& inputToForgetWeights = OverrideDataType(
+                    cLayer->m_QuantizedLstmParameters.m_InputToForgetWeights->GetTensorInfo(), dataType);
+            const TensorInfo& inputToCellWeights = OverrideDataType(
+                    cLayer->m_QuantizedLstmParameters.m_InputToCellWeights->GetTensorInfo(), dataType);
+            const TensorInfo& inputToOutputWeights = OverrideDataType(
+                    cLayer->m_QuantizedLstmParameters.m_InputToOutputWeights->GetTensorInfo(), dataType);
+
+            const TensorInfo& recurrentToInputWeights = OverrideDataType(
+                    cLayer->m_QuantizedLstmParameters.m_RecurrentToInputWeights->GetTensorInfo(), dataType);
+            const TensorInfo& recurrentToForgetWeights = OverrideDataType(
+                    cLayer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights->GetTensorInfo(), dataType);
+            const TensorInfo& recurrentToCellWeights = OverrideDataType(
+                    cLayer->m_QuantizedLstmParameters.m_RecurrentToCellWeights->GetTensorInfo(), dataType);
+            const TensorInfo& recurrentToOutputWeights = OverrideDataType(
+                    cLayer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights->GetTensorInfo(), dataType);
+
+            const TensorInfo& inputGateBias = OverrideDataType(
+                    cLayer->m_QuantizedLstmParameters.m_InputGateBias->GetTensorInfo(), dataType);
+            const TensorInfo& forgetGateBias = OverrideDataType(
+                    cLayer->m_QuantizedLstmParameters.m_ForgetGateBias->GetTensorInfo(), dataType);
+            const TensorInfo& cellBias = OverrideDataType(
+                    cLayer->m_QuantizedLstmParameters.m_CellBias->GetTensorInfo(), dataType);
+            const TensorInfo& outputGateBias = OverrideDataType(
+                    cLayer->m_QuantizedLstmParameters.m_OutputGateBias->GetTensorInfo(), dataType);
+
+            QuantizedLstmInputParamsInfo paramsInfo;
+
+            paramsInfo.m_InputToInputWeights      = &inputToInputWeights;
+            paramsInfo.m_InputToForgetWeights     = &inputToForgetWeights;
+            paramsInfo.m_InputToCellWeights       = &inputToCellWeights;
+            paramsInfo.m_InputToOutputWeights     = &inputToOutputWeights;
+
+            paramsInfo.m_RecurrentToInputWeights  = &recurrentToInputWeights;
+            paramsInfo.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
+            paramsInfo.m_RecurrentToCellWeights   = &recurrentToCellWeights;
+            paramsInfo.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
+
+            paramsInfo.m_InputGateBias            = &inputGateBias;
+            paramsInfo.m_ForgetGateBias           = &forgetGateBias;
+            paramsInfo.m_CellBias                 = &cellBias;
+            paramsInfo.m_OutputGateBias           = &outputGateBias;
+
+            result = layerSupportObject->IsQuantizedLstmSupported(input,
+                                                                  previousCellStateIn,
+                                                                  previousOutputIn,
+                                                                  cellStateOut,
+                                                                  output,
+                                                                  paramsInfo,
+                                                                  reason);
+            break;
+        }
         case LayerType::Division:
         {
             const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
@@ -1109,6 +1179,12 @@
     return std::unique_ptr<IWorkload>();
 }
 
+std::unique_ptr<IWorkload> IWorkloadFactory::CreateQuantizedLstm(const QuantizedLstmQueueDescriptor& descriptor,
+                                                                 const WorkloadInfo& info) const
+{
+    return std::unique_ptr<IWorkload>();
+}
+
 std::unique_ptr<IWorkload> IWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
                                                            const WorkloadInfo& info) const
 {

diff --git a/src/backends/backendsCommon/WorkloadFactory.hpp b/src/backends/backendsCommon/WorkloadFactory.hpp
index 749a258..6d03da7 100644
--- a/src/backends/backendsCommon/WorkloadFactory.hpp
+++ b/src/backends/backendsCommon/WorkloadFactory.hpp

@@ -161,6 +161,9 @@
     virtual std::unique_ptr<IWorkload> CreateQuantize(const QuantizeQueueDescriptor& descriptor,
                                                       const WorkloadInfo& Info) const;
 
+    virtual std::unique_ptr<IWorkload> CreateQuantizedLstm(const QuantizedLstmQueueDescriptor& descriptor,
+                                                           const WorkloadInfo& info) const;
+
     virtual std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor,
                                                      const WorkloadInfo& info) const;
 

diff --git a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
index 6aff759..451c585 100644
--- a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
+++ b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp

@@ -27,14 +27,17 @@
 armnn::WorkloadInfo MakeDummyWorkloadInfo(unsigned int numInputs, unsigned int numOutputs)
 {
     armnn::WorkloadInfo info;
+
     for (unsigned int i=0; i < numInputs; i++)
     {
         info.m_InputTensorInfos.push_back(MakeDummyTensorInfo<DataType>());
     }
+
     for (unsigned int o=0; o < numOutputs; o++)
     {
         info.m_OutputTensorInfos.push_back(MakeDummyTensorInfo<DataType>());
     }
+
     return info;
 }
 
@@ -46,10 +49,12 @@
     {
         m_Layer = dummyGraph.AddLayer<LayerType>(DescType(), "");
     }
+
     ~DummyLayer()
     {
         dummyGraph.EraseLayer(m_Layer);
     }
+
     LayerType* m_Layer;
 };
 
@@ -61,10 +66,12 @@
     {
         m_Layer = dummyGraph.AddLayer<LayerType>("");
     }
+
     ~DummyLayer()
     {
         dummyGraph.EraseLayer(m_Layer);
     }
+
     LayerType* m_Layer;
 };
 
@@ -83,12 +90,13 @@
         m_Layer->m_Gamma = std::make_unique<armnn::ScopedCpuTensorHandle>(
             armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32));
     }
+
     ~DummyLayer()
     {
         dummyGraph.EraseLayer(m_Layer);
     }
-    armnn::BatchNormalizationLayer* m_Layer;
 
+    armnn::BatchNormalizationLayer* m_Layer;
 };
 
 template<>
@@ -98,10 +106,12 @@
     {
         m_Layer = dummyGraph.AddLayer<armnn::BatchToSpaceNdLayer>(armnn::BatchToSpaceNdDescriptor(), "");
     }
+
     ~DummyLayer()
     {
         dummyGraph.EraseLayer(m_Layer);
     }
+
     armnn::BatchToSpaceNdLayer* m_Layer;
 };
 
@@ -112,10 +122,12 @@
     {
         m_Layer = dummyGraph.AddLayer<armnn::ConstantLayer>("");
     }
+
     ~DummyLayer()
     {
         dummyGraph.EraseLayer(m_Layer);
     }
+
     armnn::ConstantLayer* m_Layer;
 };
 
@@ -125,12 +137,13 @@
     DummyLayer()
     {
         m_Layer = dummyGraph.AddLayer<armnn::InputLayer>(armnn::LayerBindingId(), "");
-
     }
+
     ~DummyLayer()
     {
         dummyGraph.EraseLayer(m_Layer);
     }
+
     armnn::InputLayer* m_Layer;
 };
 
@@ -141,12 +154,13 @@
     {
         armnn::OriginsDescriptor desc(2);
         m_Layer = dummyGraph.AddLayer<armnn::ConcatLayer>(desc, "");
-
     }
+
     ~DummyLayer()
     {
         dummyGraph.EraseLayer(m_Layer);
     }
+
     armnn::ConcatLayer* m_Layer;
 };
 
@@ -156,12 +170,13 @@
     DummyLayer()
     {
         m_Layer = dummyGraph.AddLayer<armnn::OutputLayer>(armnn::LayerBindingId(), "");
-
     }
+
     ~DummyLayer()
     {
         dummyGraph.EraseLayer(m_Layer);
     }
+
     armnn::OutputLayer* m_Layer;
 };
 
@@ -172,12 +187,13 @@
     {
         armnn::ViewsDescriptor desc(1);
         m_Layer = dummyGraph.AddLayer<armnn::SplitterLayer>(desc, "");
-
     }
+
     ~DummyLayer()
     {
         dummyGraph.EraseLayer(m_Layer);
     }
+
     armnn::SplitterLayer* m_Layer;
 };
 
@@ -193,10 +209,12 @@
         m_Layer->m_Bias = std::make_unique<armnn::ScopedCpuTensorHandle>(
             armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32));
     }
+
     ~DummyConvolutionLayer()
     {
         dummyGraph.EraseLayer(m_Layer);
     }
+
     ConvolutionLayerType* m_Layer;
 };
 
@@ -255,10 +273,12 @@
         m_Layer->m_CifgParameters.m_InputGateBias              = std::make_unique<armnn::ScopedCpuTensorHandle>(
                 armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32));
     }
+
     ~DummyLstmLayer()
     {
         dummyGraph.EraseLayer(m_Layer);
     }
+
     armnn::LstmLayer* m_Layer;
 };
 
@@ -269,6 +289,49 @@
 };
 
 template<>
+struct DummyLayer<armnn::QuantizedLstmLayer, void>
+{
+    DummyLayer()
+    {
+        m_Layer = dummyGraph.AddLayer<armnn::QuantizedLstmLayer>("");
+
+        m_Layer->m_QuantizedLstmParameters.m_InputToInputWeights  = std::make_unique<armnn::ScopedCpuTensorHandle>(
+                armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::QuantisedAsymm8));
+        m_Layer->m_QuantizedLstmParameters.m_InputToForgetWeights = std::make_unique<armnn::ScopedCpuTensorHandle>(
+                armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::QuantisedAsymm8));
+        m_Layer->m_QuantizedLstmParameters.m_InputToCellWeights   = std::make_unique<armnn::ScopedCpuTensorHandle>(
+                armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::QuantisedAsymm8));
+        m_Layer->m_QuantizedLstmParameters.m_InputToOutputWeights = std::make_unique<armnn::ScopedCpuTensorHandle>(
+                armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::QuantisedAsymm8));
+
+        m_Layer->m_QuantizedLstmParameters.m_RecurrentToInputWeights  = std::make_unique<armnn::ScopedCpuTensorHandle>(
+                armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::QuantisedAsymm8));
+        m_Layer->m_QuantizedLstmParameters.m_RecurrentToForgetWeights = std::make_unique<armnn::ScopedCpuTensorHandle>(
+                armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::QuantisedAsymm8));
+        m_Layer->m_QuantizedLstmParameters.m_RecurrentToCellWeights   = std::make_unique<armnn::ScopedCpuTensorHandle>(
+                armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::QuantisedAsymm8));
+        m_Layer->m_QuantizedLstmParameters.m_RecurrentToOutputWeights = std::make_unique<armnn::ScopedCpuTensorHandle>(
+                armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::QuantisedAsymm8));
+
+        m_Layer->m_QuantizedLstmParameters.m_InputGateBias  = std::make_unique<armnn::ScopedCpuTensorHandle>(
+                armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Signed32));
+        m_Layer->m_QuantizedLstmParameters.m_ForgetGateBias = std::make_unique<armnn::ScopedCpuTensorHandle>(
+                armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Signed32));
+        m_Layer->m_QuantizedLstmParameters.m_CellBias       = std::make_unique<armnn::ScopedCpuTensorHandle>(
+                armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Signed32));
+        m_Layer->m_QuantizedLstmParameters.m_OutputGateBias = std::make_unique<armnn::ScopedCpuTensorHandle>(
+                armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Signed32));
+    }
+
+    ~DummyLayer()
+    {
+        dummyGraph.EraseLayer(m_Layer);
+    }
+
+    armnn::QuantizedLstmLayer* m_Layer;
+};
+
+template<>
 struct DummyLayer<armnn::FullyConnectedLayer>
 {
     DummyLayer()
@@ -278,10 +341,12 @@
         m_Layer->m_Weight = std::make_unique<armnn::ScopedCpuTensorHandle>(
             armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32));
     }
+
     ~DummyLayer()
     {
         dummyGraph.EraseLayer(m_Layer);
     }
+
     armnn::FullyConnectedLayer* m_Layer;
 };
 
@@ -392,6 +457,8 @@
 
 DECLARE_LAYER_POLICY_1_PARAM(Prelu)
 
+DECLARE_LAYER_POLICY_1_PARAM(QuantizedLstm)
+
 DECLARE_LAYER_POLICY_1_PARAM(Division)
 
 DECLARE_LAYER_POLICY_2_PARAM(Resize)
commit	ee18dc8d1725f472850ab0c398fd7cbc4b850891	[log] [tgz]
author	James Conroy <james.conroy@arm.com>	Wed Jul 17 11:27:46 2019 +0100
committer	Aron Virginas-Tar <Aron.Virginas-Tar@arm.com>	Wed Jul 24 10:40:13 2019 +0100
tree	b57738b18781d512f5438ca5154652571393e4e8
parent	7b1845206d723a91aec811edaf7cb0cf832dfd25 [diff]