MLCE-510 Add CpuRef Shape Operator to ArmNN

 * Add front end
 * Add reference workload
 * Serialization/Deserialization
 * Add unit tests
 * Update ArmNN Versioning

Signed-off-by: Keith Davis <keith.davis@arm.com>
Change-Id: I6fcb1fa341d6f08dea4003b13544e6e9f53fefd3
diff --git a/src/armnn/BackendHelper.cpp b/src/armnn/BackendHelper.cpp
index be21412..a7bf419 100644
--- a/src/armnn/BackendHelper.cpp
+++ b/src/armnn/BackendHelper.cpp
@@ -722,6 +722,13 @@
     return m_LayerSupport->IsRsqrtSupported(input, output, reasonIfUnsupported.value());
 }
 
+bool LayerSupportHandle::IsShapeSupported(const TensorInfo& input,
+                                          const TensorInfo& output,
+                                          Optional<std::string&> reasonIfUnsupported)
+{
+    return m_LayerSupport->IsShapeSupported(input, output, reasonIfUnsupported.value());
+}
+
 bool LayerSupportHandle::IsSliceSupported(const TensorInfo& input,
                                           const TensorInfo& output,
                                           const SliceDescriptor& descriptor,
diff --git a/src/armnn/LayersFwd.hpp b/src/armnn/LayersFwd.hpp
index 19cd9bd..cdbcaa7 100644
--- a/src/armnn/LayersFwd.hpp
+++ b/src/armnn/LayersFwd.hpp
@@ -60,6 +60,7 @@
 #include "layers/ReduceLayer.hpp"
 #include "layers/ReshapeLayer.hpp"
 #include "layers/ResizeLayer.hpp"
+#include "layers/ShapeLayer.hpp"
 #include "layers/SliceLayer.hpp"
 #include "layers/SoftmaxLayer.hpp"
 #include "layers/SpaceToBatchNdLayer.hpp"
@@ -154,6 +155,7 @@
 DECLARE_LAYER(Reduce)
 DECLARE_LAYER(Reshape)
 DECLARE_LAYER(Resize)
+DECLARE_LAYER(Shape)
 DECLARE_LAYER(Slice)
 DECLARE_LAYER(Softmax)
 DECLARE_LAYER(SpaceToBatchNd)
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 5807d17..71f1931 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -482,6 +482,11 @@
     return pNetworkImpl->AddTransposeLayer(transposeDescriptor, name);
 }
 
+IConnectableLayer* INetwork::AddShapeLayer(const char* name)
+{
+    return pNetworkImpl->AddShapeLayer(name);
+}
+
 IConnectableLayer* INetwork::AddStackLayer(const StackDescriptor& descriptor,
                                            const char* name)
 {
@@ -2099,6 +2104,11 @@
     return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name);
 }
 
+IConnectableLayer* NetworkImpl::AddShapeLayer(const char* name)
+{
+    return m_Graph->AddLayer<ShapeLayer>(name);
+}
+
 IConnectableLayer* NetworkImpl::AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor& desc,
                                                               const char* name)
 {
diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp
index ad9b51c..e07075f 100644
--- a/src/armnn/Network.hpp
+++ b/src/armnn/Network.hpp
@@ -250,6 +250,8 @@
     IConnectableLayer* AddTransposeLayer(const TransposeDescriptor& transposeDescriptor,
                                          const char* name = nullptr);
 
+    IConnectableLayer* AddShapeLayer(const char* name = nullptr);
+
     IConnectableLayer* AddStackLayer(const StackDescriptor& stackDescriptor,
                                      const char* name = nullptr);
 
diff --git a/src/armnn/layers/ShapeLayer.cpp b/src/armnn/layers/ShapeLayer.cpp
new file mode 100644
index 0000000..4193fa9
--- /dev/null
+++ b/src/armnn/layers/ShapeLayer.cpp
@@ -0,0 +1,73 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ShapeLayer.hpp"
+
+#include "LayerCloneBase.hpp"
+
+#include <armnn/TypesUtils.hpp>
+#include <armnn/utility/NumericCast.hpp>
+
+#include <backendsCommon/WorkloadData.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+namespace armnn
+{
+
+ShapeLayer::ShapeLayer(const char* name)
+    : Layer(1, 1, LayerType::Shape, name)
+{
+}
+
+std::unique_ptr<IWorkload> ShapeLayer::CreateWorkload(const IWorkloadFactory& factory) const
+{
+    ShapeQueueDescriptor descriptor;
+    SetAdditionalInfo(descriptor);
+
+    return factory.CreateShape(descriptor, PrepInfoAndDesc(descriptor));
+}
+
+ShapeLayer* ShapeLayer::Clone(Graph& graph) const
+{
+    return CloneBase<ShapeLayer>(graph, GetName());
+}
+
+void ShapeLayer::ValidateTensorShapesFromInputs()
+{
+    VerifyLayerConnections(1, CHECK_LOCATION());
+
+    const TensorShape& outputShape = GetOutputSlot(0).GetTensorInfo().GetShape();
+
+    VerifyShapeInferenceType(outputShape, m_ShapeInferenceMethod);
+
+    auto inferredShape = InferOutputShapes({ GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape() });
+
+    ARMNN_ASSERT(inferredShape.size() == 1);
+
+    ValidateAndCopyShape(outputShape, inferredShape[0], m_ShapeInferenceMethod, "ShapeLayer");
+}
+
+std::vector<TensorShape> ShapeLayer::InferOutputShapes(const std::vector<TensorShape>& inputShapes) const
+{
+    IgnoreUnused(inputShapes);
+    ARMNN_ASSERT(inputShapes.size() == 1);
+
+    TensorShape outputShape({ inputShapes[0].GetNumDimensions()} );
+
+    return std::vector<TensorShape>({ outputShape });
+}
+
+void ShapeLayer::Accept(ILayerVisitor& visitor) const
+{
+    IgnoreUnused(visitor);
+    throw armnn::Exception("ShapeLayer VisitShapeLayer is not implemented");
+}
+
+void ShapeLayer::ExecuteStrategy(IStrategy& strategy) const
+{
+    strategy.ExecuteStrategy(this, BaseDescriptor(), {}, GetName());
+}
+
+} // namespace armnn
diff --git a/src/armnn/layers/ShapeLayer.hpp b/src/armnn/layers/ShapeLayer.hpp
new file mode 100644
index 0000000..fee285c
--- /dev/null
+++ b/src/armnn/layers/ShapeLayer.hpp
@@ -0,0 +1,50 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerWithParameters.hpp"
+
+namespace armnn
+{
+
+class ShapeLayer : public Layer
+{
+public:
+    /// Makes a workload for the Shape type.
+    /// @param [in] graph The graph where this layer can be found.
+    /// @param [in] factory The workload factory which will create the workload.
+    /// @return A pointer to the created workload, or nullptr if not created.
+    virtual std::unique_ptr<IWorkload> CreateWorkload(const IWorkloadFactory& factory) const override;
+
+    /// Creates a dynamically-allocated copy of this layer.
+    /// @param [in] graph The graph into which this layer is being cloned.
+    ShapeLayer* Clone(Graph& graph) const override;
+
+    /// Check if the input tensor shape(s)
+    /// will lead to a valid configuration of @ref ShapeLayer.
+    /// @param [in] shapeInferenceMethod Indicates if output shape shall be overwritten or just validated.
+    void ValidateTensorShapesFromInputs() override;
+
+    /// By default returns inputShapes if the number of inputs are equal to number of outputs,
+    /// otherwise infers the output shapes from given input shapes and layer properties.
+    /// @param [in] inputShapes The input shapes layer has.
+    /// @return A vector to the inferred output shape.
+    std::vector<TensorShape> InferOutputShapes(const std::vector<TensorShape>& inputShapes) const override;
+
+    void Accept(ILayerVisitor& visitor) const override;
+
+    void ExecuteStrategy(IStrategy& strategy) const override;
+
+protected:
+    /// Constructor to create a ShapeLayer.
+    /// @param [in] name Optional name for the layer.
+    ShapeLayer(const char* name);
+
+    /// Default destructor.
+    ~ShapeLayer() = default;
+};
+
+} // namespace armnn
diff --git a/src/armnnDeserializer/Deserializer.cpp b/src/armnnDeserializer/Deserializer.cpp
index b5bf9da..af6ff84 100644
--- a/src/armnnDeserializer/Deserializer.cpp
+++ b/src/armnnDeserializer/Deserializer.cpp
@@ -257,6 +257,7 @@
     m_ParserFunctions[Layer_ResizeBilinearLayer]         = &DeserializerImpl::ParseResizeBilinear;
     m_ParserFunctions[Layer_ResizeLayer]                 = &DeserializerImpl::ParseResize;
     m_ParserFunctions[Layer_RsqrtLayer]                  = &DeserializerImpl::ParseRsqrt;
+    m_ParserFunctions[Layer_ShapeLayer]                  = &DeserializerImpl::ParseShape;
     m_ParserFunctions[Layer_SliceLayer]                  = &DeserializerImpl::ParseSlice;
     m_ParserFunctions[Layer_SoftmaxLayer]                = &DeserializerImpl::ParseSoftmax;
     m_ParserFunctions[Layer_SpaceToBatchNdLayer]         = &DeserializerImpl::ParseSpaceToBatchNd;
@@ -377,6 +378,8 @@
             return graphPtr->layers()->Get(layerIndex)->layer_as_ResizeLayer()->base();
         case Layer::Layer_RsqrtLayer:
             return graphPtr->layers()->Get(layerIndex)->layer_as_RsqrtLayer()->base();
+        case Layer::Layer_ShapeLayer:
+            return graphPtr->layers()->Get(layerIndex)->layer_as_ShapeLayer()->base();
         case Layer::Layer_SliceLayer:
             return graphPtr->layers()->Get(layerIndex)->layer_as_SliceLayer()->base();
         case Layer::Layer_SoftmaxLayer:
@@ -2338,6 +2341,26 @@
     RegisterOutputSlots(graph, layerIndex, layer);
 }
 
+void IDeserializer::DeserializerImpl::ParseShape(GraphPtr graph, unsigned int layerIndex)
+{
+    CHECK_LAYERS(graph, 0, layerIndex);
+
+    TensorRawPtrVector inputs = GetInputs(graph, layerIndex);
+    CHECK_VALID_SIZE(inputs.size(), 1);
+
+    TensorRawPtrVector outputs = GetOutputs(graph, layerIndex);
+    CHECK_VALID_SIZE(outputs.size(), 1);
+
+    auto layerName = GetLayerName(graph, layerIndex);
+    IConnectableLayer* layer = m_Network->AddShapeLayer( layerName.c_str());
+
+    armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]);
+    layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+    RegisterInputSlots(graph, layerIndex, layer);
+    RegisterOutputSlots(graph, layerIndex, layer);
+}
+
 void IDeserializer::DeserializerImpl::ParseSoftmax(GraphPtr graph, unsigned int layerIndex)
 {
     CHECK_LAYERS(graph, 0, layerIndex);
diff --git a/src/armnnDeserializer/Deserializer.hpp b/src/armnnDeserializer/Deserializer.hpp
index 8f38058..0b05e16 100644
--- a/src/armnnDeserializer/Deserializer.hpp
+++ b/src/armnnDeserializer/Deserializer.hpp
@@ -125,6 +125,7 @@
     void ParseResize(GraphPtr graph, unsigned int layerIndex);
     void ParseResizeBilinear(GraphPtr graph, unsigned int layerIndex);
     void ParseRsqrt(GraphPtr graph, unsigned int layerIndex);
+    void ParseShape(GraphPtr graph, unsigned int layerIndex);
     void ParseSlice(GraphPtr graph, unsigned int layerIndex);
     void ParseSoftmax(GraphPtr graph, unsigned int layerIndex);
     void ParseSpaceToBatchNd(GraphPtr graph, unsigned int layerIndex);
diff --git a/src/armnnDeserializer/test/DeserializeShape.cpp b/src/armnnDeserializer/test/DeserializeShape.cpp
new file mode 100644
index 0000000..a20fb59
--- /dev/null
+++ b/src/armnnDeserializer/test/DeserializeShape.cpp
@@ -0,0 +1,131 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <boost/test/unit_test.hpp>
+#include "ParserFlatbuffersSerializeFixture.hpp"
+#include <armnnDeserializer/IDeserializer.hpp>
+
+#include <string>
+
+BOOST_AUTO_TEST_SUITE(Deserializer)
+
+struct ShapeFixture : public ParserFlatbuffersSerializeFixture
+{
+    explicit ShapeFixture()
+    {
+        m_JsonString = R"(
+            {
+              layers: [
+                {
+                  layer_type: "InputLayer",
+                  layer: {
+                    base: {
+                      base: {
+                        layerName: "InputLayer",
+                        layerType: "Input",
+                        inputSlots: [
+
+                        ],
+                        outputSlots: [
+                          {
+                            tensorInfo: {
+                              dimensions: [
+                                1,
+                                3,
+                                3,
+                                1
+                              ],
+                              dataType: "Signed32",
+                              quantizationScale: 0.0
+                            }
+                          }
+                        ]
+                      }
+                    }
+                  }
+                },
+                {
+                  layer_type: "ShapeLayer",
+                  layer: {
+                    base: {
+                      index: 1,
+                      layerName: "shape",
+                      layerType: "Shape",
+                      inputSlots: [
+                        {
+                          connection: {
+                            sourceLayerIndex: 0,
+                            outputSlotIndex: 0
+                          }
+                        }
+                      ],
+                      outputSlots: [
+                        {
+                          tensorInfo: {
+                            dimensions: [
+                              4
+                            ],
+                            dataType: "Signed32",
+                            quantizationScale: 0.0
+                          }
+                        }
+                      ]
+                    }
+                  }
+                },
+                {
+                  layer_type: "OutputLayer",
+                  layer: {
+                    base: {
+                      base: {
+                        index: 2,
+                        layerName: "OutputLayer",
+                        layerType: "Output",
+                        inputSlots: [
+                          {
+                            connection: {
+                              sourceLayerIndex: 1,
+                              outputSlotIndex: 0
+                            }
+                          }
+                        ],
+                        outputSlots: [
+
+                        ]
+                      }
+                    }
+                  }
+                }
+              ],
+              inputIds: [
+                0
+              ],
+              outputIds: [
+                0
+              ],
+              featureVersions: {
+                bindingIdsScheme: 1
+              }
+            }
+    )";
+        Setup();
+    }
+};
+
+
+struct SimpleShapeFixture : ShapeFixture
+{
+    SimpleShapeFixture() : ShapeFixture() {}
+};
+
+BOOST_FIXTURE_TEST_CASE(DeserializeShape, SimpleShapeFixture)
+{
+    RunTest<1, armnn::DataType::Signed32>(
+            0,
+            {{"InputLayer", { 1, 1, 1, 1, 1, 1, 1, 1, 1 }}},
+            {{"OutputLayer",{ 1, 3, 3, 1 }}});
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnSerializer/ArmnnSchema.fbs b/src/armnnSerializer/ArmnnSchema.fbs
index 753c244..32a9bba 100644
--- a/src/armnnSerializer/ArmnnSchema.fbs
+++ b/src/armnnSerializer/ArmnnSchema.fbs
@@ -171,7 +171,8 @@
     Rank = 58,
     LogicalBinary = 59,
     Reduce = 60,
-    Cast = 61
+    Cast = 61,
+    Shape = 62
 }
 
 // Base layer table to be used as part of other layers
@@ -487,7 +488,7 @@
 }
 
 table ReshapeDescriptor {
-  targetShape:[uint];
+    targetShape:[uint];
 }
 
 table PermuteLayer {
@@ -499,6 +500,10 @@
     dimMappings:[uint];
 }
 
+table ShapeLayer {
+    base:LayerBase;
+}
+
 table SpaceToBatchNdLayer {
     base:LayerBase;
     descriptor:SpaceToBatchNdDescriptor;
@@ -972,7 +977,8 @@
     RankLayer,
     LogicalBinaryLayer,
     ReduceLayer,
-    CastLayer
+    CastLayer,
+    ShapeLayer
 }
 
 table AnyLayer {
diff --git a/src/armnnSerializer/ArmnnSchema_generated.h b/src/armnnSerializer/ArmnnSchema_generated.h
index 675fcc6..4a352dd 100644
--- a/src/armnnSerializer/ArmnnSchema_generated.h
+++ b/src/armnnSerializer/ArmnnSchema_generated.h
@@ -4,6 +4,7 @@
 //
 // automatically generated by the FlatBuffers compiler, do not modify
 
+
 #ifndef FLATBUFFERS_GENERATED_ARMNNSCHEMA_ARMNNSERIALIZER_H_
 #define FLATBUFFERS_GENERATED_ARMNNSCHEMA_ARMNNSERIALIZER_H_
 
@@ -193,6 +194,9 @@
 struct PermuteDescriptor;
 struct PermuteDescriptorBuilder;
 
+struct ShapeLayer;
+struct ShapeLayerBuilder;
+
 struct SpaceToBatchNdLayer;
 struct SpaceToBatchNdLayerBuilder;
 
@@ -735,11 +739,12 @@
   LayerType_LogicalBinary = 59,
   LayerType_Reduce = 60,
   LayerType_Cast = 61,
+  LayerType_Shape = 62,
   LayerType_MIN = LayerType_Addition,
-  LayerType_MAX = LayerType_Cast
+  LayerType_MAX = LayerType_Shape
 };
 
-inline const LayerType (&EnumValuesLayerType())[62] {
+inline const LayerType (&EnumValuesLayerType())[63] {
   static const LayerType values[] = {
     LayerType_Addition,
     LayerType_Input,
@@ -802,13 +807,14 @@
     LayerType_Rank,
     LayerType_LogicalBinary,
     LayerType_Reduce,
-    LayerType_Cast
+    LayerType_Cast,
+    LayerType_Shape
   };
   return values;
 }
 
 inline const char * const *EnumNamesLayerType() {
-  static const char * const names[63] = {
+  static const char * const names[64] = {
     "Addition",
     "Input",
     "Multiplication",
@@ -871,13 +877,14 @@
     "LogicalBinary",
     "Reduce",
     "Cast",
+    "Shape",
     nullptr
   };
   return names;
 }
 
 inline const char *EnumNameLayerType(LayerType e) {
-  if (flatbuffers::IsOutRange(e, LayerType_Addition, LayerType_Cast)) return "";
+  if (flatbuffers::IsOutRange(e, LayerType_Addition, LayerType_Shape)) return "";
   const size_t index = static_cast<size_t>(e);
   return EnumNamesLayerType()[index];
 }
@@ -1219,11 +1226,12 @@
   Layer_LogicalBinaryLayer = 60,
   Layer_ReduceLayer = 61,
   Layer_CastLayer = 62,
+  Layer_ShapeLayer = 63,
   Layer_MIN = Layer_NONE,
-  Layer_MAX = Layer_CastLayer
+  Layer_MAX = Layer_ShapeLayer
 };
 
-inline const Layer (&EnumValuesLayer())[63] {
+inline const Layer (&EnumValuesLayer())[64] {
   static const Layer values[] = {
     Layer_NONE,
     Layer_ActivationLayer,
@@ -1287,13 +1295,14 @@
     Layer_RankLayer,
     Layer_LogicalBinaryLayer,
     Layer_ReduceLayer,
-    Layer_CastLayer
+    Layer_CastLayer,
+    Layer_ShapeLayer
   };
   return values;
 }
 
 inline const char * const *EnumNamesLayer() {
-  static const char * const names[64] = {
+  static const char * const names[65] = {
     "NONE",
     "ActivationLayer",
     "AdditionLayer",
@@ -1357,13 +1366,14 @@
     "LogicalBinaryLayer",
     "ReduceLayer",
     "CastLayer",
+    "ShapeLayer",
     nullptr
   };
   return names;
 }
 
 inline const char *EnumNameLayer(Layer e) {
-  if (flatbuffers::IsOutRange(e, Layer_NONE, Layer_CastLayer)) return "";
+  if (flatbuffers::IsOutRange(e, Layer_NONE, Layer_ShapeLayer)) return "";
   const size_t index = static_cast<size_t>(e);
   return EnumNamesLayer()[index];
 }
@@ -1620,6 +1630,10 @@
   static const Layer enum_value = Layer_CastLayer;
 };
 
+template<> struct LayerTraits<armnnSerializer::ShapeLayer> {
+  static const Layer enum_value = Layer_ShapeLayer;
+};
+
 bool VerifyLayer(flatbuffers::Verifier &verifier, const void *obj, Layer type);
 bool VerifyLayerVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types);
 
@@ -5180,6 +5194,49 @@
       dimMappings__);
 }
 
+struct ShapeLayer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ShapeLayerBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_BASE = 4
+  };
+  const armnnSerializer::LayerBase *base() const {
+    return GetPointer<const armnnSerializer::LayerBase *>(VT_BASE);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_BASE) &&
+           verifier.VerifyTable(base()) &&
+           verifier.EndTable();
+  }
+};
+
+struct ShapeLayerBuilder {
+  typedef ShapeLayer Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_base(flatbuffers::Offset<armnnSerializer::LayerBase> base) {
+    fbb_.AddOffset(ShapeLayer::VT_BASE, base);
+  }
+  explicit ShapeLayerBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  ShapeLayerBuilder &operator=(const ShapeLayerBuilder &);
+  flatbuffers::Offset<ShapeLayer> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ShapeLayer>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ShapeLayer> CreateShapeLayer(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<armnnSerializer::LayerBase> base = 0) {
+  ShapeLayerBuilder builder_(_fbb);
+  builder_.add_base(base);
+  return builder_.Finish();
+}
+
 struct SpaceToBatchNdLayer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   typedef SpaceToBatchNdLayerBuilder Builder;
   enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
@@ -9567,6 +9624,9 @@
   const armnnSerializer::CastLayer *layer_as_CastLayer() const {
     return layer_type() == armnnSerializer::Layer_CastLayer ? static_cast<const armnnSerializer::CastLayer *>(layer()) : nullptr;
   }
+  const armnnSerializer::ShapeLayer *layer_as_ShapeLayer() const {
+    return layer_type() == armnnSerializer::Layer_ShapeLayer ? static_cast<const armnnSerializer::ShapeLayer *>(layer()) : nullptr;
+  }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<uint8_t>(verifier, VT_LAYER_TYPE) &&
@@ -9824,6 +9884,10 @@
   return layer_as_CastLayer();
 }
 
+template<> inline const armnnSerializer::ShapeLayer *AnyLayer::layer_as<armnnSerializer::ShapeLayer>() const {
+  return layer_as_ShapeLayer();
+}
+
 struct AnyLayerBuilder {
   typedef AnyLayer Table;
   flatbuffers::FlatBufferBuilder &fbb_;
@@ -10292,6 +10356,10 @@
       auto ptr = reinterpret_cast<const armnnSerializer::CastLayer *>(obj);
       return verifier.VerifyTable(ptr);
     }
+    case Layer_ShapeLayer: {
+      auto ptr = reinterpret_cast<const armnnSerializer::ShapeLayer *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
     default: return true;
   }
 }
diff --git a/src/armnnSerializer/Serializer.cpp b/src/armnnSerializer/Serializer.cpp
index 30a7e74..fd7f8dc 100644
--- a/src/armnnSerializer/Serializer.cpp
+++ b/src/armnnSerializer/Serializer.cpp
@@ -1309,6 +1309,17 @@
     CreateAnyLayer(flatBufferLayer.o, serializer::Layer::Layer_NormalizationLayer);
 }
 
+void SerializerStrategy::SerializeShapeLayer(const armnn::IConnectableLayer* layer,
+                                             const char* name)
+{
+    IgnoreUnused(name);
+
+    auto shapeBaseLayer = CreateLayerBase(layer, serializer::LayerType::LayerType_Shape);
+    auto shapeLayer = serializer::CreateShapeLayer(m_flatBufferBuilder, shapeBaseLayer);
+
+    CreateAnyLayer(shapeLayer.o, serializer::Layer::Layer_ShapeLayer);
+}
+
 void SerializerStrategy::SerializeStackLayer(const armnn::IConnectableLayer* layer,
                                         const armnn::StackDescriptor& stackDescriptor,
                                         const char* name)
@@ -2138,6 +2149,11 @@
             SerializeResizeLayer(layer, layerDescriptor, name);
             break;
         }
+        case armnn::LayerType::Shape:
+        {
+            SerializeShapeLayer(layer, name);
+            break;
+        }
         case armnn::LayerType::Slice:
         {
             const armnn::SliceDescriptor& layerDescriptor =
diff --git a/src/armnnSerializer/Serializer.hpp b/src/armnnSerializer/Serializer.hpp
index 7bbcc24..c99e87d 100644
--- a/src/armnnSerializer/Serializer.hpp
+++ b/src/armnnSerializer/Serializer.hpp
@@ -315,6 +315,9 @@
                                      const armnn::NormalizationDescriptor& normalizationDescriptor,
                                      const char* name = nullptr);
 
+    void SerializeShapeLayer(const armnn::IConnectableLayer* layer,
+                             const char* name = nullptr);
+
     void SerializeSplitterLayer(const armnn::IConnectableLayer* layer,
                                 const armnn::ViewsDescriptor& viewsDescriptor,
                                 const char* name = nullptr);
diff --git a/src/armnnSerializer/test/SerializerTests.cpp b/src/armnnSerializer/test/SerializerTests.cpp
index 8e7ca37..98532d0 100644
--- a/src/armnnSerializer/test/SerializerTests.cpp
+++ b/src/armnnSerializer/test/SerializerTests.cpp
@@ -1951,6 +1951,31 @@
     deserializedNetwork->ExecuteStrategy(verifier);
 }
 
+TEST_CASE("SerializeShape")
+{
+    const std::string layerName("shape");
+    const armnn::TensorInfo inputInfo({1, 3, 3, 1}, armnn::DataType::Signed32);
+    const armnn::TensorInfo outputInfo({ 4 }, armnn::DataType::Signed32);
+
+    armnn::INetworkPtr network = armnn::INetwork::Create();
+    armnn::IConnectableLayer* const inputLayer = network->AddInputLayer(0);
+    armnn::IConnectableLayer* const shapeLayer = network->AddShapeLayer(layerName.c_str());
+    armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0);
+
+    inputLayer->GetOutputSlot(0).Connect(shapeLayer->GetInputSlot(0));
+    shapeLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+    inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
+    shapeLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
+
+    armnn::INetworkPtr deserializedNetwork = DeserializeNetwork(SerializeNetwork(*network));
+    CHECK(deserializedNetwork);
+
+    LayerVerifierBase verifier(layerName, {inputInfo}, {outputInfo});
+
+    deserializedNetwork->ExecuteStrategy(verifier);
+}
+
 TEST_CASE("SerializeSlice")
 {
     const std::string layerName{"slice"};
diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp
index 2e171f9..8a24e11 100644
--- a/src/backends/backendsCommon/LayerSupportBase.cpp
+++ b/src/backends/backendsCommon/LayerSupportBase.cpp
@@ -557,6 +557,13 @@
     return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
 }
 
+bool LayerSupportBase::IsShapeSupported(const TensorInfo&, // input
+                                        const TensorInfo&, // output
+                                        Optional<std::string&> reasonIfUnsupported) const
+{
+    return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
+}
+
 bool LayerSupportBase::IsSliceSupported(const TensorInfo&, // input
                                         const TensorInfo&, // output
                                         const SliceDescriptor&, // descriptor
diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp
index a4f972f..0277a78 100644
--- a/src/backends/backendsCommon/LayerSupportBase.hpp
+++ b/src/backends/backendsCommon/LayerSupportBase.hpp
@@ -344,6 +344,10 @@
                           const TensorInfo& output,
                           Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
+    bool IsShapeSupported(const TensorInfo& input,
+                          const TensorInfo& output,
+                          Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
     bool IsSliceSupported(const TensorInfo& input,
                           const TensorInfo& output,
                           const SliceDescriptor& descriptor,
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index 44a6a17..8c78136 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -2805,6 +2805,33 @@
     ValidateTensorDataTypesMatch(inputTensorInfo0, outputTensorInfo, descriptorName, "input_0", "output");
 }
 
+void ShapeQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
+{
+    const std::string& descriptorName{"ShapeQueueDescriptor"};
+
+    ValidateNumInputs(workloadInfo,  descriptorName, 1);
+    ValidateNumOutputs(workloadInfo, descriptorName, 1);
+
+    const TensorInfo& inputTensorInfo = workloadInfo.m_InputTensorInfos[0];
+    const TensorInfo& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0];
+
+    std::vector<DataType> supportedTypes =
+    {
+        DataType::BFloat16,
+        DataType::Float16,
+        DataType::Float32,
+        DataType::QAsymmS8,
+        DataType::QAsymmU8,
+        DataType::QAsymmS8,
+        DataType::QSymmS8,
+        DataType::QSymmS16,
+        DataType::Signed32
+    };
+
+    ValidateDataTypes(inputTensorInfo, supportedTypes, descriptorName);
+    ValidateDataTypes(outputTensorInfo, {DataType::Signed32}, descriptorName);
+}
+
 void SwitchQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
 {
     const std::string& descriptorName{"SwitchQueueDescriptor"};
diff --git a/src/backends/backendsCommon/WorkloadData.hpp b/src/backends/backendsCommon/WorkloadData.hpp
index 11ce2cb..36653bd 100644
--- a/src/backends/backendsCommon/WorkloadData.hpp
+++ b/src/backends/backendsCommon/WorkloadData.hpp
@@ -690,4 +690,9 @@
     void Validate(const WorkloadInfo& workloadInfo) const;
 };
 
+struct ShapeQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
 } // namespace armnn
diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
index c5fc9d0..61ad209 100644
--- a/src/backends/backendsCommon/WorkloadFactory.cpp
+++ b/src/backends/backendsCommon/WorkloadFactory.cpp
@@ -1003,6 +1003,16 @@
                                                           reason);
             break;
         }
+        case LayerType::Shape:
+        {
+            const TensorInfo& input  = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
+            const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
+
+            result = layerSupportObject.IsShapeSupported(OverrideDataType(input, dataType),
+                                                         OverrideDataType(output, dataType),
+                                                         reason);
+            break;
+        }
         case LayerType::Slice:
         {
             auto cLayer = PolymorphicDowncast<const SliceLayer*>(&layer);
@@ -1673,6 +1683,12 @@
     return std::unique_ptr<IWorkload>();
 }
 
+std::unique_ptr<IWorkload> IWorkloadFactory::CreateShape(const ShapeQueueDescriptor& /*descriptor*/,
+                                                         const WorkloadInfo& /*info*/) const
+{
+    return std::unique_ptr<IWorkload>();
+}
+
 std::unique_ptr<IWorkload> IWorkloadFactory::CreateSlice(const SliceQueueDescriptor& /*descriptor*/,
                                                          const WorkloadInfo& /*info*/) const
 {
diff --git a/src/backends/backendsCommon/WorkloadFactory.hpp b/src/backends/backendsCommon/WorkloadFactory.hpp
index 42360d3..1987b9b 100644
--- a/src/backends/backendsCommon/WorkloadFactory.hpp
+++ b/src/backends/backendsCommon/WorkloadFactory.hpp
@@ -252,6 +252,9 @@
     virtual std::unique_ptr<IWorkload> CreateRsqrt(const RsqrtQueueDescriptor& descriptor,
                                                    const WorkloadInfo& info) const;
 
+    virtual std::unique_ptr<IWorkload> CreateShape(const ShapeQueueDescriptor& descriptor,
+                                                   const WorkloadInfo& info) const;
+
     virtual std::unique_ptr<IWorkload> CreateSlice(const SliceQueueDescriptor& descriptor,
                                                    const WorkloadInfo& info) const;
 
diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk
index 73a16d0..ff9375d 100644
--- a/src/backends/backendsCommon/common.mk
+++ b/src/backends/backendsCommon/common.mk
@@ -85,6 +85,7 @@
     test/layerTests/SliceTestImpl.cpp \
     test/layerTests/QuantizeTestImpl.cpp \
     test/layerTests/SinTestImpl.cpp \
+    test/layerTests/ShapeTestImpl.cpp \
     test/layerTests/SoftmaxTestImpl.cpp \
     test/layerTests/SpaceToBatchNdTestImpl.cpp \
     test/layerTests/SpaceToDepthTestImpl.cpp \
diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
index 82381a8..162368f 100644
--- a/src/backends/backendsCommon/test/CMakeLists.txt
+++ b/src/backends/backendsCommon/test/CMakeLists.txt
@@ -157,6 +157,8 @@
     layerTests/RsqrtTestImpl.hpp
     layerTests/SinTestImpl.cpp
     layerTests/SinTestImpl.hpp
+    layerTests/ShapeTestImpl.cpp
+    layerTests/ShapeTestImpl.cpp
     layerTests/SliceTestImpl.cpp
     layerTests/SliceTestImpl.hpp
     layerTests/SoftmaxTestImpl.cpp
diff --git a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
index 5a05ee1..adc7bc4 100644
--- a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
+++ b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
@@ -663,6 +663,8 @@
 
 DECLARE_LAYER_POLICY_2_PARAM(Reshape)
 
+DECLARE_LAYER_POLICY_1_PARAM(Shape)
+
 DECLARE_LAYER_POLICY_2_PARAM(Slice)
 
 DECLARE_LAYER_POLICY_2_PARAM(Softmax)
diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
index 4ae6553..46eb6ee 100644
--- a/src/backends/backendsCommon/test/LayerTests.hpp
+++ b/src/backends/backendsCommon/test/LayerTests.hpp
@@ -55,6 +55,7 @@
 #include <backendsCommon/test/layerTests/ReshapeTestImpl.hpp>
 #include <backendsCommon/test/layerTests/ResizeTestImpl.hpp>
 #include <backendsCommon/test/layerTests/RsqrtTestImpl.hpp>
+#include <backendsCommon/test/layerTests/ShapeTestImpl.hpp>
 #include <backendsCommon/test/layerTests/SinTestImpl.hpp>
 #include <backendsCommon/test/layerTests/SliceTestImpl.hpp>
 #include <backendsCommon/test/layerTests/SoftmaxTestImpl.hpp>
diff --git a/src/backends/backendsCommon/test/layerTests/ShapeTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ShapeTestImpl.cpp
new file mode 100644
index 0000000..d6c0314
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/ShapeTestImpl.cpp
@@ -0,0 +1,306 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ShapeTestImpl.hpp"
+
+#include <backendsCommon/test/DataTypeUtils.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+template<typename T, std::size_t n>
+LayerTestResult<int32_t, 1> ShapeTest(
+    armnn::TensorInfo inputTensorInfo,
+    std::vector<T> input,
+    armnn::TensorInfo outputTensorInfo,
+    std::vector<int32_t> expectedOutputData,
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+
+    std::vector<int32_t> actualOutput(outputTensorInfo.GetNumElements());
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ShapeQueueDescriptor data;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateShape(data, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), input.data());
+
+    workload->Execute();
+
+    CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
+
+    return LayerTestResult<int32_t, 1>(actualOutput,
+                                       expectedOutputData,
+                                       outputHandle->GetShape(),
+                                       outputTensorInfo.GetShape());
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<int32_t, 1> ShapeDimSize1Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    armnn::TensorInfo inputTensorInfo({ 1 }, ArmnnType, 1.0f, 0);
+    armnn::TensorInfo outputTensorInfo({ 1 }, armnn::DataType::Signed32);
+
+    auto input = ConvertToDataType<ArmnnType>({ 1.0f }, inputTensorInfo);
+
+    return ShapeTest<T, 1>(inputTensorInfo, input, outputTensorInfo, { 1 }, workloadFactory, memoryManager,
+                           tensorHandleFactory);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<int32_t, 1> ShapeDimSize2Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    armnn::TensorInfo inputTensorInfo({ 1, 2 }, ArmnnType, 1.0f, 0);
+    armnn::TensorInfo outputTensorInfo({ 2 }, armnn::DataType::Signed32);
+
+    auto input = ConvertToDataType<ArmnnType>({ 1.0f, 1.0f }, inputTensorInfo);
+
+    return ShapeTest<T, 2>(inputTensorInfo, input, outputTensorInfo, { 1, 2 }, workloadFactory, memoryManager,
+                           tensorHandleFactory);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<int32_t, 1> ShapeDimSize3Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    armnn::TensorInfo inputTensorInfo({ 1, 2, 3 }, ArmnnType, 1.0f, 0);
+    armnn::TensorInfo outputTensorInfo({ 3 }, armnn::DataType::Signed32);
+
+    auto input = ConvertToDataType<ArmnnType>({ 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }, inputTensorInfo);
+
+    return ShapeTest<T, 3>(inputTensorInfo, input, outputTensorInfo, { 1, 2, 3 }, workloadFactory, memoryManager,
+                           tensorHandleFactory);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<int32_t, 1> ShapeDimSize4Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    armnn::TensorInfo inputTensorInfo({ 2, 2, 3, 2 }, ArmnnType, 1.0f, 0);
+    armnn::TensorInfo outputTensorInfo({ 4 }, armnn::DataType::Signed32);
+
+    auto input = ConvertToDataType<ArmnnType>({ 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                                                1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                                                1.0f },
+                                              inputTensorInfo);
+
+    return ShapeTest<T, 4>(inputTensorInfo, input, outputTensorInfo, { 2, 2, 3, 2 }, workloadFactory, memoryManager,
+                           tensorHandleFactory);
+}
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize4Test<armnn::DataType::Float16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize4Test<armnn::DataType::Float32>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize4Test<armnn::DataType::QAsymmU8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize4Test<armnn::DataType::Signed32>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize4Test<armnn::DataType::QSymmS16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize4Test<armnn::DataType::QSymmS8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize4Test<armnn::DataType::QAsymmS8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize4Test<armnn::DataType::BFloat16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize3Test<armnn::DataType::Float16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize3Test<armnn::DataType::Float32>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize3Test<armnn::DataType::QAsymmU8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize3Test<armnn::DataType::Signed32>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize3Test<armnn::DataType::QSymmS16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize3Test<armnn::DataType::QSymmS8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize3Test<armnn::DataType::QAsymmS8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize3Test<armnn::DataType::BFloat16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize2Test<armnn::DataType::Float16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize2Test<armnn::DataType::Float32>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize2Test<armnn::DataType::QAsymmU8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize2Test<armnn::DataType::Signed32>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize2Test<armnn::DataType::QSymmS16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize2Test<armnn::DataType::QSymmS8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize2Test<armnn::DataType::QAsymmS8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize2Test<armnn::DataType::BFloat16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize1Test<armnn::DataType::Float16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize1Test<armnn::DataType::Float32>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize1Test<armnn::DataType::QAsymmU8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize1Test<armnn::DataType::Signed32>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize1Test<armnn::DataType::QSymmS16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize1Test<armnn::DataType::QSymmS8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize1Test<armnn::DataType::QAsymmS8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<int32_t, 1>
+ShapeDimSize1Test<armnn::DataType::BFloat16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    const armnn::ITensorHandleFactory& tensorHandleFactory);
\ No newline at end of file
diff --git a/src/backends/backendsCommon/test/layerTests/ShapeTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ShapeTestImpl.hpp
new file mode 100644
index 0000000..85f7c0a
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/ShapeTestImpl.hpp
@@ -0,0 +1,45 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <ResolveType.hpp>
+
+#include <armnn/backends/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+template<typename T , std::size_t n>
+LayerTestResult<int32_t, 1> ShapeTest(
+        armnn::TensorInfo inputTensorInfo,
+        std::vector<T> input,
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<int32_t, 1> ShapeDimSize1Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<int32_t, 1> ShapeDimSize2Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<int32_t, 1> ShapeDimSize3Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<int32_t, 1> ShapeDimSize4Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
index 14a40f9..1b05c4e 100644
--- a/src/backends/reference/RefLayerSupport.cpp
+++ b/src/backends/reference/RefLayerSupport.cpp
@@ -1859,6 +1859,24 @@
                                        reasonIfUnsupported);
 }
 
+bool RefLayerSupport::IsShapeSupported(const TensorInfo& input,
+                                       const TensorInfo& output,
+                                       Optional<std::string&> reasonIfUnsupported) const
+{
+    IgnoreUnused(input);
+    bool supported = true;
+
+    std::array<DataType, 1> supportedTypes =
+    {
+        DataType::Signed32
+    };
+
+    supported &= CheckSupportRule(TypeAnyOf(output, supportedTypes), reasonIfUnsupported,
+                                  "Reference Shape: output type not supported");
+
+    return supported;
+}
+
 bool RefLayerSupport::IsSliceSupported(const TensorInfo& input,
                                        const TensorInfo& output,
                                        const SliceDescriptor& descriptor,
diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp
index 7a95bb0..c060f79 100644
--- a/src/backends/reference/RefLayerSupport.hpp
+++ b/src/backends/reference/RefLayerSupport.hpp
@@ -297,12 +297,16 @@
                            const TensorInfo& output,
                            const ResizeDescriptor& descriptor,
                            Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
-    
+
     ARMNN_DEPRECATED_MSG("Use IsElementwiseUnarySupported instead")
     bool IsRsqrtSupported(const TensorInfo& input,
                           const TensorInfo& output,
                           Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
+    bool IsShapeSupported(const TensorInfo& input,
+                          const TensorInfo& output,
+                          Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
     bool IsSliceSupported(const TensorInfo& input,
                           const TensorInfo& output,
                           const SliceDescriptor& descriptor,
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index 8e3bbe4..606f531 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -7,7 +7,6 @@
 #include <backendsCommon/MemImportWorkload.hpp>
 #include <backendsCommon/MakeWorkloadHelper.hpp>
 #include <backendsCommon/TensorHandle.hpp>
-#include <reference/workloads/RefFillWorkload.hpp>
 #include "RefWorkloadFactory.hpp"
 #include "RefBackendId.hpp"
 #include "workloads/RefWorkloads.hpp"
@@ -626,6 +625,12 @@
     return CreateElementwiseUnary(elementwiseUnaryDescriptor, info);
 }
 
+std::unique_ptr<IWorkload> RefWorkloadFactory::CreateShape(const ShapeQueueDescriptor& descriptor,
+                                                           const WorkloadInfo& info) const
+{
+    return std::make_unique<RefShapeWorkload>(descriptor, info);
+}
+
 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateSlice(const SliceQueueDescriptor& descriptor,
                                                            const WorkloadInfo& info) const
 {
diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp
index 734c5e4..2beffa7 100644
--- a/src/backends/reference/RefWorkloadFactory.hpp
+++ b/src/backends/reference/RefWorkloadFactory.hpp
@@ -243,6 +243,9 @@
     std::unique_ptr<IWorkload> CreateRsqrt(const RsqrtQueueDescriptor& descriptor,
                                            const WorkloadInfo& info) const override;
 
+    std::unique_ptr<IWorkload> CreateShape(const ShapeQueueDescriptor& descriptor,
+                                           const WorkloadInfo& info) const override;
+
     std::unique_ptr<IWorkload> CreateSlice(const SliceQueueDescriptor& descriptor,
                                            const WorkloadInfo& info) const override;
 
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index 1cc6fa8..45e3717 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -1733,6 +1733,43 @@
 ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcInt16_3, DepthToSpaceTest3<DataType::QSymmS16>, DataLayout::NHWC);
 ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcInt16_4, DepthToSpaceTest4<DataType::QSymmS16>, DataLayout::NHWC);
 
+// Shape
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize1Float16,  ShapeDimSize1Test<DataType::Float16>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize1Float32,  ShapeDimSize1Test<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize1QAsymmU8, ShapeDimSize1Test<DataType::QAsymmU8>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize1Signed32, ShapeDimSize1Test<DataType::Signed32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize1QSymmS16, ShapeDimSize1Test<DataType::QSymmS16>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize1QSymmS8,  ShapeDimSize1Test<DataType::QSymmS8>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize1QAsymmS8, ShapeDimSize1Test<DataType::QAsymmS8>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize1BFloat16, ShapeDimSize1Test<DataType::BFloat16>)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize2Float16,  ShapeDimSize2Test<DataType::Float16>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize2Float32,  ShapeDimSize2Test<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize2QAsymmU8, ShapeDimSize2Test<DataType::QAsymmU8>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize2Signed32, ShapeDimSize2Test<DataType::Signed32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize2QSymmS16, ShapeDimSize2Test<DataType::QSymmS16>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize2QSymmS8,  ShapeDimSize2Test<DataType::QSymmS8>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize2QAsymmS8, ShapeDimSize2Test<DataType::QAsymmS8>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize2BFloat16, ShapeDimSize2Test<DataType::BFloat16>)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize3Float16,  ShapeDimSize3Test<DataType::Float16>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize3Float32,  ShapeDimSize3Test<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize3QAsymmU8, ShapeDimSize3Test<DataType::QAsymmU8>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize3Signed32, ShapeDimSize3Test<DataType::Signed32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize3QSymmS16, ShapeDimSize3Test<DataType::QSymmS16>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize3QSymmS8,  ShapeDimSize3Test<DataType::QSymmS8>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize3QAsymmS8, ShapeDimSize3Test<DataType::QAsymmS8>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize3BFloat16, ShapeDimSize3Test<DataType::BFloat16>)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize4Float16,  ShapeDimSize4Test<DataType::Float16>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize4Float32,  ShapeDimSize4Test<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize4QAsymmU8, ShapeDimSize4Test<DataType::QAsymmU8>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize4Signed32, ShapeDimSize4Test<DataType::Signed32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize4QSymmS16, ShapeDimSize4Test<DataType::QSymmS16>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize4QSymmS8,  ShapeDimSize4Test<DataType::QSymmS8>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize4QAsymmS8, ShapeDimSize4Test<DataType::QAsymmS8>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ShapeDimSize4BFloat16, ShapeDimSize4Test<DataType::BFloat16>)
+
 // SpaceToDepth
 ARMNN_AUTO_TEST_CASE_WITH_THF(SpaceToDepthNchwAsymmQ8, SpaceToDepthNchwAsymmQ8Test)
 ARMNN_AUTO_TEST_CASE_WITH_THF(SpaceToDepthNhwcAsymmQ8, SpaceToDepthNhwcAsymmQ8Test)
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index 09e02e6..7a769e5 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -143,6 +143,7 @@
     RefResizeBilinearWorkload.hpp
     RefResizeWorkload.cpp
     RefResizeWorkload.hpp
+    RefShapeWorkload.hpp
     RefSliceWorkload.cpp
     RefSliceWorkload.hpp
     RefSoftmaxWorkload.cpp
diff --git a/src/backends/reference/workloads/RefShapeWorkload.hpp b/src/backends/reference/workloads/RefShapeWorkload.hpp
new file mode 100644
index 0000000..8e2a410
--- /dev/null
+++ b/src/backends/reference/workloads/RefShapeWorkload.hpp
@@ -0,0 +1,48 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+#include <backendsCommon/WorkloadData.hpp>
+
+#include "RefWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+struct RefShapeWorkload : public BaseWorkload<ShapeQueueDescriptor>
+{
+public:
+    using BaseWorkload<ShapeQueueDescriptor>::BaseWorkload;
+    virtual void Execute() const override
+    {
+        Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+    }
+    void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor)  override
+    {
+        Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+    }
+
+private:
+    void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+    {
+        const TensorShape Shape = GetTensorInfo(inputs[0]).GetShape();
+
+        const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
+
+        unsigned int numBytes =
+            GetTensorInfo(inputs[0]).GetNumDimensions() * GetDataTypeSize(outputInfo.GetDataType());
+
+        std::memcpy(outputs[0]->Map(), &Shape, numBytes);
+        outputs[0]->Unmap();
+    }
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index d3995f2..afe63d1 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -35,10 +35,10 @@
 #include "RefDequantizeWorkload.hpp"
 #include "RefElementwiseWorkload.hpp"
 #include "RefElementwiseUnaryWorkload.hpp"
-#include "RefFillWorkload.hpp"
-#include "RefFullyConnectedWorkload.hpp"
-#include "RefFloorWorkload.hpp"
 #include "RefFakeQuantizationFloat32Workload.hpp"
+#include "RefFillWorkload.hpp"
+#include "RefFloorWorkload.hpp"
+#include "RefFullyConnectedWorkload.hpp"
 #include "RefGatherWorkload.hpp"
 #include "RefInstanceNormalizationWorkload.hpp"
 #include "RefL2NormalizationWorkload.hpp"
@@ -59,6 +59,7 @@
 #include "RefReshapeWorkload.hpp"
 #include "RefResizeBilinearWorkload.hpp"
 #include "RefResizeWorkload.hpp"
+#include "RefShapeWorkload.hpp"
 #include "RefSliceWorkload.hpp"
 #include "RefSplitterWorkload.hpp"
 #include "RefSoftmaxWorkload.hpp"