Release 18.03
diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp
index 97f702e..af3b17e 100644
--- a/src/armnn/Graph.cpp
+++ b/src/armnn/Graph.cpp
@@ -14,6 +14,9 @@
 #include <boost/format.hpp>
 
 #include <unordered_map>
+#include <DotSerializer.hpp>
+#include <sstream>
+
 
 namespace armnn
 {
@@ -71,6 +74,80 @@
     return Status::Success;
 }
 
+Status Graph::SerializeToDot(std::ostream& stream)
+{
+    {
+        DotGraph graph(stream, "Optimized");
+
+        {
+            // Default node attributes:
+            DotDefaults nodes(stream, "node");
+            nodes.GetAttributeSet()
+                .AddAttribute("shape", "record");
+        }
+
+        {
+            // Default edge attributes:
+            DotDefaults edges(stream, "edge");
+            edges.GetAttributeSet()
+                .AddAttribute("fontsize", 8)
+                .AddAttribute("fontcolor", "blue")
+                .AddAttribute("fontname", "arial-bold");
+        }
+
+        // First declare the nodes
+        for (auto&& layer : m_Layers)
+        {
+            DotNode node(stream, layer->GetGuid(), GetLayerTypeAsCString(layer->GetType()));
+            // Extract the layer parameters
+            ParameterStringifyFunction extractParams = [&node](const std::string & name, const std::string & value){
+                node.GetContents().AddContent(name + " : " + value);
+            };
+            layer->SerializeLayerParameters(extractParams);
+        }
+
+        // Second declare the edges
+        for (auto&& layer : m_Layers)
+        {
+            LayerGuid toId = layer->GetGuid();
+
+            for (unsigned int i=0;i<layer->GetNumInputSlots(); i++)
+            {
+                OutputSlot* outputSlot = static_cast<OutputSlot*>(layer->GetInputSlot(i).GetConnection());
+                LayerGuid fromId = outputSlot->GetOwningLayer().GetGuid();
+                DotEdge edge(stream, fromId, toId);
+
+                // Now Print the tensor shape on the edge
+                {
+                    // Construct the label attribute with HTML markup
+                    std::stringstream ss;
+                    {
+                        ss << "< [";
+                        const TensorShape& shape = outputSlot->GetTensorInfo().GetShape();
+                        for (unsigned int i = 0; i < shape.GetNumDimensions(); i++)
+                        {
+                            if (i != 0)
+                            {
+                                ss << ",";
+                            }
+                            ss << shape[i];
+                        }
+                        ss << "] >";
+                    }
+
+                    edge.GetAttributeSet().AddAttribute("label", ss);
+                }
+            }
+        }
+    }
+
+    if (stream.bad())
+    {
+        return Status::Failure;
+    }
+    return Status::Success;
+}
+
 Status Graph::AllocateDynamicBuffers()
 {
     for (auto&& layer : m_Layers)
diff --git a/src/armnn/Graph.hpp b/src/armnn/Graph.hpp
index 8888034..34aefbf 100644
--- a/src/armnn/Graph.hpp
+++ b/src/armnn/Graph.hpp
@@ -92,6 +92,8 @@
 
     Status Print() const;
 
+    Status SerializeToDot(std::ostream& stream);
+
     /// Adds a new layer of type LaterType to the graph constructed with the arguments passed.
     template <typename LayerT, typename... Args>
     LayerT* AddLayer(Args&&... args);
@@ -121,6 +123,11 @@
     /// Return const iterator pointing to end of list. Lowercase for range-based for loops.
     ConstIterator end() const { return {m_Layers.end(), &PtrCast<const Layer>}; }
 
+    /// Return const iterator pointing to begin of list. Lowercase for range-based for loops.
+    ConstIterator cbegin() const { return begin(); }
+    /// Return const iterator pointing to end of list. Lowercase for range-based for loops.
+    ConstIterator cend() const { return end(); }
+
     /// Sort layers in topological order and return this.
     Graph& TopologicalSort() { const_cast<const Graph*>(this)->TopologicalSort(); return *this; }
     const Graph& TopologicalSort() const;
@@ -154,13 +161,27 @@
     template <typename LayerT>
     class LayerInGraph;
 
+    Iterator ForwardToEndOfInputs(Iterator it) const
+    {
+        while ((it != m_Layers.end()) && ((*it)->GetType() == LayerType::Input))
+        {
+            ++it;
+        }
+        return it;
+    }
+
+    Iterator RewindToBeginOfOutputs(Iterator it) const
+    {
+        while ((it != m_Layers.begin()) && ((*std::prev(it))->GetType() == LayerType::Output))
+        {
+            --it;
+        }
+        return it;
+    }
+
     /// Get the position of a layer in the graph.
     Iterator GetPosInGraph(Layer& layer);
 
-    /// Adds a new layer of type LaterType to the graph constructed with the arguments passed.
-    template <typename LayerT, typename... Args>
-    LayerInGraph<LayerT>* AddLayerImpl(Iterator insertBefore, Args&&... args);
-
     std::unordered_set<LayerBindingId> m_InputIds;
     std::unordered_set<LayerBindingId> m_OutputIds;
     std::unordered_map<const Layer*, Iterator> m_PosInGraphMap;
@@ -197,8 +218,19 @@
 {
 public:
     template <typename... Args>
+    LayerInGraph(Graph& graph, Args&&... args)
+        : LayerInGraphBase<LayerT>(graph,
+                                   // Insert at the back of the intermediate layers (before outputs).
+                                   std::prev(graph.end(), IteratorDifference(graph.GetNumOutputs())),
+                                   std::forward<Args>(args)...)
+    {
+    }
+    template <typename... Args>
     LayerInGraph(Graph& graph, Iterator insertBefore, Args&&... args)
-        : LayerInGraphBase<LayerT>(graph, insertBefore, std::forward<Args>(args)...)
+        : LayerInGraphBase<LayerT>(graph,
+                                   // Make sure it's inserted after all inputs and before all outputs.
+                                   graph.ForwardToEndOfInputs(graph.RewindToBeginOfOutputs(insertBefore)),
+                                   std::forward<Args>(args)...)
     {
     }
 };
@@ -209,8 +241,11 @@
 {
 public:
     template <typename... Args>
-    LayerInGraph(Graph& graph, Iterator insertBefore, Args&&... args)
-        : LayerInGraphBase<InputLayer>(graph, insertBefore, std::forward<Args>(args)...)
+    LayerInGraph(Graph& graph, Args&&... args)
+        : LayerInGraphBase<InputLayer>(graph,
+                                       // Always add to the back of the inputs.
+                                       std::next(graph.begin(), IteratorDifference(graph.GetNumInputs())),
+                                       std::forward<Args>(args)...)
     {
         const bool isNewId = m_Graph.m_InputIds.emplace(GetBindingId()).second;
         if (!isNewId)
@@ -218,6 +253,12 @@
             throw InvalidArgumentException("A layer already exists with the specified id");
         }
     }
+    template <typename... Args>
+    LayerInGraph(Graph& graph, Iterator insertBefore, Args&&... args)
+        // Ignore insertBefore. Always add to the back of the inputs.
+        : LayerInGraph(graph, std::forward<Args>(args)...)
+    {
+    }
     ~LayerInGraph() override
     {
         const size_t numErased = m_Graph.m_InputIds.erase(GetBindingId());
@@ -232,8 +273,11 @@
 {
 public:
     template <typename... Args>
-    LayerInGraph(Graph& graph, Iterator insertBefore, Args&&... args)
-        : LayerInGraphBase<OutputLayer>(graph, insertBefore, std::forward<Args>(args)...)
+    LayerInGraph(Graph& graph, Args&&... args)
+        : LayerInGraphBase<OutputLayer>(graph,
+                                        // Always add to the back of the outputs.
+                                        graph.end(),
+                                        std::forward<Args>(args)...)
     {
         const bool isNewId = m_Graph.m_OutputIds.emplace(GetBindingId()).second;
         if (!isNewId)
@@ -257,42 +301,22 @@
 }
 
 template <typename LayerT, typename... Args>
-inline Graph::LayerInGraph<LayerT>* Graph::AddLayerImpl(Iterator insertBefore, Args&&... args)
-{
-    return new LayerInGraph<LayerT>(*this, insertBefore, std::forward<Args>(args)...);
-}
-
-/// Inputs are inserted at the front of the list, to keep the order correct if the list is sorted.
-/// Outputs are inserted at the back of the list, to keep the order correct if the list is sorted.
-/// Other layers are inserted before existing outputs, so the latter remain at the back of the list.
-template <typename LayerT, typename... Args>
 inline LayerT* Graph::AddLayer(Args&&... args)
 {
-    switch (LayerEnumOf<LayerT>())
-    {
-        case LayerType::Input:
-        {
-            return AddLayerImpl<LayerT>(begin(), std::forward<Args>(args)...);
-        }
-        case LayerType::Output:
-        {
-            return AddLayerImpl<LayerT>(end(), std::forward<Args>(args)...);
-        }
-        default:
-        {
-            m_LayersInOrder = false;
-            const auto pos = std::prev(end(), IteratorDifference(GetNumOutputs()));
-            return AddLayerImpl<LayerT>(pos, std::forward<Args>(args)...);
-        }
-    }
+    m_LayersInOrder = m_LayersInOrder &&
+        ((LayerEnumOf<LayerT>() == LayerType::Input) || (LayerEnumOf<LayerT>() == LayerType::Output));
+    return new LayerInGraph<LayerT>(*this, std::forward<Args>(args)...);
 }
 
 template <typename LayerT, typename... Args>
 inline LayerT* Graph::InsertNewLayer(InputSlot& insertBefore, Args&&... args)
 {
-    // Insert before the child layer so topological order is kept.
-    const Iterator pos = GetPosInGraph(insertBefore.GetOwningLayer());
-    LayerT* const layer = AddLayerImpl<LayerT>(pos, std::forward<Args>(args)...);
+    // Insert after the parent if any, or before the child otherwise, so topological order is kept.
+    OutputSlot* parentOut = insertBefore.GetConnectedOutputSlot();
+    const Iterator pos = (parentOut != nullptr)
+                         ? std::next(GetPosInGraph(parentOut->GetOwningLayer()))
+                         : GetPosInGraph(insertBefore.GetOwningLayer());
+    LayerT* const layer = new LayerInGraph<LayerT>(*this, pos, std::forward<Args>(args)...);
     insertBefore.Insert(*layer);
     return layer;
 }
diff --git a/src/armnn/Layer.cpp b/src/armnn/Layer.cpp
index 20a8ba4..fcf0656 100644
--- a/src/armnn/Layer.cpp
+++ b/src/armnn/Layer.cpp
@@ -18,7 +18,6 @@
 
 void InputSlot::Insert(Layer& layer)
 {
-    BOOST_ASSERT(layer.GetNumInputSlots() <= 1);
     BOOST_ASSERT(layer.GetNumOutputSlots() == 1);
 
     OutputSlot* const prevSlot = GetConnectedOutputSlot();
@@ -115,11 +114,21 @@
     }
 }
 
+namespace {
+LayerGuid GenerateLayerGuid()
+{
+    //Note: Not thread safe.
+    static LayerGuid newGuid=0;
+    return newGuid++;
+}
+} //namespace
+
 Layer::Layer(unsigned int numInputSlots, unsigned int numOutputSlots, LayerType type, const char* name)
 : m_OutputHandlers(numOutputSlots)
 , m_LayerName(name ? name : "")
 , m_Type(type)
 , m_ComputeDevice(Compute::Undefined)
+, m_Guid(GenerateLayerGuid())
 {
     m_InputSlots.reserve(numInputSlots);
     for (unsigned int i = 0; i < numInputSlots; ++i)
diff --git a/src/armnn/Layer.hpp b/src/armnn/Layer.hpp
index 1160f0a..f9f2f22 100644
--- a/src/armnn/Layer.hpp
+++ b/src/armnn/Layer.hpp
@@ -10,6 +10,7 @@
 #include "backends/WorkloadDataCollector.hpp"
 #include "backends/WorkloadInfo.hpp"
 #include "InternalTypes.hpp"
+#include "SerializeLayerParameters.hpp"
 
 #include <armnn/Types.hpp>
 #include <armnn/Tensor.hpp>
@@ -218,6 +219,10 @@
 
     virtual void ValidateTensorShapesFromInputs() = 0;
 
+    /// Helper to serialize the layer parameters to string
+    /// (currently used in DotSerializer and company)
+    virtual void SerializeLayerParameters(ParameterStringifyFunction & fn) const {}
+
     // IConnectableLayer
 
     const char* GetName() const override { return m_LayerName.c_str(); }
@@ -230,6 +235,9 @@
     const OutputSlot& GetOutputSlot(unsigned int index = 0) const override { return m_OutputSlots.at(index); }
     OutputSlot& GetOutputSlot(unsigned int index = 0) override { return m_OutputSlots.at(index); }
 
+    void SetGuid(LayerGuid guid) { m_Guid = guid; }
+    LayerGuid GetGuid() const final { return m_Guid; }
+
 protected:
     // Graph needs access to the virtual destructor
     friend class Graph;
@@ -281,6 +289,8 @@
     /// Used for sorting
     mutable LayerPriority m_Priority = 0;
     mutable bool m_Visiting = false;
+
+    LayerGuid m_Guid;
 };
 
 // A layer user-provided data can be bound to (e.g. inputs, outputs)
diff --git a/src/armnn/Layers.cpp b/src/armnn/Layers.cpp
index ddbc7d2..48a02ab 100644
--- a/src/armnn/Layers.cpp
+++ b/src/armnn/Layers.cpp
@@ -11,6 +11,8 @@
 
 #include "Permute.hpp"
 
+#include <queue>
+
 
 namespace armnn
 {
@@ -21,6 +23,7 @@
     LayerType* const layer = graph.AddLayer<LayerType>(std::forward<Params>(params)...);
 
     layer->SetComputeDevice(m_ComputeDevice);
+    layer->SetGuid(GetGuid());
 
     return layer;
 }
@@ -82,12 +85,11 @@
         unsigned int dim1 = input1.GetShape()[i];
         if (dim0 != dim1)
         {
-            BOOST_ASSERT_MSG(dim0 == 1 || dim1 == 1, "Dimensions should either match or one should be one length");
+            BOOST_ASSERT_MSG(dim0 == 1 || dim1 == 1, "Dimensions should either match or one should be of size 1.");
         }
     }
 #endif
 
-
     for (unsigned int i = 0; i < numDims; i++)
     {
         unsigned int dim0 = input0.GetShape()[i];
@@ -439,14 +441,31 @@
     m_OutputHandlers[0].CreateTensorHandles(factory);
     if (factory.SupportsSubTensors())
     {
-        const unsigned int numInputSlots = GetNumInputSlots();
-        for (unsigned int i = 0; i < numInputSlots; ++i)
-        {
-            OutputHandler& outputHandler = GetInputSlot(i).GetConnectedOutputSlot()->GetOutputHandler();
+        std::queue<MergerLayer*> m_MergerLayers;
 
-            outputHandler.SetData(factory.CreateSubTensorHandle(*m_OutputHandlers[0].GetData(),
-                                                                outputHandler.GetTensorInfo().GetShape(),
-                                                                m_Param.GetViewOrigin(i)));
+        m_MergerLayers.push(this);
+        while (!m_MergerLayers.empty())
+        {
+            MergerLayer* currentLayer = m_MergerLayers.front();
+            ITensorHandle* parentTensor = currentLayer->GetOutputHandler(0).GetData();
+
+            m_MergerLayers.pop();
+
+            const unsigned int numInputSlots = currentLayer->GetNumInputSlots();
+            for (unsigned int i = 0; i < numInputSlots; ++i)
+            {
+                OutputSlot* slot = currentLayer->GetInputSlot(i).GetConnectedOutputSlot();
+                OutputHandler& outputHandler = slot->GetOutputHandler();
+                outputHandler.SetData(factory.CreateSubTensorHandle(*parentTensor,
+                                                                    outputHandler.GetTensorInfo().GetShape(),
+                                                                    currentLayer->m_Param.GetViewOrigin(i)));
+
+                Layer& inputLayer = slot->GetOwningLayer();
+                if (inputLayer.GetType() == LayerType::Merger)
+                {
+                    m_MergerLayers.push(boost::polymorphic_downcast<MergerLayer*>(&inputLayer));
+                }
+            }
         }
     }
 }
@@ -568,12 +587,36 @@
 
 void MultiplicationLayer::ValidateTensorShapesFromInputs()
 {
-    ConditionalThrow<LayerValidationException>(GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape() ==
-                     GetInputSlot(1).GetConnection()->GetTensorInfo().GetShape(),
-                     "MultiplicationLayer: Inputs must match");
+    auto& input0 = GetInputSlot(0).GetConnection()->GetTensorInfo();
+    auto& input1 = GetInputSlot(1).GetConnection()->GetTensorInfo();
 
-    TensorInfo infoOut(GetInputSlot(0).GetConnection()->GetTensorInfo());
-    ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(infoOut.GetShape()),
+    // Get the max of the inputs
+    BOOST_ASSERT(input0.GetNumDimensions() == input1.GetNumDimensions());
+    unsigned int numDims = input0.GetNumDimensions();
+    std::vector<unsigned int> dims(numDims);
+
+    // validate inputs are broadcast compatible
+#if !NDEBUG
+    for (unsigned int i = 0; i < numDims; i++)
+    {
+        unsigned int dim0 = input0.GetShape()[i];
+        unsigned int dim1 = input1.GetShape()[i];
+        if (dim0 != dim1)
+        {
+            BOOST_ASSERT_MSG(dim0 == 1 || dim1 == 1, "Dimensions should either match or one should be of size 1.");
+        }
+    }
+#endif
+
+    for (unsigned int i = 0; i < numDims; i++)
+    {
+        unsigned int dim0 = input0.GetShape()[i];
+        unsigned int dim1 = input1.GetShape()[i];
+        dims[i] = std::max(dim0, dim1);
+    }
+
+    TensorShape outShape(numDims, dims.data());
+    ConditionalThrow<LayerValidationException>(GetOutputSlot(0).ValidateTensorShape(outShape),
                      "MultiplicationLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.");
 }
 
diff --git a/src/armnn/Layers.hpp b/src/armnn/Layers.hpp
index 5a1e3ca..cb460e1 100644
--- a/src/armnn/Layers.hpp
+++ b/src/armnn/Layers.hpp
@@ -22,10 +22,17 @@
 class LayerWithParameters : public Layer
 {
 public:
-    typedef Parameters DescriptorType;
+    using DescriptorType = Parameters;
 
     const Parameters& GetParameters() const { return m_Param; }
 
+    /// Helper to serialize the layer parameters to string
+    /// (currently used in DotSerializer and company)
+    void SerializeLayerParameters(ParameterStringifyFunction & fn) const
+    {
+        StringifyLayerParameters<Parameters>::Serialize(fn, m_Param);
+    }
+
 protected:
     LayerWithParameters(unsigned int numInputSlots,
                         unsigned int numOutputSlots,
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 4ee68b3..77390cb 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -58,6 +58,11 @@
     return Status::Success;
 }
 
+Status OptimizedNetwork::SerializeToDot(std::ostream& stream) const
+{
+    return m_Graph->SerializeToDot(stream);
+}
+
 IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, const DeviceSpec& deviceSpec)
 {
     const Network& network = *boost::polymorphic_downcast<const Network*>(&inNetwork);
@@ -65,7 +70,7 @@
 
     OptimizedNetwork* optNet = new OptimizedNetwork(std::move(graph));
 
-    Optimizer::Get().Optimize(optNet->GetGraph());
+    Optimizer::Optimize(optNet->GetGraph());
 
     // Infer the tensor infos for all output slots. Throws an exception on failure.
     optNet->GetGraph().InferTensorInfos();
diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp
index de0c1ec..4eb67b1 100644
--- a/src/armnn/Network.hpp
+++ b/src/armnn/Network.hpp
@@ -135,6 +135,7 @@
     ~OptimizedNetwork();
 
     Status PrintGraph() override;
+    Status SerializeToDot(std::ostream& stream) const override;
 
     Graph& GetGraph() { return *m_Graph; }
 
diff --git a/src/armnn/Optimizer.cpp b/src/armnn/Optimizer.cpp
index 85b9f28..9b76c7f 100644
--- a/src/armnn/Optimizer.cpp
+++ b/src/armnn/Optimizer.cpp
@@ -8,7 +8,7 @@
 namespace armnn
 {
 
-const Optimizer& Optimizer::Get()
+Optimizer::Optimizer()
 {
     // Add optimizations here
     static optimizations::SquashEqualPermuteSiblings squashEqualPermuteSiblings;
@@ -19,28 +19,26 @@
     static optimizations::OptimizeConsecutiveReshapes optimizeConsecutiveReshapes;
 
     // Set optimizations in desired order
-    static const Optimizer optimizer({
-                                         &squashEqualPermuteSiblings,
-                                         &squashEqualReshapeSiblings,
-                                         &optimizeInversePermutes,
-                                         &movePermuteUp,
-                                         &permuteAsReshape,
-                                         &optimizeConsecutiveReshapes,
-                                     });
-
-    return optimizer;
+    m_Optimizations = {&squashEqualPermuteSiblings,
+                       &squashEqualReshapeSiblings,
+                       &optimizeInversePermutes,
+                       &movePermuteUp,
+                       &permuteAsReshape,
+                       &optimizeConsecutiveReshapes,
+                      };
 }
 
-void Optimizer::Optimize(Graph& graph) const
+void Optimizer::Optimize(Graph& graph)
 {
+    Optimizer optimizer;
     auto it = graph.TopologicalSort().end();
     // Call TopologicalSort() in every iteration to re-order the list in case layers where added/removed.
     while (it != graph.TopologicalSort().begin())
     {
         --it;
-        for (auto&& optimization : m_Optimizations)
+        for (auto&& optimization : optimizer.m_Optimizations)
         {
-            optimization->Run(graph, it);
+            optimization->Run(graph, **it);
 
             if ((*it)->IsOutputUnconnected())
             {
diff --git a/src/armnn/Optimizer.hpp b/src/armnn/Optimizer.hpp
index 262f264..1f5ed02 100644
--- a/src/armnn/Optimizer.hpp
+++ b/src/armnn/Optimizer.hpp
@@ -15,14 +15,13 @@
 class Optimizer
 {
 public:
-    static const Optimizer& Get();
 
-    void Optimize(Graph& graph) const;
+    static void Optimize(Graph& graph);
 
 private:
     ~Optimizer() = default;
 
-    Optimizer(std::initializer_list<Optimization*> optimizations) : m_Optimizations(optimizations) {}
+    Optimizer();
 
     std::vector<Optimization*> m_Optimizations;
 };
diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp
index ea6d19b..e0d6a9a 100644
--- a/src/armnn/Runtime.cpp
+++ b/src/armnn/Runtime.cpp
@@ -9,6 +9,7 @@
 #ifdef ARMCOMPUTECL_ENABLED
 #include <arm_compute/core/CL/OpenCL.h>
 #include <arm_compute/core/CL/CLKernelLibrary.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
 #endif
 
 #include <boost/log/trivial.hpp>
@@ -58,18 +59,26 @@
     m_LoadedNetworks[networkIdOut] = std::move(loadedNetwork);
 
     return Status::Success;
-
 }
 
 Status Runtime::UnloadNetwork(NetworkId networkId)
 {
+#ifdef ARMCOMPUTECL_ENABLED
+    if (arm_compute::CLScheduler::get().context()() != NULL)
+    {
+        arm_compute::CLScheduler::get().sync();
+    }
+#endif
     if (m_LoadedNetworks.erase(networkId) == 0)
     {
         BOOST_LOG_TRIVIAL(warning) << "WARNING: Runtime::UnloadNetwork(): " << networkId << " not found!";
         return Status::Failure;
     }
 #ifdef ARMCOMPUTECL_ENABLED
-    arm_compute::CLKernelLibrary::get().clear_programs_cache();
+    if (arm_compute::CLScheduler::get().context()() != NULL && m_LoadedNetworks.empty())
+    {
+        m_WorkloadFactories.m_GpuAcc.get()->LoadOpenClRuntime();
+    }
 #endif
     BOOST_LOG_TRIVIAL(debug) << "Runtime::UnloadNetwork(): Unloaded network with ID: " << networkId;
     return Status::Success;
@@ -87,11 +96,24 @@
     m_WorkloadFactories.m_CpuRef = make_shared<RefWorkloadFactory>(
         options.m_DefaultComputeDevice == Compute::CpuRef ? true : options.m_UseCpuRefAsFallback);
     m_WorkloadFactories.m_CpuAcc = make_shared<NeonWorkloadFactory>();
-    m_WorkloadFactories.m_GpuAcc = make_shared<ClWorkloadFactory>();
+    m_WorkloadFactories.m_GpuAcc = make_shared<ClWorkloadFactory>(options.m_ClTunedParameters);
 
     if (options.m_DefaultComputeDevice == Compute::GpuAcc)
     {
-        m_WorkloadFactories.m_GpuAcc.get()->LoadOpenClRuntime(options.m_ClTunedParameters);
+        m_WorkloadFactories.m_GpuAcc.get()->LoadOpenClRuntime();
+    }
+}
+
+Runtime::~Runtime()
+{
+    std::vector<int> networkIDs;
+    std::transform(m_LoadedNetworks.begin(), m_LoadedNetworks.end(),
+                   std::back_inserter(networkIDs),
+                   [](const auto &pair) { return pair.first; });
+
+    for (auto networkID : networkIDs)
+    {
+        UnloadNetwork(networkID);
     }
 }
 
diff --git a/src/armnn/Runtime.hpp b/src/armnn/Runtime.hpp
index d3f3a57..86fd48d 100644
--- a/src/armnn/Runtime.hpp
+++ b/src/armnn/Runtime.hpp
@@ -56,6 +56,8 @@
     /// it cannot be setup for some reason.
     Runtime(const CreationOptions& options);
 
+    ~Runtime();
+
 private:
     friend void RuntimeLoadedNetworksReserve(armnn::Runtime* runtime); // see RuntimeTests.cpp
 
diff --git a/src/armnn/SerializeLayerParameters.cpp b/src/armnn/SerializeLayerParameters.cpp
new file mode 100644
index 0000000..e8c2bba
--- /dev/null
+++ b/src/armnn/SerializeLayerParameters.cpp
@@ -0,0 +1,156 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include "SerializeLayerParameters.hpp"
+#include <armnn/TypesUtils.hpp>
+#include <string>
+#include <iostream>
+#include <sstream>
+
+namespace armnn
+{
+
+void
+StringifyLayerParameters<PermuteDescriptor>::Serialize(ParameterStringifyFunction & fn,
+                                                       const PermuteDescriptor & desc)
+{
+    std::stringstream ss;
+    ss <<  "[";
+    bool addComma = false;
+    for (auto it=desc.m_DimMappings.begin(); it!= desc.m_DimMappings.end(); ++it)
+    {
+        if (addComma)
+        {
+            ss << ",";
+        }
+        ss << *it;
+        addComma = true;
+    }
+    ss << "]";
+
+    fn("DimMappings",ss.str());
+}
+
+void
+StringifyLayerParameters<ReshapeDescriptor>::Serialize(ParameterStringifyFunction & fn,
+                                                       const ReshapeDescriptor & desc)
+{
+    std::stringstream ss;
+    ss <<  "[";
+    bool addComma = false;
+    for (unsigned int i=0; i<desc.m_TargetShape.GetNumDimensions(); ++i)
+    {
+        if (addComma)
+        {
+            ss << ",";
+        }
+        ss << desc.m_TargetShape[i];
+        addComma = true;
+    }
+    ss << "]";
+
+    fn("TargetShape",ss.str());
+}
+
+void
+StringifyLayerParameters<ActivationDescriptor>::Serialize(ParameterStringifyFunction & fn,
+                                                          const ActivationDescriptor & desc)
+{
+    fn("Function",GetActivationFunctionAsCString(desc.m_Function));
+    fn("A",std::to_string(desc.m_A));
+    fn("B",std::to_string(desc.m_B));
+}
+
+void
+StringifyLayerParameters<Convolution2dDescriptor>::Serialize(ParameterStringifyFunction & fn,
+                                                             const Convolution2dDescriptor & desc)
+{
+    {
+        std::stringstream ss;
+        ss << "(" << desc.m_PadTop    << "," << desc.m_PadLeft
+           << "," << desc.m_PadBottom << "," << desc.m_PadRight << ")";
+        fn("Padding(T,L,B,R)",ss.str());
+    }
+
+    {
+        std::stringstream ss;
+        ss << "(" << desc.m_StrideX <<  "," << desc.m_StrideY << ")";
+        fn("Stride(X,Y)", ss.str());
+    }
+
+    fn("BiasEnabled",(desc.m_BiasEnabled?"true":"false"));
+}
+
+void
+StringifyLayerParameters<BatchNormalizationDescriptor>::Serialize(ParameterStringifyFunction & fn,
+                                                                  const BatchNormalizationDescriptor & desc)
+{
+    fn("Eps",std::to_string(desc.m_Eps));
+}
+
+void
+StringifyLayerParameters<DepthwiseConvolution2dDescriptor>::Serialize(ParameterStringifyFunction & fn,
+                                                                      const DepthwiseConvolution2dDescriptor & desc)
+{
+    {
+        std::stringstream ss;
+        ss << "(" << desc.m_PadTop    << "," << desc.m_PadLeft
+           << "," << desc.m_PadBottom << "," << desc.m_PadRight << ")";
+        fn("Padding(T,L,B,R)",ss.str());
+    }
+
+    {
+        std::stringstream ss;
+        ss << "(" << desc.m_StrideX <<  "," << desc.m_StrideY << ")";
+        fn("Stride(X,Y)", ss.str());
+    }
+
+    fn("BiasEnabled",(desc.m_BiasEnabled?"true":"false"));
+}
+
+void
+StringifyLayerParameters<Pooling2dDescriptor>::Serialize(ParameterStringifyFunction & fn,
+                                                         const Pooling2dDescriptor & desc)
+{
+    fn("Type", GetPoolingAlgorithmAsCString(desc.m_PoolType));
+    {
+        std::stringstream ss;
+        ss << "(" << desc.m_PadTop    << "," << desc.m_PadLeft
+           << "," << desc.m_PadBottom << "," << desc.m_PadRight << ")";
+        fn("Padding(T,L,B,R)",ss.str());
+    }
+
+    {
+        std::stringstream ss;
+        ss << "(" << desc.m_PoolWidth    << "," << desc.m_PoolHeight << ")";
+        fn("(Width,Height)",ss.str());
+    }
+
+    {
+        std::stringstream ss;
+        ss << "(" << desc.m_StrideX <<  "," << desc.m_StrideY << ")";
+        fn("Stride(X,Y)", ss.str());
+    }
+
+    fn("OutputShapeRounding", GetOutputShapeRoundingAsCString(desc.m_OutputShapeRounding));
+    fn("PaddingMethod", GetPaddingMethodAsCString(desc.m_PaddingMethod));
+}
+
+void
+StringifyLayerParameters<SoftmaxDescriptor>::Serialize(ParameterStringifyFunction & fn,
+                                                       const SoftmaxDescriptor & desc)
+{
+    fn("Beta", std::to_string(desc.m_Beta));
+}
+
+void
+StringifyLayerParameters<FullyConnectedDescriptor>::Serialize(ParameterStringifyFunction & fn,
+                                                              const FullyConnectedDescriptor & desc)
+{
+    fn("BiasEnabled", (desc.m_BiasEnabled?"true":"false"));
+    fn("TransposeWeightMatrix", (desc.m_TransposeWeightMatrix?"true":"false"));
+}
+
+
+}
diff --git a/src/armnn/SerializeLayerParameters.hpp b/src/armnn/SerializeLayerParameters.hpp
new file mode 100644
index 0000000..b008160
--- /dev/null
+++ b/src/armnn/SerializeLayerParameters.hpp
@@ -0,0 +1,73 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#pragma once
+
+#include <string>
+#include <functional>
+#include <armnn/Descriptors.hpp>
+
+namespace armnn
+{
+
+using ParameterStringifyFunction = std::function<void(const std::string & name, const std::string & value)>;
+
+///
+/// StringifyLayerParameters allows serializing layer parameters to string.
+/// The default implementation is a no-op because this operation is considered
+/// non-vital for ArmNN and thus we allow adding new layer parameters without
+/// supplying the corresponding stringify functionality.
+///
+template <typename LayerParameter>
+struct StringifyLayerParameters
+{
+    static void Serialize(ParameterStringifyFunction &, const LayerParameter &) {}
+};
+
+template <> struct StringifyLayerParameters<PermuteDescriptor>
+{
+    static void Serialize(ParameterStringifyFunction & fn, const PermuteDescriptor & desc);
+};
+
+template <> struct StringifyLayerParameters<ReshapeDescriptor>
+{
+    static void Serialize(ParameterStringifyFunction & fn, const ReshapeDescriptor & desc);
+};
+
+template <> struct StringifyLayerParameters<ActivationDescriptor>
+{
+    static void Serialize(ParameterStringifyFunction & fn, const ActivationDescriptor & desc);
+};
+
+template <> struct StringifyLayerParameters<Convolution2dDescriptor>
+{
+    static void Serialize(ParameterStringifyFunction & fn, const Convolution2dDescriptor & desc);
+};
+
+template <> struct StringifyLayerParameters<BatchNormalizationDescriptor>
+{
+    static void Serialize(ParameterStringifyFunction & fn, const BatchNormalizationDescriptor & desc);
+};
+
+template <> struct StringifyLayerParameters<DepthwiseConvolution2dDescriptor>
+{
+    static void Serialize(ParameterStringifyFunction & fn, const DepthwiseConvolution2dDescriptor & desc);
+};
+
+template <> struct StringifyLayerParameters<Pooling2dDescriptor>
+{
+    static void Serialize(ParameterStringifyFunction & fn, const Pooling2dDescriptor & desc);
+};
+
+template <> struct StringifyLayerParameters<SoftmaxDescriptor>
+{
+    static void Serialize(ParameterStringifyFunction & fn, const SoftmaxDescriptor & desc);
+};
+
+template <> struct StringifyLayerParameters<FullyConnectedDescriptor>
+{
+    static void Serialize(ParameterStringifyFunction & fn, const FullyConnectedDescriptor & desc);
+};
+
+}
\ No newline at end of file
diff --git a/src/armnn/backends/ArmComputeTensorUtils.cpp b/src/armnn/backends/ArmComputeTensorUtils.cpp
index 9f21c41..f88ed2b 100644
--- a/src/armnn/backends/ArmComputeTensorUtils.cpp
+++ b/src/armnn/backends/ArmComputeTensorUtils.cpp
@@ -78,6 +78,7 @@
     using arm_compute::DimensionRoundingType;
     using arm_compute::PadStrideInfo;
     using arm_compute::PoolingLayerInfo;
+    using arm_compute::Size2D;
 
     // Resolve ARM Compute layer parameters
     const PoolingType poolingType = ConvertPoolingAlgorithmToAclPoolingType(descriptor.m_PoolType);
@@ -94,7 +95,9 @@
 
     const bool excludePadding = (descriptor.m_PaddingMethod == PaddingMethod::Exclude);
 
-    return arm_compute::PoolingLayerInfo(poolingType, descriptor.m_PoolWidth, padStrideInfo, excludePadding);
+    const Size2D poolSize(descriptor.m_PoolWidth, descriptor.m_PoolHeight);
+
+    return arm_compute::PoolingLayerInfo(poolingType, poolSize, padStrideInfo, excludePadding);
 }
 
 arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& descriptor)
@@ -114,7 +117,7 @@
     arm_compute::PermutationVector aclPerm;
 
     unsigned int start = 0;
-    while ((start == perm[start]) && (start < perm.GetSize()))
+    while ((start < perm.GetSize()) && (start == perm[start]))
     {
         ++start;
     }
diff --git a/src/armnn/backends/ClWorkloadFactory.cpp b/src/armnn/backends/ClWorkloadFactory.cpp
index 4e565a0..6af657b 100644
--- a/src/armnn/backends/ClWorkloadFactory.cpp
+++ b/src/armnn/backends/ClWorkloadFactory.cpp
@@ -35,24 +35,62 @@
 
 #ifdef ARMCOMPUTECL_ENABLED
 
-void ClWorkloadFactory::LoadOpenClRuntime(IClTunedParameters* clTunedParameters)
+ClWorkloadFactory::ClWorkloadFactory(IClTunedParameters* clTunedParameters):
+    m_clTunedParameters(boost::polymorphic_downcast<ClTunedParameters*>(clTunedParameters))
 {
-    ClTunedParameters* clTunedParametersImpl = boost::polymorphic_downcast<ClTunedParameters*>(clTunedParameters);
+    try
+    {
+        std::vector<cl::Platform> platforms;
+        cl::Platform::get(&platforms);
 
-    cl::Device device;
+        // Select default platform as the first element
+        cl::Platform::setDefault(platforms[0]);
+
+        std::vector<cl::Device> devices;
+        platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices);
+
+        // Select default device as the first element
+        cl::Device::setDefault(devices[0]);
+    }
+    catch (const cl::Error& clError)
+    {
+        throw ClRuntimeUnavailableException(boost::str(boost::format(
+            "Could not initialize the CL runtime. Error description: %1%. CL error code: %2%"
+        ) % clError.what() % clError.err()));
+    }
+
+    // Remove the use of global CL context
+    cl::Context::setDefault(cl::Context{});
+    BOOST_ASSERT(cl::Context::getDefault()() == NULL);
+
+    // Remove the use of global CL command queue
+    cl::CommandQueue::setDefault(cl::CommandQueue{});
+    BOOST_ASSERT(cl::CommandQueue::getDefault()() == NULL);
+}
+
+ClWorkloadFactory::~ClWorkloadFactory()
+{
+}
+
+void ClWorkloadFactory::LoadOpenClRuntime()
+{
+    cl::Device device = cl::Device::getDefault();
     cl::Context context;
     cl::CommandQueue commandQueue;
 
     try
     {
-        device = cl::Device::getDefault();
-        context = cl::Context::getDefault();
+        arm_compute::CLKernelLibrary::get().clear_programs_cache();
+        arm_compute::CLScheduler::get().init(context, commandQueue, device);
+        arm_compute::CLKernelLibrary::get().init(".", context, device);
+
+        context = cl::Context(device);
 
         bool enableProfiling = false;
 #if ARMNN_PROFILING_ENABLED
         enableProfiling = true;
 #endif
-        if (clTunedParametersImpl && clTunedParametersImpl->m_Mode == IClTunedParameters::Mode::UpdateTunedParameters)
+        if (m_clTunedParameters && m_clTunedParameters->m_Mode == IClTunedParameters::Mode::UpdateTunedParameters)
         {
             enableProfiling = true; // Needed for the CLTuner to work.
         }
@@ -65,7 +103,7 @@
         else
         {
             // Use default queue
-            commandQueue = cl::CommandQueue::getDefault();
+            commandQueue = cl::CommandQueue(context, device);
         }
     }
     catch (const cl::Error& clError)
@@ -79,9 +117,9 @@
     arm_compute::CLKernelLibrary::get().init(".", context, device);
 
     arm_compute::ICLTuner* tuner = nullptr;
-    if (clTunedParameters)
+    if (m_clTunedParameters)
     {
-        tuner = &clTunedParametersImpl->m_Tuner;
+        tuner = &m_clTunedParameters->m_Tuner;
     }
     arm_compute::CLScheduler::get().init(context, commandQueue, device, tuner);
 }
@@ -266,7 +304,16 @@
 
 #else // #if ARMCOMPUTECL_ENABLED
 
-void ClWorkloadFactory::LoadOpenClRuntime(IClTunedParameters* clTunedParameters)
+ClWorkloadFactory::ClWorkloadFactory(IClTunedParameters* clTunedParameters)
+{
+    // No CL support
+}
+
+ClWorkloadFactory::~ClWorkloadFactory()
+{
+}
+
+void ClWorkloadFactory::LoadOpenClRuntime()
 {
     // No CL support
 }
diff --git a/src/armnn/backends/ClWorkloadFactory.hpp b/src/armnn/backends/ClWorkloadFactory.hpp
index 2477e23..e1e66c0 100644
--- a/src/armnn/backends/ClWorkloadFactory.hpp
+++ b/src/armnn/backends/ClWorkloadFactory.hpp
@@ -23,18 +23,22 @@
 {
 
 class IClTunedParameters;
+class ClTunedParameters;
 
 // ARM Compute OpenCL workload factory
 class ClWorkloadFactory : public IWorkloadFactory
 {
 public:
-    virtual ~ClWorkloadFactory(){};
+
+    ClWorkloadFactory(IClTunedParameters* clTunedParameters = nullptr);
+
+    virtual ~ClWorkloadFactory();
 
     virtual Compute GetCompute() const override { return Compute::GpuAcc; }
 
     static bool IsLayerSupported(const Layer& layer, DataType dataType, std::string& outReasonIfUnsupported);
 
-    void LoadOpenClRuntime(IClTunedParameters* clTunedParameters = nullptr);
+    void LoadOpenClRuntime();
 
     virtual bool SupportsSubTensors() const override { return true; }
 
@@ -109,6 +113,9 @@
 
     virtual std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor,
                                                    const WorkloadInfo& info) const override;
+
+private:
+    ClTunedParameters* m_clTunedParameters;
 };
 
 class ClTunedParameters : public IClTunedParameters
diff --git a/src/armnn/backends/NeonLayerSupport.cpp b/src/armnn/backends/NeonLayerSupport.cpp
index 382b15e..d8a3366 100644
--- a/src/armnn/backends/NeonLayerSupport.cpp
+++ b/src/armnn/backends/NeonLayerSupport.cpp
@@ -71,6 +71,22 @@
     return preferDirectConvolution;
 }
 
+bool IsNeonMultiplicationParamsSupported(std::string* reasonIfUnsupported,
+                                         const TensorInfo& info0,
+                                         const TensorInfo& info1)
+{
+    if (info0.GetShape() == info1.GetShape())
+    {
+        return true;
+    }
+
+    if (reasonIfUnsupported)
+    {
+        *reasonIfUnsupported = "Multiplication on Neon does not support implicit broadcast.";
+    }
+    return false;
+}
+
 bool IsNeonNormalizationDescParamsSupported(std::string* reasonIfUnsupported, const NormalizationDescriptor& parameters)
 {
     if (parameters.m_NormMethodType != NormalizationAlgorithmMethod::LocalBrightness)
@@ -233,7 +249,7 @@
     return IsSupportedForDataTypeNeon(reasonIfUnsupported,
                                       input.GetDataType(),
                                       &TrueFunc<>,
-                                      &FalseFuncU8<>);
+                                      &TrueFunc<>);
 }
 
 bool IsDepthwiseConvolutionSupportedNeon(const TensorInfo& input,
@@ -293,11 +309,13 @@
                                    const TensorInfo& input1,
                                    std::string* reasonIfUnsupported)
 {
-    ignore_unused(input1);
     return IsSupportedForDataTypeNeon(reasonIfUnsupported,
                                       input0.GetDataType(),
-                                      &TrueFunc<>,
-                                      &FalseFuncU8<>);
+                                      &IsNeonMultiplicationParamsSupported,
+                                      &FalseFuncU8<const TensorInfo&, const TensorInfo&>,
+                                      input0,
+                                      input1
+                            );
 }
 
 bool IsNormalizationSupportedNeon(const TensorInfo& input,
diff --git a/src/armnn/backends/NeonWorkloadFactory.cpp b/src/armnn/backends/NeonWorkloadFactory.cpp
index 3842841..0f65a3d 100644
--- a/src/armnn/backends/NeonWorkloadFactory.cpp
+++ b/src/armnn/backends/NeonWorkloadFactory.cpp
@@ -112,7 +112,7 @@
 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d(
     const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
 {
-    return MakeWorkload<NeonConvolution2dFloat32Workload, NullWorkload>(descriptor, info);
+    return MakeWorkload<NeonConvolution2dFloat32Workload, NeonConvolution2dUint8Workload>(descriptor, info);
 }
 
 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d(
diff --git a/src/armnn/backends/NeonWorkloads.hpp b/src/armnn/backends/NeonWorkloads.hpp
index 7e9e885..83a3e9f 100644
--- a/src/armnn/backends/NeonWorkloads.hpp
+++ b/src/armnn/backends/NeonWorkloads.hpp
@@ -13,7 +13,9 @@
 #include "backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp"
 #include "backends/NeonWorkloads/NeonConstantFloat32Workload.hpp"
 #include "backends/NeonWorkloads/NeonConstantUint8Workload.hpp"
+#include "backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp"
 #include "backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp"
+#include "backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp"
 #include "backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp"
 #include "backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp"
 #include "backends/NeonWorkloads/NeonFloorFloat32Workload.hpp"
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp
index 5099965..10c96d8 100644
--- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp
@@ -73,10 +73,6 @@
     using Type = ResolveType<dataType>;
 
     InitialiseArmComputeTensorData(m_KernelTensor, m_Data.m_Weight->template GetConstTensor<Type>());
-    if (m_Data.m_Parameters.m_BiasEnabled)
-    {
-        InitialiseArmComputeTensorData(m_BiasTensor, m_Data.m_Bias->template GetConstTensor<Type>());
-    }
 }
 
 // Generate known implementations for linker
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp
index 3774051..98d075a 100644
--- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp
@@ -3,6 +3,8 @@
 // See LICENSE file in the project root for full license information.
 //
 
+#pragma once
+
 #include <backends/Workload.hpp>
 #include <backends/NeonWorkloadUtils.hpp>
 
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp
index b4650ac..a8c5c63 100644
--- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp
@@ -15,7 +15,12 @@
 NeonConvolution2dFloat32Workload::NeonConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor,
                                                                    const WorkloadInfo& info)
     : NeonConvolution2dBaseWorkload(descriptor, info)
-{}
+{
+    if (m_Data.m_Parameters.m_BiasEnabled)
+    {
+        InitialiseArmComputeTensorData(m_BiasTensor, m_Data.m_Bias->template GetConstTensor<float>());
+    }
+}
 
 
 void NeonConvolution2dFloat32Workload::Execute() const
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp
new file mode 100644
index 0000000..ae20522
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonConvolution2dUint8Workload.hpp"
+
+
+namespace armnn
+{
+NeonConvolution2dUint8Workload::NeonConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor,
+                                                             const WorkloadInfo& info)
+    : NeonConvolution2dBaseWorkload(descriptor, info)
+{
+    if (m_Data.m_Parameters.m_BiasEnabled)
+    {
+        InitialiseArmComputeTensorData(m_BiasTensor, m_Data.m_Bias->template GetConstTensor<int32_t>());
+    }
+}
+
+
+void NeonConvolution2dUint8Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, NeonConvolution2dUint8Workload_Execute);
+    m_ConvolutionLayer->run();
+}
+
+void NeonConvolution2dUint8Workload::ValidateData() const
+{
+    m_Data.ValidateInputsOutputs("NeonConvolution2dUint8Workload", 1, 1);
+}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp
new file mode 100644
index 0000000..319d574
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "NeonConvolution2dBaseWorkload.hpp"
+
+namespace armnn
+{
+
+class NeonConvolution2dUint8Workload : public NeonConvolution2dBaseWorkload<DataType::QuantisedAsymm8>
+{
+public:
+    NeonConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+    virtual void ValidateData() const override;
+    virtual void Execute() const override;
+private:
+};
+
+} //namespace armnnn
+
+
+
+
diff --git a/src/armnn/backends/RefWorkloads/Addition.cpp b/src/armnn/backends/RefWorkloads/Addition.cpp
index c26f82e..6d53a70 100644
--- a/src/armnn/backends/RefWorkloads/Addition.cpp
+++ b/src/armnn/backends/RefWorkloads/Addition.cpp
@@ -8,9 +8,6 @@
 
 #include <functional>
 
-namespace armnn
-{
-
 namespace
 {
 
@@ -24,6 +21,9 @@
 
 } // namespace
 
+namespace armnn
+{
+
 void Addition(const TensorShape& inShape0,
               const TensorShape& inShape1,
               const TensorShape& outShape,
diff --git a/src/armnn/backends/RefWorkloads/Merger.hpp b/src/armnn/backends/RefWorkloads/Merger.hpp
index 9695e45..476ced7 100644
--- a/src/armnn/backends/RefWorkloads/Merger.hpp
+++ b/src/armnn/backends/RefWorkloads/Merger.hpp
@@ -39,6 +39,7 @@
 
             //split view extents are defined by the size of (the corresponding) input tensor
             const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[viewIdx]);
+            BOOST_ASSERT(inputInfo.GetNumDimensions() == outputInfo0.GetNumDimensions());
 
             // check all dimensions to see if this element is inside the given input view
             bool insideView = true;
diff --git a/src/armnn/backends/RefWorkloads/Multiplication.cpp b/src/armnn/backends/RefWorkloads/Multiplication.cpp
index 7f558d8..47c0f1c 100644
--- a/src/armnn/backends/RefWorkloads/Multiplication.cpp
+++ b/src/armnn/backends/RefWorkloads/Multiplication.cpp
@@ -4,18 +4,48 @@
 //
 
 #include "Multiplication.hpp"
+#include "Broadcast.hpp"
+
+#include <functional>
+
+namespace
+{
+
+void ElementwiseMultiplication(unsigned int numElements,
+                               const float* inData0,
+                               const float* inData1,
+                               float* outData)
+{
+    for (unsigned int i = 0; i < numElements; ++i)
+    {
+        outData[i] = inData0[i] * inData1[i];
+    }
+}
+
+} // namespace
 
 namespace armnn
 {
 
-void Multiplication(const float* in0,
-                    const float* in1,
-                    unsigned int numElements,
-                    float* out)
+void Multiplication(const TensorShape& inShape0,
+                    const TensorShape& inShape1,
+                    const TensorShape& outShape,
+                    const float* inData0,
+                    const float* inData1,
+                    float* outData)
 {
-    for (unsigned int i = 0; i < numElements; ++i)
+    if (inShape0 == inShape1)
     {
-        out[i] = in0[i] * in1[i];
+        ElementwiseMultiplication(inShape0.GetNumElements(), inData0, inData1, outData);
+    }
+    else
+    {
+        BroadcastLoop(inShape0, inShape1, outShape).Unroll(
+            std::multiplies<float>(),
+            0,
+            inData0,
+            inData1,
+            outData);
     }
 }
 
diff --git a/src/armnn/backends/RefWorkloads/Multiplication.hpp b/src/armnn/backends/RefWorkloads/Multiplication.hpp
index d0b033e..54fcac5 100644
--- a/src/armnn/backends/RefWorkloads/Multiplication.hpp
+++ b/src/armnn/backends/RefWorkloads/Multiplication.hpp
@@ -5,12 +5,16 @@
 
 #pragma once
 
+#include <armnn/Tensor.hpp>
+
 namespace armnn
 {
 
-void Multiplication(const float* in0,
-                    const float* in1,
-                    unsigned int numElements,
-                    float* out);
+void Multiplication(const TensorShape& inShape0,
+                    const TensorShape& inShape1,
+                    const TensorShape& outShape,
+                    const float* inData0,
+                    const float* inData1,
+                    float* outData);
 
 } //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/Pooling2d.cpp b/src/armnn/backends/RefWorkloads/Pooling2d.cpp
index 6d15d8a..a643e67 100644
--- a/src/armnn/backends/RefWorkloads/Pooling2d.cpp
+++ b/src/armnn/backends/RefWorkloads/Pooling2d.cpp
@@ -186,8 +186,8 @@
 
                     // Clamp the pooling region inside the valid input area (which includes the padding).
                     // This is necessary because the final pooling in a row may overlap beyond the padding.
-                    hend = std::min(hend, heightInput + padRight);
-                    wend = std::min(wend, widthInput + padBottom);
+                    hend = std::min(hend, heightInput + padBottom);
+                    wend = std::min(wend, widthInput + padRight);
 
                     float result = defaultInitializer;
                     float poolAreaSize = boost::numeric_cast<float>((hend - hstart) * (wend - wstart));
diff --git a/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.cpp
index ed68b1f..d7c54d9 100644
--- a/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.cpp
+++ b/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.cpp
@@ -17,12 +17,15 @@
 {
     ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMultiplicationFloat32Workload_Execute");
 
-    const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]);
+    const TensorShape& inShape0 = GetTensorInfo(m_Data.m_Inputs[0]).GetShape();
+    const TensorShape& inShape1 = GetTensorInfo(m_Data.m_Inputs[1]).GetShape();
+    const TensorShape& outShape = GetTensorInfo(m_Data.m_Outputs[0]).GetShape();
 
     float* outputData = GetOutputTensorDataFloat(0, m_Data);
     const float* inputData0 = GetInputTensorDataFloat(0, m_Data);
     const float* inputData1 = GetInputTensorDataFloat(1, m_Data);
-    Multiplication(inputData0, inputData1, inputInfo0.GetNumElements(), outputData);
+
+    Multiplication(inShape0, inShape1, outShape, inputData0, inputData1, outputData);
 }
 
 } //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp
index 2e6f0e6..d5c4afd 100644
--- a/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp
+++ b/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp
@@ -27,10 +27,9 @@
     auto dequant1 = Dequantize(GetInputTensorDataU8(1, m_Data), inputInfo1);
 
     std::vector<float> results(outputInfo.GetNumElements());
-    Multiplication(dequant0.data(),
-                   dequant1.data(),
-                   inputInfo0.GetNumElements(),
-                   results.data());
+    Multiplication(
+        inputInfo0.GetShape(), inputInfo1.GetShape(), outputInfo.GetShape(),
+        dequant0.data(), dequant1.data(),results.data());
 
    Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo);
 }
diff --git a/src/armnn/backends/RefWorkloads/Splitter.hpp b/src/armnn/backends/RefWorkloads/Splitter.hpp
index 67f6c10..74c4cb4 100644
--- a/src/armnn/backends/RefWorkloads/Splitter.hpp
+++ b/src/armnn/backends/RefWorkloads/Splitter.hpp
@@ -41,6 +41,7 @@
 
             //split view extents are defined by the size of (the corresponding) input tensor
             const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[viewIdx]);
+            BOOST_ASSERT(outputInfo.GetNumDimensions() == inputInfo0.GetNumDimensions());
 
             // check all dimensions to see if this element is inside the given input view
             bool insideView = true;
diff --git a/src/armnn/backends/WorkloadData.cpp b/src/armnn/backends/WorkloadData.cpp
index 96a3780..c951fc5 100644
--- a/src/armnn/backends/WorkloadData.cpp
+++ b/src/armnn/backends/WorkloadData.cpp
@@ -502,16 +502,13 @@
 {
     ValidateTwoInputs(workloadInfo, "MultiplicationQueueDescriptor");
     ValidateSingleOutput(workloadInfo, "MultiplicationQueueDescriptor");
-    ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0],
-                              workloadInfo.m_InputTensorInfos[1],
-                              "MultiplicationQueueDescriptor",
-                              "first input",
-                              "second input");
-    ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0],
-                              workloadInfo.m_OutputTensorInfos[0],
-                              "MultiplicationQueueDescriptor",
-                              "input",
-                              "output");
+
+    ValidateBroadcastTensorShapesMatch(workloadInfo.m_InputTensorInfos[0],
+                                       workloadInfo.m_InputTensorInfos[1],
+                                       workloadInfo.m_OutputTensorInfos[0],
+                                       "MultiplicationQueueDescriptor",
+                                       "first input",
+                                       "second input");
 }
 
 void BatchNormalizationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
diff --git a/src/armnn/backends/test/ArmComputeCl.cpp b/src/armnn/backends/test/ArmComputeCl.cpp
index 5933ceb..c45a82d 100644
--- a/src/armnn/backends/test/ArmComputeCl.cpp
+++ b/src/armnn/backends/test/ArmComputeCl.cpp
@@ -103,7 +103,7 @@
 ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dUint8, IgnorePaddingSimpleAveragePooling2dUint8Test)
 ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPadding, IgnorePaddingSimpleAveragePooling2dNoPaddingTest)
 ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPaddingUint8,
-    IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test)
+                     IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test)
 ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3, IgnorePaddingAveragePooling2dSize3Test)
 ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3Uint8, IgnorePaddingAveragePooling2dSize3Uint8Test)
 
@@ -114,6 +114,12 @@
 
 ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest)
 ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test)
+ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2,
+                     IgnorePaddingAveragePooling2dSize3x2Stride2x2Test,
+                     false)
+ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2NoPadding,
+                     IgnorePaddingAveragePooling2dSize3x2Stride2x2Test,
+                     true)
 ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest)
 ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test)
 
@@ -136,6 +142,8 @@
 
 // Mul
 ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest)
+ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1Element, MultiplicationBroadcast1ElementTest)
+ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVector, MultiplicationBroadcast1DVectorTest)
 
 // Batch Norm
 ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest)
@@ -194,6 +202,9 @@
 // Permute
 ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test)
 ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test)
+ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet1, PermuteFloat32ValueSet1Test)
+ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2, PermuteFloat32ValueSet2Test)
+ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3, PermuteFloat32ValueSet3Test)
 
 // ============================================================================
 // COMPARE tests
diff --git a/src/armnn/backends/test/ArmComputeNeon.cpp b/src/armnn/backends/test/ArmComputeNeon.cpp
index dd8a668..a81b7cd 100644
--- a/src/armnn/backends/test/ArmComputeNeon.cpp
+++ b/src/armnn/backends/test/ArmComputeNeon.cpp
@@ -141,6 +141,7 @@
 ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4Uint8, SimpleMaxPooling2dSize3x3Stride2x4Uint8Test, true)
 ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest)
 ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test)
+
 ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest)
 ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test)
 
@@ -170,6 +171,11 @@
     IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test)
 ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3, IgnorePaddingAveragePooling2dSize3Test)
 ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3Uint8, IgnorePaddingAveragePooling2dSize3Uint8Test)
+ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2,
+                             IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, false)
+ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2NoPadding,
+                             IgnorePaddingAveragePooling2dSize3x2Stride2x2Test,
+                                          true)
 
 ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleL2Pooling2d, IgnorePaddingSimpleL2Pooling2dTest)
 ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingSimpleL2Pooling2dUint8, IgnorePaddingSimpleL2Pooling2dUint8Test)
@@ -281,6 +287,10 @@
 // Permute
 ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test)
 ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test)
+ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet1, PermuteFloat32ValueSet1Test)
+ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2, PermuteFloat32ValueSet2Test)
+ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3, PermuteFloat32ValueSet3Test)
+
 // ============================================================================
 // COMPARE tests
 
diff --git a/src/armnn/backends/test/LayerTests.cpp b/src/armnn/backends/test/LayerTests.cpp
index 76681f9..9eed2db 100644
--- a/src/armnn/backends/test/LayerTests.cpp
+++ b/src/armnn/backends/test/LayerTests.cpp
@@ -1005,31 +1005,22 @@
     return ret;
 }
 
-LayerTestResult<float,4> MultiplicationTest(armnn::IWorkloadFactory& workloadFactory)
+namespace {
+LayerTestResult<float,4> MultiplicationTestHelper(armnn::IWorkloadFactory& workloadFactory,
+                                                  const unsigned int shape0[4],
+                                                  const std::vector<float> & values0,
+                                                  const unsigned int shape1[4],
+                                                  const std::vector<float> & values1,
+                                                  const unsigned int outShape[4],
+                                                  const std::vector<float> & outValues)
 {
-    const unsigned int width = 2;
-    const unsigned int height = 2;
-    const unsigned int channelCount = 2;
-    const unsigned int batchSize = 2;
+    const size_t dimensionCount = 4;
+    armnn::TensorInfo inputTensorInfo0{dimensionCount, shape0, armnn::DataType::Float32};
+    armnn::TensorInfo inputTensorInfo1{dimensionCount, shape1, armnn::DataType::Float32};
+    armnn::TensorInfo outputTensorInfo{dimensionCount, outShape, armnn::DataType::Float32};
 
-    armnn::TensorInfo inputTensorInfo0;
-    armnn::TensorInfo inputTensorInfo1;
-    armnn::TensorInfo outputTensorInfo;
-
-    constexpr unsigned int shape[] = { batchSize, channelCount, height, width };
-    constexpr std::size_t dimensionCount = std::extent<decltype(shape)>::value;
-
-    inputTensorInfo0 = armnn::TensorInfo(dimensionCount, shape, armnn::DataType::Float32);
-    inputTensorInfo1 = armnn::TensorInfo(dimensionCount, shape, armnn::DataType::Float32);
-    outputTensorInfo = armnn::TensorInfo(dimensionCount, shape, armnn::DataType::Float32);
-
-    auto input0 = MakeTensor<float, 4>(inputTensorInfo0, std::vector<float>({
-        1,  1,  1,  1,    2,  2,  2,  2,
-        3,  3,  3,  3,    4,  4,  4,  4 }));
-
-    auto input1 = MakeTensor<float, 4>(inputTensorInfo1, std::vector<float>({
-        2,  2,  2,  2,    3,  3,  3,  3,
-        4,  4,  4,  4,    5,  5,  5,  5 }));
+    auto input0 = MakeTensor<float, 4>(inputTensorInfo0, values0);
+    auto input1 = MakeTensor<float, 4>(inputTensorInfo1, values1);
 
     LayerTestResult<float,4> ret(outputTensorInfo);
 
@@ -1056,12 +1047,85 @@
 
     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
 
-    ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({
-        2,  2,  2,  2,    6,  6,  6,  6,
-        12, 12, 12, 12,  20, 20, 20, 20 }));
-
+    ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outValues);
     return ret;
 }
+} // anonymous namespace
+
+
+LayerTestResult<float,4> MultiplicationTest(armnn::IWorkloadFactory& workloadFactory)
+{
+    const unsigned int width = 2;
+    const unsigned int height = 2;
+    const unsigned int channelCount = 2;
+    const unsigned int batchSize = 2;
+
+    unsigned int shape[] = { batchSize, channelCount, height, width };
+
+    std::vector<float> input0({
+        1,  1,  1,  1,    2,  2,  2,  2,
+        3,  3,  3,  3,    4,  4,  4,  4 });
+
+    std::vector<float> input1({
+        2,  2,  2,  2,    3,  3,  3,  3,
+        4,  4,  4,  4,    5,  5,  5,  5 });
+
+    std::vector<float> output({
+        2,  2,  2,  2,    6,  6,  6,  6,
+        12, 12, 12, 12,  20, 20, 20, 20 });
+
+    return MultiplicationTestHelper(workloadFactory,
+                                    shape,
+                                    input0,
+                                    shape,
+                                    input1,
+                                    shape,
+                                    output);
+}
+
+LayerTestResult<float, 4> MultiplicationBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory)
+{
+    unsigned int shape0[] = { 1, 2, 2, 2 };
+    std::vector<float> input0({ 1, 2, 3, 4, 5, 6, 7, 8});
+
+    unsigned int shape1[] = { 1, 1, 1, 1 };
+    std::vector<float> input1({ 2 });
+
+    std::vector<float> output({ 2, 4, 6, 8, 10, 12, 14, 16});
+
+    return MultiplicationTestHelper(workloadFactory,
+                                    shape0,
+                                    input0,
+                                    shape1,
+                                    input1,
+                                    shape0,
+                                    output);
+}
+
+LayerTestResult<float, 4> MultiplicationBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory)
+{
+    unsigned int shape0[] = { 1, 3, 3, 2 };
+    std::vector<float> input0({
+        1,   2,      3,  4,      5,  6,
+        7,   8,      9, 10,     11, 12,
+        13, 14,     15, 16,     17, 18});
+
+    unsigned int shape1[] = { 1, 1, 1, 2 };
+    std::vector<float> input1({ 1, 2 });
+
+    std::vector<float> output({
+        1,   4,       3,  8,      5, 12,
+        7,   16,      9, 20,     11, 24,
+        13,  28,     15, 32,     17, 36});
+
+    return MultiplicationTestHelper(workloadFactory,
+                                    shape0,
+                                    input0,
+                                    shape1,
+                                    input1,
+                                    shape0,
+                                    output);
+}
 
 LayerTestResult<float,4> CompareMultiplicationTest(armnn::IWorkloadFactory& workloadFactory,
                                           armnn::IWorkloadFactory& refWorkloadFactory)
@@ -3253,69 +3317,59 @@
     return result;
 }
 
-LayerTestResult<uint8_t, 4> MultiplicationUint8Test(armnn::IWorkloadFactory& workloadFactory)
+namespace
 {
-    unsigned int batchSize = 1;
-    unsigned int channels = 2;
-    unsigned int height = 2;
-    unsigned int width = 3;
+LayerTestResult<uint8_t, 4> MultiplicationUint8TestHelper(armnn::IWorkloadFactory& workloadFactory,
+                                                          const unsigned int shape0[4],
+                                                          const std::vector<uint8_t> & values0,
+                                                          float scale0,
+                                                          int32_t offset0,
+                                                          const unsigned int shape1[4],
+                                                          const std::vector<uint8_t> & values1,
+                                                          float scale1,
+                                                          int32_t offset1,
+                                                          const unsigned int outShape[4],
+                                                          const std::vector<uint8_t> & outValues,
+                                                          float outScale,
+                                                          int32_t outOffset)
+{
+    armnn::TensorInfo inputTensorInfo0(4, shape0, armnn::DataType::QuantisedAsymm8);
+    armnn::TensorInfo inputTensorInfo1(4, shape1, armnn::DataType::QuantisedAsymm8);
+    armnn::TensorInfo outputTensorInfo(4, outShape, armnn::DataType::QuantisedAsymm8);
 
-    armnn::TensorInfo inputTensorInfo1, inputTensorInfo2;
-    armnn::TensorInfo outputTensorInfo;
+    inputTensorInfo0.SetQuantizationScale(scale0);
+    inputTensorInfo0.SetQuantizationOffset(offset0);
 
-    const unsigned int shape[] = { batchSize, channels, height, width };
-    inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8);
-    inputTensorInfo1.SetQuantizationScale(4.0f);
-    inputTensorInfo1.SetQuantizationOffset(1);
+    inputTensorInfo1.SetQuantizationScale(scale1);
+    inputTensorInfo1.SetQuantizationOffset(offset1);
 
-    inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8);
-    inputTensorInfo2.SetQuantizationScale(3.0f);
-    inputTensorInfo2.SetQuantizationOffset(-2);
+    outputTensorInfo.SetQuantizationScale(outScale);
+    outputTensorInfo.SetQuantizationOffset(outOffset);
 
-    outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8);
-    outputTensorInfo.SetQuantizationScale(1366.255f); // Scale/offset chosen to have output values out of range
-    outputTensorInfo.SetQuantizationOffset(-5);
+    auto input0 = MakeTensor<uint8_t, 4>(inputTensorInfo0, values0);
+    auto input1 = MakeTensor<uint8_t, 4>(inputTensorInfo1, values1);
 
-    // See dequantized values to the right
-    auto input1 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>(
-    {
-         62,  37,   3, 172,  13, 111, // 244, 144,   8, 684,  48, 440,
-        188,  20,  73,  31,  23,  31  // 748,  76, 288, 120,  88, 120
-    }));
-
-    // See dequantized values to the right
-    auto input2 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>(
-    {
-        126, 240, 252, 183, 121, 247, // 384, 726, 762, 555, 369, 747,
-         48, 115, 151,  79,  78,  97  // 150, 351, 459, 243, 240, 297
-    }));
-
-    // See dequantized values to the right
     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
-    result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>(
-    {
-         64,  72,   0, 255,   8, 236, //  93696, 104544, 6096(clamped), 379620(clamped), 17712, 328680,
-         77,  15,  92,  16,  10,  21, // 112200,  26676,        132192,           29160, 21120,  35640
-    }));
+    result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, outValues);
 
+    std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
-    std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
 
     armnn::MultiplicationQueueDescriptor data;
     armnn::WorkloadInfo info;
-    AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
-    AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
+    AddInputToWorkload(data,  info, inputTensorInfo0, inputHandle0.get());
+    AddInputToWorkload(data,  info, inputTensorInfo1, inputHandle1.get());
     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
 
     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info);
 
+    inputHandle0->Allocate();
     inputHandle1->Allocate();
-    inputHandle2->Allocate();
     outputHandle->Allocate();
 
+    CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
-    CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
 
     workload->Execute();
 
@@ -3323,6 +3377,113 @@
 
     return result;
 }
+} // anonymous namespace
+
+LayerTestResult<uint8_t, 4> MultiplicationUint8Test(armnn::IWorkloadFactory& workloadFactory)
+{
+    unsigned int batchSize = 1;
+    unsigned int channels = 2;
+    unsigned int height = 2;
+    unsigned int width = 3;
+    const unsigned int shape[] = { batchSize, channels, height, width };
+
+    // See dequantized values to the right
+    std::vector<uint8_t> input0({
+         62,  37,   3, 172,  13, 111, // 244, 144,   8, 684,  48, 440,
+        188,  20,  73,  31,  23,  31  // 748,  76, 288, 120,  88, 120
+    });
+
+    // See dequantized values to the right
+    std::vector<uint8_t> input1({
+        126, 240, 252, 183, 121, 247, // 384, 726, 762, 555, 369, 747,
+         48, 115, 151,  79,  78,  97  // 150, 351, 459, 243, 240, 297
+    });
+
+    // See dequantized values to the right
+    std::vector<uint8_t> output(
+    {
+         64,  72,   0, 255,   8, 236, //  93696, 104544, 6096(clamped), 379620(clamped), 17712, 328680,
+         77,  15,  92,  16,  10,  21, // 112200,  26676,        132192,           29160, 21120,  35640
+    });
+
+    return MultiplicationUint8TestHelper(workloadFactory,
+                                         shape,
+                                         input0,
+                                         4.0f,
+                                         1,
+                                         shape,
+                                         input1,
+                                         3.0f,
+                                         -2,
+                                         shape,
+                                         output,
+                                         1366.255f, // Scale/offset chosen to have output values out of range
+                                         -5);
+}
+
+LayerTestResult<uint8_t, 4> MultiplicationBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory)
+{
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<uint8_t> input0({
+        1, 2, 3,    4,  5,  6,
+        7, 8, 9,   10, 11, 12
+    });
+
+    std::vector<uint8_t> input1({2});
+
+    std::vector<uint8_t> output({
+        2,  4,   6,     8, 10, 12,
+        14, 16, 18,    20, 22, 24
+    });
+
+    return MultiplicationUint8TestHelper(workloadFactory,
+                                         shape0,
+                                         input0,
+                                         1.0f,
+                                         0,
+                                         shape1,
+                                         input1,
+                                         1.0f,
+                                         0,
+                                         shape0,
+                                         output,
+                                         1.0f,
+                                         0);
+}
+
+LayerTestResult<uint8_t, 4> MultiplicationBroadcast1DVectorUint8Test(armnn::IWorkloadFactory& workloadFactory)
+{
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 3 };
+
+    std::vector<uint8_t> input0({
+        1, 2, 3,    4,  5,  6,
+        7, 8, 9,   10, 11, 12
+    });
+
+    std::vector<uint8_t> input1({1, 2, 3});
+
+    std::vector<uint8_t> output({
+        1,  4,   9,     4, 10, 18,
+        7, 16,  27,    10, 22, 36
+    });
+
+    return MultiplicationUint8TestHelper(workloadFactory,
+                                         shape0,
+                                         input0,
+                                         1.0f,
+                                         0,
+                                         shape1,
+                                         input1,
+                                         1.0f,
+                                         0,
+                                         shape0,
+                                         output,
+                                         1.0f,
+                                         0);
+}
 
 LayerTestResult<uint8_t, 4> ResizeBilinearNopUint8Test(armnn::IWorkloadFactory& workloadFactory)
 {
@@ -3702,6 +3863,12 @@
     return SimpleAveragePooling2dTestCommon<uint8_t>(workloadFactory, 0.5, -1);
 }
 
+LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory,
+                                                                            bool forceNoPadding)
+{
+    return IgnorePaddingAveragePooling2dSize3x2Stride2x2TestCommon<float>(workloadFactory, forceNoPadding);
+}
+
 LayerTestResult<float, 4> LargeTensorsAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory)
 {
     return LargeTensorsAveragePooling2dTestCommon<float>(workloadFactory);
@@ -3882,3 +4049,18 @@
 {
     return SimplePermuteUint8TestCommon(workloadFactory);
 };
+
+LayerTestResult<float, 4> PermuteFloat32ValueSet1Test(armnn::IWorkloadFactory& workloadFactory)
+{
+    return PermuteFloat32ValueSet1TestCommon(workloadFactory);
+};
+
+LayerTestResult<float, 4> PermuteFloat32ValueSet2Test(armnn::IWorkloadFactory& workloadFactory)
+{
+    return PermuteFloat32ValueSet2TestCommon(workloadFactory);
+};
+
+LayerTestResult<float, 4> PermuteFloat32ValueSet3Test(armnn::IWorkloadFactory& workloadFactory)
+{
+    return PermuteFloat32ValueSet3TestCommon(workloadFactory);
+};
diff --git a/src/armnn/backends/test/LayerTests.hpp b/src/armnn/backends/test/LayerTests.hpp
index fc0c9c7..36e73e46 100644
--- a/src/armnn/backends/test/LayerTests.hpp
+++ b/src/armnn/backends/test/LayerTests.hpp
@@ -82,6 +82,8 @@
 
 LayerTestResult<float,   4> SimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory);
 LayerTestResult<uint8_t, 4> SimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float,   4> IgnorePaddingAveragePooling2dSize3x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory,
+                                                                              bool forceNoPadding);
 LayerTestResult<float,   4> IgnorePaddingSimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory);
 LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory);
 LayerTestResult<float, 4>   IgnorePaddingSimpleAveragePooling2dNoPaddingTest(armnn::IWorkloadFactory& workloadFactory);
@@ -187,6 +189,8 @@
                                                 unsigned int batchSize);
 
 LayerTestResult<float, 4> MultiplicationTest(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 4> MultiplicationBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 4> MultiplicationBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory);
 
 LayerTestResult<float, 4> CompareMultiplicationTest(armnn::IWorkloadFactory& workloadFactory,
                                              armnn::IWorkloadFactory& refWorkloadFactory);
@@ -260,6 +264,8 @@
     float beta);
 
 LayerTestResult<uint8_t, 4> MultiplicationUint8Test(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<uint8_t, 4> MultiplicationBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<uint8_t, 4> MultiplicationBroadcast1DVectorUint8Test(armnn::IWorkloadFactory& workloadFactory);
 
 LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory& workloadFactory,
                                                             bool                     biasEnabled);
@@ -303,3 +309,6 @@
 LayerTestResult<float, 4> SimplePermuteFloat32Test(armnn::IWorkloadFactory& workloadFactory);
 LayerTestResult<uint8_t, 4> SimplePermuteUint8Test(armnn::IWorkloadFactory& workloadFactory);
 
+LayerTestResult<float, 4> PermuteFloat32ValueSet1Test(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 4> PermuteFloat32ValueSet2Test(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 4> PermuteFloat32ValueSet3Test(armnn::IWorkloadFactory& workloadFactory);
diff --git a/src/armnn/backends/test/PermuteTestImpl.hpp b/src/armnn/backends/test/PermuteTestImpl.hpp
index 4eafa1a..4ecffed 100644
--- a/src/armnn/backends/test/PermuteTestImpl.hpp
+++ b/src/armnn/backends/test/PermuteTestImpl.hpp
@@ -119,3 +119,107 @@
     return SimplePermuteTestImpl<uint8_t>(workloadFactory, descriptor, inputTensorInfo,
                                           outputTensorInfo, input, outputExpected);
 }
+
+LayerTestResult<float, 4>
+PermuteFloat32ValueSet1TestCommon(armnn::IWorkloadFactory& workloadFactory)
+{
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int inputShape[]  = { 1, 2, 2, 3 };
+    unsigned int outputShape[] = { 1, 3, 2, 2 };
+
+    armnn::PermuteDescriptor descriptor;
+    descriptor.m_DimMappings = {0U, 2U, 3U, 1U};
+
+    inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32);
+    outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32);
+
+    std::vector<float> input = std::vector<float>(
+            {
+                    1.0f,   2.0f,  3.0f,
+                    11.0f, 12.0f, 13.0f,
+                    21.0f, 22.0f, 23.0f,
+                    31.0f, 32.0f, 33.0f,
+            });
+
+    std::vector<float> outputExpected = std::vector<float>(
+            {
+                    1.0f, 11.0f, 21.0f, 31.0f,
+                    2.0f, 12.0f, 22.0f, 32.0f,
+                    3.0f, 13.0f, 23.0f, 33.0f,
+            });
+
+    return SimplePermuteTestImpl<float>(workloadFactory, descriptor, inputTensorInfo,
+                                        outputTensorInfo, input, outputExpected);
+}
+
+LayerTestResult<float, 4>
+PermuteFloat32ValueSet2TestCommon(armnn::IWorkloadFactory& workloadFactory)
+{
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int inputShape[]  = { 1, 3, 2, 2 };
+    unsigned int outputShape[] = { 1, 2, 2, 3 };
+
+    armnn::PermuteDescriptor descriptor;
+    descriptor.m_DimMappings = {0U, 3U, 1U, 2U};
+
+    inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32);
+    outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32);
+
+    std::vector<float> input = std::vector<float>(
+            {
+                1.0f, 11.0f, 21.0f, 31.0f,
+                2.0f, 12.0f, 22.0f, 32.0f,
+                3.0f, 13.0f, 23.0f, 33.0f,
+            });
+
+    std::vector<float> outputExpected = std::vector<float>(
+            {
+                1.0f,   2.0f,  3.0f,
+                11.0f, 12.0f, 13.0f,
+                21.0f, 22.0f, 23.0f,
+                31.0f, 32.0f, 33.0f,
+            });
+
+    return SimplePermuteTestImpl<float>(workloadFactory, descriptor, inputTensorInfo,
+                                        outputTensorInfo, input, outputExpected);
+}
+
+LayerTestResult<float, 4>
+PermuteFloat32ValueSet3TestCommon(armnn::IWorkloadFactory& workloadFactory)
+{
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int inputShape[]  = { 1, 2, 3, 3 };
+    unsigned int outputShape[] = { 1, 3, 2, 3 };
+
+    armnn::PermuteDescriptor descriptor;
+    descriptor.m_DimMappings = {0U, 2U, 3U, 1U};
+
+    inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32);
+    outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32);
+
+    std::vector<float> input = std::vector<float>(
+            {
+                1.0f,   2.0f,  3.0f,
+                11.0f, 12.0f, 13.0f,
+                21.0f, 22.0f, 23.0f,
+                31.0f, 32.0f, 33.0f,
+                41.0f, 42.0f, 43.0f,
+                51.0f, 52.0f, 53.0f,
+            });
+
+    std::vector<float> outputExpected = std::vector<float>(
+            {
+                1.0f, 11.0f, 21.0f, 31.0f, 41.0f, 51.0f,
+                2.0f, 12.0f, 22.0f, 32.0f, 42.0f, 52.0f,
+                3.0f, 13.0f, 23.0f, 33.0f, 43.0f, 53.0f,
+            });
+
+    return SimplePermuteTestImpl<float>(workloadFactory, descriptor, inputTensorInfo,
+                                        outputTensorInfo, input, outputExpected);
+}
diff --git a/src/armnn/backends/test/Pooling2dTestImpl.hpp b/src/armnn/backends/test/Pooling2dTestImpl.hpp
index fc84ddb..ab9fd6d 100644
--- a/src/armnn/backends/test/Pooling2dTestImpl.hpp
+++ b/src/armnn/backends/test/Pooling2dTestImpl.hpp
@@ -720,6 +720,83 @@
     return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected);
 }
 
+//
+// Tests max pooling with the following parameters:
+//
+//   Pooling size: 3x2
+//   Stride:       (2,2)
+//   input size:   3x2
+//   channels:     1
+//   batch size:   1
+//
+template<typename T>
+LayerTestResult<T, 4> IgnorePaddingAveragePooling2dSize3x2Stride2x2TestCommon(
+        armnn::IWorkloadFactory& workloadFactory,
+        bool forceNoPadding,
+        float qScale = 1.0f,
+        int32_t qOffset = 0)
+{
+    armnn::Pooling2dDescriptor descriptor;
+    descriptor.m_PoolType = armnn::PoolingAlgorithm::Average;
+    descriptor.m_PoolWidth = 3;
+    descriptor.m_PoolHeight = 2;
+    descriptor.m_StrideX = 2;
+    descriptor.m_StrideY = 2;
+    descriptor.m_PadLeft = (forceNoPadding) ? 0 : 1;
+    descriptor.m_PadRight = descriptor.m_PadLeft;
+    descriptor.m_PadTop = 0;
+    descriptor.m_PadBottom = 0;
+    descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor;
+    descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue;
+
+    unsigned int inputWidth = 3;
+    unsigned int inputHeight = 2;
+    unsigned int outputWidth =
+        (inputWidth + descriptor.m_PadLeft + descriptor.m_PadRight + descriptor.m_StrideX - descriptor.m_PoolWidth) /
+        descriptor.m_StrideX;
+    unsigned int outputHeight =
+        (inputHeight + descriptor.m_PadTop + descriptor.m_PadBottom + descriptor.m_StrideY - descriptor.m_PoolHeight) /
+        descriptor.m_StrideY;
+    unsigned int channels = 1;
+    unsigned int batchSize = 1;
+
+    std::vector<float> inputData = {
+        3.0f, 6.0f, 9.0f,
+        12.0f, 15.0f, 18.0f,
+    };
+
+    std::vector<float> expectedOutputDataWithPadding = {
+        6.0f, 8.0f,
+    };
+
+    std::vector<float> expectedOutputDataNoPadding = {
+        10.5f,
+    };
+
+    armnn::TensorInfo inputTensorInfo({ batchSize, channels, inputHeight, inputWidth }, armnn::GetDataType<T>());
+
+    // Scale and offset should match input - we're just calculating average values.
+    armnn::TensorInfo outputTensorInfo({ batchSize, channels, outputHeight, outputWidth }, armnn::GetDataType<T>());
+
+    // Set quantization parameters if the requested type is a quantized type.
+    if(armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(qScale);
+        inputTensorInfo.SetQuantizationOffset(qOffset);
+        outputTensorInfo.SetQuantizationScale(qScale);
+        outputTensorInfo.SetQuantizationOffset(qOffset);
+    }
+
+    auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, inputData));
+
+    auto outputExpected = MakeTensor<T, 4>(outputTensorInfo,
+        forceNoPadding ? QuantizedVector<T>(qScale, qOffset, expectedOutputDataNoPadding) :
+                         QuantizedVector<T>(qScale, qOffset, expectedOutputDataWithPadding));
+
+    return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected);
+}
+
+
 template<typename T>
 LayerTestResult<T, 4> IgnorePaddingSimpleMaxPooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory,
                                                             float qScale = 1.0f,
diff --git a/src/armnn/backends/test/Reference.cpp b/src/armnn/backends/test/Reference.cpp
index 87d82f1..89e5db8 100644
--- a/src/armnn/backends/test/Reference.cpp
+++ b/src/armnn/backends/test/Reference.cpp
@@ -76,6 +76,10 @@
 
 ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest)
 ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test)
+ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2,
+                     IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, false)
+ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2NoPadding,
+                     IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, true)
 
 ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest)
 ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test)
@@ -158,7 +162,11 @@
 
 // Mul
 ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest)
+ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1Element, MultiplicationBroadcast1ElementTest)
+ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVector, MultiplicationBroadcast1DVectorTest)
 ARMNN_AUTO_TEST_CASE(MultiplicationUint8, MultiplicationUint8Test)
+ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1ElementUint8, MultiplicationBroadcast1ElementUint8Test)
+ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVectorUint8, MultiplicationBroadcast1DVectorUint8Test)
 
 // Batch Norm
 ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest)
@@ -227,5 +235,8 @@
 // Permute
 ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test)
 ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test)
+ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet1, PermuteFloat32ValueSet1Test)
+ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2, PermuteFloat32ValueSet2Test)
+ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3, PermuteFloat32ValueSet3Test)
 
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/optimizations/Optimization.hpp b/src/armnn/optimizations/Optimization.hpp
index 89e03ff..f810718 100644
--- a/src/armnn/optimizations/Optimization.hpp
+++ b/src/armnn/optimizations/Optimization.hpp
@@ -13,7 +13,7 @@
 class Optimization
 {
 public:
-    virtual void Run(Graph& graph, Graph::Iterator& pos) const = 0;
+    virtual void Run(Graph& graph, Layer& base) const = 0;
 protected:
     ~Optimization() = default;
 };
@@ -23,22 +23,20 @@
 // (curiously recurring template pattern).
 // For details, see https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern
 
-/// Wrapper Optimization base class that calls Wrapped::Run for every layer of type BaseType.
-/// - Wrapped class mustn't remove the base layer.
-/// - Base layer is removed if left unconnected after applying the wrapped optimization.
+/// Wrapper Optimization base class that calls Wrapped::Run() for every layer of type BaseType.
+/// - Wrapped class mustn't remove the base layer. The optimizer will remove it if left unconnected
+///   after applying each optimization.
 template <typename BaseType, typename Wrapped>
 class OptimizeForTypeImpl : public armnn::Optimization, public Wrapped
 {
 public:
     using Wrapped::Wrapped;
 
-    void Run(Graph& graph, Graph::Iterator& pos) const override
+    void Run(Graph& graph, Layer& base) const override
     {
-        Layer* const base = *pos;
-
-        if (base->GetType() == LayerEnumOf<BaseType>())
+        if (base.GetType() == LayerEnumOf<BaseType>())
         {
-            Wrapped::Run(graph, *boost::polymorphic_downcast<BaseType*>(base));
+            Wrapped::Run(graph, *boost::polymorphic_downcast<BaseType*>(&base));
         }
     }
 
@@ -46,16 +44,16 @@
     ~OptimizeForTypeImpl() = default;
 };
 
-/// Specialization that calls Wrapped::Run for any layer type
+/// Specialization that calls Wrapped::Run() for any layer type
 template <typename Wrapped>
 class OptimizeForTypeImpl<Layer, Wrapped> : public armnn::Optimization, public Wrapped
 {
 public:
     using Wrapped::Wrapped;
 
-    void Run(Graph& graph, Graph::Iterator& pos) const override
+    void Run(Graph& graph, Layer& base) const override
     {
-        Wrapped::Run(graph, **pos);
+        Wrapped::Run(graph, base);
     }
 
 protected:
@@ -70,9 +68,10 @@
 };
 
 /// Wrapper Optimization class that calls Wrapped::Run for every connection BaseType -> ChildType.
-/// - Wrapped class mustn't remove the base layer.
+/// - Wrapped class mustn't remove the base layer. The optimizer will remove it if left unconnected
+///   after applying each optimization.
 /// - Wrapped class mustn't affect existing connections in the same output. It might add new ones.
-/// - Base and children layers are removed if left unconnected after applying the wrapped optimization.
+/// - Children layers are removed if left unconnected after applying the wrapped optimization.
 template <typename BaseType, typename ChildType, typename Wrapped>
 class OptimizeForConnectionImpl : public Wrapped
 {
diff --git a/src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp b/src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp
index deb49c6..9a926a5 100644
--- a/src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp
+++ b/src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp
@@ -18,8 +18,8 @@
     /// Inserts an equivalent ReshapeLayer that bypasses both for that connection.
     void Run(Graph& graph, InputSlot& connection) const
     {
-        auto& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
-        auto& child = connection.GetOwningLayer();
+        Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
+        Layer& child = connection.GetOwningLayer();
 
         BOOST_ASSERT(base.GetType() == LayerType::Reshape);
         BOOST_ASSERT(child.GetType() == LayerType::Reshape);
diff --git a/src/armnn/optimizations/SquashEqualSiblings.hpp b/src/armnn/optimizations/SquashEqualSiblings.hpp
index 2dfe91f..c5ce28e 100644
--- a/src/armnn/optimizations/SquashEqualSiblings.hpp
+++ b/src/armnn/optimizations/SquashEqualSiblings.hpp
@@ -26,19 +26,29 @@
         if (!child.IsOutputUnconnected())
         {
             OutputSlot& baseOutput = *connection.GetConnectedOutputSlot();
-            auto& comparableChild = *boost::polymorphic_downcast<Comparable*>(&child);
 
-            for (auto&& it : baseOutput.GetConnections())
+            if (baseOutput.GetNumConnections() > 1)
             {
-                Layer& sibling = it->GetOwningLayer();
-                if ((&sibling != &child) && comparableChild.IsEqual(sibling))
+                auto& comparableChild = *boost::polymorphic_downcast<Comparable*>(&child);
+
+                Layer* lowestPriorityChild = &child;
+                for (auto&& it : baseOutput.GetConnections())
                 {
-                    // Bypass sibling. It will be removed as it's left unconnected.
-                    auto siblingOut = sibling.BeginOutputSlots();
-                    for (auto childOut = child.BeginOutputSlots(); childOut != child.EndOutputSlots(); ++childOut)
+                    Layer* sibling = &it->GetOwningLayer();
+                    if ((sibling != lowestPriorityChild) && comparableChild.IsEqual(*sibling))
                     {
-                        siblingOut->MoveAllConnections(*childOut);
-                        ++siblingOut;
+                        if (sibling->GetPriority() < lowestPriorityChild->GetPriority())
+                        {
+                            std::swap(sibling, lowestPriorityChild);
+                        }
+                        // Bypass sibling. It will be removed as it's left unconnected.
+                        auto siblingOut = sibling->BeginOutputSlots();
+                        for (auto lowestPriorityChildOut = lowestPriorityChild->BeginOutputSlots();
+                             lowestPriorityChildOut != lowestPriorityChild->EndOutputSlots(); ++lowestPriorityChildOut)
+                        {
+                            siblingOut->MoveAllConnections(*lowestPriorityChildOut);
+                            ++siblingOut;
+                        }
                     }
                 }
             }
diff --git a/src/armnn/test/Network_test.cpp b/src/armnn/test/Network_test.cpp
index 523d47b..057caa0 100644
--- a/src/armnn/test/Network_test.cpp
+++ b/src/armnn/test/Network_test.cpp
@@ -29,6 +29,64 @@
 
 BOOST_AUTO_TEST_SUITE(Network)
 
+BOOST_AUTO_TEST_CASE(LayerGuids)
+{
+    armnn::Network net;
+    armnn::LayerGuid inputId = net.AddInputLayer(0)->GetGuid();
+    armnn::LayerGuid addId = net.AddAdditionLayer()->GetGuid();
+    armnn::LayerGuid outputId = net.AddOutputLayer(0)->GetGuid();
+
+    BOOST_TEST(inputId != addId);
+    BOOST_TEST(addId != outputId);
+    BOOST_TEST(inputId != outputId);
+}
+
+BOOST_AUTO_TEST_CASE(SerializeToDot)
+{
+    armnn::Network net;
+
+    //define layers
+    auto input = net.AddInputLayer(0);
+    auto add = net.AddAdditionLayer();
+    auto output = net.AddOutputLayer(0);
+
+    // connect layers
+    input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
+    input->GetOutputSlot(0).Connect(add->GetInputSlot(1));
+    add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    armnn::TensorShape shape({4});
+    armnn::TensorInfo info(shape, armnn::DataType::Float32);
+    input->GetOutputSlot(0).SetTensorInfo(info);
+    add->GetOutputSlot(0).SetTensorInfo(info);
+
+    armnn::DeviceSpec spec;
+    spec.DefaultComputeDevice = armnn::Compute::CpuAcc;
+    armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(net, spec);
+
+    std::ostringstream ss;
+    optimizedNet->SerializeToDot(ss);
+
+    auto inputId = input->GetGuid();
+    auto addId = add->GetGuid();
+    auto outputId = output->GetGuid();
+
+    std::stringstream expected;
+    expected <<
+        "digraph Optimized {\n"
+        "    node [shape=\"record\"];\n"
+        "    edge [fontsize=8 fontcolor=\"blue\" fontname=\"arial-bold\"];\n"
+        "    " << inputId << " [label=\"{Input}\"];\n"
+        "    " << addId << " [label=\"{Addition}\"];\n"
+        "    " << outputId << " [label=\"{Output}\"];\n"
+        "    " << inputId << " -> " << addId << " [label=< [4] >];\n"
+        "    " << inputId << " -> " << addId << " [label=< [4] >];\n"
+        "    " << addId << " -> " << outputId << " [label=< [4] >];\n"
+        "}\n";
+
+    BOOST_TEST(ss.str() == expected.str());
+}
+
 BOOST_AUTO_TEST_CASE(NetworkBasic)
 {
     armnn::Network net;
diff --git a/src/armnn/test/OptimizerTests.cpp b/src/armnn/test/OptimizerTests.cpp
new file mode 100644
index 0000000..da26fba
--- /dev/null
+++ b/src/armnn/test/OptimizerTests.cpp
@@ -0,0 +1,334 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include <boost/test/unit_test.hpp>
+
+#include "armnn/ArmNN.hpp"
+#include "Graph.hpp"
+#include "Optimizer.hpp"
+
+namespace
+{
+template <typename LayerT>
+bool IsLayerOfType(const armnn::Layer* const layer)
+{
+    return (layer->GetType() == armnn::LayerEnumOf<LayerT>());
+}
+
+bool CheckSequence(const armnn::Graph::ConstIterator first, const armnn::Graph::ConstIterator last)
+{
+    return (first == last);
+}
+
+/// Check each unary function in Us evaluates true for each correspondent layer in the sequence [first, last)
+template <typename U, typename... Us>
+bool CheckSequence(const armnn::Graph::ConstIterator first,
+                   const armnn::Graph::ConstIterator last,
+                   U&& u,
+                   Us&&... us)
+{
+    return u(*first) && CheckSequence(std::next(first), last, us...);
+}
+}
+
+BOOST_AUTO_TEST_SUITE(Optimizer)
+
+BOOST_AUTO_TEST_CASE(OptimizeInversePermutes)
+{
+    armnn::Graph graph;
+
+    auto output = graph.AddLayer<armnn::OutputLayer>(0, "output");
+
+    graph.InsertNewLayer<armnn::InputLayer>(output->GetInputSlot(0), 0, "input");
+
+    // Insert two permutes, one the inverse of the other
+    graph.InsertNewLayer<armnn::PermuteLayer>(output->GetInputSlot(0),
+                                              armnn::PermuteDescriptor({0, 2, 3, 1}),
+                                              "perm0231");
+    graph.InsertNewLayer<armnn::PermuteLayer>(output->GetInputSlot(0),
+                                              armnn::PermuteDescriptor({0, 3, 1, 2}),
+                                              "perm0312");
+
+    BOOST_TEST(CheckSequence(graph.cbegin(),
+                             graph.cend(),
+                             &IsLayerOfType<armnn::InputLayer>,
+                             &IsLayerOfType<armnn::PermuteLayer>,
+                             &IsLayerOfType<armnn::PermuteLayer>,
+                             &IsLayerOfType<armnn::OutputLayer>));
+
+    armnn::Optimizer::Optimize(graph);
+
+    // The permutes are removed
+    BOOST_TEST(CheckSequence(graph.cbegin(),
+                             graph.cend(),
+                             &IsLayerOfType<armnn::InputLayer>,
+                             &IsLayerOfType<armnn::OutputLayer>));
+}
+
+BOOST_AUTO_TEST_CASE(MovePermuteUp)
+{
+    const armnn::TensorInfo info({ 1, 5, 2, 3 }, armnn::DataType::Float32);
+    const armnn::TensorInfo permuted({ 1, 3, 5, 2 }, armnn::DataType::Float32);
+
+    armnn::Graph graph;
+
+    armnn::LayerBindingId inputId = 0;
+
+    armnn::Layer* head = graph.AddLayer<armnn::OutputLayer>(0, "output");
+
+    // Insert permute
+    head = graph.InsertNewLayer<armnn::PermuteLayer>(head->GetInputSlot(0),
+                                                     armnn::PermuteDescriptor({ 0, 2, 3, 1 }), "");
+    head->GetOutputHandler().SetTensorInfo(permuted);
+
+    // Insert layers that don't care about data format
+    head = graph.InsertNewLayer<armnn::ActivationLayer>(head->GetInputSlot(0),
+                                                        armnn::ActivationDescriptor{}, "");
+    head->GetOutputHandler().SetTensorInfo(info);
+
+    head = graph.InsertNewLayer<armnn::AdditionLayer>(head->GetInputSlot(0), "");
+    head->GetOutputHandler().SetTensorInfo(info);
+
+    // Insert input for 2nd input of Addition
+    graph.InsertNewLayer<armnn::InputLayer>(head->GetInputSlot(1), inputId++, "")
+        ->GetOutputHandler().SetTensorInfo(info);
+
+    head = graph.InsertNewLayer<armnn::FakeQuantizationLayer>(head->GetInputSlot(0),
+                                                              armnn::FakeQuantizationDescriptor{}, "");
+    head->GetOutputHandler().SetTensorInfo(info);
+
+    head = graph.InsertNewLayer<armnn::FloorLayer>(head->GetInputSlot(0), "");
+    head->GetOutputHandler().SetTensorInfo(info);
+
+    head = graph.InsertNewLayer<armnn::MemCopyLayer>(head->GetInputSlot(0), "");
+    head->GetOutputHandler().SetTensorInfo(info);
+
+    head = graph.InsertNewLayer<armnn::MultiplicationLayer>(head->GetInputSlot(0), "");
+    head->GetOutputHandler().SetTensorInfo(info);
+
+    // Insert input for 2nd input of Multiplication
+    graph.InsertNewLayer<armnn::InputLayer>(head->GetInputSlot(1), inputId++, "")
+        ->GetOutputHandler().SetTensorInfo(info);
+
+    // Insert input
+    graph.InsertNewLayer<armnn::InputLayer>(head->GetInputSlot(0), inputId++, "")
+        ->GetOutputHandler().SetTensorInfo(info);
+
+    BOOST_TEST(CheckSequence(graph.cbegin(),
+                             graph.cend(),
+                             &IsLayerOfType<armnn::InputLayer>,
+                             &IsLayerOfType<armnn::InputLayer>,
+                             &IsLayerOfType<armnn::InputLayer>,
+                             &IsLayerOfType<armnn::MultiplicationLayer>,
+                             &IsLayerOfType<armnn::MemCopyLayer>,
+                             &IsLayerOfType<armnn::FloorLayer>,
+                             &IsLayerOfType<armnn::FakeQuantizationLayer>,
+                             &IsLayerOfType<armnn::AdditionLayer>,
+                             &IsLayerOfType<armnn::ActivationLayer>,
+                             &IsLayerOfType<armnn::PermuteLayer>,
+                             &IsLayerOfType<armnn::OutputLayer>));
+
+    armnn::Optimizer::Optimize(graph);
+
+    // The permute is moved to the top. New permutes for layers with multiple inputs
+    BOOST_TEST(CheckSequence(graph.cbegin(),
+                             graph.cend(),
+                             &IsLayerOfType<armnn::InputLayer>,
+                             &IsLayerOfType<armnn::InputLayer>,
+                             &IsLayerOfType<armnn::InputLayer>,
+                             &IsLayerOfType<armnn::PermuteLayer>,
+                             &IsLayerOfType<armnn::PermuteLayer>,
+                             &IsLayerOfType<armnn::PermuteLayer>,
+                             &IsLayerOfType<armnn::MultiplicationLayer>,
+                             &IsLayerOfType<armnn::MemCopyLayer>,
+                             &IsLayerOfType<armnn::FloorLayer>,
+                             &IsLayerOfType<armnn::FakeQuantizationLayer>,
+                             &IsLayerOfType<armnn::AdditionLayer>,
+                             &IsLayerOfType<armnn::ActivationLayer>,
+                             &IsLayerOfType<armnn::OutputLayer>));
+}
+
+BOOST_AUTO_TEST_CASE(PermuteAsReshape)
+{
+    armnn::Graph graph;
+
+    const armnn::TensorInfo infoIn({ 1, 2, 3, 1 }, armnn::DataType::Float32);
+    const armnn::TensorInfo infoOut({ 1, 1, 2, 3 }, armnn::DataType::Float32);
+
+    auto output = graph.AddLayer<armnn::OutputLayer>(0, "output");
+
+    graph.InsertNewLayer<armnn::InputLayer>(output->GetInputSlot(0), 0, "input")
+        ->GetOutputHandler().SetTensorInfo(infoIn);
+
+    // Insert permute
+    graph.InsertNewLayer<armnn::PermuteLayer>(output->GetInputSlot(0),
+                                              armnn::PermuteDescriptor({ 0, 2, 3, 1 }), "")
+        ->GetOutputHandler().SetTensorInfo(infoOut);
+
+    BOOST_TEST(CheckSequence(graph.cbegin(),
+                             graph.cend(),
+                             &IsLayerOfType<armnn::InputLayer>,
+                             &IsLayerOfType<armnn::PermuteLayer>,
+                             &IsLayerOfType<armnn::OutputLayer>));
+
+    armnn::Optimizer::Optimize(graph);
+
+    // The permute is replaced by an equivalent reshape.
+
+    auto checkReshape = [&infoOut](const armnn::Layer* const layer) -> bool
+        {
+            const auto reshapeLayer = static_cast<const armnn::ReshapeLayer*>(layer);
+            return IsLayerOfType<armnn::ReshapeLayer>(layer) &&
+                   (reshapeLayer->GetParameters().m_TargetShape == infoOut.GetShape()) &&
+                   (reshapeLayer->GetOutputHandler().GetTensorInfo().GetShape() == infoOut.GetShape());
+        };
+
+    BOOST_TEST(CheckSequence(graph.cbegin(),
+                             graph.cend(),
+                             &IsLayerOfType<armnn::InputLayer>,
+                             checkReshape,
+                             &IsLayerOfType<armnn::OutputLayer>));
+}
+
+BOOST_AUTO_TEST_CASE(OptimizeConsecutiveReshapes)
+{
+    armnn::Graph graph;
+
+    const armnn::TensorInfo info0({ 1, 2, 3, 5 }, armnn::DataType::Float32);
+
+    auto output = graph.AddLayer<armnn::OutputLayer>(0, "output");
+    auto input = graph.InsertNewLayer<armnn::InputLayer>(output->GetInputSlot(0), 0, "input");
+
+    input->GetOutputHandler().SetTensorInfo(info0);
+
+    {
+        // Insert two reshapes
+        const armnn::TensorInfo info1({1, 30, 1, 1}, armnn::DataType::Float32);
+        const armnn::TensorInfo info2({1, 2, 1, 15}, armnn::DataType::Float32);
+
+        auto reshape1 = graph.InsertNewLayer<armnn::ReshapeLayer>(output->GetInputSlot(0),
+                                                                  armnn::ReshapeDescriptor{ info1.GetShape() },
+                                                                  "reshape1");
+        auto reshape2 = graph.InsertNewLayer<armnn::ReshapeLayer>(output->GetInputSlot(0),
+                                                                  armnn::ReshapeDescriptor{ info2.GetShape() },
+                                                                  "reshape2");
+
+        reshape1->GetOutputHandler().SetTensorInfo(info1);
+        reshape2->GetOutputHandler().SetTensorInfo(info2);
+
+        BOOST_TEST(CheckSequence(graph.cbegin(),
+                                 graph.cend(),
+                                 &IsLayerOfType<armnn::InputLayer>,
+                                 &IsLayerOfType<armnn::ReshapeLayer>,
+                                 &IsLayerOfType<armnn::ReshapeLayer>,
+                                 &IsLayerOfType<armnn::OutputLayer>));
+
+        armnn::Optimizer::Optimize(graph);
+
+        auto checkReshape = [&info2](const armnn::Layer* const layer) -> bool
+            {
+                const auto reshapeLayer = static_cast<const armnn::ReshapeLayer*>(layer);
+                return IsLayerOfType<armnn::ReshapeLayer>(layer) &&
+                    (reshapeLayer->GetParameters().m_TargetShape == info2.GetShape()) &&
+                    (reshapeLayer->GetOutputHandler().GetTensorInfo().GetShape() == info2.GetShape());
+            };
+
+        // The two reshapes are replaced by a single equivalent reshape
+        BOOST_TEST(CheckSequence(graph.cbegin(),
+                                 graph.cend(),
+                                 &IsLayerOfType<armnn::InputLayer>,
+                                 checkReshape,
+                                 &IsLayerOfType<armnn::OutputLayer>));
+    }
+
+    {
+        // Insert a reshape to the input shape
+        auto reshapeToIn = graph.InsertNewLayer<armnn::ReshapeLayer>(output->GetInputSlot(0),
+                                                                     armnn::ReshapeDescriptor{ info0.GetShape() },
+                                                                     "reshapeToIn");
+
+        reshapeToIn->GetOutputHandler().SetTensorInfo(info0);
+
+        armnn::Optimizer::Optimize(graph);
+
+        // The two reshapes are removed
+        BOOST_TEST(CheckSequence(graph.cbegin(),
+                                 graph.cend(),
+                                 &IsLayerOfType<armnn::InputLayer>,
+                                 &IsLayerOfType<armnn::OutputLayer>));
+    }
+}
+
+BOOST_AUTO_TEST_CASE(SquashEqualSiblings)
+{
+    armnn::Graph graph;
+
+    armnn::LayerBindingId outputId = 0;
+
+    const armnn::TensorInfo info({ 1, 2, 3, 5 }, armnn::DataType::Float32);
+    const armnn::TensorInfo permuted({ 1, 5, 2, 3 }, armnn::DataType::Float32);
+
+    auto input = graph.AddLayer<armnn::InputLayer>(0, "input");
+    input->GetOutputSlot().SetTensorInfo(info);
+
+    // Insert equal permutes, equal reshapes and something else
+    const armnn::PermuteDescriptor permDesc({ 0, 2, 3, 1 });
+    const armnn::ReshapeDescriptor reshapeDesc{ { 1, 3, 1, 5 } };
+
+    armnn::Layer* layer;
+
+    layer = graph.AddLayer<armnn::PermuteLayer>(permDesc, "");
+    layer->GetOutputSlot().SetTensorInfo(permuted);
+    layer->GetOutputSlot().Connect(graph.AddLayer<armnn::OutputLayer>(outputId++, "")->GetInputSlot(0));
+    input->GetOutputSlot().Connect(layer->GetInputSlot(0));
+
+    layer = graph.AddLayer<armnn::ReshapeLayer>(reshapeDesc, "");
+    layer->GetOutputSlot().Connect(graph.AddLayer<armnn::OutputLayer>(outputId++, "")->GetInputSlot(0));
+    input->GetOutputSlot().Connect(layer->GetInputSlot(0));
+
+    layer = graph.AddLayer<armnn::FloorLayer>("");
+    layer->GetOutputSlot().Connect(graph.AddLayer<armnn::OutputLayer>(outputId++, "")->GetInputSlot(0));
+    input->GetOutputSlot().Connect(layer->GetInputSlot(0));
+
+    layer = graph.AddLayer<armnn::ReshapeLayer>(reshapeDesc, "");
+    layer->GetOutputSlot().Connect(graph.AddLayer<armnn::OutputLayer>(outputId++, "")->GetInputSlot(0));
+    input->GetOutputSlot().Connect(layer->GetInputSlot(0));
+
+    layer = graph.AddLayer<armnn::PermuteLayer>(permDesc, "");
+    layer->GetOutputSlot().SetTensorInfo(permuted);
+    layer->GetOutputSlot().Connect(graph.AddLayer<armnn::OutputLayer>(outputId++, "")->GetInputSlot(0));
+    input->GetOutputSlot().Connect(layer->GetInputSlot(0));
+
+    BOOST_TEST(CheckSequence(graph.cbegin(),
+                             graph.cend(),
+                             &IsLayerOfType<armnn::InputLayer>,
+                             &IsLayerOfType<armnn::PermuteLayer>,
+                             &IsLayerOfType<armnn::ReshapeLayer>,
+                             &IsLayerOfType<armnn::FloorLayer>,
+                             &IsLayerOfType<armnn::ReshapeLayer>,
+                             &IsLayerOfType<armnn::PermuteLayer>,
+                             &IsLayerOfType<armnn::OutputLayer>,
+                             &IsLayerOfType<armnn::OutputLayer>,
+                             &IsLayerOfType<armnn::OutputLayer>,
+                             &IsLayerOfType<armnn::OutputLayer>,
+                             &IsLayerOfType<armnn::OutputLayer>));
+
+    armnn::Optimizer::Optimize(graph);
+
+    // The permutes and reshapes are squashed.
+
+    BOOST_TEST(CheckSequence(graph.cbegin(),
+                             graph.cend(),
+                             &IsLayerOfType<armnn::InputLayer>,
+                             &IsLayerOfType<armnn::PermuteLayer>,
+                             &IsLayerOfType<armnn::ReshapeLayer>,
+                             &IsLayerOfType<armnn::FloorLayer>,
+                             &IsLayerOfType<armnn::OutputLayer>,
+                             &IsLayerOfType<armnn::OutputLayer>,
+                             &IsLayerOfType<armnn::OutputLayer>,
+                             &IsLayerOfType<armnn::OutputLayer>,
+                             &IsLayerOfType<armnn::OutputLayer>));
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/test/RuntimeTests.cpp b/src/armnn/test/RuntimeTests.cpp
index 117df5e..e42d71c 100644
--- a/src/armnn/test/RuntimeTests.cpp
+++ b/src/armnn/test/RuntimeTests.cpp
@@ -115,7 +115,7 @@
     BOOST_TEST(leakedBefore == leakedAfter);
 
     // Add resonable threshold after and before running valgrind with the ACL clear cache function.
-    BOOST_TEST(reachableAfter - reachableBefore < 30000);
+    BOOST_TEST(static_cast<long>(reachableAfter) - static_cast<long>(reachableBefore) < 1024);
 
     // these are needed because VALGRIND_COUNT_LEAKS is a macro that assigns to the parameters
     // so they are assigned to, but still considered unused, causing a warning
@@ -178,7 +178,18 @@
 
     // if we're not running under Valgrind, these vars will have been initialised to 0, so this will always pass
     BOOST_TEST(leakedBefore == leakedAfter);
-    BOOST_TEST(reachableBefore == reachableAfter);
+
+    #if defined(ARMCOMPUTECL_ENABLED)
+        // reachableBefore == reachableAfter should hold, but on OpenCL with Android we are still
+        // not entirely able to control the memory in the OpenCL driver. Testing is showing that
+        // after this test (which clears all OpenCL memory) we are clearing a little bit more than
+        // we expect, probably depending on the order in which other tests are run.
+        BOOST_TEST(reachableBefore - reachableAfter <= 24);
+    #else
+        BOOST_TEST(reachableBefore == reachableAfter);
+    #endif
+
+    BOOST_TEST(reachableBefore >= reachableAfter);
 
     // these are needed because VALGRIND_COUNT_LEAKS is a macro that assigns to the parameters
     // so they are assigned to, but still considered unused, causing a warning
diff --git a/src/armnnCaffeParser/CaffeSupport.md b/src/armnnCaffeParser/CaffeSupport.md
new file mode 100644
index 0000000..e772480
--- /dev/null
+++ b/src/armnnCaffeParser/CaffeSupport.md
@@ -0,0 +1,31 @@
+#Caffe layers supported by the Arm NN SDK
+This reference guide provides a list of Caffe layers the Arm NN SDK currently supports. 
+
+Although some other neural networks might work, Arm tests the Arm NN SDK with Caffe implementations of the following neural networks: 
+
+- AlexNet.
+- Cifar10.
+- Inception-BN.
+- Resnet_50, Resnet_101 and Resnet_152.
+- VGG_CNN_S, VGG_16 and VGG_19.
+- Yolov1_tiny.
+- Lenet.
+- MobileNetv1.
+
+The Arm NN SDK supports the following machine learning layers for Caffe networks: 
+
+
+- BatchNorm, in inference mode. 
+- Convolution, excluding the Dilation Size, Weight Filler, Bias Filler, Engine, Force nd_im2col, and Axis parameters.
+- Eltwise, excluding the coeff parameter.
+- Inner Product, excluding the Weight Filler, Bias Filler, Engine, and Axis parameters.
+- Input.
+- LRN, excluding the Engine parameter.
+- Pooling, excluding the Stochastic Pooling and Engine parameters.
+- ReLU.
+- Scale.
+- Softmax, excluding the Axis and Engine parameters.
+- Split.
+- Dropout, in inference mode.
+
+More machine learning layers will be supported in future releases. 
\ No newline at end of file
diff --git a/src/armnnCaffeParser/README.md b/src/armnnCaffeParser/README.md
new file mode 100644
index 0000000..92d7d0a
--- /dev/null
+++ b/src/armnnCaffeParser/README.md
@@ -0,0 +1,5 @@
+#Arm NN Caffe parser
+
+`armnnCaffeParser` is a library for loading neural networks defined in Caffe protobuf files into the Arm NN runtime.
+
+For more information about the Caffe layers that are supported, and the networks that have been tested, see [CaffeSupport.md](./CaffeSupport.md).
\ No newline at end of file
diff --git a/src/armnnTfParser/README.md b/src/armnnTfParser/README.md
new file mode 100644
index 0000000..fe3f2b8
--- /dev/null
+++ b/src/armnnTfParser/README.md
@@ -0,0 +1,5 @@
+#The Arm NN TensorFlow parser
+
+`armnnTfParser` is a library for loading Neural Networks defined by TensorFlow protobuf files into the Arm NN runtime.
+
+For more information about the TensorFlow operators that are supported, and the networks that have been tested, see [TensorFlowSupport.md](./TensorFlowSupport.md)
\ No newline at end of file
diff --git a/src/armnnTfParser/TensorFlowSupport.md b/src/armnnTfParser/TensorFlowSupport.md
new file mode 100644
index 0000000..d052a70
--- /dev/null
+++ b/src/armnnTfParser/TensorFlowSupport.md
@@ -0,0 +1,111 @@
+#TensorFlow operators that the Arm NN SDK supports 
+
+This reference guide provides a list of TensorFlow operators the Arm NN SDK currently supports. 
+
+The Arm NN SDK TensorFlow parser currently only supports fp32 operators. 
+
+These are the TensorFlow operators that the Arm NN SDK currently supports:    
+
+**avg_pool** 
+
+See the TensorFlow [avg_pool documentation](https://www.tensorflow.org/api_docs/python/tf/nn/avg_pool) for more information. 
+
+**bias_add**
+
+ See the TensorFlow [bias_add documentation](https://www.tensorflow.org/api_docs/python/tf/nn/bias_add) for more information. 
+
+**conv2d** 
+
+ See the TensorFlow [conv2d documentation](https://www.tensorflow.org/api_docs/python/tf/nn/conv2d) for more information. 
+
+**identity** 
+
+See the TensorFlow [identity documentation](https://www.tensorflow.org/api_docs/python/tf/identity) for more information. 
+
+**local_response_normalization** 
+
+See the TensorFlow [local_response_normalization documentation](https://www.tensorflow.org/api_docs/python/tf/nn/local_response_normalization)  for more information. 
+
+**max_pool**  
+
+See the TensorFlow [max_pool documentation](https://www.tensorflow.org/api_docs/python/tf/nn/max_pool) for more information. 
+
+**relu** 
+
+ See the TensorFlow [relu documentation](https://www.tensorflow.org/api_docs/python/tf/nn/relu) for more information. 
+
+**relu6** 
+
+ See the TensorFlow [relu6 documentation](https://www.tensorflow.org/api_docs/python/tf/nn/relu6) for more information. 
+
+**shape** 
+
+ See the TensorFlow [shape documentation](https://www.tensorflow.org/api_docs/python/tf/shape) for more information.  
+
+**sigmoid** 
+
+ See the TensorFlow [sigmoid documentation](https://www.tensorflow.org/api_docs/python/tf/sigmoid) for more information. 
+
+**softplus** 
+
+See the TensorFlow [softplus documentation](https://www.tensorflow.org/api_docs/python/tf/nn/softplus) for more information. 
+
+**squeeze** 
+
+See the TensorFlow [squeeze documentation](https://www.tensorflow.org/api_docs/python/tf/squeeze) for more information. 
+
+**tanh** 
+
+See the TensorFlow [tanh documentation](https://www.tensorflow.org/api_docs/python/tf/tanh) for more information. 
+
+The Arm NN SDK TensorFlow parser currently partially supports: 
+
+**add** 
+
+The parser does not support all forms of [broadcast composition](https://www.tensorflow.org/performance/xla/broadcasting), only broadcasting of scalars and 1D tensors. See the TensorFlow [add operator documentation](https://www.tensorflow.org/api_docs/python/tf/add) for more information. 
+
+**depthwise_conv2D_native** 
+
+The parser only supports a dilation rate of (1,1,1,1). See the TensorFlow [depthwise_conv2d_native documentation](https://www.tensorflow.org/api_docs/python/tf/nn/depthwise_conv2d_native) for more information. 
+
+**fused_batch_norm** 
+
+The parser does not support training outputs. See the TensorFlow [fused_batch_norm documentation](https://www.tensorflow.org/api_docs/python/tf/nn/fused_batch_norm) for more information. 
+
+**matmul** 
+
+The parser only supports constant weights in a fully connected layer. See the TensorFlow [matmul documentation](https://www.tensorflow.org/api_docs/python/tf/matmul) for more information.  
+
+**multiply** 
+
+The parser does not support all forms of [broadcast composition](https://www.tensorflow.org/performance/xla/broadcasting), only broadcasting of scalars and 1D tensors. See the TensorFlow [multiply documentation](https://www.tensorflow.org/api_docs/python/tf/multiply) for more information. No broadcasting supported on the NEON backend.
+
+**placeholder** 
+
+ The parser only supports the NHWC data format in the input layer. See the TensorFlow [placeholder documentation](https://www.tensorflow.org/api_docs/python/tf/placeholder) for more information. 
+
+**reshape** 
+
+The parser does not support reshaping to or from 4D. See the TensorFlow [reshape documentation](https://www.tensorflow.org/api_docs/python/tf/reshape) for more information. 
+
+**resize_images** 
+
+The parser only supports `ResizeMethod.BILINEAR`. See the TensorFlow [resize_images documentation](https://www.tensorflow.org/api_docs/python/tf/image/resize_images) for more information. 
+ 
+**softmax** 
+
+The parser only supports 2D inputs and does not support selecting the `softmax` dimension. See the TensorFlow [softmax documentation](https://www.tensorflow.org/api_docs/python/tf/nn/softmax) for more information. 
+
+ 
+
+Arm tests these operators with the following TensorFlow fp32 neural networks:  
+
+* Cifar10. 
+
+* Lenet. 
+
+* mobilenet_v1_1.0_224. The Arm NN SDK only supports the non*_quant version of the network. See the [MobileNet_v1 documentation](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md) for more information on _quant networks. 
+
+* inception_v3. The Arm NN SDK only supports the official inception_v3 transformed model using the GPU acceleration only, but NEON acceleration is not supported at the moment. See the TensorFlow documentation on [preparing models for mobile deployment](https://www.tensorflow.org/mobile/prepare_models) for more information on how to transform the inception_v3 network.
+
+More machine learning operators will be supported in future releases. 
diff --git a/src/armnnTfParser/TfParser.cpp b/src/armnnTfParser/TfParser.cpp
new file mode 100644
index 0000000..7c8e01b
--- /dev/null
+++ b/src/armnnTfParser/TfParser.cpp
@@ -0,0 +1,2200 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include "TfParser.hpp"
+
+#include <armnn/INetwork.hpp>
+#include <armnn/Utils.hpp>
+#include <armnn/TypesUtils.hpp>
+#include <armnn/Exceptions.hpp>
+#include <armnn/Descriptors.hpp>
+
+#include <GraphTopologicalSort.hpp>
+#include <Permute.hpp>
+
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
+
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/framework/tensor.pb.h"
+#include "tensorflow/core/framework/tensor_shape.pb.h"
+
+#include <boost/assert.hpp>
+#include <boost/format.hpp>
+#include <boost/core/ignore_unused.hpp>
+#include <boost/log/trivial.hpp>
+#include <boost/numeric/conversion/cast.hpp>
+#include <boost/polymorphic_cast.hpp>
+
+#include <memory>
+#include <sstream>
+#include <numeric>
+#include <functional>
+
+using namespace armnn;
+
+namespace armnnTfParser
+{
+namespace
+{
+
+const PermutationVector NHWCToArmNN = { 0, 2, 3, 1 };
+const PermutationVector ArmNNToNHWC = { 0, 3, 1, 2 };
+
+IConnectableLayer* AddSwizzleLayer(INetwork& network, IOutputSlot& input, const PermutationVector& mapping,
+    const std::string& name)
+{
+    // Add swizzle layer
+    IConnectableLayer* const layer = network.AddPermuteLayer(mapping, name.c_str());
+
+    // Connect intput to swizzle layer
+    input.Connect(layer->GetInputSlot(0));
+
+    // Setup swizzled output
+    const TensorInfo outInfo = armnnUtils::Permuted(input.GetTensorInfo(), mapping);
+    layer->GetOutputSlot(0).SetTensorInfo(outInfo);
+
+    return layer;
+}
+
+IConnectableLayer* SwizzleInDeswizzleOut(INetwork& network, IOutputSlot& input, IConnectableLayer& layer,
+    const std::string& name)
+{
+    // Add swizzle layer
+    IConnectableLayer* const swizzleLayer = AddSwizzleLayer(network, input, NHWCToArmNN, "swizzle_for-" + name);
+
+    // Connect swizzledInput to layer
+    swizzleLayer->GetOutputSlot(0).Connect(layer.GetInputSlot(0));
+
+    // Add deswizzle layer
+    IConnectableLayer* const deswizzleLayer = AddSwizzleLayer(network, layer.GetOutputSlot(0), ArmNNToNHWC,
+        "deswizzle_for-" + name);
+
+    return deswizzleLayer;
+}
+
+template <typename Callable>
+void ReadMandatoryNodeAttributeImpl(const tensorflow::NodeDef& nodeDef,
+    const std::string& attribName,
+    tensorflow::AttrValue::ValueCase expectedValueCase,
+    Callable callable)
+{
+    auto iter = nodeDef.attr().find(attribName);
+    if (iter != nodeDef.attr().end())
+    {
+        const auto& attrValue = iter->second;
+        if (attrValue.value_case() == expectedValueCase)
+        {
+            callable(attrValue);
+        }
+        else
+        {
+            throw ParseException(boost::str(boost::format(
+                "Attribute %1% of node %2% expected to have %3% as tensorflow::AttrValue::ValueCase, "
+                "but found %4% instead")
+                % attribName
+                % nodeDef.name()
+                % static_cast<int>(expectedValueCase)
+                % static_cast<int>(attrValue.value_case())));
+        }
+    }
+    else
+    {
+        throw ParseException(boost::str(boost::format("Could not find required attribute %1% in node %2%")
+            % attribName % nodeDef.name()));
+    }
+}
+
+template <typename Callable>
+void ReadOptionalNodeAttributeImpl(const tensorflow::NodeDef& nodeDef,
+    const std::string& attribName,
+    tensorflow::AttrValue::ValueCase expectedValueCase,
+    Callable callable)
+{
+    auto iter = nodeDef.attr().find(attribName);
+    if (iter != nodeDef.attr().end())
+    {
+        const auto& attrValue = iter->second;
+        if (attrValue.value_case() == expectedValueCase)
+        {
+            callable(attrValue);
+        }
+        else
+        {
+            throw ParseException(boost::str(boost::format(
+                "Attribute %1% of node %2% expected to have %3% as tensorflow::AttrValue::ValueCase, "
+                "but found %4% instead")
+                % attribName
+                % nodeDef.name()
+                % static_cast<int>(expectedValueCase)
+                % static_cast<int>(attrValue.value_case())));
+        }
+    }
+}
+
+float ReadMandatoryNodeFloatAttribute(const tensorflow::NodeDef& nodeDef, const std::string& name)
+{
+    float attribValue = 0.0f;
+    ReadMandatoryNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kF,
+        [&attribValue](const tensorflow::AttrValue& attrValue)
+    {
+        attribValue = attrValue.f();
+    });
+    return attribValue;
+}
+
+uint32_t ReadMandatoryNodeUint32Attribute(const tensorflow::NodeDef& nodeDef, const std::string& name)
+{
+    uint32_t attribValue = 0u;
+    ReadMandatoryNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kI,
+        [&attribValue](const tensorflow::AttrValue& attrValue)
+    {
+        attribValue = static_cast<uint32_t>(attrValue.i());
+    });
+    return attribValue;
+}
+
+std::string ReadMandatoryNodeStringAttribute(const tensorflow::NodeDef& nodeDef, const std::string& name)
+{
+    std::string attribValue = "";
+    ReadMandatoryNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kS,
+        [&attribValue](const tensorflow::AttrValue& attrValue)
+    {
+        attribValue = attrValue.s();
+    });
+    return attribValue;
+}
+
+std::vector<uint32_t> ReadMandatoryNodeUint32ListAttribute(const tensorflow::NodeDef& nodeDef,
+    const std::string& name)
+{
+    std::vector<uint32_t> attriList;
+    ReadMandatoryNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kList,
+        [&attriList](const tensorflow::AttrValue& attrValue)
+    {
+        for (int attriNum = 0; attriNum < attrValue.list().i_size(); ++attriNum)
+        {
+            attriList.push_back(static_cast<uint32_t>(attrValue.list().i(attriNum)));
+        }
+    });
+
+    return attriList;
+}
+
+std::vector<uint32_t> ReadOptionalNodeUint32ListAttribute(const tensorflow::NodeDef& nodeDef,
+    const std::string& name)
+{
+    std::vector<uint32_t> attriList;
+    ReadOptionalNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kList,
+        [&attriList](const tensorflow::AttrValue& attrValue)
+    {
+        for (int attriNum = 0; attriNum < attrValue.list().i_size(); ++attriNum)
+        {
+            attriList.push_back(static_cast<uint32_t>(attrValue.list().i(attriNum)));
+        }
+    });
+
+    return attriList;
+}
+
+bool ReadOptionalNodeBoolAttribute(const tensorflow::NodeDef& nodeDef,
+    const std::string& name,
+    bool defaultValue = false)
+{
+    bool attribValue = defaultValue;
+    ReadOptionalNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kB,
+        [&attribValue](const tensorflow::AttrValue& attrValue)
+    {
+        attribValue = attrValue.b();
+    });
+    return attribValue;
+}
+
+tensorflow::DataType ReadMandatoryNodeTypeAttribute(const tensorflow::NodeDef& nodeDef, const std::string& name)
+{
+    tensorflow::DataType attribValue = tensorflow::DT_INVALID;
+    ReadMandatoryNodeAttributeImpl(nodeDef, name, tensorflow::AttrValue::kType,
+        [&attribValue](const tensorflow::AttrValue& attrValue)
+    {
+        attribValue = attrValue.type();
+    });
+    return attribValue;
+}
+
+TensorInfo PrepareReshape(const TensorInfo& input, const std::vector<int32_t>& targetDims)
+{
+    std::vector<unsigned int> outDims(targetDims.begin(), targetDims.end());
+    const auto stretchDim = std::find(targetDims.begin(), targetDims.end(), -1);
+
+    if (stretchDim != targetDims.end())
+    {
+        if (std::find(std::next(stretchDim), targetDims.end(), -1) != targetDims.end())
+        {
+            throw ParseException("At most one component of shape can be -1");
+        }
+
+        auto targetNumElements = boost::numeric_cast<unsigned int>(std::accumulate(targetDims.begin(), targetDims.end(),
+            -1, std::multiplies<int32_t>()));
+        auto stretchIndex = static_cast<size_t>(std::distance(targetDims.begin(), stretchDim));
+        outDims[stretchIndex] = input.GetNumElements() / targetNumElements;
+    }
+
+    TensorInfo reshapeInfo = input;
+    reshapeInfo.SetShape(TensorShape{ static_cast<unsigned int>(outDims.size()), outDims.data() });
+
+    return reshapeInfo;
+}
+
+// We need the input0Slot to guide the reshape for input1Slot
+IOutputSlot* BroadcastForAddandMul(IOutputSlot* input0Slot, IOutputSlot* input1Slot, bool isNHWC, INetwork& m_Network,
+                                   const tensorflow::NodeDef& nodeDef)
+{
+    const TensorInfo& input1Info = input1Slot->GetTensorInfo();
+    const TensorInfo inputTensorInfo = input0Slot->GetTensorInfo();
+    const unsigned int matchDim = inputTensorInfo.GetNumDimensions() - (isNHWC ? 1 : 3);
+    std::array<unsigned int, MaxNumOfTensorDimensions> reshapedDimensions;
+    std::fill_n(reshapedDimensions.begin(), inputTensorInfo.GetNumDimensions(), 1);
+    reshapedDimensions[matchDim] = input1Info.GetShape()[0];
+
+    armnn::TensorInfo reshapedInfo = input1Info;
+    reshapedInfo.SetShape(TensorShape{ inputTensorInfo.GetNumDimensions(), reshapedDimensions.data() });
+
+    const std::string reshapeLayerName = "reshape_for-" + nodeDef.name();
+    ReshapeDescriptor reshapeDesc;
+    reshapeDesc.m_TargetShape = reshapedInfo.GetShape();
+    IConnectableLayer* const reshapeLayer = m_Network.AddReshapeLayer(reshapeDesc, reshapeLayerName.c_str());
+
+    input1Slot->Connect(reshapeLayer->GetInputSlot(0));
+    reshapeLayer->GetOutputSlot(0).SetTensorInfo(reshapedInfo);
+
+    input1Slot = &reshapeLayer->GetOutputSlot(0);
+
+    return input1Slot;
+}
+
+OutputId ParseOutputId(const std::string & name)
+{
+    unsigned int outputNum = 0;
+    size_t colonPos = name.find_last_of(":");
+    if (colonPos != std::string::npos)
+    {
+        int n = std::stoi(name.substr(colonPos+1));
+        if (n<0 || n>100)
+        {
+            throw ParseException("Output tensor id is out of range for "+name);
+        }
+        outputNum = static_cast<unsigned int>(n);
+    }
+    return OutputId(name.substr(0,colonPos),outputNum);
+}
+
+} // namespace
+
+const std::map<std::string, TfParser::OperationParsingFunction> TfParser::ms_OperationNameToParsingFunctions = {
+    { "Const",                 &TfParser::ParseConst },
+    { "Add",                   &TfParser::ParseAdd },
+    { "BiasAdd",               &TfParser::ParseBiasAdd },
+    { "Identity",              &TfParser::ParseIdentity },
+    { "Conv2D",                &TfParser::ParseConv2D },
+    { "DepthwiseConv2dNative", &TfParser::ParseDepthwiseConv2D },
+    { "FusedBatchNorm",        &TfParser::ParseFusedBatchNorm },
+    { "ConcatV2",              &TfParser::ParseConcat },
+    { "LRN",                   &TfParser::ParseLrn },
+    { "MatMul",                &TfParser::ParseMatMul },
+    { "Mul",                   &TfParser::ParseMul },
+    { "Placeholder",           &TfParser::ParsePlaceholder },
+    { "Relu",                  &TfParser::ParseRelu },
+    { "Relu6",                 &TfParser::ParseRelu6 },
+    { "Reshape",               &TfParser::ParseReshape },
+    { "ResizeBilinear",        &TfParser::ParseResizeBilinear },
+    { "Shape",                 &TfParser::ParseShape },
+    { "Squeeze",               &TfParser::ParseSqueeze },
+    { "Sigmoid",               &TfParser::ParseSigmoid },
+    { "Softmax",               &TfParser::ParseSoftmax },
+    { "Softplus",              &TfParser::ParseSoftplus },
+    { "Tanh",                  &TfParser::ParseTanh },
+    { "MaxPool",               &TfParser::ParseMaxPool },
+    { "AvgPool",               &TfParser::ParseAvgPool },
+};
+
+ITfParser* ITfParser::CreateRaw()
+{
+    return new TfParser();
+}
+
+ITfParserPtr ITfParser::Create()
+{
+    return ITfParserPtr(CreateRaw(), &ITfParser::Destroy);
+}
+
+void ITfParser::Destroy(ITfParser* parser)
+{
+    delete parser;
+}
+
+inline void CalculateSamePadding(uint32_t inputSize, uint32_t stride,
+                                 uint32_t filterSize, bool samePadding,
+                                 uint32_t* paddingFront, uint32_t* paddingBack) {
+    *paddingFront = 0;
+    *paddingBack = 0;
+
+    if (samePadding) {
+        uint32_t outputSize = (inputSize + stride - 1) / stride;
+        uint32_t temp = (outputSize - 1) * stride + filterSize;
+        if (temp > inputSize) {
+            *paddingFront = (temp - inputSize) / 2;
+            *paddingBack = (temp - inputSize) - *paddingFront;
+        }
+    }
+}
+
+void CalcPadding(uint32_t input, uint32_t kernel, uint32_t stride, uint32_t& outPadHead, uint32_t& outPadTail,
+                 bool samePadding)
+{
+    CalculateSamePadding(input, stride, kernel, samePadding, &outPadHead, &outPadTail);
+}
+
+/// An Abstract base class which represents a single tensorflow operation (node)
+/// that has been (potentially partially) converted to Armnn.
+/// It may not yet have been fully converted into actual Armnn layers.
+class ParsedTfOperation
+{
+public:
+    ParsedTfOperation(TfParser* parser, const tensorflow::NodeDef& node)
+    : m_Parser(parser)
+    , m_Node(node)
+    {
+    }
+
+    virtual ~ParsedTfOperation() {};
+
+    const tensorflow::NodeDef& GetNode() const { return m_Node; }
+
+    /// Gets the ArmNN IOutputSlot corresponding to the given output index of the Tensorflow operation.
+    /// This may result in the creation of Armnn layers if this was deferred (e.g. see ParsedConstTfOperation).
+    virtual IOutputSlot& ResolveArmnnOutputSlot(unsigned int tfOutputIndex) = 0;
+
+    /// If this operation is an Identity then this will follow return the 'parent' operation (recursively).
+    virtual ParsedTfOperation* ResolveIdentityOperations()
+    {
+        return this;
+    }
+
+protected:
+    TfParser* m_Parser;
+    const tensorflow::NodeDef& m_Node;
+};
+
+/// An ParsedTfOperation where the Armnn equivalent is a single layer,
+/// with output slots that correspond directly to the Tf node outputs.
+class SingleLayerParsedTfOperation : public ParsedTfOperation
+{
+public:
+    SingleLayerParsedTfOperation(TfParser* parser, const tensorflow::NodeDef& node, IConnectableLayer* layer)
+    : ParsedTfOperation(parser, node)
+    , m_Layer(layer)
+    {
+    }
+
+    IOutputSlot& ResolveArmnnOutputSlot(unsigned int tfOutputIndex) override
+    {
+        BOOST_ASSERT(m_Layer);
+        // Assume one-to-one mapping between Tf and armnn output slots.
+        unsigned int armnnOutputSlotIdx = tfOutputIndex;
+        if (armnnOutputSlotIdx >= m_Layer->GetNumOutputSlots())
+        {
+            throw ParseException(
+                boost::str(boost::format("The requested output slot #%1% "
+                    "for %2% does not exist") % armnnOutputSlotIdx % m_Layer->GetName()));
+        }
+        return m_Layer->GetOutputSlot(armnnOutputSlotIdx);
+    }
+
+protected:
+    IConnectableLayer* m_Layer;
+};
+
+/// A SingleLayerParsedTfOperation for deferred layer creation
+class DeferredSingleLayerParsedTfOperation : public SingleLayerParsedTfOperation
+{
+public:
+    DeferredSingleLayerParsedTfOperation(TfParser* parser, const tensorflow::NodeDef& node)
+    : SingleLayerParsedTfOperation(parser, node, nullptr)
+    {
+    }
+
+    IOutputSlot& ResolveArmnnOutputSlot(unsigned int tfOutputIndex) override
+    {
+        if (!m_Layer)
+        {
+            CreateLayerDeferred();
+        }
+        return SingleLayerParsedTfOperation::ResolveArmnnOutputSlot(tfOutputIndex);
+    }
+
+private:
+    virtual void CreateLayerDeferred() = 0;
+};
+
+
+TfParser::TfParser()
+    : m_Network(nullptr, nullptr)
+{
+}
+
+
+const tensorflow::NodeDef* TfParser::ResolveIdentityNode(const tensorflow::NodeDef* nodeDef)
+{
+    if (nodeDef->op() != "Identity")
+    {
+        return nodeDef;
+    }
+
+    if (nodeDef->input_size() != 1)
+    {
+        throw ParseException("Identity node does not have correct amount of inputs!");
+    }
+
+    auto it = m_NodesByName.find(nodeDef->input(0));
+    if (it != m_NodesByName.end())
+    {
+        const tensorflow::NodeDef* inputNode = it->second;
+        return ResolveIdentityNode(inputNode);
+    }
+    else
+    {
+        throw ParseException("Cannot find what the Identity node is linked to!");
+    }
+}
+
+std::vector<OutputOfConstNodeDef>
+TfParser::GetTfInputNodes(const tensorflow::NodeDef& nodeDef) const
+{
+    std::vector<OutputOfConstNodeDef> ret;
+
+    ret.reserve(boost::numeric_cast<size_t>(nodeDef.input_size()));
+    for (int j = 0; j < nodeDef.input_size(); ++j)
+    {
+        OutputId outputId = ParseOutputId(nodeDef.input(j));
+        auto inputIt = m_NodesByName.find(outputId.m_IndexedValue);
+        if (inputIt == m_NodesByName.end())
+        {
+            throw ParseException(
+                "Can't find node '" + nodeDef.input(j) +
+                "', which is listed as an input of '" + nodeDef.name() + "'");
+        }
+        ret.push_back(OutputOfConstNodeDef(inputIt->second,outputId.m_Index));
+    }
+
+    return ret;
+}
+
+std::vector<OutputOfParsedTfOperation>
+TfParser::GetInputParsedTfOperationsChecked(const tensorflow::NodeDef& nodeDef,
+                                            std::size_t expectedNumInputs)
+{
+    // Fetch the tensorflow nodes connected as inputs and validate the size.
+    std::vector<OutputOfConstNodeDef> nodes = GetTfInputNodes(nodeDef);
+    const std::size_t numInputs = nodes.size();
+    if (numInputs != expectedNumInputs)
+    {
+        throw ParseException(boost::str(boost::format("Unexpected number of inputs for node %1%. "
+            "Expected %2%, found %3%") % nodeDef.name() % expectedNumInputs % numInputs));
+    }
+    // Fetch the corresponding ParsedTfOperation operations
+    std::vector<OutputOfParsedTfOperation> result;
+    for (auto&& node : nodes)
+    {
+        auto it = m_ParsedTfOperations.find(node.m_IndexedValue->name());
+        if (it == m_ParsedTfOperations.end())
+        {
+            throw ParseException("Node with name '" + node.m_IndexedValue->name() + "' has not been parsed");
+        }
+        ParsedTfOperation* parsedOp = it->second.get();
+        // Transparently 'skip' any Identity operations. This simplifies the logic inside the ParseXXX() functions.
+        parsedOp = parsedOp->ResolveIdentityOperations();
+        result.push_back(OutputOfParsedTfOperation(parsedOp,node.m_Index));
+    }
+    return result;
+}
+
+ParsedTfOperationPtr TfParser::ParseAdd(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
+{
+    std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
+
+    // If one of the inputs is a MatMul and the other is a const, then we handle both nodes together as FullyConnected
+    if (inputs[0].m_IndexedValue->GetNode().op() == "MatMul" &&
+        HasParsedConstTensor<float>(inputs[1].m_IndexedValue->GetNode().name()))
+    {
+        IConnectableLayer* layer =
+            AddFullyConnectedLayer(inputs[0].m_IndexedValue->GetNode(),
+                                   &nodeDef,nodeDef.name().c_str());
+        return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
+    }
+    else if (HasParsedConstTensor<float>(inputs[0].m_IndexedValue->GetNode().name()) &&
+                                         inputs[1].m_IndexedValue->GetNode().op() == "MatMul")
+    {
+        IConnectableLayer* layer =
+            AddFullyConnectedLayer(inputs[1].m_IndexedValue->GetNode(),
+                                   &nodeDef,nodeDef.name().c_str());
+        return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
+    }
+    else
+    {
+        // Otherwise it's just a regular addition
+        return AddAdditionLayer(nodeDef);
+    }
+}
+
+ParsedTfOperationPtr TfParser::ParseBiasAdd(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
+{
+    return AddAdditionLayer(nodeDef, true);
+}
+
+/// An ParsedTfOperation which forwards to another (used for Identity nodes).
+class ParsedIdentityTfOperation : public ParsedTfOperation
+{
+public:
+    ParsedIdentityTfOperation(TfParser* parser, const tensorflow::NodeDef& node, ParsedTfOperation* representative)
+        : ParsedTfOperation(parser, node)
+        , m_Representative(representative)
+    {
+    }
+
+    virtual IOutputSlot& ResolveArmnnOutputSlot(unsigned int tfOutputIndex) override
+    {
+        BOOST_ASSERT(m_Representative);
+        return m_Representative->ResolveArmnnOutputSlot(tfOutputIndex);
+    }
+
+    virtual ParsedTfOperation* ResolveIdentityOperations() override
+    {
+        return m_Representative->ResolveIdentityOperations();
+    }
+
+private:
+    ParsedTfOperation* m_Representative;
+};
+
+ParsedTfOperationPtr TfParser::ParseIdentity(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
+{
+    std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
+    // Any requests for the output slots of this node should be forwarded to the node connected as input.
+    return std::make_unique<ParsedIdentityTfOperation>(this, nodeDef, inputs[0].m_IndexedValue);
+}
+
+/// An ParsedTfOperation for a Const node.
+/// Creation of the armnn ConstLayer is deferred until it is actually needed, because Const nodes are mostly used
+/// for weight inputs to MatMul/Conv2D nodes and in these cases armnn doesn't need a ConstLayer.
+template <typename T>
+class ParsedConstTfOperation : public DeferredSingleLayerParsedTfOperation
+{
+public:
+    ParsedConstTfOperation(TfParser* parser, const tensorflow::NodeDef& node,
+        const T* tensorData, const TensorInfo& tensorInfo)
+        : DeferredSingleLayerParsedTfOperation(parser, node),
+        m_Storage(tensorData, tensorData + tensorInfo.GetNumElements()),
+        m_TensorInfo(tensorInfo)
+    {
+        BOOST_ASSERT(tensorInfo.GetDataType() == GetDataType<T>());
+    }
+
+    void CreateLayerDeferred() override
+    {
+        BOOST_ASSERT(m_Layer == nullptr);
+        m_Layer = m_Parser->m_Network->AddConstantLayer(ConstTensor(m_TensorInfo, m_Storage), m_Node.name().c_str());
+        m_Layer->GetOutputSlot(0).SetTensorInfo(m_TensorInfo);
+    }
+
+    ConstTensor GetConstTensor(bool swizzleForConvolutionWeights, std::vector<T>& outputTensorData) const
+    {
+        // Mappings from TensorFlow filter tensors to the ArmNN filter tensors.
+        // Tensorflow weights are [H, W, In, Out]
+        // ArmNN weights are [Out, In, H, W]
+        static const PermutationVector HWIOToOIHW = {2, 3, 1, 0};
+
+        const TensorInfo outInfo = swizzleForConvolutionWeights
+                                   ? armnnUtils::Permuted(m_TensorInfo, HWIOToOIHW)
+                                   : m_TensorInfo;
+
+        outputTensorData.resize(m_TensorInfo.GetNumElements());
+
+        // Copy or swizzle from the permanent storage into the storage the caller provided.
+        if (swizzleForConvolutionWeights)
+        {
+            armnnUtils::Permute(outInfo.GetShape(), HWIOToOIHW, m_Storage.data(), outputTensorData.data());
+        }
+        else
+        {
+            memcpy(outputTensorData.data(), m_Storage.data(), m_TensorInfo.GetNumBytes());
+        }
+        // Update the result to point to the user provided storage
+        ConstTensor constTensor(outInfo, outputTensorData);
+        return constTensor;
+    }
+
+private:
+    ///< Manages the lifetime of the tensor data.
+    std::vector<T> m_Storage;
+    ///< Describes the layout of the tensor and points to the data in m_Storage.
+    TensorInfo m_TensorInfo;
+};
+
+DataType ConvertTfTensorDataType(const tensorflow::DataType tfDataType)
+{
+    switch (tfDataType)
+    {
+    case tensorflow::DT_FLOAT:
+        return DataType::Float32;
+        break;
+    case tensorflow::DT_INT32:
+        return DataType::Signed32;
+        break;
+    default:
+        throw ParseException(boost::str(
+            boost::format("Unknown DataType %1% for node")
+            % tensorflow::DataType_Name(tfDataType)));
+    }
+}
+
+struct ParseTfTensorValueList
+{
+    template<typename DataType>
+    static void Parse(
+        const tensorflow::TensorProto& tfTensor,
+        unsigned int dstElements,
+        std::vector<int8_t>& outputData);
+
+    template <typename DataType>
+    static void ReadData(const void* srcData, unsigned int numSrcElements,
+        std::vector<int8_t>& dstData, unsigned int numDstElements)
+    {
+        // If there are no entries in the list, perform no action
+        if (numSrcElements == 0)
+        {
+            return;
+        }
+
+        // If no size was provided, use the length of the value list
+        if (numDstElements == 0)
+        {
+            numDstElements = numSrcElements;
+        }
+
+        // Allocate memory
+        dstData.resize(std::max(numSrcElements, numDstElements) * sizeof(DataType));
+
+        const DataType* srcTensor = reinterpret_cast<const DataType*>(srcData);
+        DataType* dstTensor = reinterpret_cast<DataType*>(dstData.data());
+
+        // Copy the value list entries into the destination
+        std::copy(srcTensor, srcTensor + numSrcElements, dstTensor);
+
+        if (numDstElements > numSrcElements)
+        {
+            // Use the last element in the list to fill the remaining entries
+            std::fill(dstTensor + numSrcElements, dstTensor + numDstElements, srcTensor[numSrcElements - 1]);
+        }
+    }
+
+};
+
+template <>
+void ParseTfTensorValueList::Parse<float>(const tensorflow::TensorProto& tfTensor,
+    unsigned int dstElements, std::vector<int8_t>& outputData)
+{
+    ReadData<float>(tfTensor.float_val().data(), static_cast<unsigned int>(tfTensor.float_val_size()),
+        outputData, dstElements);
+}
+
+template <>
+void ParseTfTensorValueList::Parse<int32_t>(const tensorflow::TensorProto& tfTensor,
+    unsigned int dstElements, std::vector<int8_t>& outputData)
+{
+    ReadData<int32_t>(tfTensor.int_val().data(), static_cast<unsigned int>(tfTensor.int_val_size()),
+        outputData, dstElements);
+}
+
+template <template<typename> class OperatorType, typename T = int8_t>
+struct MakeTfOperation
+{
+    template<typename DataType, class... Args>
+    inline static std::unique_ptr<OperatorType<DataType>> Parse(TfParser* parser, const tensorflow::NodeDef& node,
+        Args&&... args)
+    {
+        return std::make_unique<OperatorType<DataType>>(parser, node, std::forward<Args>(args)...);
+    }
+};
+
+template <>
+struct MakeTfOperation<ParsedConstTfOperation>
+{
+    template<typename DataType, class... Args>
+    inline static std::unique_ptr<ParsedConstTfOperation<DataType>> Parse(TfParser* parser,
+        const tensorflow::NodeDef& node, const std::vector<int8_t>& tensorData, const TensorInfo& tensorInfo)
+    {
+        return std::make_unique<ParsedConstTfOperation<DataType>>(parser, node,
+            reinterpret_cast<const DataType*>(tensorData.data()), tensorInfo);
+    }
+};
+
+template <class FuncType>
+struct InvokeParseFunction
+{
+    template<class ResType, class... Args>
+    inline static ResType Result(DataType dataType, Args&&... args)
+    {
+        if (dataType == DataType::Float32)
+        {
+            return FuncType::template Parse<float>(std::forward<Args>(args)...);
+        }
+        else if (dataType == DataType::Signed32)
+        {
+            return FuncType::template Parse<int32_t>(std::forward<Args>(args)...);
+        }
+
+        return ResType();
+    }
+
+    template<class... Args>
+    inline static void Result(DataType dataType, Args&&... args)
+    {
+        if (dataType == DataType::Float32)
+        {
+            FuncType::template Parse<float>(std::forward<Args>(args)...);
+        }
+        else if (dataType == DataType::Signed32)
+        {
+            FuncType::template Parse<int32_t>(std::forward<Args>(args)...);
+        }
+    }
+};
+
+ParsedTfOperationPtr TfParser::ParseConst(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
+{
+    BOOST_ASSERT(nodeDef.op() == "Const");
+
+    if (nodeDef.attr().count("value") == 0)
+    {
+        throw ParseException(boost::str(
+            boost::format("Value not found for Const node - %1%")
+            % nodeDef.name()));
+    }
+
+    const tensorflow::TensorProto& tfTensor = nodeDef.attr().at("value").tensor();
+    const tensorflow::TensorShapeProto& tfTensorShape = tfTensor.tensor_shape();
+    const tensorflow::DataType tfDataType = ReadMandatoryNodeTypeAttribute(nodeDef, "dtype");
+
+    const auto GetDimensionSize = [](auto& d) { return d.size(); };
+
+    std::vector<unsigned int> dimensionSizes;
+    std::transform(tfTensorShape.dim().begin(), tfTensorShape.dim().end(),
+        std::back_inserter(dimensionSizes), GetDimensionSize);
+
+    // Calculate number of elements
+    const DataType dataType = ConvertTfTensorDataType(tfDataType);
+    unsigned int numElements = 0U;
+
+    if (!dimensionSizes.empty())
+    {
+        numElements = std::accumulate(dimensionSizes.begin(), dimensionSizes.end(),
+                                      1U, std::multiplies<unsigned int>());
+    }
+
+    std::vector<int8_t> tensorData;
+
+    // Get tensor data from the list of values attribute
+    if (tfTensor.tensor_content().empty())
+    {
+        InvokeParseFunction<ParseTfTensorValueList>::Result<void>(dataType, tfTensor, numElements, tensorData);
+
+        // If the tensor shape is not defined, but there is a value list, then interpret the data as a 1D
+        // tensor of the provided number of elements
+        if (numElements == 0)
+        {
+            const unsigned int tfNumElements = static_cast<unsigned int>(tensorData.size()) / GetDataTypeSize(dataType);
+            dimensionSizes.push_back(tfNumElements);
+        }
+    }
+    // Get tensor data from tensor content attribute
+    else
+    {
+        tensorData.assign(tfTensor.tensor_content().begin(), tfTensor.tensor_content().end());
+
+        // Check if a tensor shape is defined for the tensor content
+        if (numElements == 0)
+        {
+            throw ParseException(boost::str(
+                boost::format("No tensor shape found for Const node - %1%")
+                % nodeDef.name()));
+        }
+    }
+
+    // Const node requires at least a list of values or a content attribute
+    if (tensorData.empty())
+    {
+        throw ParseException(boost::str(
+            boost::format("No tensor data found for Const node - %1%")
+            % nodeDef.name()));
+    }
+
+    const TensorInfo tensorInfo(static_cast<unsigned int>(dimensionSizes.size()), dimensionSizes.data(), dataType);
+
+    // If we have a list of values, then the length of the list must be
+    // less than or equal to the number of elements implied by the shape argument
+    if (tensorData.size() > tensorInfo.GetNumBytes())
+    {
+        throw ParseException(boost::str(
+            boost::format("Number of elements (%1%) should be less than or equal \
+            to the number of elements implied by the shape argument (%2%) for Const node - %3%")
+            % (tensorData.size() / GetDataTypeSize(dataType))
+            % tensorInfo.GetNumElements()
+            % nodeDef.name()));
+    }
+
+    return InvokeParseFunction<MakeTfOperation<ParsedConstTfOperation>>::Result<ParsedTfOperationPtr>(
+        dataType, this, nodeDef, tensorData, tensorInfo);
+}
+
+template<typename Type>
+bool TfParser::HasParsedConstTensor(const std::string & nodeName) const
+{
+    auto it = m_ParsedTfOperations.find(nodeName);
+    if (it == m_ParsedTfOperations.end() ||
+        dynamic_cast<ParsedConstTfOperation<Type>*>(it->second.get()) == nullptr)
+    {
+        return false;
+    }
+    else
+    {
+        return true;
+    }
+}
+
+ParsedTfOperationPtr TfParser::ParseConv2D(const tensorflow::NodeDef& nodeDef,
+    const tensorflow::GraphDef& graphDef)
+{
+    std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
+    IOutputSlot& inputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
+    TensorInfo inputTensorInfo = inputSlot.GetTensorInfo();
+
+    if (!HasParsedConstTensor<float>(inputs[1].m_IndexedValue->GetNode().name()))
+    {
+        throw ParseException("ArmNN only supports Convolution layers with constant weights");
+    }
+    ParsedConstTfOperation<float>* weightNode =
+        boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[1].m_IndexedValue);
+
+    std::string paddingString = ReadMandatoryNodeStringAttribute(nodeDef, "padding");
+    std::string dataFormat = ReadMandatoryNodeStringAttribute(nodeDef, "data_format");
+    std::vector<uint32_t> strides = ReadMandatoryNodeUint32ListAttribute(nodeDef, "strides");
+
+    // read the dilations, if present - only [1,1,1,1] (the default) is supported
+    std::vector<uint32_t> dilations = ReadOptionalNodeUint32ListAttribute(nodeDef, "dilations");
+    if (!dilations.empty())
+    {
+        for (auto dilation : dilations)
+        {
+            if (dilation != 1u)
+            {
+                throw ParseException("ArmNN only supports Convolution layers with dilations [1,1,1,1]");
+            }
+        }
+    }
+
+    Convolution2dDescriptor desc;
+    desc.m_BiasEnabled = false;
+
+    if (dataFormat == "NHWC")
+    {
+        desc.m_StrideX = strides[2];
+        desc.m_StrideY = strides[1];
+        // Swizzle input to supported memory layout
+        inputTensorInfo = armnnUtils::Permuted(inputSlot.GetTensorInfo(), NHWCToArmNN);
+    }
+    else if (dataFormat == "NCHW")
+    {
+        desc.m_StrideX = strides[3];
+        desc.m_StrideY = strides[2];
+    }
+    else
+    {
+        throw ParseException("Unsupported data format passed for Conv2D. Only NHWC and NCHW supported");
+    }
+
+    uint32_t inputHeight = inputTensorInfo.GetShape()[2];
+    uint32_t inputWidth = inputTensorInfo.GetShape()[3];
+
+    std::vector<float> outputTensorData;
+
+    ConstTensor weightTensor = weightNode->GetConstTensor(true, outputTensorData);
+
+    uint32_t weightHeight = weightTensor.GetShape()[2];
+    uint32_t weightWidth = weightTensor.GetShape()[3];
+
+    bool padding = false;
+    TensorInfo outputInfo;
+    if (paddingString == "SAME")
+    {
+        padding = true;
+        outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
+                                  weightTensor.GetShape()[0],
+                                  static_cast<uint32_t>(ceil(
+                                      static_cast<float>(inputHeight) /
+                                      static_cast<float>(desc.m_StrideY))),
+                                  static_cast<uint32_t>(ceil(
+                                      static_cast<float>(inputWidth) /
+                                      static_cast<float>(desc.m_StrideX)))
+                                }, DataType::Float32);
+    }
+    else if (paddingString == "VALID")
+    {
+        padding = false;
+        outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
+                                  weightTensor.GetShape()[0],
+                                  static_cast<uint32_t>(ceil(
+                                      static_cast<float>(inputHeight - weightHeight + 1) /
+                                      static_cast<float>(desc.m_StrideY))),
+                                  static_cast<uint32_t>(ceil(
+                                      static_cast<float>(inputWidth - weightWidth + 1) /
+                                      static_cast<float>(desc.m_StrideX)))
+                                }, DataType::Float32);
+    }
+    else
+    {
+        throw ParseException("Only 'SAME' and 'VALID' padding supported");
+    }
+
+    CalcPadding(inputHeight, weightHeight, desc.m_StrideY, desc.m_PadTop, desc.m_PadBottom, padding);
+    CalcPadding(inputWidth, weightWidth, desc.m_StrideX, desc.m_PadLeft, desc.m_PadRight, padding);
+
+    IConnectableLayer* layer = m_Network->AddConvolution2dLayer(desc, weightTensor, nodeDef.name().c_str());
+    layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
+
+    if (dataFormat == "NHWC")
+    {
+        layer = SwizzleInDeswizzleOut(*m_Network, inputSlot, *layer, nodeDef.name());
+    }
+    else
+    {
+        inputSlot.Connect(layer->GetInputSlot(0));
+    }
+
+    return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
+}
+
+ParsedTfOperationPtr TfParser::ParseDepthwiseConv2D(const tensorflow::NodeDef& nodeDef,
+                                                   const tensorflow::GraphDef& graphDef)
+{
+    std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
+    IOutputSlot& inputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
+    TensorInfo inputTensorInfo = inputSlot.GetTensorInfo();
+
+    if (!HasParsedConstTensor<float>(inputs[1].m_IndexedValue->GetNode().name()))
+    {
+        throw ParseException("ArmNN only supports Depthwise Convolution layers with constant weights");
+    }
+    ParsedConstTfOperation<float>* weightNode =
+        boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[1].m_IndexedValue);
+
+
+    std::string paddingString = ReadMandatoryNodeStringAttribute(nodeDef, "padding");
+    std::string dataFormat = ReadMandatoryNodeStringAttribute(nodeDef, "data_format");
+    std::vector<uint32_t> strides = ReadMandatoryNodeUint32ListAttribute(nodeDef, "strides");
+
+    DepthwiseConvolution2dDescriptor desc;
+    desc.m_BiasEnabled = false;
+
+    if (dataFormat == "NHWC")
+    {
+        desc.m_StrideX = strides[2];
+        desc.m_StrideY = strides[1];
+        // Swizzle input to supported memory layout
+        inputTensorInfo = armnnUtils::Permuted(inputSlot.GetTensorInfo(), NHWCToArmNN);
+    }
+    else if (dataFormat == "NCHW")
+    {
+        desc.m_StrideX = strides[3];
+        desc.m_StrideY = strides[2];
+    }
+    else
+    {
+        throw ParseException("Unsupported data format passed for DepthwiseConv2dNative. Only NHWC and NCHW supported");
+    }
+
+    uint32_t inputHeight = inputTensorInfo.GetShape()[2];
+    uint32_t inputWidth = inputTensorInfo.GetShape()[3];
+
+    std::vector<float> outputTensorData;
+
+    ConstTensor weightTensor = weightNode->GetConstTensor(true, outputTensorData);
+
+    uint32_t weightHeight = weightTensor.GetShape()[2];
+    uint32_t weightWidth = weightTensor.GetShape()[3];
+
+    bool padding = false;
+    TensorInfo outputInfo;
+    if (paddingString == "SAME")
+    {
+        padding = true;
+        outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
+                                weightTensor.GetShape()[0] * weightTensor.GetShape()[1],
+                                static_cast<uint32_t>(ceil(
+                                    static_cast<float>(inputHeight) /
+                                    static_cast<float>(desc.m_StrideY))),
+                                static_cast<uint32_t>(ceil(
+                                    static_cast<float>(inputWidth) /
+                                    static_cast<float>(desc.m_StrideX)))
+                                }, DataType::Float32);
+    }
+    else if (paddingString == "VALID")
+    {
+        padding = false;
+        outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
+                                weightTensor.GetShape()[0] * weightTensor.GetShape()[1],
+                                static_cast<uint32_t>(ceil(
+                                    static_cast<float>(inputHeight - weightHeight + 1) /
+                                    static_cast<float>(desc.m_StrideY))),
+                                static_cast<uint32_t>(ceil(
+                                    static_cast<float>(inputWidth - weightWidth + 1) /
+                                    static_cast<float>(desc.m_StrideX)))
+                                }, DataType::Float32);
+    }
+    else
+    {
+        throw ParseException("Only 'SAME' and 'VALID' padding supported");
+    }
+
+    CalcPadding(inputHeight, weightHeight, desc.m_StrideY, desc.m_PadTop, desc.m_PadBottom, padding);
+    CalcPadding(inputWidth, weightWidth, desc.m_StrideX, desc.m_PadLeft, desc.m_PadRight, padding);
+
+    IConnectableLayer* layer = m_Network->AddDepthwiseConvolution2dLayer(desc, weightTensor, nodeDef.name().c_str());
+    layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
+
+    if (dataFormat == "NHWC")
+    {
+        layer = SwizzleInDeswizzleOut(*m_Network, inputSlot, *layer, nodeDef.name());
+    }
+    else
+    {
+        inputSlot.Connect(layer->GetInputSlot(0));
+    }
+
+    return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
+}
+
+ParsedTfOperationPtr TfParser::ParseFusedBatchNorm(const tensorflow::NodeDef& nodeDef,
+                                                   const tensorflow::GraphDef& graphDef)
+{
+    std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 5);
+
+    if (!HasParsedConstTensor<float>(inputs[1].m_IndexedValue->GetNode().name()))
+    {
+        throw ParseException("ArmNN only supports FusedBatchNormalization layers with constant scale");
+    }
+    ParsedConstTfOperation<float>* scaleNode =
+        boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[1].m_IndexedValue);
+
+    if (!HasParsedConstTensor<float>(inputs[2].m_IndexedValue->GetNode().name()))
+    {
+        throw ParseException("ArmNN only supports FusedBatchNormalization layers with constant offset");
+    }
+    ParsedConstTfOperation<float>* offsetNode =
+        boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[2].m_IndexedValue);
+
+    if (!HasParsedConstTensor<float>(inputs[3].m_IndexedValue->GetNode().name()))
+    {
+        throw ParseException("ArmNN only supports FusedBatchNormalization layers with constant mean");
+    }
+    ParsedConstTfOperation<float>* meanNode =
+        boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[3].m_IndexedValue);
+
+    if (!HasParsedConstTensor<float>(inputs[4].m_IndexedValue->GetNode().name()))
+    {
+        throw ParseException("ArmNN only supports FusedBatchNormalization layers with constant variance");
+    }
+    ParsedConstTfOperation<float>* varianceNode =
+        boost::polymorphic_downcast<ParsedConstTfOperation<float> *>(inputs[4].m_IndexedValue);
+
+    // The descriptor only has the epsilon attribute
+    BatchNormalizationDescriptor desc;
+    desc.m_Eps = ReadMandatoryNodeFloatAttribute(nodeDef, "epsilon");
+
+    // data for the parsed tensor args (scale, offset, mean, variance) must be stored locally until the layer is added
+    std::vector<float> scaleTensorData;
+    ConstTensor scaleTensor = scaleNode->GetConstTensor(false, scaleTensorData);
+
+    std::vector<float> offsetTensorData;
+    ConstTensor offsetTensor = offsetNode->GetConstTensor(false, offsetTensorData);
+
+    std::vector<float> meanTensorData;
+    ConstTensor meanTensor = meanNode->GetConstTensor(false, meanTensorData);
+
+    std::vector<float> varianceTensorData;
+    ConstTensor varianceTensor = varianceNode->GetConstTensor(false, varianceTensorData);
+
+    IConnectableLayer* layer = m_Network->AddBatchNormalizationLayer(desc,
+                                                                     meanTensor,
+                                                                     varianceTensor,
+                                                                     offsetTensor,
+                                                                     scaleTensor,
+                                                                     nodeDef.name().c_str());
+
+    IOutputSlot& inputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
+
+    const std::string dataFormat = ReadMandatoryNodeStringAttribute(nodeDef, "data_format");
+
+    if (dataFormat == "NHWC")
+    {
+        const TensorInfo outputTensorInfo = armnnUtils::Permuted(inputSlot.GetTensorInfo(), NHWCToArmNN);
+        layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+        layer = SwizzleInDeswizzleOut(*m_Network, inputSlot, *layer, nodeDef.name());
+    }
+    else
+    {
+        layer->GetOutputSlot(0).SetTensorInfo(inputSlot.GetTensorInfo());
+        inputSlot.Connect(layer->GetInputSlot(0));
+    }
+
+    return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
+}
+
+ParsedTfOperationPtr TfParser::ParseConcat(const tensorflow::NodeDef& nodeDef,
+                                           const tensorflow::GraphDef& graphDef)
+{
+    std::vector<OutputOfConstNodeDef> nodes = GetTfInputNodes(nodeDef);
+    // In tensorflow, we have the last input of the Concat layer as the axis for concatenation
+    unsigned int numInputs = static_cast<unsigned int>(nodes.size());
+    unsigned int numConcatView = numInputs - 1;
+
+    OriginsDescriptor concatDescriptor(static_cast<uint32_t>(numConcatView), MaxNumOfTensorDimensions);
+    std::vector<unsigned int>mergeDimSizes(MaxNumOfTensorDimensions, 0u);
+
+    unsigned int mergeDim = 0;
+    std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, numInputs);
+
+    // The last input is the axis for concatenation
+    if (!HasParsedConstTensor<int32_t>(inputs[numInputs - 1].m_IndexedValue->GetNode().name()))
+    {
+        throw ParseException("ArmNN only supports Concat with constant axis");
+    }
+    ParsedConstTfOperation<int32_t>* shapeNode =
+            boost::polymorphic_downcast<ParsedConstTfOperation<int32_t>*>(inputs[numInputs - 1].m_IndexedValue);
+
+    std::vector<int32_t> axisTensorData;
+    ConstTensor axisTensor = shapeNode->GetConstTensor(false, axisTensorData);
+
+    // This concatDim indicates the data format: 3 is the NHWC, 1 is the NCHW
+    const unsigned int concatDimInput = static_cast<unsigned int>(axisTensorData[0]);
+
+    // Armnn supports concatenation along the channel dimension for data format NHWC and NCHW
+    if (concatDimInput == 0 || concatDimInput == 2)
+    {
+        throw ParseException("The dimension for concatenation is not supported by Armnn");
+    }
+
+    // This is the only concatDim we support in Armnn
+    const unsigned int concatDim = 1;
+    for (unsigned int viewIndex = 0; viewIndex < numConcatView; ++viewIndex)
+    {
+        // need to double check whether it should be
+        IOutputSlot& inputSlot =
+            inputs[viewIndex].m_IndexedValue->ResolveArmnnOutputSlot(inputs[viewIndex].m_Index);
+        TensorInfo inputTensorInfo = inputSlot.GetTensorInfo();
+
+        if (inputTensorInfo.GetNumDimensions() != MaxNumOfTensorDimensions)
+        {
+            throw ParseException("The number of dimensions for input tensors of the concatenation op should be 4");
+        }
+
+        if (concatDimInput == 3)
+        {
+            inputTensorInfo = armnnUtils::Permuted(inputTensorInfo, NHWCToArmNN);
+        }
+
+        for (unsigned int dim = 0; dim < MaxNumOfTensorDimensions; ++dim)
+        {
+            mergeDimSizes[dim] = inputTensorInfo.GetShape()[dim];
+        }
+
+        for (unsigned int j = 0; j < concatDim; ++j)
+        {
+            concatDescriptor.SetViewOriginCoord(viewIndex, j, 0);
+        }
+
+        concatDescriptor.SetViewOriginCoord(viewIndex, concatDim, mergeDim);
+        mergeDim += mergeDimSizes[concatDim];
+
+        for (unsigned int j = concatDim+1; j < MaxNumOfTensorDimensions; ++j)
+        {
+            concatDescriptor.SetViewOriginCoord(viewIndex, j, 0);
+        }
+    }
+
+    mergeDimSizes[concatDim] = mergeDim;
+    armnn::IConnectableLayer *layer = m_Network->AddMergerLayer(concatDescriptor, nodeDef.name().c_str());
+
+    layer->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo(MaxNumOfTensorDimensions, mergeDimSizes.data(),
+                                                            DataType::Float32));
+
+    for (unsigned int v = 0; v < numConcatView; ++v)
+    {
+        IOutputSlot& inputSlot = inputs[v].m_IndexedValue->ResolveArmnnOutputSlot(inputs[v].m_Index);
+        if (concatDimInput == 3)
+        {
+            IConnectableLayer* const swizzleLayer = AddSwizzleLayer(*m_Network, inputSlot, NHWCToArmNN,
+                                                                    "swizzle_for-" + nodeDef.name());
+            swizzleLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(v));
+        }
+        else
+        {
+            inputSlot.Connect(layer->GetInputSlot(v));
+        }
+    }
+
+    if (concatDimInput == 3)
+    {
+        IConnectableLayer* const deswizzleLayer = AddSwizzleLayer(*m_Network, layer->GetOutputSlot(0), ArmNNToNHWC,
+                                                                  "deswizzle_for-" + nodeDef.name());
+        layer = deswizzleLayer;
+    }
+
+    return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
+}
+
+ParsedTfOperationPtr TfParser::ParseShape(const tensorflow::NodeDef& nodeDef,
+    const tensorflow::GraphDef& graphDef)
+{
+    // Note: The Shape layer is handled in a special way, because:
+    //        1. ARMNN doesn't support int32 tensors which it outputs
+    //        2. ARMNN works with statically shaped tensors which are known at parse time
+    //        3. because of 1. and 2. we treat the output of Shape as a temporary const int32
+    //           tensor which may be used as an input to other ops, most likely a Reshape
+
+    const tensorflow::DataType tfDataType = ReadMandatoryNodeTypeAttribute(nodeDef, "out_type");
+    if (tfDataType != tensorflow::DT_INT32)
+    {
+        throw ParseException("Armnn only supports DT_INT32 as out_type");
+    }
+
+    const std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
+    IOutputSlot& prevLayerOutputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
+    const TensorInfo& prevLayerTensorInfo = prevLayerOutputSlot.GetTensorInfo();
+    unsigned int prevLayerDimensions = prevLayerTensorInfo.GetNumDimensions();
+
+    std::vector<int32_t> shapeTensorData;
+    shapeTensorData.reserve(prevLayerDimensions);
+
+    for (unsigned int i=0; i<prevLayerDimensions; ++i)
+    {
+        shapeTensorData.push_back(static_cast<int32_t>(prevLayerTensorInfo.GetShape()[i]));
+    }
+
+    TensorInfo shapeTensorInfo(1, &prevLayerDimensions, DataType::Signed32);
+
+    return std::make_unique<ParsedConstTfOperation<int32_t>>(this,
+                                                             nodeDef,
+                                                             &shapeTensorData[0],
+                                                             shapeTensorInfo);
+}
+
+ParsedTfOperationPtr TfParser::ParseReshape(const tensorflow::NodeDef& nodeDef,
+    const tensorflow::GraphDef& graphDef)
+{
+    std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
+    ParsedTfOperation* inputNode = inputs[0].m_IndexedValue;
+
+    if (!HasParsedConstTensor<int32_t>(inputs[1].m_IndexedValue->GetNode().name()))
+    {
+        throw ParseException("ArmNN only supports Reshape layers with constant shapes");
+    }
+    ParsedConstTfOperation<int32_t>* shapeNode =
+        boost::polymorphic_downcast<ParsedConstTfOperation<int32_t>*>(inputs[1].m_IndexedValue);
+
+    armnn::IOutputSlot& prevLayerOutputSlot = inputNode->ResolveArmnnOutputSlot(inputs[0].m_Index);
+    TensorInfo inputTensorInfo = prevLayerOutputSlot.GetTensorInfo();
+
+    std::vector<int32_t> shapeTensorData;
+    ConstTensor shapeTensor = shapeNode->GetConstTensor(false, shapeTensorData);
+    const TensorInfo outputTensorInfo = PrepareReshape(inputTensorInfo, shapeTensorData);
+
+    TensorShape targetShape = outputTensorInfo.GetShape();
+    ReshapeDescriptor reshapeDesc;
+    reshapeDesc.m_TargetShape = targetShape;
+
+    IConnectableLayer* layer = m_Network->AddReshapeLayer(reshapeDesc, nodeDef.name().c_str());
+    prevLayerOutputSlot.Connect(layer->GetInputSlot(0));
+    layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+    return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
+}
+
+ParsedTfOperationPtr TfParser::ParseResizeBilinear(const tensorflow::NodeDef& nodeDef,
+    const tensorflow::GraphDef& graphDef)
+{
+    std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
+
+    if (!HasParsedConstTensor<int32_t>(inputs[1].m_IndexedValue->GetNode().name()))
+    {
+        throw ParseException("ArmNN only supports ResizeBilinear layers with constant sizes");
+    }
+    ParsedConstTfOperation<int32_t>* sizeNode =
+        boost::polymorphic_downcast<ParsedConstTfOperation<int32_t>*>(inputs[1].m_IndexedValue);
+
+    // Check the align_corners attribute is not set
+    if (ReadOptionalNodeBoolAttribute(nodeDef, "align_corners", false))
+    {
+        throw ParseException("ArmNN only supports ResizeBilinear layers with align_corners set to false");
+    }
+
+    // data for the parsed tensor args (size) must be stored locally
+    std::vector<int32_t> sizeTensorData;
+    ConstTensor sizeTensor = sizeNode->GetConstTensor(false, sizeTensorData);
+
+    // The descriptor only has target height and width attributes, which we get from the size tensor
+    ResizeBilinearDescriptor desc;
+    desc.m_TargetHeight = static_cast<uint32_t> (sizeTensorData[0]);
+    desc.m_TargetWidth = static_cast<uint32_t> (sizeTensorData[1]);
+
+    IConnectableLayer* layer = m_Network->AddResizeBilinearLayer(desc, nodeDef.name().c_str());
+
+    IOutputSlot& inputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
+    TensorInfo inputTensorInfo = inputSlot.GetTensorInfo();
+    // the input shape is always in BHWC format, this will be swizzled below; for now,
+    // get the batch and channels to make up the ArmNN output shape with the target size
+    unsigned int outBatch = inputTensorInfo.GetShape()[0];
+    unsigned int outChannels = inputTensorInfo.GetShape()[3];
+    unsigned int outHeight = desc.m_TargetHeight;
+    unsigned int outWidth = desc.m_TargetWidth;
+    TensorShape outShape({outBatch, outChannels, outHeight, outWidth});
+    // The output DataType is always Float32, regardless of the input DataType
+    const TensorInfo outputTensorInfo(outShape, armnn::DataType::Float32);
+    layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+    // TensorFlow ResizeBilinear input is always in BHWC format, so add swizzle and deswizzle layers
+    layer = SwizzleInDeswizzleOut(*m_Network, inputSlot, *layer, nodeDef.name());
+
+    return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
+}
+
+TensorInfo OutputShapeOfSqueeze(const tensorflow::NodeDef& nodeDef, TensorInfo inputTensorInfo)
+{
+    BOOST_ASSERT(nodeDef.op() == "Squeeze");
+    tensorflow::DataType tfDataType = ReadMandatoryNodeTypeAttribute(nodeDef, "T");
+
+    DataType type;
+    if (tfDataType == tensorflow::DT_FLOAT)
+    {
+        type = DataType::Float32;
+    }
+    else if (tfDataType == tensorflow::DT_INT32)
+    {
+        type = DataType::Signed32;
+    }
+    else
+    {
+        throw ParseException(boost::str(
+                boost::format("Unsupported DataType %1% for Squeeze operation")
+                % tensorflow::DataType_Name(tfDataType)));
+    }
+
+    std::vector<uint32_t> squeezeDims = ReadOptionalNodeUint32ListAttribute(nodeDef, "squeeze_dims");
+    if (squeezeDims.empty())
+    {
+        for(unsigned int i = 0; i < inputTensorInfo.GetNumDimensions(); i++)
+        {
+            if (inputTensorInfo.GetShape()[i] == 1)
+            {
+                squeezeDims.push_back(i);
+            }
+        }
+    }
+
+    std::vector<uint32_t> outputDims;
+    for(unsigned int i = 0; i < inputTensorInfo.GetNumDimensions(); i++)
+    {
+        bool includeDimension = (std::find(squeezeDims.begin(), squeezeDims.end(), i) == squeezeDims.end());
+        if (includeDimension)
+        {
+            outputDims.push_back(inputTensorInfo.GetShape()[i]);
+        }
+    }
+
+    if (outputDims.size() > 4)
+    {
+        throw ParseException("Unsupported shape for Squeeze");
+    }
+
+    TensorInfo outTensorInfo = TensorInfo(boost::numeric_cast<unsigned int>(outputDims.size()),
+                                          outputDims.data(),
+                                          type);
+
+    return outTensorInfo;
+}
+
+ParsedTfOperationPtr TfParser::ParseSqueeze(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
+{
+    std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
+
+    IOutputSlot& prevLayerOutputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
+    TensorInfo inputTensorInfo = prevLayerOutputSlot.GetTensorInfo();
+
+    TensorInfo outputInfo;
+    outputInfo = OutputShapeOfSqueeze(nodeDef, inputTensorInfo);
+
+    ReshapeDescriptor reshapeDesc;
+    reshapeDesc.m_TargetShape = outputInfo.GetShape();
+    IConnectableLayer* layer = m_Network->AddReshapeLayer(reshapeDesc, nodeDef.name().c_str());
+    prevLayerOutputSlot.Connect(layer->GetInputSlot(0));
+    layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
+
+    return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
+}
+
+ParsedTfOperationPtr TfParser::ParseLrn(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
+{
+    std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
+
+    NormalizationDescriptor normalizationDescriptor;
+    normalizationDescriptor.m_NormMethodType = NormalizationAlgorithmMethod::LocalBrightness;
+    normalizationDescriptor.m_NormChannelType = NormalizationAlgorithmChannel::Across;
+    normalizationDescriptor.m_Alpha = ReadMandatoryNodeFloatAttribute(nodeDef, "alpha");
+    normalizationDescriptor.m_Beta = ReadMandatoryNodeFloatAttribute(nodeDef, "beta");
+    normalizationDescriptor.m_K = ReadMandatoryNodeFloatAttribute(nodeDef, "bias");
+    normalizationDescriptor.m_NormSize = ReadMandatoryNodeUint32Attribute(nodeDef, "depth_radius");
+
+    // The window size must be an odd value. For a window size of (2 * n + 1), TensorFlow defines depth_radius = n.
+    normalizationDescriptor.m_NormSize = normalizationDescriptor.m_NormSize * 2 + 1;
+
+    IOutputSlot& prevLayerOutputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
+
+    IConnectableLayer* layer = m_Network->AddNormalizationLayer(normalizationDescriptor,
+        nodeDef.name().c_str());
+
+    const TensorInfo permutedInfo = armnnUtils::Permuted(prevLayerOutputSlot.GetTensorInfo(), NHWCToArmNN);
+    layer->GetOutputSlot(0).SetTensorInfo(permutedInfo);
+
+    layer = SwizzleInDeswizzleOut(*m_Network, prevLayerOutputSlot, *layer, nodeDef.name());
+
+    return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
+}
+
+/// An ParsedTfOperation for a MatMul node.
+/// Creation of the armnn FullyConnected layer is deferred until it is actually needed, because MatMul nodes are
+/// often used for the first part of a biased FullyConnected (MatMul followed by Add) and in these cases armnn doesn't
+/// need a separate layer for the MatMul.
+class ParsedMatMulTfOperation : public DeferredSingleLayerParsedTfOperation
+{
+public:
+    ParsedMatMulTfOperation(TfParser* parser, const tensorflow::NodeDef& node)
+        : DeferredSingleLayerParsedTfOperation(parser, node)
+    {
+    }
+
+    void CreateLayerDeferred() override
+    {
+        BOOST_ASSERT(m_Layer == nullptr);
+        m_Layer = m_Parser->AddFullyConnectedLayer(m_Node, nullptr, m_Node.name().c_str());
+    }
+};
+
+ParsedTfOperationPtr TfParser::ParseMatMul(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
+{
+    // Defer the creation of the layer (see ParsedMatMulTfOperation).
+    return std::make_unique<ParsedMatMulTfOperation>(this, nodeDef);
+}
+
+ParsedTfOperationPtr TfParser::ParseMul(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
+{
+    boost::ignore_unused(graphDef);
+
+    std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
+
+    IConnectableLayer* const layer = m_Network->AddMultiplicationLayer(nodeDef.name().c_str());
+    IOutputSlot* input0Slot = &inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
+    IOutputSlot* input1Slot = &inputs[1].m_IndexedValue->ResolveArmnnOutputSlot(inputs[1].m_Index);
+
+    auto const input0NumDims = input0Slot->GetTensorInfo().GetNumDimensions();
+    auto const input1NumDims = input1Slot->GetTensorInfo().GetNumDimensions();
+
+    if (input0NumDims < input1NumDims)
+    {
+        const bool isNHWC = true;
+        input0Slot = BroadcastForAddandMul(input1Slot, input0Slot, isNHWC, *m_Network, nodeDef);
+    }
+    if (input1NumDims < input0NumDims)
+    {
+        const bool isNHWC = true;
+        input1Slot = BroadcastForAddandMul(input0Slot, input1Slot, isNHWC, *m_Network, nodeDef);
+    }
+
+    input0Slot->Connect(layer->GetInputSlot(0));
+    input1Slot->Connect(layer->GetInputSlot(1));
+
+    if (input0NumDims < input1NumDims)
+    {
+        layer->GetOutputSlot(0).SetTensorInfo(input1Slot->GetTensorInfo());
+    }
+    else
+    {
+        layer->GetOutputSlot(0).SetTensorInfo(input0Slot->GetTensorInfo());
+    }
+    return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
+}
+
+ParsedTfOperationPtr TfParser::ParsePlaceholder(const tensorflow::NodeDef& nodeDef,
+    const tensorflow::GraphDef& graphDef)
+{
+    boost::ignore_unused(graphDef);
+
+    std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 0);
+
+    const LayerBindingId layerId = boost::numeric_cast<LayerBindingId>(m_NetworkInputsBindingInfo.size());
+
+    auto it = m_InputShapes.find(nodeDef.name());
+    if (it == m_InputShapes.end())
+    {
+        throw ParseException("Missing input shape for Placeholder '" + nodeDef.name() + "'");
+    }
+    TensorInfo tensorInfo(it->second, DataType::Float32);
+
+    IConnectableLayer* const layer = m_Network->AddInputLayer(layerId, nodeDef.name().c_str());
+
+    layer->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+
+    TrackInputBinding(layer, layerId, tensorInfo);
+
+    return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
+}
+
+ParsedTfOperationPtr TfParser::ParseRelu(const tensorflow::NodeDef& nodeDef,
+    const tensorflow::GraphDef& graphDef)
+{
+    boost::ignore_unused(graphDef);
+
+    ActivationDescriptor activationDesc;
+    activationDesc.m_Function = ActivationFunction::ReLu;
+    return AddActivationLayer(nodeDef, activationDesc);
+}
+
+ParsedTfOperationPtr TfParser::ParseRelu6(const tensorflow::NodeDef& nodeDef,
+    const tensorflow::GraphDef& graphDef)
+{
+    boost::ignore_unused(graphDef);
+
+    ActivationDescriptor activationDesc;
+    activationDesc.m_Function = ActivationFunction::BoundedReLu;
+    activationDesc.m_A = 6.0f;
+    activationDesc.m_B = 0.0f;
+
+    return AddActivationLayer(nodeDef, activationDesc);
+}
+
+ParsedTfOperationPtr TfParser::ParseSigmoid(const tensorflow::NodeDef& nodeDef,
+    const tensorflow::GraphDef& graphDef)
+{
+    boost::ignore_unused(graphDef);
+
+    ActivationDescriptor activationDesc;
+    activationDesc.m_Function = ActivationFunction::Sigmoid;
+
+    return AddActivationLayer(nodeDef, activationDesc);
+}
+
+ParsedTfOperationPtr TfParser::ParseSoftmax(const tensorflow::NodeDef& nodeDef,
+    const tensorflow::GraphDef& graphDef)
+{
+    boost::ignore_unused(graphDef);
+
+    std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
+
+    SoftmaxDescriptor softmaxDescriptor;
+    IConnectableLayer* const layer = m_Network->AddSoftmaxLayer(softmaxDescriptor, nodeDef.name().c_str());
+
+    IOutputSlot& prevLayerSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
+    prevLayerSlot.Connect(layer->GetInputSlot(0));
+    layer->GetOutputSlot(0).SetTensorInfo(prevLayerSlot.GetTensorInfo());
+
+    return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
+}
+
+ParsedTfOperationPtr TfParser::ParseSoftplus(const tensorflow::NodeDef& nodeDef,
+    const tensorflow::GraphDef& graphDef)
+{
+    boost::ignore_unused(graphDef);
+
+    ActivationDescriptor activationDesc;
+    activationDesc.m_Function = ActivationFunction::SoftReLu;
+
+    return AddActivationLayer(nodeDef, activationDesc);
+}
+
+ParsedTfOperationPtr TfParser::ParseTanh(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
+{
+    boost::ignore_unused(graphDef);
+
+    ActivationDescriptor activationDesc;
+    activationDesc.m_Function = ActivationFunction::TanH;
+    activationDesc.m_A = 1.0f;
+    activationDesc.m_B = 1.0f;
+
+    return AddActivationLayer(nodeDef, activationDesc);
+}
+
+ParsedTfOperationPtr TfParser::AddActivationLayer(const tensorflow::NodeDef& nodeDef,
+    ActivationDescriptor& activationDesc)
+{
+    std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
+
+    IConnectableLayer* const layer = m_Network->AddActivationLayer(activationDesc, nodeDef.name().c_str());
+
+    IOutputSlot& prevLayerOutputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
+    prevLayerOutputSlot.Connect(layer->GetInputSlot(0));
+    layer->GetOutputSlot(0).SetTensorInfo(prevLayerOutputSlot.GetTensorInfo());
+    return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
+}
+
+ParsedTfOperationPtr TfParser::ParseMaxPool(const tensorflow::NodeDef& nodeDef,
+    const tensorflow::GraphDef& graphDef)
+{
+    return ParsePooling2d(nodeDef, graphDef, PoolingAlgorithm::Max);
+}
+
+ParsedTfOperationPtr TfParser::ParseAvgPool(const tensorflow::NodeDef& nodeDef,
+    const tensorflow::GraphDef& graphDef)
+{
+    return ParsePooling2d(nodeDef, graphDef, PoolingAlgorithm::Average);
+}
+
+ParsedTfOperationPtr TfParser::ParsePooling2d(const tensorflow::NodeDef& nodeDef,
+    const tensorflow::GraphDef& graphDef, PoolingAlgorithm pooltype)
+{
+    std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 1);
+    IOutputSlot& inputSlot = inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
+    TensorInfo inputTensorInfo = inputSlot.GetTensorInfo();
+
+    if (inputs.size() != 1)
+    {
+        throw ParseException("2D Pooling expects one input!");
+    }
+
+    std::string paddingString = ReadMandatoryNodeStringAttribute(nodeDef, "padding");
+    std::string dataFormat = ReadMandatoryNodeStringAttribute(nodeDef, "data_format");
+    std::vector<uint32_t> strides = ReadMandatoryNodeUint32ListAttribute(nodeDef, "strides");
+    std::vector<uint32_t> ksize = ReadMandatoryNodeUint32ListAttribute(nodeDef, "ksize"); // size of pool windows
+
+    Pooling2dDescriptor pooling2dDescriptor;
+    pooling2dDescriptor.m_PoolType = pooltype;
+    pooling2dDescriptor.m_PaddingMethod = PaddingMethod::Exclude;
+    pooling2dDescriptor.m_OutputShapeRounding = OutputShapeRounding::Floor;
+
+    if (dataFormat == "NHWC")
+    {
+        pooling2dDescriptor.m_StrideX    = strides[2];
+        pooling2dDescriptor.m_StrideY    = strides[1];
+        pooling2dDescriptor.m_PoolWidth  = ksize[2];
+        pooling2dDescriptor.m_PoolHeight = ksize[1];
+        // Swizzle input to supported memory layout
+        inputTensorInfo = armnnUtils::Permuted(inputSlot.GetTensorInfo(), NHWCToArmNN);
+    }
+    else if (dataFormat == "NCHW")
+    {
+        pooling2dDescriptor.m_StrideX    = strides[3];
+        pooling2dDescriptor.m_StrideY    = strides[2];
+        pooling2dDescriptor.m_PoolWidth  = ksize[3];
+        pooling2dDescriptor.m_PoolHeight = ksize[2];
+    }
+    else
+    {
+        throw ParseException("Only NHWC or NCHW supported for Pooling2d");
+    }
+
+    uint32_t inputHeight = inputTensorInfo.GetShape()[2];
+    uint32_t inputWidth = inputTensorInfo.GetShape()[3];
+
+    bool padding = false;
+    TensorInfo outputInfo;
+    if (paddingString == "SAME")
+    {
+        padding = true;
+        outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
+                                  inputTensorInfo.GetShape()[1],
+                                  static_cast<uint32_t>(ceil(
+                                      static_cast<float>(inputHeight) /
+                                      static_cast<float>(pooling2dDescriptor.m_StrideY))),
+                                  static_cast<uint32_t>(ceil(
+                                      static_cast<float>(inputWidth) /
+                                      static_cast<float>(pooling2dDescriptor.m_StrideX)))
+                                }, DataType::Float32);
+    }
+    else if (paddingString == "VALID")
+    {
+        padding = false;
+        outputInfo = TensorInfo({ inputTensorInfo.GetShape()[0],
+                                  inputTensorInfo.GetShape()[1],
+                                  static_cast<uint32_t>(ceil(
+                                      static_cast<float>(inputHeight - pooling2dDescriptor.m_PoolHeight + 1) /
+                                      static_cast<float>(pooling2dDescriptor.m_StrideY))),
+                                  static_cast<uint32_t>(ceil(
+                                      static_cast<float>(inputWidth - pooling2dDescriptor.m_PoolWidth + 1) /
+                                      static_cast<float>(pooling2dDescriptor.m_StrideX)))
+                                }, DataType::Float32);
+    }
+    else
+    {
+        throw ParseException("Only 'SAME' and 'VALID' padding supported");
+    }
+
+    CalcPadding(inputWidth, pooling2dDescriptor.m_PoolWidth, pooling2dDescriptor.m_StrideX,
+                    pooling2dDescriptor.m_PadLeft, pooling2dDescriptor.m_PadRight, padding);
+    CalcPadding(inputHeight, pooling2dDescriptor.m_PoolHeight, pooling2dDescriptor.m_StrideY,
+                    pooling2dDescriptor.m_PadTop, pooling2dDescriptor.m_PadBottom, padding);
+
+
+    IConnectableLayer* layer = m_Network->AddPooling2dLayer(pooling2dDescriptor, nodeDef.name().c_str());
+    if (layer == nullptr)
+    {
+        throw ParseException("Failed to add pooling2d layer");
+    }
+
+    layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
+
+    if (dataFormat == "NHWC")
+    {
+        layer = SwizzleInDeswizzleOut(*m_Network, inputSlot, *layer, nodeDef.name());
+    }
+    else
+    {
+        inputSlot.Connect(layer->GetInputSlot(0));
+    }
+
+    return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
+}
+
+ParsedTfOperationPtr TfParser::AddAdditionLayer(const tensorflow::NodeDef& nodeDef, bool isBiasAdd)
+{
+    std::vector<OutputOfParsedTfOperation> inputs = GetInputParsedTfOperationsChecked(nodeDef, 2);
+
+    IOutputSlot* input0Slot = &inputs[0].m_IndexedValue->ResolveArmnnOutputSlot(inputs[0].m_Index);
+    IOutputSlot* input1Slot = &inputs[1].m_IndexedValue->ResolveArmnnOutputSlot(inputs[1].m_Index);
+
+    const TensorInfo& input0Info = input0Slot->GetTensorInfo();
+    const TensorInfo& input1Info = input1Slot->GetTensorInfo();
+
+    if (isBiasAdd)
+    {
+        // BiasAdd takes bias as a 1D tensor. We need to add a reshape layer to create a 4D tensor
+        // with the same data in the correct dimension for broadcast in addition.
+        if(input1Info.GetNumDimensions() != 1)
+        {
+            throw ParseException("Unsupported bias for BiasAdd. It should be a 1D vector.");
+        }
+
+        const std::string dataFormat = ReadMandatoryNodeStringAttribute(nodeDef, "data_format");
+        const bool isNHWC = (dataFormat == "NHWC");
+        const bool isNCHW = (dataFormat == "NCHW");
+
+        if (!isNHWC && ! isNCHW)
+        {
+            throw ParseException("Only NHWC or NCHW supported for BiasAdd");
+        }
+
+        input1Slot = BroadcastForAddandMul(input0Slot, input1Slot, isNHWC, *m_Network, nodeDef);
+    }
+    else
+    {
+        if (input0Info.GetNumDimensions() == 1)
+        {
+            const bool isNHWC = true;
+            input0Slot = BroadcastForAddandMul(input1Slot, input0Slot, isNHWC, *m_Network, nodeDef);
+        }
+
+        if (input1Info.GetNumDimensions() == 1)
+        {
+            const bool isNHWC = true;
+            input1Slot = BroadcastForAddandMul(input0Slot, input1Slot, isNHWC, *m_Network, nodeDef);
+        }
+    }
+
+    IConnectableLayer* const layer = m_Network->AddAdditionLayer(nodeDef.name().c_str());
+
+    input0Slot->Connect(layer->GetInputSlot(0));
+    input1Slot->Connect(layer->GetInputSlot(1));
+
+    if (input0Info.GetNumDimensions() == 1 && isBiasAdd == false)
+    {
+        layer->GetOutputSlot(0).SetTensorInfo(input1Slot->GetTensorInfo());
+    }
+    else
+    {
+        layer->GetOutputSlot(0).SetTensorInfo(input0Slot->GetTensorInfo());
+    }
+
+    return std::make_unique<SingleLayerParsedTfOperation>(this, nodeDef, layer);
+}
+
+IConnectableLayer* TfParser::AddFullyConnectedLayer(const tensorflow::NodeDef& matMulNodeDef,
+    const tensorflow::NodeDef* addNodeDef, const char* armnnLayerName)
+{
+    // find bias const (if applicable)
+    ParsedConstTfOperation<float>* biasNode = nullptr;
+    if (addNodeDef != nullptr)
+    {
+        std::vector<OutputOfParsedTfOperation> addInputs = GetInputParsedTfOperationsChecked(*addNodeDef, 2);
+        // find our inputs
+        if (HasParsedConstTensor<float>(addInputs[0].m_IndexedValue->GetNode().name()))
+        {
+            biasNode = boost::polymorphic_downcast<ParsedConstTfOperation<float>*>(addInputs[0].m_IndexedValue);
+        }
+        else if (HasParsedConstTensor<float>(addInputs[1].m_IndexedValue->GetNode().name()))
+        {
+            biasNode = boost::polymorphic_downcast<ParsedConstTfOperation<float>*>(addInputs[1].m_IndexedValue);
+        }
+        else
+        {
+            throw ParseException("ArmNN only supports fully connected layers with constant bias");
+        }
+    }
+
+    // find matmul inputs
+    ParsedConstTfOperation<float>* weightNode = nullptr;
+    ParsedTfOperation* inputNode  = nullptr;
+    unsigned int inputIdx = 0;
+    std::vector<OutputOfParsedTfOperation> mulInputs = GetInputParsedTfOperationsChecked(matMulNodeDef, 2);
+    if (HasParsedConstTensor<float>(mulInputs[0].m_IndexedValue->GetNode().name()))
+    {
+        weightNode = boost::polymorphic_downcast<ParsedConstTfOperation<float>*>(mulInputs[0].m_IndexedValue);
+        inputNode = mulInputs[1].m_IndexedValue;
+        inputIdx = mulInputs[1].m_Index;
+    }
+    else if (HasParsedConstTensor<float>(mulInputs[1].m_IndexedValue->GetNode().name()))
+    {
+        weightNode = boost::polymorphic_downcast<ParsedConstTfOperation<float>*>(mulInputs[1].m_IndexedValue);
+        inputNode = mulInputs[0].m_IndexedValue;
+        inputIdx = mulInputs[0].m_Index;
+    }
+    else
+    {
+        throw ParseException("ArmNN only supports fully connected layers with constant weights");
+    }
+
+    std::vector<float> weightTensorData;
+    // handle weight
+    ConstTensor weights = weightNode->GetConstTensor(false, weightTensorData);
+
+    FullyConnectedDescriptor desc;
+    desc.m_BiasEnabled = addNodeDef != nullptr;
+
+    IConnectableLayer* layer = nullptr;
+    // make the layer
+    if (addNodeDef != nullptr)
+    {
+        std::vector<float> biasTensorData;
+        ConstTensor biases = biasNode->GetConstTensor(false, biasTensorData);
+
+        if (weights.GetShape()[1] != biases.GetShape()[0])
+        {
+            throw ParseException("shape of matmul and bias do not match");
+        }
+
+        layer = m_Network->AddFullyConnectedLayer(desc, weights, biases, armnnLayerName);
+    }
+    else
+    {
+        layer = m_Network->AddFullyConnectedLayer(desc, weights, armnnLayerName);
+    }
+
+    BOOST_ASSERT(layer != nullptr);
+
+    inputNode->ResolveArmnnOutputSlot(inputIdx).Connect(layer->GetInputSlot(0));
+    unsigned int batches = inputNode->ResolveArmnnOutputSlot(inputIdx).GetTensorInfo().GetShape()[0];
+
+    // handle output
+    TensorInfo outputInfo({ batches, weights.GetShape()[1] }, DataType::Float32);
+    layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
+    return layer;
+}
+
+void TfParser::LoadNodeDef(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef)
+{
+    // get the type of the node (assume float)
+    tensorflow::DataType type = tensorflow::DT_FLOAT;
+    if (nodeDef.attr().count("T") != 0)
+    {
+        auto attr = nodeDef.attr().at("T");
+        type      = attr.type();
+    }
+    else if (nodeDef.attr().count("dtype") != 0)
+    {
+        auto attr = nodeDef.attr().at("dtype");
+        type      = attr.type();
+    }
+
+    if (type != tensorflow::DT_FLOAT && nodeDef.op() != "Const")
+    {
+        throw ParseException("Currently only FLOAT is supported for tensorflow nodes (apart from Const)");
+    }
+
+    const std::string& operation = nodeDef.op();
+    auto it = ms_OperationNameToParsingFunctions.find(operation);
+    if (it != ms_OperationNameToParsingFunctions.end())
+    {
+        auto func = it->second;
+        ParsedTfOperationPtr parsedTfOperation = (this->*func)(nodeDef, graphDef);
+        ParsedTfOperation* parsedTfOperationRaw = parsedTfOperation.get();
+
+        // Store the parsed operation so that dependent layers can connect to it
+        auto it = m_ParsedTfOperations.find(nodeDef.name());
+        if (it != m_ParsedTfOperations.end())
+        {
+            throw ParseException(boost::str(boost::format("Name %1% used by more than one node") % nodeDef.name()));
+        }
+        m_ParsedTfOperations[nodeDef.name()] = std::move(parsedTfOperation);
+
+        // If this node was requested as an output from the network then add an ArmNN output layer
+        if (std::find(m_RequestedOutputs.begin(), m_RequestedOutputs.end(), nodeDef.name()) !=
+            m_RequestedOutputs.end())
+        {
+            auto outId = ParseOutputId(nodeDef.name());
+            const LayerBindingId layerId = boost::numeric_cast<LayerBindingId>(m_NetworkOutputsBindingInfo.size());
+            IOutputSlot& prevSlot = parsedTfOperationRaw->ResolveArmnnOutputSlot(outId.m_Index);
+
+            TensorInfo tensorInfo = prevSlot.GetTensorInfo();
+
+            IConnectableLayer* outputLayer = m_Network->AddOutputLayer(layerId, nodeDef.name().c_str());
+
+            prevSlot.Connect(outputLayer->GetInputSlot(0));
+
+            TrackOutputBinding(outputLayer, layerId, tensorInfo);
+        }
+    }
+    else
+    {
+        throw ParseException(boost::str(
+            boost::format("Unsupported operation %1% in tensorflow::GraphDef") % operation));
+    }
+}
+
+void TfParser::LoadGraphDef(const tensorflow::GraphDef& graphDef)
+{
+    // add all nodes to our map
+    m_NodesByName.clear();
+    m_NetworkInputsBindingInfo.clear();
+    m_NetworkOutputsBindingInfo.clear();
+
+    for (int i = 0; i < graphDef.node_size(); ++i)
+    {
+        const tensorflow::NodeDef& node = graphDef.node(i);
+        m_NodesByName[node.name()]      = &node;
+    }
+
+    // Find the output nodes the user requested
+    std::vector<const tensorflow::NodeDef*> targetNodes;
+    for (const std::string& requestedOutputName : m_RequestedOutputs)
+    {
+        auto nodeIt = m_NodesByName.find(requestedOutputName);
+        if (nodeIt == m_NodesByName.end())
+        {
+            throw ParseException("Couldn't find requested output node '" + requestedOutputName + "' in graph");
+        }
+        targetNodes.push_back(nodeIt->second);
+    }
+
+    // Sort them into a linear ordering such that all inputs of a node are before the node itself
+    std::vector<const tensorflow::NodeDef*> sortedNodes;
+    if (!armnnUtils::GraphTopologicalSort<const tensorflow::NodeDef*>(
+        targetNodes,
+        [this](const tensorflow::NodeDef* node)
+        {
+            auto outputs = GetTfInputNodes(*node);
+            std::vector<const tensorflow::NodeDef*> nodesOnly;
+            for (const auto & o : outputs) {
+                nodesOnly.push_back(o.m_IndexedValue);
+            }
+            return nodesOnly;
+        },
+        sortedNodes))
+    {
+        throw ParseException("Cycle detected in graph");
+    }
+
+    // Parse each node in order, knowing that all inputs of a node will be processed before the node itself
+    for (const auto& it : sortedNodes)
+    {
+        const tensorflow::NodeDef& currentNode = *it;
+        LoadNodeDef(currentNode, graphDef);
+    }
+}
+
+INetworkPtr TfParser::CreateNetworkFromTextFile(const char* graphFile,
+    const std::map<std::string, TensorShape>& inputShapes,
+    const std::vector<std::string>& requestedOutputs)
+{
+    FILE* fd = fopen(graphFile, "r");
+
+    if (fd == nullptr)
+    {
+        std::stringstream error;
+        error << "Graph file " << graphFile << " failed to open";
+        throw FileNotFoundException(error.str());
+    }
+
+    // Parse the file into a message
+    tensorflow::GraphDef graphDef;
+    auto                 input   = new google::protobuf::io::FileInputStream(fileno(fd));
+    bool                 success = google::protobuf::TextFormat::Parse(input, &graphDef);
+    delete input;
+    fclose(fd);
+
+    if (!success)
+    {
+        std::stringstream error;
+        error << "Failed to parse graph file";
+        throw ParseException(error.str());
+    }
+
+    return CreateNetworkFromGraphDef(graphDef, inputShapes, requestedOutputs);
+}
+
+INetworkPtr TfParser::CreateNetworkFromString(const char* protoText,
+    const std::map<std::string, TensorShape>& inputShapes,
+    const std::vector<std::string>& requestedOutputs)
+{
+    // Parse the string into a message
+    tensorflow::GraphDef graphDef;
+    bool success = google::protobuf::TextFormat::ParseFromString(protoText, &graphDef);
+
+    if (!success)
+    {
+        std::stringstream error;
+        error << "Failed to parse graph file";
+        throw ParseException(error.str());
+    }
+
+    return CreateNetworkFromGraphDef(graphDef, inputShapes, requestedOutputs);
+}
+
+INetworkPtr TfParser::CreateNetworkFromBinaryFile(const char* graphFile,
+    const std::map<std::string, TensorShape>& inputShapes,
+    const std::vector<std::string>& requestedOutputs)
+{
+    FILE* fd = fopen(graphFile, "rb");
+
+    if (fd == nullptr)
+    {
+        std::stringstream error;
+        error << "Graph file " << graphFile << " failed to open";
+        throw FileNotFoundException(error.str());
+    }
+
+    // Parse the file into a message
+    tensorflow::GraphDef graphDef;
+
+    google::protobuf::io::FileInputStream  inStream(fileno(fd));
+    google::protobuf::io::CodedInputStream codedStream(&inStream);
+    codedStream.SetTotalBytesLimit(INT_MAX, INT_MAX);
+    bool success = graphDef.ParseFromCodedStream(&codedStream);
+    fclose(fd);
+
+    if (!success)
+    {
+        std::stringstream error;
+        error << "Failed to parse protobuf file" << graphFile;
+        throw ParseException(error.str());
+    }
+
+    return CreateNetworkFromGraphDef(graphDef, inputShapes, requestedOutputs);
+}
+
+INetworkPtr TfParser::CreateNetworkFromGraphDef(const tensorflow::GraphDef& graphDef,
+    const std::map<std::string, TensorShape>& inputShapes,
+    const std::vector<std::string>& requestedOutputs)
+{
+    m_Network = INetwork::Create();
+
+    m_InputShapes = inputShapes;
+    if (requestedOutputs.size() == 0)
+    {
+        throw ParseException("requestedOutputs must have at least one entry");
+    }
+    m_RequestedOutputs = requestedOutputs;
+
+    try
+    {
+        LoadGraphDef(graphDef);
+    }
+    catch (const ParseException& e)
+    {
+        Cleanup();
+        throw e;
+    }
+
+    Cleanup();
+
+    return std::move(m_Network);
+}
+
+void TfParser::Cleanup()
+{
+    // cleanup, in case we reuse this parser
+    m_InputShapes.clear();
+    m_RequestedOutputs.clear();
+    m_NodesByName.clear();
+    m_ParsedTfOperations.clear();
+}
+
+BindingPointInfo TfParser::GetNetworkInputBindingInfo(const std::string& name) const
+{
+    return GetBindingInfo(name, "input", m_NetworkInputsBindingInfo);
+}
+
+BindingPointInfo TfParser::GetNetworkOutputBindingInfo(const std::string& name) const
+{
+    return GetBindingInfo(name, "output", m_NetworkOutputsBindingInfo);
+}
+
+std::pair<LayerBindingId, TensorInfo> TfParser::GetBindingInfo(const std::string& layerName,
+    const char* bindingPointDesc,
+    const std::unordered_map<std::string, BindingPointInfo>& nameToBindingInfo)
+{
+    auto it = nameToBindingInfo.find(layerName);
+    if (it == nameToBindingInfo.end())
+    {
+        throw InvalidArgumentException(boost::str(boost::format("Unknown %1% '%2%'") % bindingPointDesc % layerName));
+    }
+    return it->second;
+}
+
+void TfParser::TrackInputBinding(IConnectableLayer* layer, LayerBindingId id, const TensorInfo& tensorInfo)
+{
+    return TrackBindingPoint(layer, id, tensorInfo, "input", m_NetworkInputsBindingInfo);
+}
+
+void TfParser::TrackOutputBinding(IConnectableLayer* layer, LayerBindingId id, const TensorInfo& tensorInfo)
+{
+    return TrackBindingPoint(layer, id, tensorInfo, "output", m_NetworkOutputsBindingInfo);
+}
+
+void TfParser::TrackBindingPoint(IConnectableLayer* layer,
+    LayerBindingId id,
+    const TensorInfo& tensorInfo,
+    const char* bindingPointDesc,
+    std::unordered_map<std::string, BindingPointInfo>& nameToBindingInfo)
+{
+    const std::string layerName = layer->GetName();
+    auto it = nameToBindingInfo.find(layerName);
+    if (it == nameToBindingInfo.end())
+    {
+        nameToBindingInfo[layerName] = std::make_pair(id, tensorInfo);
+    }
+    else
+    {
+        throw ParseException(boost::str(
+            boost::format("Id %1% used by more than one %2% layer") % id % bindingPointDesc));
+    }
+}
+
+} // namespace armnnTfParser
diff --git a/src/armnnTfParser/TfParser.hpp b/src/armnnTfParser/TfParser.hpp
new file mode 100644
index 0000000..c5b4bce
--- /dev/null
+++ b/src/armnnTfParser/TfParser.hpp
@@ -0,0 +1,199 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#pragma once
+
+#include "armnnTfParser/ITfParser.hpp"
+
+#include "armnn/Types.hpp"
+#include "armnn/Tensor.hpp"
+#include "armnn/INetwork.hpp"
+
+#include <map>
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+namespace armnn
+{
+class TensorInfo;
+}
+
+namespace tensorflow
+{
+class GraphDef;
+class NodeDef;
+}
+
+namespace armnnTfParser
+{
+
+using BindingPointInfo = std::pair<armnn::LayerBindingId, armnn::TensorInfo>;
+
+class ParsedTfOperation;
+using ParsedTfOperationPtr = std::unique_ptr<ParsedTfOperation>;
+
+///
+/// WithOutputTensorIndex wraps a value and an index. The purpose of
+/// this template is to signify that in Tensorflow the input name of
+/// a layer has the convention of 'inputTensorName:#index' where the
+/// #index can be omitted and it implicitly means the 0. output of
+/// the referenced layer. By supporting this notation we can handle
+/// layers with multiple outputs, such as Split.
+///
+template <typename T>
+struct WithOutputTensorIndex
+{
+    T                m_IndexedValue;
+    unsigned int     m_Index;
+
+    WithOutputTensorIndex(const T & value, unsigned int index)
+    : m_IndexedValue{value}
+    , m_Index{index} {}
+
+    WithOutputTensorIndex(T && value, unsigned int index)
+    : m_IndexedValue{value}
+    , m_Index{index} {}
+};
+
+using OutputOfParsedTfOperation = WithOutputTensorIndex<ParsedTfOperation *>;
+using OutputOfConstNodeDef = WithOutputTensorIndex<const tensorflow::NodeDef*>;
+using OutputId = WithOutputTensorIndex<std::string>;
+
+class TfParser : public ITfParser
+{
+public:
+    /// Create the network from a protobuf text file on disk
+    virtual armnn::INetworkPtr CreateNetworkFromTextFile(
+        const char* graphFile,
+        const std::map<std::string, armnn::TensorShape>& inputShapes,
+        const std::vector<std::string>& requestedOutputs) override;
+
+    /// Create the network from a protobuf binary file on disk
+    virtual armnn::INetworkPtr CreateNetworkFromBinaryFile(
+        const char* graphFile,
+        const std::map<std::string, armnn::TensorShape>& inputShapes,
+        const std::vector<std::string>& requestedOutputs) override;
+
+    /// Create the network directly from protobuf text in a string. Useful for debugging/testing
+    virtual armnn::INetworkPtr CreateNetworkFromString(
+        const char* protoText,
+        const std::map<std::string, armnn::TensorShape>& inputShapes,
+        const std::vector<std::string>& requestedOutputs) override;
+
+    /// Retrieve binding info (layer id and tensor info) for the network input identified by the given layer name
+    virtual BindingPointInfo GetNetworkInputBindingInfo(const std::string& name) const override;
+
+    /// Retrieve binding info (layer id and tensor info) for the network output identified by the given layer name
+    virtual BindingPointInfo GetNetworkOutputBindingInfo(const std::string& name) const override;
+
+public:
+    TfParser();
+
+private:
+    template <typename T>
+    friend class ParsedConstTfOperation;
+    friend class ParsedMatMulTfOperation;
+
+    /// Parses a GraphDef loaded into memory from one of the other CreateNetwork*
+    armnn::INetworkPtr CreateNetworkFromGraphDef(const tensorflow::GraphDef& graphDef,
+        const std::map<std::string, armnn::TensorShape>& inputShapes,
+        const std::vector<std::string>& requestedOutputs);
+
+    /// sets up variables and then performs BFS to parse all nodes
+    void LoadGraphDef(const tensorflow::GraphDef& graphDef);
+
+    /// parses a given node, assuming nodes before it in graph have been done
+    void LoadNodeDef(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+
+    /// Handling identity layers as the input for Conv2D layer
+    const tensorflow::NodeDef* ResolveIdentityNode(const tensorflow::NodeDef* nodeDef);
+    /// Finds the nodes connected as inputs of the given node in the graph.
+    std::vector<OutputOfConstNodeDef> GetTfInputNodes(const tensorflow::NodeDef& nodeDef) const;
+    /// Finds the IParsedTfOperations for the nodes connected as inputs of the given node in the graph,
+    /// and throws an exception if the number of inputs does not match the expected one.
+    /// This will automatically resolve any identity nodes. The result vector contains the parsed operation
+    /// together with the output tensor index to make the connection unambiguous.
+    std::vector<OutputOfParsedTfOperation> GetInputParsedTfOperationsChecked(const tensorflow::NodeDef& nodeDef,
+                                                                             std::size_t expectedNumInputs);
+
+    ParsedTfOperationPtr ParseConst(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+
+    /// Checks if there is a pre-parsed const tensor is available with the given name and Type
+    template<typename Type>
+    bool HasParsedConstTensor(const std::string & nodeName) const;
+
+    ParsedTfOperationPtr ParseAdd(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseBiasAdd(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseConv2D(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseDepthwiseConv2D(const tensorflow::NodeDef& nodeDef,const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseFusedBatchNorm(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseConcat(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseIdentity(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseLrn(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseMatMul(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseMul(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParsePlaceholder(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseRelu(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseRelu6(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseReshape(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseResizeBilinear(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseShape(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseSqueeze(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseSigmoid(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseSoftmax(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseSoftplus(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseTanh(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseMaxPool(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParseAvgPool(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef);
+    ParsedTfOperationPtr ParsePooling2d(const tensorflow::NodeDef& nodeDef, const tensorflow::GraphDef& graphDef,
+        armnn::PoolingAlgorithm pooltype);
+    ParsedTfOperationPtr AddActivationLayer(const tensorflow::NodeDef& nodeDef, armnn::ActivationDescriptor& desc);
+    ParsedTfOperationPtr AddAdditionLayer(const tensorflow::NodeDef& nodeDef, bool isBiasAdd = false);
+    armnn::IConnectableLayer* AddFullyConnectedLayer(const tensorflow::NodeDef& matMulNodeDef,
+        const tensorflow::NodeDef* addNodeDef, const char* armnnLayerName);
+
+    static std::pair<armnn::LayerBindingId, armnn::TensorInfo> GetBindingInfo(const std::string& layerName,
+        const char* bindingPointDesc,
+        const std::unordered_map<std::string, BindingPointInfo>& nameToBindingInfo);
+
+    void TrackInputBinding(armnn::IConnectableLayer* layer,
+        armnn::LayerBindingId id,
+        const armnn::TensorInfo& tensorInfo);
+
+    void TrackOutputBinding(armnn::IConnectableLayer* layer,
+        armnn::LayerBindingId id,
+        const armnn::TensorInfo& tensorInfo);
+
+    static void TrackBindingPoint(armnn::IConnectableLayer* layer, armnn::LayerBindingId id,
+        const armnn::TensorInfo& tensorInfo,
+        const char* bindingPointDesc,
+        std::unordered_map<std::string, BindingPointInfo>& nameToBindingInfo);
+
+    void Cleanup();
+
+    /// The network we're building. Gets cleared after it is passed to the user
+    armnn::INetworkPtr m_Network;
+
+    using OperationParsingFunction = ParsedTfOperationPtr(TfParser::*)(const tensorflow::NodeDef& nodeDef,
+                                                                 const tensorflow::GraphDef& graphDef);
+
+    /// map of TensorFlow operation names to parsing member functions
+    static const std::map<std::string, OperationParsingFunction> ms_OperationNameToParsingFunctions;
+
+    std::map<std::string, armnn::TensorShape> m_InputShapes;
+    std::vector<std::string> m_RequestedOutputs;
+
+    /// map of nodes extracted from the GraphDef to speed up parsing
+    std::unordered_map<std::string, const tensorflow::NodeDef*> m_NodesByName;
+
+    std::unordered_map<std::string, ParsedTfOperationPtr> m_ParsedTfOperations;
+
+    /// maps input layer names to their corresponding ids and tensor infos
+    std::unordered_map<std::string, BindingPointInfo> m_NetworkInputsBindingInfo;
+
+    /// maps output layer names to their corresponding ids and tensor infos
+    std::unordered_map<std::string, BindingPointInfo> m_NetworkOutputsBindingInfo;
+};
+}
diff --git a/src/armnnTfParser/test/Activations.cpp b/src/armnnTfParser/test/Activations.cpp
new file mode 100644
index 0000000..72ed64d
--- /dev/null
+++ b/src/armnnTfParser/test/Activations.cpp
@@ -0,0 +1,113 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+
+struct ActivationFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    explicit ActivationFixture(const char* activationFunction)
+    {
+        m_Prototext = "node {\n"
+            "  name: \"Placeholder\"\n"
+            "  op: \"Placeholder\"\n"
+            "  attr {\n"
+            "    key: \"dtype\"\n"
+            "    value {\n"
+            "      type: DT_FLOAT\n"
+            "    }\n"
+            "  }\n"
+            "  attr {\n"
+            "    key: \"shape\"\n"
+            "    value {\n"
+            "      shape {\n"
+            "        unknown_rank: true\n"
+            "      }\n"
+            "    }\n"
+            "  }\n"
+            "}\n"
+            "node {\n"
+            "  name: \"";
+        m_Prototext.append(activationFunction);
+        m_Prototext.append("\"\n"
+                               "  op: \"");
+        m_Prototext.append(activationFunction);
+        m_Prototext.append("\"\n"
+                               "  input: \"Placeholder\"\n"
+                               "  attr {\n"
+                               "    key: \"T\"\n"
+                               "    value {\n"
+                               "      type: DT_FLOAT\n"
+                               "    }\n"
+                               "  }\n"
+                               "}\n");
+
+        SetupSingleInputSingleOutput({ 1, 7 }, "Placeholder", activationFunction);
+    }
+};
+
+
+struct ReLuFixture : ActivationFixture
+{
+    ReLuFixture() : ActivationFixture("Relu") {}
+};
+BOOST_FIXTURE_TEST_CASE(ParseReLu, ReLuFixture)
+{
+    RunTest<2>({ -1.0f, -0.5f, 1.25f, -3.0f, 0.0f, 0.5f, -0.75f },
+               { 0.0f, 0.0f, 1.25f, 0.0f, 0.0f, 0.5f, 0.0f });
+}
+
+
+struct ReLu6Fixture : ActivationFixture
+{
+    ReLu6Fixture() : ActivationFixture("Relu6") {}
+};
+BOOST_FIXTURE_TEST_CASE(ParseReLu6, ReLu6Fixture)
+{
+    RunTest<2>({ -1.0f, -0.5f, 7.25f, -3.0f, 0.0f, 0.5f, -0.75f },
+               { 0.0f, 0.0f, 6.0f, 0.0f, 0.0f, 0.5f, 0.0f });
+}
+
+
+struct SigmoidFixture : ActivationFixture
+{
+    SigmoidFixture() : ActivationFixture("Sigmoid") {}
+};
+BOOST_FIXTURE_TEST_CASE(ParseSigmoid, SigmoidFixture)
+{
+    RunTest<2>({ -0.1f, -0.2f, -0.3f, -0.4f, 0.1f, 0.2f, 0.3f },
+               { 0.4750208f, 0.45016602f, 0.42555749f, 0.40131235f, 0.52497917f, 0.54983395f, 0.57444251f });
+}
+
+
+struct SoftplusFixture : ActivationFixture
+{
+    SoftplusFixture() : ActivationFixture("Softplus") {}
+};
+BOOST_FIXTURE_TEST_CASE(ParseSoftplus, SoftplusFixture)
+{
+    RunTest<2>({ -0.1f, -0.2f, -0.3f, -0.4f, 0.1f, 0.2f, 0.3f },
+               { 0.64439666f, 0.59813893f, 0.55435526f, 0.51301527f, 0.74439669f, 0.7981388f, 0.85435522f });
+}
+
+
+struct TanhFixture : ActivationFixture
+{
+    TanhFixture() : ActivationFixture("Tanh") {}
+};
+BOOST_FIXTURE_TEST_CASE(ParseTanh, TanhFixture)
+{
+    RunTest<2>({ -0.1f, -0.2f, -0.3f, -0.4f, 0.1f, 0.2f, 0.3f },
+               { -0.09966799f, -0.19737528f, -0.29131261f, -0.379949f, 0.09966799f, 0.19737528f, 0.29131261f });
+}
+
+
+
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/Addition.cpp b/src/armnnTfParser/test/Addition.cpp
new file mode 100644
index 0000000..c9e6926
--- /dev/null
+++ b/src/armnnTfParser/test/Addition.cpp
@@ -0,0 +1,78 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+struct AdditionFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    AdditionFixture()
+    {
+        m_Prototext = "node { \n"
+            "    name: \"graphInput\" \n"
+            "    op: \"Placeholder\" \n"
+            "    attr { \n"
+            "      key: \"dtype\" \n"
+            "      value { \n"
+            "        type: DT_FLOAT \n"
+            "      } \n"
+            "    } \n"
+            "    attr { \n"
+            "      key: \"shape\" \n"
+            "      value { \n"
+            "        shape { \n"
+            "        } \n"
+            "      } \n"
+            "    } \n"
+            "  } \n"
+            "  node { \n"
+            "    name: \"softmax1\" \n"
+            "    op: \"Softmax\" \n"
+            "    input: \"graphInput\" \n"
+            "    attr { \n"
+            "      key: \"T\" \n"
+            "      value { \n"
+            "        type: DT_FLOAT \n"
+            "      } \n"
+            "    } \n"
+            "  }\n"
+            "  node {\n"
+            "    name: \"softmax2\"\n"
+            "    op : \"Softmax\"\n"
+            "    input: \"graphInput\"\n"
+            "    attr { \n"
+            "      key: \"T\" \n"
+            "      value { \n"
+            "        type: DT_FLOAT \n"
+            "      } \n"
+            "    } \n"
+            "  }\n"
+            "  node {\n"
+            "    name: \"addition\"\n"
+            "    op : \"Add\"\n"
+            "    input: \"softmax1\"\n"
+            "    input: \"softmax2\"\n"
+            "    attr { \n"
+            "      key: \"T\" \n"
+            "      value { \n"
+            "        type: DT_FLOAT \n"
+            "      } \n"
+            "    } \n"
+            "  }\n";
+
+        SetupSingleInputSingleOutput({ 1, 7 }, "graphInput", "addition");
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseAddition, AdditionFixture)
+{
+    RunTest<2>({ 0, 0, 10000, 0, 0, 0, 0 }, { 0, 0, 2, 0, 0, 0, 0 });
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/BiasAdd.cpp b/src/armnnTfParser/test/BiasAdd.cpp
new file mode 100644
index 0000000..e29aeb1
--- /dev/null
+++ b/src/armnnTfParser/test/BiasAdd.cpp
@@ -0,0 +1,104 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+struct BiasAddFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    explicit BiasAddFixture(const std::string& dataFormat)
+    {
+        m_Prototext = R"(
+node {
+  name: "graphInput"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+      }
+    }
+  }
+}
+node {
+  name: "bias"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        float_val: 1
+        float_val: 2
+        float_val: 3
+      }
+    }
+  }
+}
+node {
+  name: "biasAdd"
+  op : "BiasAdd"
+  input: "graphInput"
+  input: "bias"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: ")" + dataFormat + R"("
+    }
+  }
+}
+)";
+
+        SetupSingleInputSingleOutput({ 1, 3, 1, 3 }, "graphInput", "biasAdd");
+    }
+};
+
+struct BiasAddFixtureNCHW : BiasAddFixture
+{
+    BiasAddFixtureNCHW() : BiasAddFixture("NCHW") {}
+};
+
+struct BiasAddFixtureNHWC : BiasAddFixture
+{
+    BiasAddFixtureNHWC() : BiasAddFixture("NHWC") {}
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseBiasAddNCHW, BiasAddFixtureNCHW)
+{
+    RunTest<4>(std::vector<float>(9), { 1, 1, 1, 2, 2, 2, 3, 3, 3 });
+}
+
+BOOST_FIXTURE_TEST_CASE(ParseBiasAddNHWC, BiasAddFixtureNHWC)
+{
+    RunTest<4>(std::vector<float>(9), { 1, 2, 3, 1, 2, 3, 1, 2, 3 });
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/BroadcastForAdd.cpp b/src/armnnTfParser/test/BroadcastForAdd.cpp
new file mode 100644
index 0000000..4c9731d
--- /dev/null
+++ b/src/armnnTfParser/test/BroadcastForAdd.cpp
@@ -0,0 +1,149 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+// This is a special case for add, which supports broadcasting
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+struct BroadcastForAddFixtureSlot1 : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    BroadcastForAddFixtureSlot1()
+    {
+        m_Prototext = R"(
+        node {
+          name: "graphInput"
+          op: "Placeholder"
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "shape"
+            value {
+               shape {
+               }
+             }
+           }
+        }
+        node {
+          name: "Const_1"
+          op: "Const"
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_FLOAT
+                tensor_shape {
+                }
+                float_val: 4.0
+                float_val: 5.0
+              }
+            }
+          }
+        }
+        node {
+          name: "Add"
+          op: "Add"
+          input: "graphInput"
+          input: "Const_1"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+        }
+        )";
+
+        SetupSingleInputSingleOutput({ 1, 2, 2, 2 }, "graphInput", "Add");
+    }
+};
+
+struct BroadcastForAddFixtureSlot0 : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    BroadcastForAddFixtureSlot0()
+    {
+        m_Prototext = R"(
+    node {
+      name: "graphInput"
+      op: "Placeholder"
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+           shape {
+           }
+         }
+       }
+    }
+    node {
+      name: "Const_1"
+      op: "Const"
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_FLOAT
+            tensor_shape {
+            }
+            float_val: 4.0
+            float_val: 5.0
+          }
+        }
+      }
+    }
+    node {
+      name: "Add"
+      op: "Add"
+      input: "Const_1"
+      input: "graphInput"
+      attr {
+        key: "T"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    )";
+
+        SetupSingleInputSingleOutput({ 1, 2, 2, 2 }, "graphInput", "Add");
+    }
+};
+
+
+BOOST_FIXTURE_TEST_CASE(ParseBroadcastForAddition1, BroadcastForAddFixtureSlot1)
+{
+    RunTest<4>({ 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0 }, { 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0 });
+}
+
+BOOST_FIXTURE_TEST_CASE(ParseBroadcastForAddition0, BroadcastForAddFixtureSlot0)
+{
+    RunTest<4>({ 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0 }, { 5.0, 6.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0 });
+}
+
+
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/Concat.cpp b/src/armnnTfParser/test/Concat.cpp
new file mode 100644
index 0000000..a7d5ea0
--- /dev/null
+++ b/src/armnnTfParser/test/Concat.cpp
@@ -0,0 +1,183 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+struct ConcatFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    explicit ConcatFixture(const armnn::TensorShape& inputShape0, const armnn::TensorShape& inputShape1,
+                           unsigned int concatDim)
+    {
+        m_Prototext = R"(
+        node {
+          name: "graphInput0"
+          op: "Placeholder"
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "shape"
+            value {
+              shape {
+              }
+            }
+          }
+        }
+        node {
+          name: "graphInput1"
+          op: "Placeholder"
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "shape"
+            value {
+              shape {
+              }
+            }
+          }
+        }
+        node {
+          name: "concat/axis"
+          op: "Const"
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: )";
+
+        m_Prototext += std::to_string(concatDim);
+
+        m_Prototext += R"(
+              }
+            }
+          }
+        }
+        node {
+          name: "concat"
+          op: "ConcatV2"
+          input: "graphInput0"
+          input: "graphInput1"
+          input: "concat/axis"
+          attr {
+            key: "N"
+            value {
+              i: 2
+            }
+          }
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "Tidx"
+            value {
+              type: DT_FLOAT
+            }
+          }
+        }
+        )";
+
+        Setup({{"graphInput0", inputShape0 },
+               {"graphInput1", inputShape1 }}, {"concat"});
+    }
+};
+
+struct ConcatFixtureNCHW : ConcatFixture
+{
+    ConcatFixtureNCHW() : ConcatFixture({ 1, 1, 2, 2 }, { 1, 1, 2, 2 }, 1 ) {}
+};
+
+struct ConcatFixtureNHWC : ConcatFixture
+{
+    ConcatFixtureNHWC() : ConcatFixture({ 1, 1, 2, 2 }, { 1, 1, 2, 2 }, 3 ) {}
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseConcatNCHW, ConcatFixtureNCHW)
+{
+    RunTest<4>({{"graphInput0", {0.0, 1.0, 2.0, 3.0}},
+                {"graphInput1", {4.0, 5.0, 6.0, 7.0}}},
+               {{"concat", { 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0 }}});
+}
+
+BOOST_FIXTURE_TEST_CASE(ParseConcatNHWC, ConcatFixtureNHWC)
+{
+    RunTest<4>({{"graphInput0", {0.0, 1.0, 2.0, 3.0}},
+                {"graphInput1", {4.0, 5.0, 6.0, 7.0}}},
+               {{"concat", { 0.0, 1.0, 4.0, 5.0, 2.0, 3.0, 6.0, 7.0 }}});
+}
+
+struct ConcatFixtureDim1 : ConcatFixture
+{
+    ConcatFixtureDim1() : ConcatFixture({ 1, 2, 3, 4 }, { 1, 2, 3, 4 }, 1) {}
+};
+
+struct ConcatFixtureDim3 : ConcatFixture
+{
+    ConcatFixtureDim3() : ConcatFixture({ 1, 2, 3, 4 }, { 1, 2, 3, 4 }, 3) {}
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseConcatDim1, ConcatFixtureDim1)
+{
+    RunTest<4>({ { "graphInput0", {  0.0,  1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,  9.0, 10.0, 11.0,
+                                     12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0 } },
+                 { "graphInput1", {  50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0,
+                                     62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0 } } },
+               { { "concat",      {  0.0,  1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,  9.0, 10.0, 11.0,
+                                     12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0,
+                                     50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0,
+                                     62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0 } } });
+}
+
+BOOST_FIXTURE_TEST_CASE(ParseConcatDim3, ConcatFixtureDim3)
+{
+    RunTest<4>({ { "graphInput0", {  0.0, 1.0, 2.0, 3.0,
+                                     4.0, 5.0, 6.0, 7.0,
+                                     8.0, 9.0, 10.0, 11.0,
+                                     12.0, 13.0, 14.0, 15.0,
+                                     16.0, 17.0, 18.0, 19.0,
+                                     20.0, 21.0, 22.0, 23.0 } },
+                 { "graphInput1", {  50.0, 51.0, 52.0, 53.0,
+                                     54.0, 55.0, 56.0, 57.0,
+                                     58.0, 59.0, 60.0, 61.0,
+                                     62.0, 63.0, 64.0, 65.0,
+                                     66.0, 67.0, 68.0, 69.0,
+                                     70.0, 71.0, 72.0, 73.0 } } },
+               { { "concat",      {  0.0,  1.0,  2.0,  3.0,
+                                     50.0, 51.0, 52.0, 53.0,
+                                     4.0,  5.0,  6.0,  7.0,
+                                     54.0, 55.0, 56.0, 57.0,
+                                     8.0,  9.0,  10.0, 11.0,
+                                     58.0, 59.0, 60.0, 61.0,
+                                     12.0, 13.0, 14.0, 15.0,
+                                     62.0, 63.0, 64.0, 65.0,
+                                     16.0, 17.0, 18.0, 19.0,
+                                     66.0, 67.0, 68.0, 69.0,
+                                     20.0, 21.0, 22.0, 23.0,
+                                     70.0, 71.0, 72.0, 73.0 } } });
+}
+
+BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file
diff --git a/src/armnnTfParser/test/ConcatOfConcats.cpp b/src/armnnTfParser/test/ConcatOfConcats.cpp
new file mode 100644
index 0000000..7316b9f
--- /dev/null
+++ b/src/armnnTfParser/test/ConcatOfConcats.cpp
@@ -0,0 +1,316 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+struct ConcatOfConcatsFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    explicit ConcatOfConcatsFixture(const armnn::TensorShape& inputShape0, const armnn::TensorShape& inputShape1,
+                                    const armnn::TensorShape& inputShape2, const armnn::TensorShape& inputShape3,
+                                    unsigned int concatDim)
+    {
+        m_Prototext = R"(
+            node {
+              name: "graphInput0"
+              op: "Placeholder"
+              attr {
+                key: "dtype"
+                value {
+                  type: DT_FLOAT
+                }
+              }
+              attr {
+                key: "shape"
+                value {
+                  shape {
+                  }
+                }
+              }
+            }
+            node {
+              name: "graphInput1"
+              op: "Placeholder"
+              attr {
+                key: "dtype"
+                value {
+                  type: DT_FLOAT
+                }
+              }
+              attr {
+                key: "shape"
+                value {
+                  shape {
+                  }
+                }
+              }
+            }
+            node {
+              name: "graphInput2"
+              op: "Placeholder"
+              attr {
+                key: "dtype"
+                value {
+                  type: DT_FLOAT
+                }
+              }
+              attr {
+                key: "shape"
+                value {
+                  shape {
+                  }
+                }
+              }
+            }
+            node {
+              name: "graphInput3"
+              op: "Placeholder"
+              attr {
+                key: "dtype"
+                value {
+                  type: DT_FLOAT
+                }
+              }
+              attr {
+                key: "shape"
+                value {
+                  shape {
+                  }
+                }
+              }
+            }
+            node {
+              name: "Relu"
+              op: "Relu"
+              input: "graphInput0"
+              attr {
+                key: "T"
+                value {
+                  type: DT_FLOAT
+                }
+              }
+            }
+            node {
+              name: "Relu_1"
+              op: "Relu"
+              input: "graphInput1"
+              attr {
+                key: "T"
+                value {
+                  type: DT_FLOAT
+                }
+              }
+            }
+            node {
+              name: "Relu_2"
+              op: "Relu"
+              input: "graphInput2"
+              attr {
+                key: "T"
+                value {
+                  type: DT_FLOAT
+                }
+              }
+            }
+            node {
+              name: "Relu_3"
+              op: "Relu"
+              input: "graphInput3"
+              attr {
+                key: "T"
+                value {
+                  type: DT_FLOAT
+                }
+              }
+            }
+            node {
+              name: "concat/axis"
+              op: "Const"
+              attr {
+                key: "dtype"
+                value {
+                  type: DT_INT32
+                }
+              }
+              attr {
+                key: "value"
+                value {
+                  tensor {
+                    dtype: DT_INT32
+                    tensor_shape {
+                    }
+                    int_val: )";
+                m_Prototext += std::to_string(concatDim);
+                m_Prototext += R"(
+                  }
+                }
+              }
+            }
+            node {
+              name: "concat"
+              op: "ConcatV2"
+              input: "Relu"
+              input: "Relu_1"
+              input: "concat/axis"
+              attr {
+                key: "N"
+                value {
+                  i: 2
+                }
+              }
+              attr {
+                key: "T"
+                value {
+                  type: DT_FLOAT
+                }
+              }
+              attr {
+                key: "Tidx"
+                value {
+                  type: DT_INT32
+                }
+              }
+            }
+            node {
+              name: "concat_1/axis"
+              op: "Const"
+              attr {
+                key: "dtype"
+                value {
+                  type: DT_INT32
+                }
+              }
+              attr {
+                key: "value"
+                value {
+                  tensor {
+                    dtype: DT_INT32
+                    tensor_shape {
+                    }
+                    int_val: )";
+                m_Prototext += std::to_string(concatDim);
+                m_Prototext += R"(
+                  }
+                }
+              }
+            }
+            node {
+              name: "concat_1"
+              op: "ConcatV2"
+              input: "Relu_2"
+              input: "Relu_3"
+              input: "concat_1/axis"
+              attr {
+                key: "N"
+                value {
+                  i: 2
+                }
+              }
+              attr {
+                key: "T"
+                value {
+                  type: DT_FLOAT
+                }
+              }
+              attr {
+                key: "Tidx"
+                value {
+                  type: DT_INT32
+                }
+              }
+            }
+            node {
+              name: "concat_2/axis"
+              op: "Const"
+              attr {
+                key: "dtype"
+                value {
+                  type: DT_INT32
+                }
+              }
+              attr {
+                key: "value"
+                value {
+                  tensor {
+                    dtype: DT_INT32
+                    tensor_shape {
+                    }
+                    int_val: )";
+                m_Prototext += std::to_string(concatDim);
+                m_Prototext += R"(
+                  }
+                }
+              }
+            }
+            node {
+              name: "concat_2"
+              op: "ConcatV2"
+              input: "concat"
+              input: "concat_1"
+              input: "concat_2/axis"
+              attr {
+                key: "N"
+                value {
+                  i: 2
+                }
+              }
+              attr {
+                key: "T"
+                value {
+                  type: DT_FLOAT
+                }
+              }
+              attr {
+                key: "Tidx"
+                value {
+                  type: DT_INT32
+                }
+              }
+            }
+            )";
+
+        Setup({{ "graphInput0", inputShape0 },
+               { "graphInput1", inputShape1 },
+               { "graphInput2", inputShape2 },
+               { "graphInput3", inputShape3}}, {"concat_2"});
+    }
+};
+
+struct ConcatOfConcatsFixtureNCHW : ConcatOfConcatsFixture
+{
+    ConcatOfConcatsFixtureNCHW() : ConcatOfConcatsFixture({ 1, 1, 2, 2 }, { 1, 1, 2, 2 }, { 1, 1, 2, 2 },
+                                                          { 1, 1, 2, 2 }, 1 ) {}
+};
+
+struct ConcatOfConcatsFixtureNHWC : ConcatOfConcatsFixture
+{
+    ConcatOfConcatsFixtureNHWC() : ConcatOfConcatsFixture({ 1, 1, 2, 2 }, { 1, 1, 2, 2 }, { 1, 1, 2, 2 },
+                                                          { 1, 1, 2, 2 }, 3 ) {}
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseConcatOfConcatsNCHW, ConcatOfConcatsFixtureNCHW)
+{
+    RunTest<4>({{"graphInput0", {0.0, 1.0, 2.0, 3.0}},
+                {"graphInput1", {4.0, 5.0, 6.0, 7.0}},
+                {"graphInput2", {8.0, 9.0, 10.0, 11.0}},
+                {"graphInput3", {12.0, 13.0, 14.0, 15.0}}},
+               {{"concat_2", { 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0,
+                                     8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0 }}});
+}
+
+BOOST_FIXTURE_TEST_CASE(ParseConcatOfConcatsNHWC, ConcatOfConcatsFixtureNHWC)
+{
+    RunTest<4>({{"graphInput0", {0.0, 1.0, 2.0, 3.0}},
+                {"graphInput1", {4.0, 5.0, 6.0, 7.0}},
+                {"graphInput2", {8.0, 9.0, 10.0, 11.0}},
+                {"graphInput3", {12.0, 13.0, 14.0, 15.0}}},
+               {{"concat_2", { 0.0, 1.0, 4.0, 5.0, 8.0, 9.0, 12.0, 13.0,
+                                     2.0, 3.0, 6.0, 7.0, 10.0, 11.0, 14.0, 15.0 }}});
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/Constant.cpp b/src/armnnTfParser/test/Constant.cpp
new file mode 100644
index 0000000..09587fc
--- /dev/null
+++ b/src/armnnTfParser/test/Constant.cpp
@@ -0,0 +1,321 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+
+#include "armnnTfParser/ITfParser.hpp"
+
+#include "ParserPrototxtFixture.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+// Tests that a Const node in Tensorflow can be converted to a ConstLayer in armnn (as opposed to most
+// Const nodes which are used as weight inputs for convolutions etc. and are therefore not converted to
+// armnn ConstLayers).
+struct ConstantFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    ConstantFixture()
+    {
+        // input = tf.placeholder(tf.float32, name = "input")
+        // const = tf.constant([17], tf.float32, [1])
+        // output = tf.add(input, const, name = "output")
+        m_Prototext =
+            R"(
+node {
+  name: "input"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        unknown_rank: true
+      }
+    }
+  }
+}
+node {
+  name: "Const"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        float_val: 17.0
+      }
+    }
+  }
+}
+node {
+  name: "output"
+  op: "Add"
+  input: "input"
+  input: "Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+            )";
+        SetupSingleInputSingleOutput({ 1 }, "input", "output");
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(Constant, ConstantFixture)
+{
+    RunTest<1>({1}, {18});
+}
+
+
+// Tests that a single Const node in Tensorflow can be used twice by a dependant node. This should result in only
+// a single armnn ConstLayer being created.
+struct ConstantReusedFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    ConstantReusedFixture()
+    {
+        // const = tf.constant([17], tf.float32, [1])
+        // output = tf.add(const, const, name = "output")
+        m_Prototext =
+            R"(
+node {
+  name: "Const"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        float_val: 17.0
+      }
+    }
+  }
+}
+node {
+  name: "output"
+  op: "Add"
+  input: "Const"
+  input: "Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+            )";
+        Setup({}, { "output" });
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(ConstantReused, ConstantReusedFixture)
+{
+    RunTest<1>({}, { { "output", { 34 } } });
+}
+
+template <int ListSize>
+struct ConstantValueListFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    ConstantValueListFixture()
+    {
+        m_Prototext =
+            R"(
+node {
+  name: "output"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 2
+          }
+          dim {
+            size: 3
+          }
+        })";
+
+        double value = 0.75;
+        for (int i = 0; i < ListSize; i++, value += 0.25)
+        {
+            m_Prototext += std::string("float_val : ") + std::to_string(value) + "\n";
+        }
+
+        m_Prototext += 
+            R"(
+      }
+    }
+  }
+}
+            )";
+        Setup({}, { "output" });
+    }
+};
+
+using ConstantSingleValueListFixture = ConstantValueListFixture<1>;
+using ConstantMultipleValueListFixture = ConstantValueListFixture<4>;
+using ConstantMaxValueListFixture = ConstantValueListFixture<6>;
+
+BOOST_FIXTURE_TEST_CASE(ConstantSingleValueList, ConstantSingleValueListFixture)
+{
+    RunTest<2>({}, { { "output", { 0.75f, 0.75f, 0.75f, 0.75f, 0.75f, 0.75f } } });
+}
+BOOST_FIXTURE_TEST_CASE(ConstantMultipleValueList, ConstantMultipleValueListFixture)
+{
+    RunTest<2>({}, { { "output", { 0.75f, 1.f, 1.25f, 1.5f,  1.5f,  1.5f } } });
+}
+BOOST_FIXTURE_TEST_CASE(ConstantMaxValueList, ConstantMaxValueListFixture)
+{
+    RunTest<2>({}, { { "output", { 0.75f, 1.f, 1.25f, 1.50f, 1.75f, 2.f } } });
+}
+
+template <bool WithShape, bool WithContent, bool WithValueList>
+struct ConstantCreateFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    ConstantCreateFixture()
+    {
+        m_Prototext =
+            R"(
+node {
+    name: "output"
+    op: "Const"
+    attr {
+    key: "dtype"
+    value {
+        type: DT_FLOAT
+    }
+    }
+    attr {
+    key: "value"
+    value {
+        tensor {
+        dtype: DT_FLOAT
+            )";
+
+        if (WithShape)
+        {
+            m_Prototext +=
+                R"(
+tensor_shape {
+    dim {
+    size: 2
+    }
+    dim {
+    size: 2
+    }
+}
+                )";
+        }
+        else
+        {
+            m_Prototext +=
+                R"(
+tensor_shape {
+}
+                )";
+        }
+
+        if (WithContent)
+        {
+            m_Prototext +=
+                R"(
+tensor_content: "\000\000\200?\000\000\200?\000\000\200?\000\000\200?\000\000\200?"
+                )";
+        }
+
+        if (WithValueList)
+        {
+            m_Prototext +=
+                R"(
+float_val: 1.0
+float_val: 1.0
+float_val: 1.0
+float_val: 1.0
+float_val: 1.0
+                )";
+        }
+
+        m_Prototext +=
+            R"(
+            }
+        }
+    }
+}
+            )";
+    }
+};
+
+using ConstantCreateNoValueListFixture = ConstantCreateFixture<true, false, true>;
+using ConstantCreateNoValueList2Fixture = ConstantCreateFixture<true, false, false>;
+using ConstantCreateNoContentFixture = ConstantCreateFixture<true, true, false>;
+using ConstantCreateNoContent2Fixture = ConstantCreateFixture<true, false, false>;
+using ConstantCreateNoShapeFixture = ConstantCreateFixture<false, false, false>;
+using ConstantCreateNoShape2Fixture = ConstantCreateFixture<false, true, false>;
+using ConstantCreateNoShape3Fixture = ConstantCreateFixture<false, false, true>;
+
+BOOST_FIXTURE_TEST_CASE(ConstantCreateInvalidValueList, ConstantCreateNoValueListFixture)
+{
+    BOOST_REQUIRE_THROW(Setup({}, { "output" }), armnn::ParseException);
+}
+BOOST_FIXTURE_TEST_CASE(ConstantCreateInvalidValueList2, ConstantCreateNoValueList2Fixture)
+{
+    BOOST_REQUIRE_THROW(Setup({}, { "output" }), armnn::ParseException);
+}
+BOOST_FIXTURE_TEST_CASE(ConstantCreateInvalidContent, ConstantCreateNoContentFixture)
+{
+    BOOST_REQUIRE_THROW(Setup({}, { "output" }), armnn::ParseException);
+}
+BOOST_FIXTURE_TEST_CASE(ConstantCreateInvalidShape, ConstantCreateNoShapeFixture)
+{
+    BOOST_REQUIRE_THROW(Setup({}, { "output" }), armnn::ParseException);
+}
+BOOST_FIXTURE_TEST_CASE(ConstantCreateNoShape2, ConstantCreateNoShape2Fixture)
+{
+    BOOST_REQUIRE_THROW(Setup({}, { "output" }), armnn::ParseException);
+}
+BOOST_FIXTURE_TEST_CASE(ConstantCreateNoShape3, ConstantCreateNoShape3Fixture)
+{
+    Setup({}, { "output" });
+    RunTest<1>({}, { { "output", { 1.f, 1.f, 1.f, 1.f, 1.f } } });
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/Convolution2d.cpp b/src/armnnTfParser/test/Convolution2d.cpp
new file mode 100644
index 0000000..a7c7648
--- /dev/null
+++ b/src/armnnTfParser/test/Convolution2d.cpp
@@ -0,0 +1,322 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+#include <string>
+#include <iostream>
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+struct Convolution2dFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    explicit Convolution2dFixture(const char* paddingType)
+    : Convolution2dFixture(paddingType, 1)
+    {}
+
+    // dilation: 0 - dilations attribute is not included;
+    // dilation: >0 - dilations attribute set to [1,v,v,1], where v is the value of the dilation arg
+    explicit Convolution2dFixture(const char* paddingType, int stride, int dilation = 0)
+    {
+        std::string strideString = std::to_string(stride);
+        std::string dilationString = std::to_string(dilation);
+        m_Prototext = "node { \n"
+            "    name: \"graphInput\" \n"
+            "    op: \"Placeholder\" \n"
+            "    attr { \n"
+            "      key: \"dtype\" \n"
+            "      value { \n"
+            "        type: DT_FLOAT \n"
+            "      } \n"
+            "    } \n"
+            "    attr { \n"
+            "      key: \"shape\" \n"
+            "      value { \n"
+            "        shape { \n"
+            "        } \n"
+            "      } \n"
+            "    } \n"
+            "  } \n"
+            "  node { \n"
+            "  name: \"Const_1\" \n"
+            "  op: \"Const\" \n"
+            "  attr { \n"
+            "    key: \"dtype\" \n"
+            "    value { \n"
+            "      type: DT_FLOAT \n"
+            "    } \n"
+            "  } \n"
+            "  attr { \n"
+            "    key: \"value\" \n"
+            "    value { \n"
+            "      tensor { \n"
+            "        dtype: DT_FLOAT \n"
+            "        tensor_shape { \n"
+            "          dim { \n"
+            "            size: 1 \n"
+            "          } \n"
+            "          dim { \n"
+            "            size: 3 \n"
+            "          } \n"
+            "          dim { \n"
+            "            size: 1 \n"
+            "          } \n"
+            "          dim { \n"
+            "            size: 1 \n"
+            "          } \n"
+            "        } \n"
+            "        tensor_content: \"\\000\\000\\000?\\000\\000\\200?\\000\\000\\000?\" \n"
+            "      } \n"
+            "    } \n"
+            "  } \n"
+            "} \n"
+            "node { \n"
+            "  name: \"potato\" \n"
+            "  op: \"Conv2D\" \n"
+            "  input: \"graphInput\" \n"
+            "  input: \"Const_1\" \n"
+            "  attr { \n"
+            "    key: \"T\" \n"
+            "    value { \n"
+            "      type: DT_FLOAT \n"
+            "    } \n"
+            "  } \n"
+            "  attr { \n"
+            "    key: \"data_format\" \n"
+            "    value { \n"
+            "      s: \"NHWC\" \n"
+            "    } \n"
+            "  } \n"
+            "  attr { \n"
+            "    key: \"padding\" \n"
+            "    value { \n"
+            "      s: \"";
+        m_Prototext.append(paddingType);
+        m_Prototext.append("\"\n"
+                           "    } \n"
+                           "  } \n"
+                           "  attr { \n"
+                           "    key: \"strides\" \n"
+                           "    value { \n"
+                           "      list { \n"
+                           "        i: 1 \n"
+                           "        i: 1 \n"
+                           "        i: ");
+        m_Prototext.append(strideString);
+        m_Prototext.append(" \n"
+                           "        i: 1 \n"
+                           "      } \n"
+                           "    } \n"
+                           "  } \n");
+
+        if (dilation > 0)
+        {
+            m_Prototext.append("  attr { \n"
+                               "    key: \"dilations\" \n"
+                               "    value { \n"
+                               "      list { \n"
+                               "        i: 1 \n"
+                               "        i: ");
+            m_Prototext.append(dilationString);
+            m_Prototext.append(" \n"
+                               "        i: ");
+            m_Prototext.append(dilationString);
+            m_Prototext.append(" \n"
+                               "        i: 1 \n"
+                               "      } \n"
+                               "    } \n"
+                               "  } \n");
+        }
+        m_Prototext.append("  attr { \n"
+                           "    key: \"use_cudnn_on_gpu\" \n"
+                           "    value { \n"
+                           "      b: false \n"
+                           "    } \n"
+                           "  } \n"
+                           "} \n");
+
+        // Manual height computation based on stride parameter.
+        BOOST_ASSERT_MSG(stride == 1 || stride==2, "Add support for strides other than 1 or 2.");
+        unsigned int dims[] = {1,2,3,1};
+        if (stride == 2)
+        {
+            dims[1]=3;
+        }
+
+        SetupSingleInputSingleOutput(armnn::TensorShape(4, dims), "graphInput", "potato");
+    }
+};
+
+
+struct Convolution2dSameFixture : Convolution2dFixture
+{
+    Convolution2dSameFixture() : Convolution2dFixture("SAME", 1){}
+};
+BOOST_FIXTURE_TEST_CASE(ParseConv2DSame, Convolution2dSameFixture)
+{
+    RunTest<4>({1, 2, 3, 4, 5, 6}, {2, 4, 4, 6.5f, 10 , 8.5f});
+}
+
+struct Convolution2dValidFixture : Convolution2dFixture
+{
+    Convolution2dValidFixture() : Convolution2dFixture("VALID", 1){}
+};
+BOOST_FIXTURE_TEST_CASE(ParseConv2DValid, Convolution2dValidFixture)
+{
+    RunTest<4>({1, 2, 3, 4, 5, 6}, {4, 10});
+}
+
+
+struct Convolution2dStride2SameFixture : Convolution2dFixture
+{
+    Convolution2dStride2SameFixture() : Convolution2dFixture("SAME", 2){}
+};
+BOOST_FIXTURE_TEST_CASE(ParseConv2DStride2Same, Convolution2dStride2SameFixture)
+{
+    RunTest<4>({1, 2, 3, 4, 5, 6, 7, 8, 9}, {2, 4, 6.5, 8.5, 11, 13});
+}
+
+
+struct Convolution2dStride2ValidFixture : Convolution2dFixture
+{
+    Convolution2dStride2ValidFixture() : Convolution2dFixture("VALID", 2){}
+};
+BOOST_FIXTURE_TEST_CASE(ParseConv2DStride2Valid, Convolution2dStride2ValidFixture)
+{
+    RunTest<4>({1, 2, 3, 4, 5, 6, 7, 8, 9}, {4, 10, 16});
+}
+
+
+struct Convolution2dDilation1Fixture : Convolution2dFixture
+{
+    Convolution2dDilation1Fixture() : Convolution2dFixture("SAME", 1, 1){}
+};
+BOOST_FIXTURE_TEST_CASE(ParseConv2DDilation1, Convolution2dDilation1Fixture)
+{
+    RunTest<4>({1, 2, 3, 4, 5, 6}, {2, 4, 4, 6.5f, 10 , 8.5f});
+}
+
+BOOST_AUTO_TEST_CASE(ParseConv2DDilation2)
+{
+    const char* prototext = ""
+        "node {\n"
+        "  name: \"graphInput\"\n"
+        "  op: \"Placeholder\"\n"
+        "  attr {\n"
+        "    key: \"dtype\"\n"
+        "    value {\n"
+        "      type: DT_FLOAT\n"
+        "    }\n"
+        "  }\n"
+        "  attr {\n"
+        "    key: \"shape\"\n"
+        "    value {\n"
+        "      shape {\n"
+        "      }\n"
+        "    }\n"
+        "  }\n"
+        "}\n"
+        "node {\n"
+        "  name: \"Const_1\"\n"
+        "  op: \"Const\"\n"
+        "  attr {\n"
+        "    key: \"dtype\"\n"
+        "    value {\n"
+        "      type: DT_FLOAT\n"
+        "    }\n"
+        "  }\n"
+        "  attr {\n"
+        "    key: \"value\"\n"
+        "    value {\n"
+        "      tensor {\n"
+        "        dtype: DT_FLOAT\n"
+        "        tensor_shape {\n"
+        "          dim {\n"
+        "            size: 1\n"
+        "          }\n"
+        "          dim {\n"
+        "            size: 3\n"
+        "          }\n"
+        "          dim {\n"
+        "            size: 1\n"
+        "          }\n"
+        "          dim {\n"
+        "            size: 1\n"
+        "          }\n"
+        "        }\n"
+        "        tensor_content: \"\\000\\000\\000?\\000\\000\\200?\\000\\000\\000?\"\n"
+        "      }\n"
+        "    }\n"
+        "  }\n"
+        "}\n"
+        "node {\n"
+        "  name: \"potato\"\n"
+        "  op: \"Conv2D\"\n"
+        "  input: \"graphInput\"\n"
+        "  input: \"Const_1\"\n"
+        "  attr {\n"
+        "    key: \"T\"\n"
+        "    value {\n"
+        "      type: DT_FLOAT\n"
+        "    }\n"
+        "  }\n"
+        "  attr {\n"
+        "    key: \"data_format\"\n"
+        "    value {\n"
+        "      s: \"NHWC\"\n"
+        "    }\n"
+        "  }\n"
+        "  attr {\n"
+        "    key: \"padding\"\n"
+        "    value {\n"
+        "      s: \"SAME\"\n"
+        "    }\n"
+        "  }\n"
+        "  attr {\n"
+        "    key: \"strides\"\n"
+        "    value {\n"
+        "      list {\n"
+        "        i: 1\n"
+        "        i: 1\n"
+        "        i: 1\n"
+        "        i: 1\n"
+        "      }\n"
+        "    }\n"
+        "  }\n"
+        "  attr {\n"
+        "    key: \"dilations\"\n"
+        "    value {\n"
+        "      list {\n"
+        "        i: 1\n"
+        "        i: 2\n"
+        "        i: 2\n"
+        "        i: 1\n"
+        "      }\n"
+        "    }\n"
+        "  }\n"
+        "  attr {\n"
+        "    key: \"use_cudnn_on_gpu\"\n"
+        "    value {\n"
+        "      b: false\n"
+        "    }\n"
+        "  }\n"
+        "}\n";
+
+    std::map<std::string, armnn::TensorShape> inputShapes;
+    armnn::TensorShape tensorShape = { 1, 3, 3, 1 };
+    inputShapes["graphInput"] = tensorShape;
+    armnnTfParser::ITfParserPtr parser = armnnTfParser::ITfParser::Create();
+    BOOST_CHECK_EXCEPTION(parser->CreateNetworkFromString(prototext, inputShapes, { "potato" }),
+                          armnn::ParseException,
+                          [] (armnn::ParseException const& ex)->bool
+                          {
+                                return strcmp(ex.what(),
+                                              "ArmNN only supports Convolution layers with dilations [1,1,1,1]") == 0;
+                          });
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/DepthwiseConvolution2d.cpp b/src/armnnTfParser/test/DepthwiseConvolution2d.cpp
new file mode 100644
index 0000000..84e7a7e
--- /dev/null
+++ b/src/armnnTfParser/test/DepthwiseConvolution2d.cpp
@@ -0,0 +1,166 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+#include <string>
+#include <iostream>
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+struct DepthwiseConvolution2dFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    explicit DepthwiseConvolution2dFixture(const char* paddingType)
+    {
+        m_Prototext = "node { \n"
+                      "    name: \"graphInput\" \n"
+                      "    op: \"Placeholder\" \n"
+                      "    attr { \n"
+                      "      key: \"dtype\" \n"
+                      "      value { \n"
+                      "        type: DT_FLOAT \n"
+                      "      } \n"
+                      "    } \n"
+                      "    attr { \n"
+                      "      key: \"value\" \n"
+                      "      value { \n"
+                      "        tensor { \n"
+                      "          dtype: DT_FLOAT \n"
+                      "          tensor_shape { \n"
+                      "            dim { \n"
+                      "              size: 1 \n"
+                      "            } \n"
+                      "            dim { \n"
+                      "              size: 1 \n"
+                      "            } \n"
+                      "            dim { \n"
+                      "              size: 3 \n"
+                      "            } \n"
+                      "            dim { \n"
+                      "              size: 3 \n"
+                      "            } \n"
+                      "          } \n"
+                      "          tensor_content: \"\\000\\000\\200?\\000\\000\\000@\\000\\000@@\\000\\000\\200@"
+                      "\\000\\000\\240@\\000\\000\\300@\\000\\000\\340@\\000\\000\\000A\\000\\000\\020A\" \n"
+                      "        } \n"
+                      "      } \n"
+                      "    } \n"
+                      "  } \n"
+                      "  node { \n"
+                      "  name: \"Const_1\" \n"
+                      "  op: \"Const\" \n"
+                      "  attr { \n"
+                      "    key: \"dtype\" \n"
+                      "    value { \n"
+                      "      type: DT_FLOAT \n"
+                      "    } \n"
+                      "  } \n"
+                      "  attr { \n"
+                      "    key: \"value\" \n"
+                      "    value { \n"
+                      "      tensor { \n"
+                      "        dtype: DT_FLOAT \n"
+                      "        tensor_shape { \n"
+                      "          dim { \n"
+                      "            size: 1 \n"
+                      "          } \n"
+                      "          dim { \n"
+                      "            size: 3 \n"
+                      "          } \n"
+                      "          dim { \n"
+                      "            size: 3 \n"
+                      "          } \n"
+                      "          dim { \n"
+                      "            size: 3 \n"
+                      "          } \n"
+                      "        } \n"
+                      "        tensor_content: \"\\000\\000\\000?\\000\\000\\200?\\000\\000\\000?"
+                      "\\000\\000\\000?\\000\\000\\200?\\000\\000\\000?"
+                      "\\000\\000\\000?\\000\\000\\200?\\000\\000\\000?"
+                      "\\000\\000\\000?\\000\\000\\200?\\000\\000\\000?"
+                      "\\000\\000\\000?\\000\\000\\200?\\000\\000\\000?"
+                      "\\000\\000\\000?\\000\\000\\200?\\000\\000\\000?"
+                      "\\000\\000\\000?\\000\\000\\200?\\000\\000\\000?"
+                      "\\000\\000\\000?\\000\\000\\200?\\000\\000\\000?"
+                      "\\000\\000\\000?\\000\\000\\200?\\000\\000\\000?\" \n"
+                      "      } \n"
+                      "    } \n"
+                      "  } \n"
+                      "} \n"
+                      "node { \n"
+                      "  name: \"potato\" \n"
+                      "  op: \"DepthwiseConv2dNative\" \n"
+                      "  input: \"graphInput\" \n"
+                      "  input: \"Const_1\" \n"
+                      "  attr { \n"
+                      "    key: \"T\" \n"
+                      "    value { \n"
+                      "      type: DT_FLOAT \n"
+                      "    } \n"
+                      "  } \n"
+                      "  attr { \n"
+                      "    key: \"data_format\" \n"
+                      "    value { \n"
+                      "      s: \"NHWC\" \n"
+                      "    } \n"
+                      "  } \n"
+                      "  attr { \n"
+                      "    key: \"padding\" \n"
+                      "    value { \n"
+                      "      s: \"";
+        m_Prototext.append(paddingType);
+        m_Prototext.append("\"\n"
+                      "    } \n"
+                      "  } \n"
+                      "  attr { \n"
+                      "    key: \"strides\" \n"
+                      "    value { \n"
+                      "      list { \n"
+                      "        i: 1 \n"
+                      "        i: 1 \n"
+                      "        i: 1 \n"
+                      "        i: 1 \n"
+                      "      } \n"
+                      "    } \n"
+                      "  } \n"
+                      "  attr { \n"
+                      "    key: \"use_cudnn_on_gpu\" \n"
+                      "    value { \n"
+                      "      b: false \n"
+                      "    } \n"
+                      "  } \n"
+                      "} \n");
+
+        SetupSingleInputSingleOutput({ 1, 1, 3, 3 }, "graphInput", "potato");
+    }
+};
+
+struct DepthwiseConvolution2dSameFixture : DepthwiseConvolution2dFixture
+{
+    DepthwiseConvolution2dSameFixture() : DepthwiseConvolution2dFixture("SAME") { }
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseDepthwiseConv2DSame, DepthwiseConvolution2dSameFixture)
+{
+    RunTest<4>({ 1, 2, 3, 4, 5, 6, 7, 8, 9 },
+               { 2.5f, 5.f,  2.5f, 3.5f, 7.f,  3.5f, 4.5f, 9.f,  4.5f,
+                 6.f,  12.f, 6.f,  7.5f, 15.f, 7.5f, 9.f,  18.f, 9.f,
+                 5.5f, 11.f, 5.5f, 6.5f, 13.f, 6.5f, 7.5f, 15.f, 7.5f});
+}
+
+struct DepthwiseConvolution2dValidFixture : DepthwiseConvolution2dFixture
+{
+    DepthwiseConvolution2dValidFixture() : DepthwiseConvolution2dFixture("VALID") { }
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseDepthwiseConv2DValid, DepthwiseConvolution2dValidFixture)
+{
+    RunTest<4>({ 1, 2, 3, 4, 5, 6, 7, 8, 9 }, // input data
+               { 6.f,  12.f, 6.f,  7.5f, 15.f, 7.5f, 9.f,  18.f, 9.f });  // output expected data
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/FullyConnected.cpp b/src/armnnTfParser/test/FullyConnected.cpp
new file mode 100644
index 0000000..2a7b495
--- /dev/null
+++ b/src/armnnTfParser/test/FullyConnected.cpp
@@ -0,0 +1,579 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+#include "Runtime.hpp"
+#include "Network.hpp"
+#include "Graph.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+// In Tensorflow fully connected layers are expressed as a MatMul followed by an Add.
+// The TfParser must detect this case and convert them to a FullyConnected layer.
+struct FullyConnectedFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    FullyConnectedFixture()
+    {
+        // input = tf.placeholder(tf.float32, [1, 1], "input")
+        // weights = tf.constant([2], tf.float32, [1, 1])
+        // matmul = tf.matmul(input, weights)
+        // bias = tf.constant([1], tf.float32)
+        // output = tf.add(matmul, bias, name="output")
+        m_Prototext = R"(
+node {
+  name: "input"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 1
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Const"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+        float_val: 2.0
+      }
+    }
+  }
+}
+node {
+  name: "MatMul"
+  op: "MatMul"
+  input: "input"
+  input: "Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "Const_1"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        float_val: 1.0
+      }
+    }
+  }
+}
+node {
+  name: "output"
+  op: "Add"
+  input: "MatMul"
+  input: "Const_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+        )";
+        SetupSingleInputSingleOutput({ 1, 1 }, "input", "output");
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(FullyConnected, FullyConnectedFixture)
+{
+    RunTest<1>({ 3 }, { 7 });
+}
+
+// Similar to FullyConnectedFixture, but this time the MatMul's output is used by two Adds. This should result
+// in two FullyConnected layers being created.
+//      I
+//      |
+//      M -- C
+//     / \'
+// C-- A  A -- C
+//     \ /
+//      A
+struct MatMulUsedInTwoFcFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    MatMulUsedInTwoFcFixture()
+    {
+        m_Prototext = R"(
+node {
+  name: "input"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 1
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Const"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+        float_val: 2.0
+      }
+    }
+  }
+}
+node {
+  name: "MatMul"
+  op: "MatMul"
+  input: "input"
+  input: "Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "Const_1"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        float_val: 5.0
+      }
+    }
+  }
+}
+node {
+  name: "Const_2"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        float_val: 15.0
+      }
+    }
+  }
+}
+node {
+  name: "Add"
+  op: "Add"
+  input: "MatMul"
+  input: "Const_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+node {
+  name: "Add_1"
+  op: "Add"
+  input: "MatMul"
+  input: "Const_2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+node {
+  name: "output"
+  op: "Add"
+  input: "Add"
+  input: "Add_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+        )";
+        SetupSingleInputSingleOutput({ 1, 1 }, "input", "output");
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(MatMulUsedInTwoFc, MatMulUsedInTwoFcFixture)
+{
+    RunTest<1>({ 3 }, { 32 });
+    // Ideally we would check here that the armnn network has 5 layers:
+    //  Input, 2 x FullyConnected (biased), Add and Output.
+    // This would make sure the parser hasn't incorrectly added some unconnected layers corresponding to the MatMul
+}
+
+// Similar to MatMulUsedInTwoFc, but this time the Adds are 'staggered' (see diagram), which means that only one
+// FullyConnected layer can be created (the other should just be an Add).
+//        I
+//        |
+//        M -- C1
+//       / \'
+// C2 -- A  |
+//       \ /
+//        A
+struct MatMulUsedInTwoFcStaggeredFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    MatMulUsedInTwoFcStaggeredFixture()
+    {
+        // input = tf.placeholder(tf.float32, shape=[1,1], name = "input")
+        // const1 = tf.constant([17], tf.float32, [1,1])
+        // mul = tf.matmul(input, const1)
+        // const2 = tf.constant([7], tf.float32, [1])
+        // fc = tf.add(mul, const2)
+        // output = tf.add(mul, fc, name="output")
+        m_Prototext = R"(
+node {
+  name: "input"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 1
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Const"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+        float_val: 17.0
+      }
+    }
+  }
+}
+node {
+  name: "MatMul"
+  op: "MatMul"
+  input: "input"
+  input: "Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "Const_1"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        float_val: 7.0
+      }
+    }
+  }
+}
+node {
+  name: "Add"
+  op: "Add"
+  input: "MatMul"
+  input: "Const_1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+node {
+  name: "output"
+  op: "Add"
+  input: "MatMul"
+  input: "Add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+        )";
+        SetupSingleInputSingleOutput({ 1, 1 }, "input", "output");
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(MatMulUsedInTwoFcStaggered, MatMulUsedInTwoFcStaggeredFixture)
+{
+    RunTest<1>({ 2 }, { 75 });
+    // Ideally we would check here that the armnn network has 5 layers:
+    //   Input, FullyConnected (biased), FullyConnected (non biased), Add and Output.
+}
+
+// A MatMul in isolation, not connected to an add. Should result in a non-biased FullyConnected layer.
+struct MatMulFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    MatMulFixture()
+    {
+        // input = tf.placeholder(tf.float32, shape = [1, 1], name = "input")
+        // const = tf.constant([17], tf.float32, [1, 1])
+        //  output = tf.matmul(input, const, name = "output")
+        m_Prototext = R"(
+node {
+  name: "input"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 1
+        }
+        dim {
+          size: 1
+        }
+      }
+    }
+  }
+}
+node {
+  name: "Const"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 1
+          }
+        }
+        float_val: 17.0
+      }
+    }
+  }
+}
+node {
+  name: "output"
+  op: "MatMul"
+  input: "input"
+  input: "Const"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "transpose_a"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "transpose_b"
+    value {
+      b: false
+    }
+  }
+}
+        )";
+        SetupSingleInputSingleOutput({ 1, 1 }, "input", "output");
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(MatMul, MatMulFixture)
+{
+    RunTest<1>({ 2 }, { 34 });
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/FusedBatchNorm.cpp b/src/armnnTfParser/test/FusedBatchNorm.cpp
new file mode 100644
index 0000000..632d5f0
--- /dev/null
+++ b/src/armnnTfParser/test/FusedBatchNorm.cpp
@@ -0,0 +1,175 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+struct FusedBatchNormFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    FusedBatchNormFixture()
+    {
+        m_Prototext = "node { \n"
+            "  name: \"graphInput\" \n"
+            "  op: \"Placeholder\" \n"
+            "  attr { \n"
+            "    key: \"dtype\" \n"
+            "    value { \n"
+            "      type: DT_FLOAT \n"
+            "    } \n"
+            "  } \n"
+            "  attr { \n"
+            "    key: \"shape\" \n"
+            "    value { \n"
+            "      shape { \n"
+            "      } \n"
+            "    } \n"
+            "  } \n"
+            "} \n"
+            "node { \n"
+            "  name: \"Const_1\" \n"
+            "  op: \"Const\" \n"
+            "  attr { \n"
+            "    key: \"dtype\" \n"
+            "    value { \n"
+            "      type: DT_FLOAT \n"
+            "    } \n"
+            "  } \n"
+            "  attr { \n"
+            "    key: \"value\" \n"
+            "    value { \n"
+            "      tensor { \n"
+            "        dtype: DT_FLOAT \n"
+            "        tensor_shape { \n"
+            "          dim { \n"
+            "            size: 1 \n"
+            "          } \n"
+            "        } \n"
+            "        float_val: 1.0 \n"
+            "      } \n"
+            "    } \n"
+            "  } \n"
+            "} \n"
+            "node { \n"
+            "  name: \"Const_2\" \n"
+            "  op: \"Const\" \n"
+            "  attr { \n"
+            "    key: \"dtype\" \n"
+            "    value { \n"
+            "      type: DT_FLOAT \n"
+            "    } \n"
+            "  } \n"
+            "  attr { \n"
+            "    key: \"value\" \n"
+            "    value { \n"
+            "      tensor { \n"
+            "        dtype: DT_FLOAT \n"
+            "        tensor_shape { \n"
+            "          dim { \n"
+            "            size: 1 \n"
+            "          } \n"
+            "        } \n"
+            "        float_val: 0.0 \n"
+            "      } \n"
+            "    } \n"
+            "  } \n"
+            "} \n"
+            "node { \n"
+            "  name: \"FusedBatchNormLayer/mean\" \n"
+            "  op: \"Const\" \n"
+            "  attr { \n"
+            "    key: \"dtype\" \n"
+            "    value { \n"
+            "      type: DT_FLOAT \n"
+            "    } \n"
+            "  } \n"
+            "  attr { \n"
+            "    key: \"value\" \n"
+            "    value { \n"
+            "      tensor { \n"
+            "        dtype: DT_FLOAT \n"
+            "        tensor_shape { \n"
+            "          dim { \n"
+            "            size: 1 \n"
+            "          } \n"
+            "        } \n"
+            "        float_val: 5.0 \n"
+            "      } \n"
+            "    } \n"
+            "  } \n"
+            "} \n"
+            "node { \n"
+            "  name: \"FusedBatchNormLayer/variance\" \n"
+            "  op: \"Const\" \n"
+            "  attr { \n"
+            "    key: \"dtype\" \n"
+            "    value { \n"
+            "      type: DT_FLOAT \n"
+            "    } \n"
+            "  } \n"
+            "  attr { \n"
+            "    key: \"value\" \n"
+            "    value { \n"
+            "      tensor { \n"
+            "        dtype: DT_FLOAT \n"
+            "        tensor_shape { \n"
+            "          dim { \n"
+            "            size: 1 \n"
+            "          } \n"
+            "        } \n"
+            "        float_val: 2.0 \n"
+            "      } \n"
+            "    } \n"
+            "  } \n"
+            "} \n"
+            "node { \n"
+            "  name: \"output\" \n"
+            "  op: \"FusedBatchNorm\" \n"
+            "  input: \"graphInput\" \n"
+            "  input: \"Const_1\" \n"
+            "  input: \"Const_2\" \n"
+            "  input: \"FusedBatchNormLayer/mean\" \n"
+            "  input: \"FusedBatchNormLayer/variance\" \n"
+            "  attr { \n"
+            "    key: \"T\" \n"
+            "    value { \n"
+            "      type: DT_FLOAT \n"
+            "    } \n"
+            "  } \n"
+            "  attr { \n"
+            "    key: \"data_format\" \n"
+            "    value { \n"
+            "      s: \"NHWC\" \n"
+            "    } \n"
+            "  } \n"
+            "  attr { \n"
+            "    key: \"epsilon\" \n"
+            "    value { \n"
+            "      f: 0.0010000000475 \n"
+            "    } \n"
+            "  } \n"
+            "  attr { \n"
+            "    key: \"is_training\" \n"
+            "    value { \n"
+            "      b: false \n"
+            "    } \n"
+            "  } \n"
+            "} \n";
+
+        SetupSingleInputSingleOutput({1, 3, 3, 1}, "graphInput", "output");
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseFusedBatchNorm, FusedBatchNormFixture)
+{
+    RunTest<4>({1, 2, 3, 4, 5, 6, 7, 8, 9},             // input data
+               {-2.8277204f, -2.12079024f, -1.4138602f,
+                -0.7069301f, 0.0f, 0.7069301f,
+                1.4138602f, 2.12079024f, 2.8277204f});  // expected output data
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/Identity.cpp b/src/armnnTfParser/test/Identity.cpp
new file mode 100644
index 0000000..ca20de5
--- /dev/null
+++ b/src/armnnTfParser/test/Identity.cpp
@@ -0,0 +1,161 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+struct IdentitySimpleFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    IdentitySimpleFixture()
+    {
+        m_Prototext = "node{ "
+            "  name: \"Placeholder\""
+            "  op: \"Placeholder\""
+            "  attr {"
+            "    key: \"dtype\""
+            "    value {"
+            "      type: DT_FLOAT"
+            "    }"
+            "  }"
+            "  attr {"
+            "    key: \"shape\""
+            "    value {"
+            "      shape {"
+            "        unknown_rank: true"
+            "      }"
+            "    }"
+            "  }"
+            "}"
+            "node {"
+            "  name: \"Identity\""
+            "  op: \"Identity\""
+            "  input: \"Placeholder\""
+            "  attr {"
+            "    key: \"T\""
+            "    value {"
+            "      type: DT_FLOAT"
+            "    }"
+            "  }"
+            "}";
+        SetupSingleInputSingleOutput({ 4 }, "Placeholder", "Identity");
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(IdentitySimple, IdentitySimpleFixture)
+{
+    RunTest<1>({ 1.0f, 2.0f, 3.0f, 4.0f }, { 1.0f, 2.0f, 3.0f, 4.0f });
+}
+
+struct IdentityFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    IdentityFixture()
+    {
+        m_Prototext = "node{ "
+            "  name: \"Placeholder\""
+            "  op: \"Placeholder\""
+            "  attr {"
+            "    key: \"dtype\""
+            "    value {"
+            "      type: DT_FLOAT"
+            "    }"
+            "  }"
+            "  attr {"
+            "    key: \"shape\""
+            "    value {"
+            "      shape {"
+            "        unknown_rank: true"
+            "      }"
+            "    }"
+            "  }"
+            "}"
+            "node {"
+            "  name: \"Identity\""
+            "  op: \"Identity\""
+            "  input: \"Placeholder\""
+            "  attr {"
+            "    key: \"T\""
+            "    value {"
+            "      type: DT_FLOAT"
+            "    }"
+            "  }"
+            "}"
+            "node {"
+            "  name: \"Add\""
+            "  op: \"Add\""
+            "  input: \"Identity\""
+            "  input: \"Identity\""
+            "  attr {"
+            "    key: \"T\""
+            "    value {"
+            "      type: DT_FLOAT"
+            "    }"
+            "  }"
+            "}";
+        SetupSingleInputSingleOutput({ 4 }, "Placeholder", "Add");
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseIdentity, IdentityFixture)
+{
+    RunTest<1>({ 1.0f, 2.0f, 3.0f, 4.0f }, { 2.0f, 4.0f, 6.0f, 8.0f });
+}
+
+struct IdentityChainFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    IdentityChainFixture()
+    {
+        m_Prototext = "node{ "
+            "  name: \"Placeholder\""
+            "  op: \"Placeholder\""
+            "  attr {"
+            "    key: \"dtype\""
+            "    value {"
+            "      type: DT_FLOAT"
+            "    }"
+            "  }"
+            "  attr {"
+            "    key: \"shape\""
+            "    value {"
+            "      shape {"
+            "        unknown_rank: true"
+            "      }"
+            "    }"
+            "  }"
+            "}"
+            "node {"
+            "  name: \"Identity\""
+            "  op: \"Identity\""
+            "  input: \"Placeholder\""
+            "  attr {"
+            "    key: \"T\""
+            "    value {"
+            "      type: DT_FLOAT"
+            "    }"
+            "  }"
+            "}"
+            "node {"
+            "  name: \"Identity2\""
+            "  op: \"Identity\""
+            "  input: \"Identity\""
+            "  attr {"
+            "    key: \"T\""
+            "    value {"
+            "      type: DT_FLOAT"
+            "    }"
+            "  }"
+            "}";
+        SetupSingleInputSingleOutput({ 4 }, "Placeholder", "Identity2");
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(IdentityChain, IdentityChainFixture)
+{
+    RunTest<1>({ 1.0f, 2.0f, 3.0f, 4.0f }, { 1.0f, 2.0f, 3.0f, 4.0f });
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/LocalResponseNormalization.cpp b/src/armnnTfParser/test/LocalResponseNormalization.cpp
new file mode 100644
index 0000000..a7c2bfe
--- /dev/null
+++ b/src/armnnTfParser/test/LocalResponseNormalization.cpp
@@ -0,0 +1,121 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+
+struct LocalResponseNormalizationBaseFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    explicit LocalResponseNormalizationBaseFixture(float alpha, float beta, float bias)
+    {
+        std::string alphaString = std::to_string(alpha);
+        std::string betaString = std::to_string(beta);
+        std::string biasString = std::to_string(bias);
+
+        m_Prototext = "node {"
+            "  name: \"Placeholder\""
+            "  op: \"Placeholder\""
+            "  attr {"
+            "    key: \"dtype\""
+            "    value {"
+            "      type: DT_FLOAT"
+            "    }"
+            "  }"
+            "  attr {"
+            "    key: \"shape\""
+            "    value {"
+            "      shape {"
+            "        unknown_rank: true"
+            "      }"
+            "    }"
+            "  }"
+            "}"
+            "node {"
+            "  name: \"LRN\""
+            "  op: \"LRN\""
+            "  input: \"Placeholder\""
+            "  attr {"
+            "    key: \"T\""
+            "    value {"
+            "      type: DT_FLOAT"
+            "    }"
+            "  }"
+            "  attr {"
+            "    key: \"alpha\""
+            "    value {"
+            "      f: ";
+        m_Prototext.append(alphaString);
+        m_Prototext.append("\n"
+            "    }"
+            "  }"
+            "  attr {"
+            "    key: \"beta\""
+            "    value {"
+            "      f: ");
+        m_Prototext.append(betaString);
+        m_Prototext.append("\n"
+            "    }"
+            "  }"
+            "  attr {"
+            "    key: \"bias\""
+            "    value {"
+            "      f: ");
+        m_Prototext.append(biasString);
+        m_Prototext.append("\n"
+            "    }"
+            "  }"
+            "  attr {"
+            "    key: \"depth_radius\""
+            "    value {"
+            "      i: 1"
+            "    }"
+            "  }"
+            "}");
+    }
+};
+
+
+struct LocalResponseNormalizationFixtureSimple : public LocalResponseNormalizationBaseFixture
+{
+    explicit LocalResponseNormalizationFixtureSimple()
+        : LocalResponseNormalizationBaseFixture(1.0f, 1.0f, 1.0f)
+    {
+        SetupSingleInputSingleOutput({ 2, 2, 2, 1 }, "Placeholder", "LRN");
+    }
+};
+BOOST_FIXTURE_TEST_CASE(ParseSimpleLocalResponseNormalization, LocalResponseNormalizationFixtureSimple)
+{
+    RunTest<4>({ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f },
+               { 0.5f, 0.4f, 0.3f, 0.23529412f, 0.1923077f, 0.16216217f, 0.14f, 0.12307692f });
+}
+
+
+struct LocalResponseNormalizationFixture : public LocalResponseNormalizationBaseFixture
+{
+    explicit LocalResponseNormalizationFixture()
+        : LocalResponseNormalizationBaseFixture(0.5f, 1.0f, 0.5f)
+    {
+        SetupSingleInputSingleOutput({1, 3, 3, 2}, "Placeholder", "LRN");
+    }
+};
+BOOST_FIXTURE_TEST_CASE(ParseLocalResponseNormalization, LocalResponseNormalizationFixture)
+{
+    RunTest<4>({ 1.0f,  2.0f,  3.0f,  4.0f,  5.0f,  6.0f,
+                 7.0f,  8.0f,  9.0f, 10.0f, 11.0f, 12.0f,
+                13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f},
+
+               {0.333333340f, 0.66666670f, 0.230769250f, 0.307692320f, 0.161290320f, 0.19354838f,
+                0.122807020f, 0.14035088f, 0.098901100f, 0.109890110f, 0.082706770f, 0.09022556f,
+                0.071038246f, 0.07650273f, 0.062240668f, 0.066390045f, 0.055374593f, 0.05863192f});
+}
+
+
+
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/MultiOutput.cpp b/src/armnnTfParser/test/MultiOutput.cpp
new file mode 100644
index 0000000..56be33d
--- /dev/null
+++ b/src/armnnTfParser/test/MultiOutput.cpp
@@ -0,0 +1,144 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+struct MultiOutMatchFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    MultiOutMatchFixture()
+    {
+        m_Prototext = R"(
+node {
+    name: "input"
+    op: "Placeholder"
+    attr {
+        key: "dtype"
+        value {
+            type: DT_FLOAT
+        }
+    }
+    attr {
+        key: "shape"
+        value {
+            shape {
+            }
+        }
+    }
+}
+node {
+    name: "softmax1"
+    op: "Softmax"
+    input: "input:0"
+    attr {
+        key: "T"
+        value {
+            type: DT_FLOAT
+        }
+    }
+}
+        )";
+        SetupSingleInputSingleOutput({ 1, 7 }, "input", "softmax1");
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(MultiOutMatch, MultiOutMatchFixture)
+{
+    // Note that the point of this test is to verify the parsing went well.
+    // Here we make sure the softmax has really connected to the input layer.
+    RunTest<2>({ 0, 0, 10000, 0, 0, 0, 0 }, { 0, 0, 1, 0, 0, 0, 0 });
+}
+
+struct MultiOutFailFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    MultiOutFailFixture()
+    {
+        m_Prototext = R"(
+node {
+    name: "input"
+    op: "Placeholder"
+    attr {
+        key: "dtype"
+        value {
+            type: DT_FLOAT
+        }
+    }
+    attr {
+        key: "shape"
+        value {
+            shape {
+            }
+        }
+    }
+}
+node {
+    name: "softmax1"
+    op: "Softmax"
+    input: "input:1"
+    attr {
+        key: "T"
+        value {
+            type: DT_FLOAT
+        }
+    }
+}
+        )";
+        BOOST_CHECK_THROW(SetupSingleInputSingleOutput({ 1, 7 }, "input", "softmax1"), armnn::ParseException);
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(MultiOutFail, MultiOutFailFixture)
+{
+    // Not running the graph because this is expected to throw an exception during parsing.
+}
+
+struct MultiOutInvalidFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    MultiOutInvalidFixture()
+    {
+        m_Prototext = R"(
+node {
+    name: "input"
+    op: "Placeholder"
+    attr {
+        key: "dtype"
+        value {
+            type: DT_FLOAT
+        }
+    }
+    attr {
+        key: "shape"
+        value {
+            shape {
+            }
+        }
+    }
+}
+node {
+    name: "softmax1"
+    op: "Softmax"
+    input: "input:-1"
+    attr {
+        key: "T"
+        value {
+            type: DT_FLOAT
+        }
+    }
+}
+        )";
+        BOOST_CHECK_THROW(SetupSingleInputSingleOutput({ 1, 7 }, "input", "softmax1"), armnn::ParseException);
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(MultiOutInvalid, MultiOutInvalidFixture)
+{
+    // Not running the graph because this is expected to throw an exception during parsing.
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file
diff --git a/src/armnnTfParser/test/Multiplication.cpp b/src/armnnTfParser/test/Multiplication.cpp
new file mode 100644
index 0000000..3a20fd1
--- /dev/null
+++ b/src/armnnTfParser/test/Multiplication.cpp
@@ -0,0 +1,172 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+struct MultiplicationFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    MultiplicationFixture()
+    {
+        m_Prototext = "node { \n"
+            "    name: \"graphInput\" \n"
+            "    op: \"Placeholder\" \n"
+            "    attr { \n"
+            "      key: \"dtype\" \n"
+            "      value { \n"
+            "        type: DT_FLOAT \n"
+            "      } \n"
+            "    } \n"
+            "    attr { \n"
+            "      key: \"shape\" \n"
+            "      value { \n"
+            "        shape { \n"
+            "        } \n"
+            "      } \n"
+            "    } \n"
+            "  } \n"
+            "  node { \n"
+            "    name: \"softmax1\" \n"
+            "    op: \"Softmax\" \n"
+            "    input: \"graphInput\" \n"
+            "    attr { \n"
+            "      key: \"T\" \n"
+            "      value { \n"
+            "        type: DT_FLOAT \n"
+            "      } \n"
+            "    } \n"
+            "  }\n"
+            "  node {\n"
+            "    name: \"softmax2\"\n"
+            "    op : \"Softmax\"\n"
+            "    input: \"graphInput\"\n"
+            "    attr { \n"
+            "      key: \"T\" \n"
+            "      value { \n"
+            "        type: DT_FLOAT \n"
+            "      } \n"
+            "    } \n"
+            "  }\n"
+            "  node {\n"
+            "    name: \"multiplication\"\n"
+            "    op : \"Mul\"\n"
+            "    input: \"softmax1\"\n"
+            "    input: \"softmax2\"\n"
+            "    attr { \n"
+            "      key: \"T\" \n"
+            "      value { \n"
+            "        type: DT_FLOAT \n"
+            "      } \n"
+            "    } \n"
+            "  }\n";
+
+        SetupSingleInputSingleOutput({ 1, 7 }, "graphInput", "multiplication");
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseMultiplication, MultiplicationFixture)
+{
+    RunTest<2>({ 0, 0, 10000, 0, 0, 0, 0 }, { 0, 0, 1, 0, 0, 0, 0 });
+}
+
+struct MultiplicationBroadcastFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    MultiplicationBroadcastFixture(const armnn::TensorShape& inputShape0, const armnn::TensorShape& inputShape1)
+    {
+        m_Prototext = R"(
+node {
+  name: "input0"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+      }
+    }
+  }
+}
+node {
+  name: "input1"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+      }
+    }
+  }
+}
+node {
+  name: "output"
+  op: "Mul"
+  input: "input0"
+  input: "input1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+        )";
+
+        Setup({ { "input0", inputShape0 },
+                { "input1", inputShape1 } },
+              { "output" });
+    }
+};
+
+struct MultiplicationBroadcastFixture4D1D : public MultiplicationBroadcastFixture
+{
+    MultiplicationBroadcastFixture4D1D() : MultiplicationBroadcastFixture({ 1, 2, 2, 3 }, { 1 }) {}
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseMultiplicationBroadcast4D1D, MultiplicationBroadcastFixture4D1D)
+{
+    RunTest<4>({ { "input0", { 0.0f,  1.0f,  2.0f,
+                               3.0f,  4.0f,  5.0f,
+                               6.0f,  7.0f,  8.0f,
+                               9.0f, 10.0f, 11.0f } },
+                 { "input1", { 5.0f } } },
+               { { "output", { 0.0f,  5.0f, 10.0f,
+                              15.0f, 20.0f, 25.0f,
+                              30.0f, 35.0f, 40.0f,
+                              45.0f, 50.0f, 55.0f } } });
+}
+
+struct MultiplicationBroadcastFixture1D4D : public MultiplicationBroadcastFixture
+{
+    MultiplicationBroadcastFixture1D4D() : MultiplicationBroadcastFixture({ 1 }, { 1, 2, 2, 3 }) {}
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseMultiplicationBroadcast1D4D, MultiplicationBroadcastFixture1D4D)
+{
+    RunTest<4>({ { "input0", { 3.0f } },
+                 { "input1", { 0.0f,  1.0f,  2.0f,
+                               3.0f,  4.0f,  5.0f,
+                               6.0f,  7.0f,  8.0f,
+                               9.0f, 10.0f, 11.0f } } },
+               { { "output", { 0.0f,  3.0f,  6.0f,
+                               9.0f, 12.0f, 15.0f,
+                              18.0f, 21.0f, 24.0f,
+                              27.0f, 30.0f, 33.0f } } });
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/PassThru.cpp b/src/armnnTfParser/test/PassThru.cpp
new file mode 100644
index 0000000..8462ec2
--- /dev/null
+++ b/src/armnnTfParser/test/PassThru.cpp
@@ -0,0 +1,52 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+struct PassThruFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    PassThruFixture()
+    {
+        m_Prototext = "node {\n"
+            "  name: \"Placeholder\"\n"
+            "  op: \"Placeholder\"\n"
+            "  attr {\n"
+            "    key: \"dtype\"\n"
+            "    value {\n"
+            "      type: DT_FLOAT\n"
+            "    }\n"
+            "  }\n"
+            "  attr {\n"
+            "    key: \"shape\"\n"
+            "    value {\n"
+            "      shape {\n"
+            "      }\n"
+            "    }\n"
+            "  }\n"
+            "}\n";
+        SetupSingleInputSingleOutput({ 1, 7 }, "Placeholder", "Placeholder");
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(ValidateOutput, PassThruFixture)
+{
+    BOOST_TEST(m_Parser->GetNetworkOutputBindingInfo("Placeholder").second.GetNumDimensions() == 2);
+    BOOST_TEST(m_Parser->GetNetworkOutputBindingInfo("Placeholder").second.GetShape()[0] == 1);
+    BOOST_TEST(m_Parser->GetNetworkOutputBindingInfo("Placeholder").second.GetShape()[1] == 7);
+}
+
+BOOST_FIXTURE_TEST_CASE(RunGraph, PassThruFixture)
+{
+    armnn::TensorInfo inputTensorInfo = m_Parser->GetNetworkInputBindingInfo("Placeholder").second;
+    auto input = MakeRandomTensor<float, 2>(inputTensorInfo, 378346);
+    std::vector<float> inputVec;
+    inputVec.assign(input.data(), input.data() + input.num_elements());
+    RunTest<2>(inputVec, inputVec); // The passthru network should output the same as the input
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/Pooling.cpp b/src/armnnTfParser/test/Pooling.cpp
new file mode 100644
index 0000000..36ffa47
--- /dev/null
+++ b/src/armnnTfParser/test/Pooling.cpp
@@ -0,0 +1,112 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+
+struct Pooling2dFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    explicit Pooling2dFixture(const char* poolingtype)
+    {
+        m_Prototext =  "node {\n"
+            "  name: \"Placeholder\"\n"
+            "  op: \"Placeholder\"\n"
+            "  attr {\n"
+            "    key: \"dtype\"\n"
+            "    value {\n"
+            "      type: DT_FLOAT\n"
+            "    }\n"
+            "  }\n"
+            "  attr {\n"
+            "    key: \"value\"\n"
+            "    value {\n"
+            "      tensor {\n"
+            "        dtype: DT_FLOAT\n"
+            "        tensor_shape {\n"
+            "        }\n"
+            "      }\n"
+            "    }\n"
+            "   }\n"
+            "  }\n"
+            "node {\n"
+            "  name: \"";
+        m_Prototext.append(poolingtype);
+        m_Prototext.append("\"\n"
+                               "  op: \"");
+        m_Prototext.append(poolingtype);
+        m_Prototext.append("\"\n"
+                               "  input: \"Placeholder\"\n"
+                               "  attr {\n"
+                               "    key: \"T\"\n"
+                               "    value {\n"
+                               "      type: DT_FLOAT\n"
+                               "    }\n"
+                               "  }\n"
+                               "  attr {\n"
+                               "    key: \"data_format\"\n"
+                               "    value {\n"
+                               "      s: \"NHWC\"\n"
+                               "    }\n"
+                               "  }\n"
+                               "  attr {\n"
+                               "    key: \"ksize\"\n"
+                               "    value {\n"
+                               "      list {\n"
+                               "        i: 1\n"
+                               "        i: 2\n"
+                               "        i: 2\n"
+                               "        i: 1\n"
+                               "      }\n"
+                               "    }\n"
+                               "  }\n"
+                               "  attr {\n"
+                               "    key: \"padding\"\n"
+                               "    value {\n"
+                               "      s: \"VALID\"\n"
+                               "    }\n"
+                               "  }\n"
+                               "  attr {\n"
+                               "    key: \"strides\"\n"
+                               "    value {\n"
+                               "      list {\n"
+                               "        i: 1\n"
+                               "        i: 1\n"
+                               "        i: 1\n"
+                               "        i: 1\n"
+                               "      }\n"
+                               "    }\n"
+                               "  }\n"
+                               "}\n");
+
+        SetupSingleInputSingleOutput({ 1, 2, 2, 1 }, "Placeholder", poolingtype);
+    }
+};
+
+
+struct MaxPoolFixture : Pooling2dFixture
+{
+    MaxPoolFixture() : Pooling2dFixture("MaxPool") {}
+};
+BOOST_FIXTURE_TEST_CASE(ParseMaxPool, MaxPoolFixture)
+{
+    RunTest<4>({1.0f, 2.0f, 3.0f, -4.0f}, {3.0f});
+}
+
+
+struct AvgPoolFixture : Pooling2dFixture
+{
+    AvgPoolFixture() : Pooling2dFixture("AvgPool") {}
+};
+BOOST_FIXTURE_TEST_CASE(ParseAvgPool, AvgPoolFixture)
+{
+    RunTest<4>({1.0f, 2.0f, 3.0f, 4.0f}, {2.5f});
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/Reshape.cpp b/src/armnnTfParser/test/Reshape.cpp
new file mode 100644
index 0000000..4eb6b12
--- /dev/null
+++ b/src/armnnTfParser/test/Reshape.cpp
@@ -0,0 +1,86 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+
+struct ReshapeFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    ReshapeFixture()
+    {
+        m_Prototext = "node { \n"
+            "    name: \"graphInput\" \n"
+            "    op: \"Placeholder\" \n"
+            "    attr { \n"
+            "      key: \"dtype\" \n"
+            "      value { \n"
+            "        type: DT_FLOAT \n"
+            "      } \n"
+            "    } \n"
+            "    attr { \n"
+            "      key: \"shape\" \n"
+            "      value { \n"
+            "        shape { \n"
+            "        } \n"
+            "      } \n"
+            "    } \n"
+            "  } \n"
+            "node { \n"
+            "  name: \"Reshape/shape\" \n"
+            "  op: \"Const\" \n"
+            "  attr { \n"
+            "    key: \"dtype\" \n"
+            "    value { \n"
+            "      type: DT_INT32 \n"
+            "    } \n"
+            "  } \n"
+            "  attr { \n"
+            "    key: \"value\" \n"
+            "    value { \n"
+            "      tensor { \n"
+            "        dtype: DT_INT32 \n"
+            "        tensor_shape { \n"
+            "          dim { \n"
+            "            size: 2 \n"
+            "          } \n"
+            "        } \n"
+            "        tensor_content: \"\\002\\000\\000\\000\\002\\000\\000\\000\" \n"
+            "      } \n"
+            "    } \n"
+            "  } \n"
+            "} \n"
+            "node { \n"
+            "  name: \"Reshape\" \n"
+            "  op: \"Reshape\" \n"
+            "  input: \"graphInput\" \n"
+            "  input: \"Reshape/shape\" \n"
+            "  attr { \n"
+            "    key: \"T\" \n"
+            "    value { \n"
+            "      type: DT_FLOAT \n"
+            "    } \n"
+            "  } \n"
+            "  attr { \n"
+            "    key: \"Tshape\" \n"
+            "    value { \n"
+            "      type: DT_INT32 \n"
+            "    } \n"
+            "  } \n"
+            "} \n";
+
+        SetupSingleInputSingleOutput({1, 4}, "graphInput", "Reshape");
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseReshape, ReshapeFixture)
+{
+    RunTest<2>({ 0.0f, 1.0f, 2.0f, 3.0f }, { 0.0f, 1.0f, 2.0f, 3.0f });
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/ResizeBilinear.cpp b/src/armnnTfParser/test/ResizeBilinear.cpp
new file mode 100644
index 0000000..30d898f
--- /dev/null
+++ b/src/armnnTfParser/test/ResizeBilinear.cpp
@@ -0,0 +1,114 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+struct ResizeBilinearFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    ResizeBilinearFixture()
+    {
+        m_Prototext = R"(
+node {
+  name: "graphInput"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+          dim {
+            size: 1
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 3
+          }
+          dim {
+            size: 1
+          }
+        }
+        tensor_content:
+"\000\000\000\000\000\000\200?\000\000\000@\000\000@@\000\000\200@\000\000\240@\000\000\300@\000\000\340@\000\000\000A"
+      }
+    }
+  }
+}
+node {
+  name: "resizeBilinearLayer/size"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 2
+          }
+        }
+        tensor_content: "\005\000\000\000\005\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "resizeBilinearLayer"
+  op: "ResizeBilinear"
+  input: "graphInput"
+  input: "resizeBilinearLayer/size"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "align_corners"
+    value {
+      b: false
+    }
+  }
+}
+        )";
+
+        SetupSingleInputSingleOutput({ 1, 3, 3, 1 }, "graphInput", "resizeBilinearLayer");
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseResizeBilinear, ResizeBilinearFixture)
+{
+    RunTest<4>(// input data
+               { 0.0f, 1.0f, 2.0f,
+                 3.0f, 4.0f, 5.0f,
+                 6.0f, 7.0f, 8.0f },
+               // expected output data
+               { 0.0f, 0.6f, 1.2f, 1.8f, 2.0f,
+                 1.8f, 2.4f, 3.0f, 3.6f, 3.8f,
+                 3.6f, 4.2f, 4.8f, 5.4f, 5.6f,
+                 5.4f, 6.0f, 6.6f, 7.2f, 7.4f,
+                 6.0f, 6.6f, 7.2f, 7.8f, 8.0f });
+
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/Shape.cpp b/src/armnnTfParser/test/Shape.cpp
new file mode 100644
index 0000000..7b414ec
--- /dev/null
+++ b/src/armnnTfParser/test/Shape.cpp
@@ -0,0 +1,94 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+struct ShapeFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    ShapeFixture()
+    {
+        m_Prototext =
+            "node { \n"
+            "  name: \"Placeholder\" \n"
+            "  op: \"Placeholder\" \n"
+            "  attr { \n"
+            "    key: \"dtype\" \n"
+            "    value { \n"
+            "      type: DT_FLOAT \n"
+            "    } \n"
+            "  } \n"
+            "  attr { \n"
+            "    key: \"shape\" \n"
+            "    value { \n"
+            "      shape { \n"
+            "        dim { \n"
+            "          size: 1 \n"
+            "        } \n"
+            "        dim { \n"
+            "          size: 1 \n"
+            "        } \n"
+            "        dim { \n"
+            "          size: 1 \n"
+            "        } \n"
+            "        dim { \n"
+            "          size: 4 \n"
+            "        } \n"
+            "      } \n"
+            "    } \n"
+            "  } \n"
+            "} \n"
+            "node { \n"
+            "  name: \"shapeTest\" \n"
+            "  op: \"Shape\" \n"
+            "  input: \"Placeholder\" \n"
+            "  attr { \n"
+            "    key: \"T\" \n"
+            "    value { \n"
+            "      type: DT_FLOAT \n"
+            "    } \n"
+            "  } \n"
+            "  attr { \n"
+            "    key: \"out_type\" \n"
+            "    value { \n"
+            "      type: DT_INT32 \n"
+            "    } \n"
+            "  } \n"
+            "} \n"
+            "node { \n"
+            "  name: \"Reshape\" \n"
+            "  op: \"Reshape\" \n"
+            "  input: \"Placeholder\" \n"
+            "  input: \"shapeTest\" \n"
+            "  attr { \n"
+            "    key: \"T\" \n"
+            "    value { \n"
+            "      type: DT_FLOAT \n"
+            "    } \n"
+            "  } \n"
+            "  attr { \n"
+            "    key: \"Tshape\" \n"
+            "    value { \n"
+            "      type: DT_INT32 \n"
+            "    } \n"
+            "  } \n"
+            "} \n";
+
+        SetupSingleInputSingleOutput({1, 4}, "Placeholder", "Reshape");
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseShape, ShapeFixture)
+{
+    // Note: the test's output cannot be an int32 const layer, because that cannot exist in the
+    //       as ARMNN only supports u8 and float layers. For that reason I added a reshape layer
+    //       which reshapes the input to its original dimensions, which is not changing it.
+    RunTest<2>({ 0.0f, 1.0f, 2.0f, 3.0f }, { 0.0f, 1.0f, 2.0f, 3.0f });
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/Softmax.cpp b/src/armnnTfParser/test/Softmax.cpp
new file mode 100644
index 0000000..1ab28ea
--- /dev/null
+++ b/src/armnnTfParser/test/Softmax.cpp
@@ -0,0 +1,55 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+struct SoftmaxFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    SoftmaxFixture()
+    {
+        m_Prototext = "node {\n"
+            "  name: \"blah\"\n"
+            "  op: \"Placeholder\"\n"
+            "  attr {\n"
+            "    key: \"dtype\"\n"
+            "    value {\n"
+            "      type: DT_FLOAT\n"
+            "    }\n"
+            "  }\n"
+            "  attr {\n"
+            "    key: \"shape\"\n"
+            "    value {\n"
+            "      shape {\n"
+            "      }\n"
+            "    }\n"
+            "  }\n"
+            "}\n"
+            "node {\n"
+            "  name: \"blah2\"\n"
+            "  op: \"Softmax\"\n"
+            "  input: \"blah\"\n"
+            "  attr {\n"
+            "    key: \"T\"\n"
+            "    value {\n"
+            "      type: DT_FLOAT\n"
+            "    }\n"
+            "  }\n"
+            "}\n";
+
+        SetupSingleInputSingleOutput({ 1, 7 }, "blah", "blah2");
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseSoftmax, SoftmaxFixture)
+{
+    RunTest<2>({ 0, 0, 10000, 0, 0, 0, 0 }, { 0, 0, 1, 0, 0, 0, 0 });
+}
+
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/Squeeze.cpp b/src/armnnTfParser/test/Squeeze.cpp
new file mode 100644
index 0000000..d2d7d49
--- /dev/null
+++ b/src/armnnTfParser/test/Squeeze.cpp
@@ -0,0 +1,108 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+
+template <bool withDimZero, bool withDimOne>
+struct SqueezeFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    SqueezeFixture()
+    {
+        m_Prototext =
+                "node { \n"
+                "    name: \"graphInput\" \n"
+                "    op: \"Placeholder\" \n"
+                "    attr { \n"
+                "      key: \"dtype\" \n"
+                "      value { \n"
+                "        type: DT_FLOAT \n"
+                "      } \n"
+                "    } \n"
+                "    attr { \n"
+                "      key: \"shape\" \n"
+                "      value { \n"
+                "        shape { \n"
+                "        } \n"
+                "      } \n"
+                "    } \n"
+                "  } \n"
+                "node { \n"
+                "  name: \"Squeeze\" \n"
+                "  op: \"Squeeze\" \n"
+                "  input: \"graphInput\" \n"
+                "  attr { \n"
+                "    key: \"T\" \n"
+                "    value { \n"
+                "      type: DT_FLOAT \n"
+                "    } \n"
+                "  } \n"
+                "  attr { \n"
+                "    key: \"squeeze_dims\" \n"
+                "    value { \n"
+                "      list {\n";
+
+        if (withDimZero)
+        {
+            m_Prototext += "i:0\n";
+        }
+
+        if (withDimOne)
+        {
+            m_Prototext += "i:1\n";
+        }
+
+        m_Prototext +=
+                "      } \n"
+                "    } \n"
+                "  } \n"
+                "} \n";
+
+        SetupSingleInputSingleOutput({ 1, 1, 2, 2 }, "graphInput", "Squeeze");
+    }
+};
+
+typedef SqueezeFixture<false, false> ImpliedDimensionsSqueezeFixture;
+typedef SqueezeFixture<true, false>  ExplicitDimensionZeroSqueezeFixture;
+typedef SqueezeFixture<false, true>  ExplicitDimensionOneSqueezeFixture;
+typedef SqueezeFixture<true, true>   ExplicitDimensionsSqueezeFixture;
+
+BOOST_FIXTURE_TEST_CASE(ParseImplicitSqueeze, ImpliedDimensionsSqueezeFixture)
+{
+    BOOST_TEST((m_Parser->GetNetworkOutputBindingInfo("Squeeze").second.GetShape() ==
+               armnn::TensorShape({2,2})));
+    RunTest<2>({ 1.0f, 2.0f, 3.0f, 4.0f },
+               { 1.0f, 2.0f, 3.0f, 4.0f });
+}
+
+BOOST_FIXTURE_TEST_CASE(ParseDimensionZeroSqueeze, ExplicitDimensionZeroSqueezeFixture)
+{
+    BOOST_TEST((m_Parser->GetNetworkOutputBindingInfo("Squeeze").second.GetShape() ==
+               armnn::TensorShape({1,2,2})));
+    RunTest<3>({ 1.0f, 2.0f, 3.0f, 4.0f },
+               { 1.0f, 2.0f, 3.0f, 4.0f });
+}
+
+BOOST_FIXTURE_TEST_CASE(ParseDimensionOneSqueeze, ExplicitDimensionOneSqueezeFixture)
+{
+    BOOST_TEST((m_Parser->GetNetworkOutputBindingInfo("Squeeze").second.GetShape() ==
+               armnn::TensorShape({1,2,2})));
+    RunTest<3>({ 1.0f, 2.0f, 3.0f, 4.0f },
+               { 1.0f, 2.0f, 3.0f, 4.0f });
+}
+
+BOOST_FIXTURE_TEST_CASE(ParseExplicitDimensionsSqueeze, ExplicitDimensionsSqueezeFixture)
+{
+    BOOST_TEST((m_Parser->GetNetworkOutputBindingInfo("Squeeze").second.GetShape() ==
+               armnn::TensorShape({2,2})));
+    RunTest<2>({ 1.0f, 2.0f, 3.0f, 4.0f },
+               { 1.0f, 2.0f, 3.0f, 4.0f });
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/TestDependencies.cpp b/src/armnnTfParser/test/TestDependencies.cpp
new file mode 100644
index 0000000..13ab17c
--- /dev/null
+++ b/src/armnnTfParser/test/TestDependencies.cpp
@@ -0,0 +1,296 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+// Graph which tests that nodes are re-ordered in the queue when they are encountered a second time.
+// In this case R0 will be encountered first via R1 and then via R2. At that time
+// we need to make sure that R0 (and the I on which it is dependent) is moved to the front again
+// so that it is before both R1 and R2.
+//    I
+//    |
+//    R0
+//   / \'
+//  R1  R2
+//   \  |
+//    \ R3
+//     \|
+//      O
+struct RediscoveredDependenciesFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    RediscoveredDependenciesFixture()
+    {
+        // input = tf.placeholder(tf.float32, 1, "input")
+        // relu0 = tf.nn.relu(input, "relu0")
+        // relu1 = tf.nn.relu(relu0, "relu1")
+        // relu2 = tf.nn.relu(relu0, "relu2")
+        // relu3 = tf.nn.relu(relu2, "relu3")
+        // output = tf.add(relu1, relu3, "output")
+        m_Prototext = R"(
+            node {
+              name: "input"
+              op: "Placeholder"
+              attr {
+                key: "dtype"
+                value {
+                  type: DT_FLOAT
+                }
+              }
+              attr {
+                key: "shape"
+                value {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            node {
+              name: "relu0"
+              op: "Relu"
+              input: "input"
+              attr {
+                key: "T"
+                value {
+                  type: DT_FLOAT
+                }
+              }
+            }
+            node {
+              name: "relu1"
+              op: "Relu"
+              input: "relu0"
+              attr {
+                key: "T"
+                value {
+                  type: DT_FLOAT
+                }
+              }
+            }
+            node {
+              name: "relu2"
+              op: "Relu"
+              input: "relu0"
+              attr {
+                key: "T"
+                value {
+                  type: DT_FLOAT
+                }
+              }
+            }
+            node {
+              name: "relu3"
+              op: "Relu"
+              input: "relu2"
+              attr {
+                key: "T"
+                value {
+                  type: DT_FLOAT
+                }
+              }
+            }
+            node {
+              name: "output"
+              op: "Add"
+              input: "relu1"
+              input: "relu3"
+              attr {
+                key: "T"
+                value {
+                  type: DT_FLOAT
+                }
+              }
+            }
+        )";
+        SetupSingleInputSingleOutput({ 1 }, "input", "output");
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(RediscoveredDependencies, RediscoveredDependenciesFixture)
+{
+    RunTest<1>({1}, {2});
+}
+
+// Tests that a simple cycle in the tensorflow graph will be detected and an exception thrown, rather than the TfParser
+// getting stuck in an infinite loop.
+BOOST_AUTO_TEST_CASE(SimpleCycle)
+{
+    const char* prototext = R"(
+node {
+  name: "r1"
+  op: "Relu"
+  input: "r2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+node {
+  name: "r2"
+  op: "Relu"
+  input: "r1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+    )";
+    armnnTfParser::ITfParserPtr parser = armnnTfParser::ITfParser::Create();
+    BOOST_CHECK_THROW(parser->CreateNetworkFromString(prototext, {}, { "r2" }), armnn::ParseException);
+}
+
+// Similar to the above SimpleCycle test, but has a single node which connects to itself.
+BOOST_AUTO_TEST_CASE(SingleNodeCycle)
+{
+    const char* prototext = R"(
+node {
+  name: "r1"
+  op: "Relu"
+  input: "r1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+    )";
+    armnnTfParser::ITfParserPtr parser = armnnTfParser::ITfParser::Create();
+    BOOST_CHECK_THROW(parser->CreateNetworkFromString(prototext, {}, { "r1" }), armnn::ParseException);
+}
+
+// Similar to the above SimpleCycle test, but with a more complicated graph.
+//    I
+//    |
+//    A2---<---<-
+//   / \'        |
+//  R1  R2       |
+//   \  |        |
+//    \ R3       |
+//     \|        |
+//      A1-->--->|
+//
+BOOST_AUTO_TEST_CASE(ComplexCycle)
+{
+    // input = tf.placeholder(tf.float32, 1, "input")
+    // add2 = tf.nn.relu(input, add1, "add2") // This line won't actually run in TF, because add1 is not yet defined
+    // relu1 = tf.nn.relu(relu0, "relu1")
+    // relu2 = tf.nn.relu(relu0, "relu2")
+    // relu3 = tf.nn.relu(relu2, "relu3")
+    // add1 = tf.add(relu1, relu3, "add1")
+    const char* prototext = R"(
+        node {
+            name: "input"
+            op: "Placeholder"
+            attr {
+            key: "dtype"
+            value {
+                type: DT_FLOAT
+            }
+            }
+            attr {
+            key: "shape"
+            value {
+                shape {
+                dim {
+                    size: 1
+                }
+                }
+            }
+            }
+        }
+        node {
+            name: "add2"
+            op: "Add"
+            input: "input"
+            input: "add1"
+            attr {
+            key: "T"
+            value {
+                type: DT_FLOAT
+            }
+            }
+        }
+        node {
+            name: "relu1"
+            op: "Relu"
+            input: "add2"
+            attr {
+            key: "T"
+            value {
+                type: DT_FLOAT
+            }
+            }
+        }
+        node {
+            name: "relu2"
+            op: "Relu"
+            input: "add2"
+            attr {
+            key: "T"
+            value {
+                type: DT_FLOAT
+            }
+            }
+        }
+        node {
+            name: "relu3"
+            op: "Relu"
+            input: "relu2"
+            attr {
+            key: "T"
+            value {
+                type: DT_FLOAT
+            }
+            }
+        }
+        node {
+            name: "add1"
+            op: "Add"
+            input: "relu1"
+            input: "relu3"
+            attr {
+            key: "T"
+            value {
+                type: DT_FLOAT
+            }
+            }
+        }
+    )";
+    armnnTfParser::ITfParserPtr parser = armnnTfParser::ITfParser::Create();
+    BOOST_CHECK_THROW(parser->CreateNetworkFromString(prototext, {}, { "add1" }), armnn::ParseException);
+}
+
+// Tests that a graph with an input that is not present throws a ParseException.
+BOOST_AUTO_TEST_CASE(InvalidInput)
+{
+    const char* prototext = R"(
+node {
+  name: "r1"
+  op: "Relu"
+  input: "a-node-that-does-not-exist"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+    )";
+    armnnTfParser::ITfParserPtr parser = armnnTfParser::ITfParser::Create();
+    BOOST_CHECK_THROW(parser->CreateNetworkFromString(prototext, {}, { "r1" }), armnn::ParseException);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnTfParser/test/TestMultiInputsOutputs.cpp b/src/armnnTfParser/test/TestMultiInputsOutputs.cpp
new file mode 100644
index 0000000..5eea616
--- /dev/null
+++ b/src/armnnTfParser/test/TestMultiInputsOutputs.cpp
@@ -0,0 +1,92 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include "armnnTfParser/ITfParser.hpp"
+#include "ParserPrototxtFixture.hpp"
+
+BOOST_AUTO_TEST_SUITE(TensorflowParser)
+
+struct MultiInputsOutputsFixture : public ParserPrototxtFixture<armnnTfParser::ITfParser>
+{
+    MultiInputsOutputsFixture()
+    {
+        // input1 = tf.placeholder(tf.float32, shape=[], name = "input1")
+        // input2 = tf.placeholder(tf.float32, shape = [], name = "input2")
+        // add1 = tf.add(input1, input2, name = "add1")
+        // add2 = tf.add(input1, input2, name = "add2")
+        m_Prototext = R"(
+node {
+  name: "input1"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+      }
+    }
+  }
+}
+node {
+  name: "input2"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+      }
+    }
+  }
+}
+node {
+  name: "add1"
+  op: "Add"
+  input: "input1"
+  input: "input2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+node {
+  name: "add2"
+  op: "Add"
+  input: "input1"
+  input: "input2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+        )";
+        Setup({ { "input1", { 1 } },
+                { "input2", { 1 } } },
+              { "add1", "add2" });
+    }
+};
+
+BOOST_FIXTURE_TEST_CASE(MultiInputsOutputs, MultiInputsOutputsFixture)
+{
+    RunTest<1>({ { "input1", {12.0f} }, { "input2", { 13.0f } } },
+               { { "add1", { 25.0f } }, { "add2", { 25.0f } } });
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnnUtils/DotSerializer.cpp b/src/armnnUtils/DotSerializer.cpp
new file mode 100644
index 0000000..1feea54
--- /dev/null
+++ b/src/armnnUtils/DotSerializer.cpp
@@ -0,0 +1,219 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "DotSerializer.hpp"
+
+#include <boost/assert.hpp>
+#include <sstream>
+#include <cstring>
+
+namespace armnn
+{
+
+namespace
+{
+std::string Indent(int numSpaces)
+{
+    std::stringstream ss;
+    for (int i = 0; i < numSpaces; i++)
+    {
+        ss << " ";
+    }
+    return ss.str();
+}
+} //namespace
+
+
+HtmlFont::HtmlFont(std::ostream& stream, int fontSize, const char *color, const char *face)
+    : DotBase(stream)
+{
+    GetStream() << "<FONT";
+
+    if (fontSize > -1)
+    {
+        GetStream() << " POINT-SIZE=" << "\"" << fontSize << "\"";
+    }
+
+    if (color && std::strlen(color) != 0)
+    {
+        GetStream() << " COLOR=\"" << color << "\" ";
+    }
+
+    if (face && std::strlen(face) != 0)
+    {
+        GetStream() << " FACE=\"" << face << "\" ";
+    }
+
+    GetStream() << ">";
+}
+
+
+HtmlFont::HtmlFont(std::ostream& stream)
+    : HtmlFont(stream, -1, nullptr, nullptr)
+{}
+
+HtmlFont::~HtmlFont()
+{
+    GetStream() << "</FONT>";
+}
+
+
+DotAttributeSet::DotAttributeSet(std::ostream& stream)
+    : DotBase(stream)
+{
+    GetStream() << "[";
+}
+
+DotAttributeSet::~DotAttributeSet()
+{
+    bool doSpace=false;
+    for (auto attrib : m_Attributes)
+    {
+        if (doSpace)
+        {
+            GetStream() << " ";
+        }
+
+        GetStream() << attrib;
+        doSpace=true;
+    }
+
+    GetStream() << "]";
+}
+
+DotAttributeSet & DotAttributeSet::AddAttribute(const std::string& name, const std::stringstream& value)
+{
+    std::stringstream ss;
+    ss << name <<"=" << value.str();
+    m_Attributes.push_back(ss.str());
+    return *this;
+}
+
+DotAttributeSet & DotAttributeSet::AddAttribute(const std::string& name, int value)
+{
+    std::stringstream ss;
+    ss << name <<"=" << value;
+    m_Attributes.push_back(ss.str());
+    return *this;
+}
+
+DotAttributeSet & DotAttributeSet::AddAttribute(const std::string& name, const std::string& value)
+{
+    std::stringstream ss;
+    ss << name <<"=\"" << value << "\"";
+    m_Attributes.push_back(ss.str());
+    return *this;
+}
+
+DotEdge::DotEdge(std::ostream& stream, unsigned int fromNodeId, unsigned int toNodeId)
+    : DotBase(stream)
+{
+    std::stringstream ss;
+    ss << Indent(4) << fromNodeId << " -> " << toNodeId << " ";
+    GetStream() << ss.str();
+
+    m_Attributes = std::make_unique<DotAttributeSet>(stream);
+}
+
+DotEdge::~DotEdge()
+{
+    m_Attributes.reset(nullptr);
+    GetStream() << ";" << std::endl;
+}
+
+
+NodeContent::NodeContent(std::ostream& stream)
+    : DotBase(stream)
+{
+}
+
+NodeContent & NodeContent::SetName(const std::string & name)
+{
+    m_Name = name;
+    return *this;
+}
+
+NodeContent & NodeContent::AddContent(const std::string & content)
+{
+    m_Contents.push_back(content);
+    return *this;
+}
+
+NodeContent::~NodeContent()
+{
+    std::stringstream ss;
+    ss << "label=\"{" << m_Name;
+    if (!m_Contents.empty())
+    {
+        ss << "|";
+    }
+    for (auto & content : m_Contents)
+    {
+        ss << content;
+        ss << "\\l";
+    }
+    ss << "}\"";
+    GetStream() << ss.str();
+}
+
+DotNode::DotNode(std::ostream& stream, unsigned int nodeId, const char* label)
+    : DotBase(stream)
+{
+    std::stringstream ss;
+    ss << Indent(4) << nodeId;
+
+    GetStream() << ss.str() << " ";
+
+    m_Contents = std::make_unique<NodeContent>(stream);
+    m_Attributes = std::make_unique<DotAttributeSet>(stream);
+
+    if (std::strlen(label) != 0)
+    {
+        m_Contents->SetName(label);
+    }
+    else
+    {
+        m_Contents->SetName("<noname>");
+    }
+}
+
+DotNode::~DotNode()
+{
+    m_Contents.reset(nullptr);
+    m_Attributes.reset(nullptr);
+    GetStream() << ";" << std::endl;
+}
+
+
+DotDefaults::DotDefaults(std::ostream& stream, const char* type)
+    : DotBase(stream)
+{
+    std::stringstream ss;
+    ss << Indent(4) << type;
+
+    GetStream() << ss.str() << " ";
+    m_Attributes = std::make_unique<DotAttributeSet>(stream);
+}
+
+DotDefaults::~DotDefaults()
+{
+    m_Attributes.reset(nullptr);
+    GetStream() << ";" << std::endl;
+}
+
+DotGraph::DotGraph(std::ostream& stream, const char* name)
+    : DotBase(stream)
+{
+    GetStream() << "digraph " << name << " {" << std::endl;
+}
+
+DotGraph::~DotGraph()
+{
+    GetStream() << "}" << std::endl;
+}
+
+} //namespace armnn
+
+
diff --git a/src/armnnUtils/DotSerializer.hpp b/src/armnnUtils/DotSerializer.hpp
new file mode 100644
index 0000000..3cb591c
--- /dev/null
+++ b/src/armnnUtils/DotSerializer.hpp
@@ -0,0 +1,131 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <ostream>
+#include <vector>
+#include <memory>
+
+namespace armnn
+{
+
+class DotBase
+{
+public:
+    explicit DotBase(std::ostream& stream)
+        : m_Stream(stream) {}
+
+    std::ostream& GetStream() { return m_Stream; }
+
+private:
+    std::ostream& m_Stream;
+};
+
+class HtmlSection : public DotBase
+{
+public:
+    explicit HtmlSection(std::ostream& stream)
+        : DotBase(stream) { GetStream() << "<";}
+    ~HtmlSection() { GetStream() << ">"; }
+};
+
+class HtmlSimpleTag : public DotBase
+{
+public:
+    explicit HtmlSimpleTag(std::ostream& stream, const char* name)
+        : DotBase(stream)
+        , m_Name(name){ GetStream() << "<" << m_Name << ">"; }
+    ~HtmlSimpleTag() { GetStream() << "</" << m_Name << ">"; }
+
+private:
+    const char* m_Name;
+};
+
+class HtmlBold : public HtmlSimpleTag
+{
+public:
+    explicit HtmlBold(std::ostream &stream)
+        : HtmlSimpleTag(stream, "B") {}
+};
+
+class HtmlFont : public DotBase
+{
+public:
+    explicit HtmlFont(std::ostream& stream, int fontSize, const char* color, const char* face);
+    explicit HtmlFont(std::ostream& stream);
+    ~HtmlFont();
+};
+
+class DotAttributeSet : public DotBase
+{
+public:
+    explicit DotAttributeSet(std::ostream& stream);
+    ~DotAttributeSet();
+
+    DotAttributeSet & AddAttribute(const std::string& name, const std::stringstream& value);
+    DotAttributeSet & AddAttribute(const std::string& name, int value);
+    DotAttributeSet & AddAttribute(const std::string& name, const std::string& value);
+private:
+    std::vector<std::string> m_Attributes;
+};
+
+class DotEdge : public DotBase
+{
+public:
+    explicit DotEdge(std::ostream& stream, unsigned int fromNodeId, unsigned int toNodeId);
+    ~DotEdge();
+
+    DotAttributeSet& GetAttributeSet() { return *m_Attributes.get(); }
+private:
+    std::unique_ptr<DotAttributeSet> m_Attributes;
+};
+
+class NodeContent : public DotBase
+{
+public:
+    explicit NodeContent(std::ostream& stream);
+    NodeContent & SetName(const std::string & name);
+    NodeContent & AddContent(const std::string & content);
+
+    ~NodeContent();
+private:
+    std::string m_Name;
+    std::vector<std::string> m_Contents;
+};
+
+class DotNode : public DotBase
+{
+public:
+    explicit DotNode(std::ostream& stream, unsigned int nodeId, const char* label);
+    ~DotNode();
+
+    NodeContent& GetContents()         { return *m_Contents.get(); }
+    DotAttributeSet& GetAttributeSet() { return *m_Attributes.get(); }
+private:
+    std::unique_ptr<NodeContent>     m_Contents;
+    std::unique_ptr<DotAttributeSet> m_Attributes;
+};
+
+class DotDefaults : public DotBase
+{
+public:
+    explicit DotDefaults(std::ostream& stream, const char* type);
+    ~DotDefaults();
+
+    DotAttributeSet& GetAttributeSet() { return *m_Attributes.get(); }
+private:
+    std::unique_ptr<DotAttributeSet> m_Attributes;
+};
+
+class DotGraph : public DotBase
+{
+public:
+    explicit DotGraph(std::ostream& stream, const char* name);
+    ~DotGraph();
+private:
+};
+
+} //namespace armnn