IVGCVSW-6706 Create the libpipeClient library

Change-Id: I2368aade38ad3808fab55d8a86cd659d4e95d91e
Signed-off-by: Jim Flynn <jim.flynn@arm.com>
diff --git a/profiling/client/include/CounterStatus.hpp b/profiling/client/include/CounterStatus.hpp
new file mode 100644
index 0000000..d497226
--- /dev/null
+++ b/profiling/client/include/CounterStatus.hpp
@@ -0,0 +1,34 @@
+//
+// Copyright © 2022 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <cstdint>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+struct CounterStatus
+{
+    CounterStatus(uint16_t backendCounterId,
+                  uint16_t globalCounterId,
+                  bool enabled,
+                  uint32_t samplingRateInMicroseconds)
+                  : m_BackendCounterId(backendCounterId),
+                    m_GlobalCounterId(globalCounterId),
+                    m_Enabled(enabled),
+                    m_SamplingRateInMicroseconds(samplingRateInMicroseconds) {}
+    uint16_t m_BackendCounterId;
+    uint16_t m_GlobalCounterId;
+    bool     m_Enabled;
+    uint32_t m_SamplingRateInMicroseconds;
+};
+
+}    // namespace pipe
+
+}    // namespace arm
diff --git a/profiling/client/include/CounterValue.hpp b/profiling/client/include/CounterValue.hpp
new file mode 100644
index 0000000..79de33b
--- /dev/null
+++ b/profiling/client/include/CounterValue.hpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2022 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+namespace arm
+{
+
+namespace pipe
+{
+
+struct CounterValue
+{
+    CounterValue(uint16_t id, uint32_t value) :
+        counterId(id), counterValue(value) {}
+    uint16_t counterId;
+    uint32_t counterValue;
+};
+
+}    // namespace pipe
+
+}    // namespace arm
diff --git a/profiling/client/include/IProfilingService.hpp b/profiling/client/include/IProfilingService.hpp
index 21d5c9c..778bdb7 100644
--- a/profiling/client/include/IProfilingService.hpp
+++ b/profiling/client/include/IProfilingService.hpp
@@ -11,12 +11,11 @@
 #include "IInitialiseProfilingService.hpp"
 #include "IProfilingServiceStatus.hpp"
 #include "ISendCounterPacket.hpp"
+#include "ISendTimelinePacket.hpp"
 #include "IReportStructure.hpp"
 #include "ProfilingOptions.hpp"
 #include "ProfilingState.hpp"
 
-#include <client/include/backends/IBackendProfilingContext.hpp>
-
 #include <common/include/ICounterRegistry.hpp>
 #include <common/include/Optional.hpp>
 #include <common/include/ProfilingGuidGenerator.hpp>
@@ -28,6 +27,9 @@
 namespace pipe
 {
 
+// forward declaration
+class IBackendProfilingContext;
+
 class IProfilingService : public IProfilingGuidGenerator,
                           public IProfilingServiceStatus,
                           public IReadWriteCounterValues
diff --git a/profiling/client/include/ISendCounterPacket.hpp b/profiling/client/include/ISendCounterPacket.hpp
index c76150f..8095128 100644
--- a/profiling/client/include/ISendCounterPacket.hpp
+++ b/profiling/client/include/ISendCounterPacket.hpp
@@ -5,7 +5,7 @@
 
 #pragma once
 
-#include <client/include/backends/IBackendProfiling.hpp>
+#include "CounterValue.hpp"
 
 #include <common/include/ICounterDirectory.hpp>
 
diff --git a/profiling/client/include/Timestamp.hpp b/profiling/client/include/Timestamp.hpp
new file mode 100644
index 0000000..4961551
--- /dev/null
+++ b/profiling/client/include/Timestamp.hpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2022 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "CounterValue.hpp"
+
+namespace arm
+{
+
+namespace pipe
+{
+
+struct Timestamp
+{
+    uint64_t timestamp;
+    std::vector<CounterValue> counterValues;
+};
+
+}    // namespace pipe
+
+}    // namespace arm
diff --git a/profiling/client/include/backends/IBackendProfiling.hpp b/profiling/client/include/backends/IBackendProfiling.hpp
index 347d2dd..a9d29c5 100644
--- a/profiling/client/include/backends/IBackendProfiling.hpp
+++ b/profiling/client/include/backends/IBackendProfiling.hpp
@@ -2,9 +2,16 @@
 // Copyright © 2020 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
+
 #pragma once
 
+#include <client/include/CounterStatus.hpp>
+#include <client/include/CounterValue.hpp>
+#include <client/include/IProfilingService.hpp>
+#include <client/include/ISendCounterPacket.hpp>
 #include <client/include/ISendTimelinePacket.hpp>
+#include <client/include/ProfilingOptions.hpp>
+#include <client/include/Timestamp.hpp>
 
 #include <common/include/IProfilingGuidGenerator.hpp>
 #include <common/include/Optional.hpp>
@@ -18,36 +25,6 @@
 namespace pipe
 {
 
-struct CounterValue
-{
-    CounterValue(uint16_t id, uint32_t value) :
-        counterId(id), counterValue(value) {}
-    uint16_t counterId;
-    uint32_t counterValue;
-};
-
-struct Timestamp
-{
-    uint64_t timestamp;
-    std::vector<CounterValue> counterValues;
-};
-
-struct CounterStatus
-{
-    CounterStatus(uint16_t backendCounterId,
-                  uint16_t globalCounterId,
-                  bool enabled,
-                  uint32_t samplingRateInMicroseconds)
-                  : m_BackendCounterId(backendCounterId),
-                    m_GlobalCounterId(globalCounterId),
-                    m_Enabled(enabled),
-                    m_SamplingRateInMicroseconds(samplingRateInMicroseconds) {}
-    uint16_t m_BackendCounterId;
-    uint16_t m_GlobalCounterId;
-    bool     m_Enabled;
-    uint32_t m_SamplingRateInMicroseconds;
-};
-
 class IRegisterBackendCounters
 {
 public:
@@ -81,6 +58,9 @@
 class IBackendProfiling
 {
 public:
+    static std::unique_ptr<IBackendProfiling> CreateBackendProfiling(const ProfilingOptions& options,
+                                                                     IProfilingService& profilingService,
+                                                                     const std::string& backendId);
     virtual ~IBackendProfiling()
     {}
 
diff --git a/profiling/client/include/backends/IBackendProfilingContext.hpp b/profiling/client/include/backends/IBackendProfilingContext.hpp
index a1ed05e..a2e0a3b 100644
--- a/profiling/client/include/backends/IBackendProfilingContext.hpp
+++ b/profiling/client/include/backends/IBackendProfilingContext.hpp
@@ -5,6 +5,9 @@
 #pragma once
 
 #include "IBackendProfiling.hpp"
+
+#include <common/include/Optional.hpp>
+
 #include <vector>
 
 namespace arm
@@ -20,7 +23,7 @@
     virtual uint16_t RegisterCounters(uint16_t currentMaxGlobalCounterID) = 0;
     virtual arm::pipe::Optional<std::string> ActivateCounters(
         uint32_t capturePeriod, const std::vector<uint16_t>& counterIds) = 0;
-    virtual std::vector<Timestamp> ReportCounterValues() = 0;
+    virtual std::vector<arm::pipe::Timestamp> ReportCounterValues() = 0;
     virtual bool EnableProfiling(bool flag) = 0;
     virtual bool EnableTimelineReporting(bool flag) = 0;
 };
diff --git a/profiling/client/src/ActivateTimelineReportingCommandHandler.cpp b/profiling/client/src/ActivateTimelineReportingCommandHandler.cpp
new file mode 100644
index 0000000..9589dd8
--- /dev/null
+++ b/profiling/client/src/ActivateTimelineReportingCommandHandler.cpp
@@ -0,0 +1,73 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ActivateTimelineReportingCommandHandler.hpp"
+
+#include <client/include/TimelineUtilityMethods.hpp>
+
+#include <common/include/ProfilingException.hpp>
+
+#include <fmt/format.h>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+void ActivateTimelineReportingCommandHandler::operator()(const arm::pipe::Packet& packet)
+{
+    ProfilingState currentState = m_StateMachine.GetCurrentState();
+
+    if (!m_ReportStructure.has_value())
+    {
+        throw arm::pipe::ProfilingException(std::string(
+                                                "Profiling Service constructor must be initialised with an "
+                                                "IReportStructure argument in order to run timeline reporting"));
+    }
+
+    switch ( currentState )
+    {
+        case ProfilingState::Uninitialised:
+        case ProfilingState::NotConnected:
+        case ProfilingState::WaitingForAck:
+            throw arm::pipe::ProfilingException(fmt::format(
+                    "Activate Timeline Reporting Command Handler invoked while in a wrong state: {}",
+                    GetProfilingStateName(currentState)));
+        case ProfilingState::Active:
+            if ( !( packet.GetPacketFamily() == 0u && packet.GetPacketId() == 6u ))
+            {
+                throw arm::pipe::ProfilingException(std::string(
+                                           "Expected Packet family = 0, id = 6 but received family =")
+                                           + std::to_string(packet.GetPacketFamily())
+                                           + " id = " + std::to_string(packet.GetPacketId()));
+            }
+
+            if (!m_TimelineReporting)
+            {
+                m_SendTimelinePacket.SendTimelineMessageDirectoryPackage();
+
+                TimelineUtilityMethods::SendWellKnownLabelsAndEventClasses(m_SendTimelinePacket);
+
+                m_TimelineReporting = true;
+
+                if (m_ReportStructure.has_value())
+                {
+                    m_ReportStructure.value().ReportStructure(m_ProfilingService);
+                }
+
+                m_BackendNotifier.NotifyBackendsForTimelineReporting();
+            }
+
+            break;
+        default:
+            throw arm::pipe::ProfilingException(fmt::format("Unknown profiling service state: {}",
+                                                static_cast<int>(currentState)));
+    }
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/ActivateTimelineReportingCommandHandler.hpp b/profiling/client/src/ActivateTimelineReportingCommandHandler.hpp
new file mode 100644
index 0000000..c9fed73
--- /dev/null
+++ b/profiling/client/src/ActivateTimelineReportingCommandHandler.hpp
@@ -0,0 +1,59 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ProfilingStateMachine.hpp"
+#include "SendTimelinePacket.hpp"
+#include "INotifyBackends.hpp"
+
+#include <client/include/IReportStructure.hpp>
+
+#include <common/include/CommandHandlerFunctor.hpp>
+#include <common/include/Optional.hpp>
+#include <common/include/Packet.hpp>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class ActivateTimelineReportingCommandHandler : public arm::pipe::CommandHandlerFunctor
+{
+public:
+    ActivateTimelineReportingCommandHandler(uint32_t familyId,
+                                            uint32_t packetId,
+                                            uint32_t version,
+                                            SendTimelinePacket& sendTimelinePacket,
+                                            ProfilingStateMachine& profilingStateMachine,
+                                            arm::pipe::Optional<IReportStructure&> reportStructure,
+                                            std::atomic<bool>& timelineReporting,
+                                            INotifyBackends& notifyBackends,
+                                            IProfilingService& profilingService)
+        : CommandHandlerFunctor(familyId, packetId, version),
+          m_SendTimelinePacket(sendTimelinePacket),
+          m_StateMachine(profilingStateMachine),
+          m_TimelineReporting(timelineReporting),
+          m_BackendNotifier(notifyBackends),
+          m_ProfilingService(profilingService),
+          m_ReportStructure(reportStructure)
+    {}
+
+    void operator()(const arm::pipe::Packet& packet) override;
+
+private:
+    SendTimelinePacket&    m_SendTimelinePacket;
+    ProfilingStateMachine& m_StateMachine;
+    std::atomic<bool>&     m_TimelineReporting;
+    INotifyBackends&       m_BackendNotifier;
+    IProfilingService&     m_ProfilingService;
+
+    arm::pipe::Optional<IReportStructure&> m_ReportStructure;
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/BufferManager.cpp b/profiling/client/src/BufferManager.cpp
new file mode 100644
index 0000000..42e3200
--- /dev/null
+++ b/profiling/client/src/BufferManager.cpp
@@ -0,0 +1,166 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "BufferManager.hpp"
+#include "PacketBuffer.hpp"
+
+namespace arm
+{
+
+namespace pipe
+{
+
+BufferManager::BufferManager(unsigned int numberOfBuffers, unsigned int maxPacketSize)
+    : m_MaxBufferSize(maxPacketSize),
+      m_NumberOfBuffers(numberOfBuffers),
+      m_MaxNumberOfBuffers(numberOfBuffers * 3),
+      m_CurrentNumberOfBuffers(numberOfBuffers)
+{
+    Initialize();
+}
+
+IPacketBufferPtr BufferManager::Reserve(unsigned int requestedSize, unsigned int& reservedSize)
+{
+    reservedSize = 0;
+    std::unique_lock<std::mutex> availableListLock(m_AvailableMutex, std::defer_lock);
+    if (requestedSize > m_MaxBufferSize)
+    {
+        return nullptr;
+    }
+    availableListLock.lock();
+    if (m_AvailableList.empty())
+    {
+        if (m_CurrentNumberOfBuffers < m_MaxNumberOfBuffers)
+        {
+            // create a temporary overflow/surge buffer and hand it back
+            m_CurrentNumberOfBuffers++;
+            availableListLock.unlock();
+            IPacketBufferPtr buffer = std::make_unique<PacketBuffer>(m_MaxBufferSize);
+            reservedSize = requestedSize;
+            return buffer;
+        }
+        else
+        {
+            // we have totally busted the limit. call a halt to new memory allocations.
+            availableListLock.unlock();
+            return nullptr;
+        }
+    }
+    IPacketBufferPtr buffer = std::move(m_AvailableList.back());
+    m_AvailableList.pop_back();
+    availableListLock.unlock();
+    reservedSize = requestedSize;
+    return buffer;
+}
+
+void BufferManager::Commit(IPacketBufferPtr& packetBuffer, unsigned int size, bool notifyConsumer)
+{
+    std::unique_lock<std::mutex> readableListLock(m_ReadableMutex, std::defer_lock);
+    packetBuffer->Commit(size);
+    readableListLock.lock();
+    m_ReadableList.push(std::move(packetBuffer));
+    readableListLock.unlock();
+
+    if (notifyConsumer)
+    {
+        FlushReadList();
+    }
+}
+
+void BufferManager::Initialize()
+{
+    m_AvailableList.reserve(m_NumberOfBuffers);
+    m_CurrentNumberOfBuffers = m_NumberOfBuffers;
+    for (unsigned int i = 0; i < m_NumberOfBuffers; ++i)
+    {
+        IPacketBufferPtr buffer = std::make_unique<PacketBuffer>(m_MaxBufferSize);
+        m_AvailableList.emplace_back(std::move(buffer));
+    }
+}
+
+void BufferManager::Release(IPacketBufferPtr& packetBuffer)
+{
+    std::unique_lock<std::mutex> availableListLock(m_AvailableMutex, std::defer_lock);
+    packetBuffer->Release();
+    availableListLock.lock();
+    if (m_AvailableList.size() <= m_NumberOfBuffers)
+    {
+        m_AvailableList.push_back(std::move(packetBuffer));
+    }
+    else
+    {
+        // we have been handed a temporary overflow/surge buffer get rid of it
+        packetBuffer->Destroy();
+        if (m_CurrentNumberOfBuffers > m_NumberOfBuffers)
+        {
+            --m_CurrentNumberOfBuffers;
+        }
+    }
+    availableListLock.unlock();
+}
+
+void BufferManager::Reset()
+{
+    //This method should only be called once all threads have been joined
+    std::lock_guard<std::mutex> readableListLock(m_ReadableMutex);
+    std::lock_guard<std::mutex> availableListLock(m_AvailableMutex);
+
+    m_AvailableList.clear();
+    std::queue<IPacketBufferPtr>().swap(m_ReadableList);
+
+    Initialize();
+}
+
+IPacketBufferPtr BufferManager::GetReadableBuffer()
+{
+    std::unique_lock<std::mutex> readableListLock(m_ReadableMutex);
+    if (!m_ReadableList.empty())
+    {
+        IPacketBufferPtr buffer = std::move(m_ReadableList.front());
+        m_ReadableList.pop();
+        readableListLock.unlock();
+        return buffer;
+    }
+    return nullptr;
+}
+
+void BufferManager::MarkRead(IPacketBufferPtr& packetBuffer)
+{
+    std::unique_lock<std::mutex> availableListLock(m_AvailableMutex, std::defer_lock);
+    packetBuffer->MarkRead();
+    availableListLock.lock();
+    if (m_AvailableList.size() <= m_NumberOfBuffers)
+    {
+        m_AvailableList.push_back(std::move(packetBuffer));
+    }
+    else
+    {
+        // we have been handed a temporary overflow/surge buffer get rid of it
+        packetBuffer->Destroy();
+        if (m_CurrentNumberOfBuffers > m_NumberOfBuffers)
+        {
+            --m_CurrentNumberOfBuffers;
+        }
+    }
+    availableListLock.unlock();
+}
+
+void BufferManager::SetConsumer(IConsumer* consumer)
+{
+    m_Consumer = consumer;
+}
+
+void BufferManager::FlushReadList()
+{
+    // notify consumer that packet is ready to read
+    if (m_Consumer != nullptr)
+    {
+        m_Consumer->SetReadyToRead();
+    }
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/BufferManager.hpp b/profiling/client/src/BufferManager.hpp
new file mode 100644
index 0000000..0ab3e0e
--- /dev/null
+++ b/profiling/client/src/BufferManager.hpp
@@ -0,0 +1,76 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "IBufferManager.hpp"
+#include "IConsumer.hpp"
+
+#include <condition_variable>
+#include <mutex>
+#include <vector>
+#include <queue>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class BufferManager : public IBufferManager
+{
+public:
+    BufferManager(unsigned int numberOfBuffers = 5, unsigned int maxPacketSize = 4096);
+
+    ~BufferManager() {}
+
+    IPacketBufferPtr Reserve(unsigned int requestedSize, unsigned int& reservedSize) override;
+
+    void Reset();
+
+    void Commit(IPacketBufferPtr& packetBuffer, unsigned int size, bool notifyConsumer = true) override;
+
+    void Release(IPacketBufferPtr& packetBuffer) override;
+
+    IPacketBufferPtr GetReadableBuffer() override;
+
+    void MarkRead(IPacketBufferPtr& packetBuffer) override;
+
+    /// Set Consumer on the buffer manager to be notified when there is a Commit
+    /// Can only be one consumer
+    void SetConsumer(IConsumer* consumer) override;
+
+    /// Notify the Consumer buffer can be read
+    void FlushReadList() override;
+
+private:
+    void Initialize();
+
+    // Maximum buffer size
+    unsigned int m_MaxBufferSize;
+    // Number of buffers
+    const unsigned int m_NumberOfBuffers;
+    const unsigned int m_MaxNumberOfBuffers;
+    unsigned int m_CurrentNumberOfBuffers;
+
+    // List of available packet buffers
+    std::vector<IPacketBufferPtr> m_AvailableList;
+
+    // List of readable packet buffers
+    std::queue<IPacketBufferPtr> m_ReadableList;
+
+    // Mutex for available packet buffer list
+    std::mutex m_AvailableMutex;
+
+    // Mutex for readable packet buffer list
+    std::mutex m_ReadableMutex;
+
+    // Consumer thread to notify packet is ready to read
+    IConsumer* m_Consumer = nullptr;
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/CMakeLists.txt b/profiling/client/src/CMakeLists.txt
new file mode 100644
index 0000000..eb6c67a
--- /dev/null
+++ b/profiling/client/src/CMakeLists.txt
@@ -0,0 +1,88 @@
+#
+# Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
+    set(pipeClient_sources)
+    list(APPEND pipeClient_sources
+        ActivateTimelineReportingCommandHandler.cpp
+        ActivateTimelineReportingCommandHandler.hpp
+        backends/BackendProfiling.cpp
+        backends/BackendProfiling.hpp
+        backends/IBackendProfiling.cpp
+        BufferManager.cpp
+        BufferManager.hpp
+        CommandHandler.cpp
+        CommandHandler.hpp
+        ConnectionAcknowledgedCommandHandler.cpp
+        ConnectionAcknowledgedCommandHandler.hpp
+        CounterIdMap.cpp
+        DeactivateTimelineReportingCommandHandler.cpp
+        DeactivateTimelineReportingCommandHandler.hpp
+        FileOnlyProfilingConnection.cpp
+        FileOnlyProfilingConnection.hpp
+        Holder.cpp
+        IBufferManager.hpp
+        IConsumer.hpp
+        INotifyBackends.hpp
+        IPacketBuffer.hpp
+        IPeriodicCounterCapture.hpp
+        IProfilingConnectionFactory.hpp
+        IProfilingConnection.hpp
+        IProfilingService.cpp
+        ISendThread.hpp
+        NullProfilingConnection.hpp
+        PacketBuffer.cpp
+        PacketBuffer.hpp
+        PeriodicCounterCapture.cpp
+        PeriodicCounterCapture.hpp
+        PeriodicCounterSelectionCommandHandler.cpp
+        PeriodicCounterSelectionCommandHandler.hpp
+        PerJobCounterSelectionCommandHandler.cpp
+        PerJobCounterSelectionCommandHandler.hpp
+        ProfilingConnectionDumpToFileDecorator.cpp
+        ProfilingConnectionDumpToFileDecorator.hpp
+        ProfilingConnectionFactory.cpp
+        ProfilingConnectionFactory.hpp
+        ProfilingService.cpp
+        ProfilingService.hpp
+        ProfilingStateMachine.cpp
+        ProfilingStateMachine.hpp
+        ProfilingUtils.cpp
+        ProfilingUtils.hpp
+        RegisterBackendCounters.cpp
+        RegisterBackendCounters.hpp
+        RequestCounterDirectoryCommandHandler.cpp
+        RequestCounterDirectoryCommandHandler.hpp
+        SendCounterPacket.cpp
+        SendCounterPacket.hpp
+        SendThread.cpp
+        SendThread.hpp
+        SendTimelinePacket.cpp
+        SendTimelinePacket.hpp
+        SocketProfilingConnection.cpp
+        SocketProfilingConnection.hpp
+        TimelinePacketWriterFactory.cpp
+        TimelinePacketWriterFactory.hpp
+        TimelineUtilityMethods.cpp)
+
+    include_directories(${PROJECT_SOURCE_DIR}/profiling/common/include)
+    include_directories(${PROJECT_SOURCE_DIR}/common/include)
+    include_directories(${PROJECT_SOURCE_DIR}/profiling/client/include)
+    include_directories(${PROJECT_SOURCE_DIR}/client/include)
+
+    # for the moment will only build a static version of this common code
+    # to simplify the build. No extra .so file to deploy to boards etc.
+    add_library_ex(pipeClient STATIC ${pipeClient_sources})
+
+    target_link_libraries(pipeClient pipeCommon)
+
+    target_compile_definitions(pipeClient PRIVATE "ARMNN_COMPILING_DLL")
+
+    # install the target
+    install(
+        TARGETS pipeClient
+        EXPORT  armnn-targets
+        LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+        ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    )
diff --git a/profiling/client/src/CommandHandler.cpp b/profiling/client/src/CommandHandler.cpp
new file mode 100644
index 0000000..6ba49c2
--- /dev/null
+++ b/profiling/client/src/CommandHandler.cpp
@@ -0,0 +1,109 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "CommandHandler.hpp"
+#include "ProfilingService.hpp"
+
+#include <common/include/Logging.hpp>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+void CommandHandler::Start(IProfilingConnection& profilingConnection)
+{
+    if (IsRunning())
+    {
+        return;
+    }
+
+    if (m_CommandThread.joinable())
+    {
+        m_CommandThread.join();
+    }
+
+    m_IsRunning.store(true);
+    m_KeepRunning.store(true);
+    m_CommandThread = std::thread(&CommandHandler::HandleCommands, this, std::ref(profilingConnection));
+}
+
+void CommandHandler::Stop()
+{
+    m_KeepRunning.store(false);
+
+    if (m_CommandThread.joinable())
+    {
+        m_CommandThread.join();
+    }
+}
+
+void CommandHandler::HandleCommands(IProfilingConnection& profilingConnection)
+{
+    do
+    {
+        try
+        {
+            arm::pipe::Packet packet = profilingConnection.ReadPacket(m_Timeout.load());
+
+            if (packet.IsEmpty())
+            {
+                // Nothing to do, continue
+                continue;
+            }
+
+            arm::pipe::Version version = m_PacketVersionResolver.ResolvePacketVersion(packet.GetPacketFamily(),
+                                                                                      packet.GetPacketId());
+
+            arm::pipe::CommandHandlerFunctor* commandHandlerFunctor =
+                m_CommandHandlerRegistry.GetFunctor(packet.GetPacketFamily(),
+                                                    packet.GetPacketId(),
+                                                    version.GetEncodedValue());
+            ARM_PIPE_ASSERT(commandHandlerFunctor);
+            commandHandlerFunctor->operator()(packet);
+        }
+        catch (const arm::pipe::TimeoutException&)
+        {
+            if (m_StopAfterTimeout.load())
+            {
+                m_KeepRunning.store(false);
+            }
+        }
+        catch (const arm::pipe::ProfilingException& e)
+        {
+            // Log the error and continue
+            ARM_PIPE_LOG(warning) << "An error has occurred when handling a command: " << e.what();
+            // Did we get here because the socket failed?
+            if ( !profilingConnection.IsOpen() )
+            {
+                // We're going to stop processing commands.
+                // This will leave the thread idle. There is no mechanism to restart the profiling service when the
+                // connection is lost.
+                m_KeepRunning.store(false);
+            }
+        }
+        catch (...)
+        {
+            // Log the error and continue
+            ARM_PIPE_LOG(warning) << "An unknown error has occurred when handling a command";
+            // Did we get here because the socket failed?
+            if ( !profilingConnection.IsOpen() )
+            {
+                // We're going to stop processing commands.
+                // This will leave the thread idle. There is no mechanism to restart the profiling service when the
+                // connection is lost.
+                m_KeepRunning.store(false);
+            }
+        }
+    }
+    while (m_KeepRunning.load());
+
+    m_IsRunning.store(false);
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/CommandHandler.hpp b/profiling/client/src/CommandHandler.hpp
new file mode 100644
index 0000000..b097f9e
--- /dev/null
+++ b/profiling/client/src/CommandHandler.hpp
@@ -0,0 +1,61 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "IProfilingConnection.hpp"
+#include <common/include/PacketVersionResolver.hpp>
+
+#include <common/include/CommandHandlerRegistry.hpp>
+
+#include <atomic>
+#include <thread>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class CommandHandler
+{
+public:
+    CommandHandler(uint32_t timeout,
+                   bool stopAfterTimeout,
+                   arm::pipe::CommandHandlerRegistry& commandHandlerRegistry,
+                   arm::pipe::PacketVersionResolver& packetVersionResolver)
+        : m_Timeout(timeout),
+          m_StopAfterTimeout(stopAfterTimeout),
+          m_IsRunning(false),
+          m_KeepRunning(false),
+          m_CommandThread(),
+          m_CommandHandlerRegistry(commandHandlerRegistry),
+          m_PacketVersionResolver(packetVersionResolver)
+    {}
+    ~CommandHandler() { Stop(); }
+
+    void SetTimeout(uint32_t timeout) { m_Timeout.store(timeout); }
+    void SetStopAfterTimeout(bool stopAfterTimeout) { m_StopAfterTimeout.store(stopAfterTimeout); }
+
+    void Start(IProfilingConnection& profilingConnection);
+    void Stop();
+    bool IsRunning() const { return m_IsRunning.load(); }
+
+private:
+    void HandleCommands(IProfilingConnection& profilingConnection);
+
+    std::atomic<uint32_t> m_Timeout;
+    std::atomic<bool>     m_StopAfterTimeout;
+    std::atomic<bool>     m_IsRunning;
+    std::atomic<bool>     m_KeepRunning;
+    std::thread           m_CommandThread;
+
+    arm::pipe::CommandHandlerRegistry& m_CommandHandlerRegistry;
+    arm::pipe::PacketVersionResolver&  m_PacketVersionResolver;
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/ConnectionAcknowledgedCommandHandler.cpp b/profiling/client/src/ConnectionAcknowledgedCommandHandler.cpp
new file mode 100644
index 0000000..9f20b9c
--- /dev/null
+++ b/profiling/client/src/ConnectionAcknowledgedCommandHandler.cpp
@@ -0,0 +1,78 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ConnectionAcknowledgedCommandHandler.hpp"
+
+#include <client/include/TimelineUtilityMethods.hpp>
+
+#include <common/include/ProfilingException.hpp>
+
+#include <fmt/format.h>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+void ConnectionAcknowledgedCommandHandler::operator()(const arm::pipe::Packet& packet)
+{
+    ProfilingState currentState = m_StateMachine.GetCurrentState();
+    switch (currentState)
+    {
+    case ProfilingState::Uninitialised:
+    case ProfilingState::NotConnected:
+        throw arm::pipe::ProfilingException(fmt::format("Connection Acknowledged Command Handler invoked while in an "
+                                            "wrong state: {}",
+                                            GetProfilingStateName(currentState)));
+    case ProfilingState::WaitingForAck:
+        // Process the packet
+        if (!(packet.GetPacketFamily() == 0u && packet.GetPacketId() == 1u))
+        {
+            throw arm::pipe::InvalidArgumentException(fmt::format("Expected Packet family = 0, id = 1 but "
+                                                                  "received family = {}, id = {}",
+                                                                  packet.GetPacketFamily(),
+                                                                  packet.GetPacketId()));
+        }
+
+        // Once a Connection Acknowledged packet has been received, move to the Active state immediately
+        m_StateMachine.TransitionToState(ProfilingState::Active);
+        // Send the counter directory packet.
+        m_SendCounterPacket.SendCounterDirectoryPacket(m_CounterDirectory);
+
+        if (m_TimelineEnabled)
+        {
+            m_SendTimelinePacket.SendTimelineMessageDirectoryPackage();
+            TimelineUtilityMethods::SendWellKnownLabelsAndEventClasses(m_SendTimelinePacket);
+        }
+
+        if (m_BackendProfilingContext.has_value())
+        {
+            for (auto backendContext : m_BackendProfilingContext.value())
+            {
+                // Enable profiling on the backend and assert that it returns true
+                if(!backendContext.second->EnableProfiling(true))
+                {
+                    throw arm::pipe::BackendProfilingException(
+                            "Unable to enable profiling on Backend Id: " + backendContext.first);
+                }
+            }
+        }
+
+        // At this point signal any external processes waiting on the profiling system
+        // to come up that profiling is fully active
+        m_ProfilingServiceStatus.NotifyProfilingServiceActive();
+        break;
+    case ProfilingState::Active:
+        return; // NOP
+    default:
+        throw arm::pipe::ProfilingException(fmt::format("Unknown profiling service state: {}",
+                                            static_cast<int>(currentState)));
+    }
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/ConnectionAcknowledgedCommandHandler.hpp b/profiling/client/src/ConnectionAcknowledgedCommandHandler.hpp
new file mode 100644
index 0000000..e409ee6
--- /dev/null
+++ b/profiling/client/src/ConnectionAcknowledgedCommandHandler.hpp
@@ -0,0 +1,71 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ProfilingStateMachine.hpp"
+
+#include <client/include/IProfilingServiceStatus.hpp>
+#include <client/include/ISendCounterPacket.hpp>
+#include <client/include/ISendTimelinePacket.hpp>
+
+#include <client/include/backends/IBackendProfilingContext.hpp>
+
+#include <common/include/CommandHandlerFunctor.hpp>
+#include <common/include/Packet.hpp>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class ConnectionAcknowledgedCommandHandler final : public arm::pipe::CommandHandlerFunctor
+{
+
+typedef const std::unordered_map<std::string, std::shared_ptr<IBackendProfilingContext>>&
+    BackendProfilingContexts;
+
+public:
+    ConnectionAcknowledgedCommandHandler(uint32_t familyId,
+                                         uint32_t packetId,
+                                         uint32_t version,
+                                         ICounterDirectory& counterDirectory,
+                                         ISendCounterPacket& sendCounterPacket,
+                                         ISendTimelinePacket& sendTimelinePacket,
+                                         ProfilingStateMachine& profilingStateMachine,
+                                         IProfilingServiceStatus& profilingServiceStatus,
+                                         arm::pipe::Optional<BackendProfilingContexts> backendProfilingContexts =
+                                         arm::pipe::EmptyOptional())
+        : CommandHandlerFunctor(familyId, packetId, version)
+        , m_CounterDirectory(counterDirectory)
+        , m_SendCounterPacket(sendCounterPacket)
+        , m_SendTimelinePacket(sendTimelinePacket)
+        , m_StateMachine(profilingStateMachine)
+        , m_ProfilingServiceStatus(profilingServiceStatus)
+        , m_BackendProfilingContext(backendProfilingContexts)
+        , m_TimelineEnabled(false)
+    {}
+
+    void operator()(const arm::pipe::Packet& packet) override;
+
+    void setTimelineEnabled(bool timelineEnabled)
+    {
+        m_TimelineEnabled = timelineEnabled;
+    }
+
+private:
+    const ICounterDirectory& m_CounterDirectory;
+    ISendCounterPacket&      m_SendCounterPacket;
+    ISendTimelinePacket&     m_SendTimelinePacket;
+    ProfilingStateMachine&   m_StateMachine;
+    IProfilingServiceStatus& m_ProfilingServiceStatus;
+    arm::pipe::Optional<BackendProfilingContexts> m_BackendProfilingContext;
+    std::atomic<bool> m_TimelineEnabled;
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/CounterIdMap.cpp b/profiling/client/src/CounterIdMap.cpp
new file mode 100644
index 0000000..cb637c3
--- /dev/null
+++ b/profiling/client/src/CounterIdMap.cpp
@@ -0,0 +1,58 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <client/include/CounterIdMap.hpp>
+
+#include <common/include/ProfilingException.hpp>
+
+#include <map>
+
+namespace arm
+{
+namespace pipe
+{
+
+void CounterIdMap::RegisterMapping(uint16_t globalCounterId,
+                                   uint16_t backendCounterId,
+                                   const std::string& backendId)
+{
+    std::pair<uint16_t, std::string> backendIdPair(backendCounterId, backendId);
+    m_GlobalCounterIdMap[globalCounterId] = backendIdPair;
+    m_BackendCounterIdMap[backendIdPair] = globalCounterId;
+}
+
+void CounterIdMap::Reset()
+{
+    m_GlobalCounterIdMap.clear();
+    m_BackendCounterIdMap.clear();
+}
+
+uint16_t CounterIdMap::GetGlobalId(uint16_t backendCounterId, const std::string& backendId) const
+{
+    std::pair<uint16_t, std::string> backendIdPair(backendCounterId, backendId);
+    auto it = m_BackendCounterIdMap.find(backendIdPair);
+    if (it == m_BackendCounterIdMap.end())
+    {
+        std::stringstream ss;
+        ss << "No Backend Counter [" << backendIdPair.second << ":" << backendIdPair.first << "] registered";
+        throw arm::pipe::ProfilingException(ss.str());
+    }
+    return it->second;
+}
+
+const std::pair<uint16_t, std::string>& CounterIdMap::GetBackendId(uint16_t globalCounterId) const
+{
+    auto it = m_GlobalCounterIdMap.find(globalCounterId);
+    if (it == m_GlobalCounterIdMap.end())
+    {
+        std::stringstream ss;
+        ss << "No Global Counter ID [" << globalCounterId << "] registered";
+        throw arm::pipe::ProfilingException(ss.str());
+    }
+    return it->second;
+}
+
+}    // namespace pipe
+}    // namespace arm
diff --git a/profiling/client/src/DeactivateTimelineReportingCommandHandler.cpp b/profiling/client/src/DeactivateTimelineReportingCommandHandler.cpp
new file mode 100644
index 0000000..6c6a045
--- /dev/null
+++ b/profiling/client/src/DeactivateTimelineReportingCommandHandler.cpp
@@ -0,0 +1,54 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "DeactivateTimelineReportingCommandHandler.hpp"
+
+#include <common/include/ProfilingException.hpp>
+
+#include <fmt/format.h>
+
+
+namespace arm
+{
+
+namespace pipe
+{
+
+void DeactivateTimelineReportingCommandHandler::operator()(const arm::pipe::Packet& packet)
+{
+    ProfilingState currentState = m_StateMachine.GetCurrentState();
+
+    switch ( currentState )
+    {
+        case ProfilingState::Uninitialised:
+        case ProfilingState::NotConnected:
+        case ProfilingState::WaitingForAck:
+            throw arm::pipe::ProfilingException(fmt::format(
+                "Deactivate Timeline Reporting Command Handler invoked while in a wrong state: {}",
+                GetProfilingStateName(currentState)));
+        case ProfilingState::Active:
+            if (!(packet.GetPacketFamily() == 0u && packet.GetPacketId() == 7u))
+            {
+                throw arm::pipe::ProfilingException(std::string(
+                    "Expected Packet family = 0, id = 7 but received family =")
+                    + std::to_string(packet.GetPacketFamily())
+                    + " id = " + std::to_string(packet.GetPacketId()));
+            }
+
+            m_TimelineReporting.store(false);
+
+            // Notify Backends
+            m_BackendNotifier.NotifyBackendsForTimelineReporting();
+
+            break;
+        default:
+            throw arm::pipe::ProfilingException(fmt::format("Unknown profiling service state: {}",
+                                                static_cast<int>(currentState)));
+    }
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/DeactivateTimelineReportingCommandHandler.hpp b/profiling/client/src/DeactivateTimelineReportingCommandHandler.hpp
new file mode 100644
index 0000000..4d0473f
--- /dev/null
+++ b/profiling/client/src/DeactivateTimelineReportingCommandHandler.hpp
@@ -0,0 +1,47 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ProfilingStateMachine.hpp"
+#include "INotifyBackends.hpp"
+#include "ProfilingStateMachine.hpp"
+
+#include <common/include/CommandHandlerFunctor.hpp>
+#include <common/include/Packet.hpp>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class DeactivateTimelineReportingCommandHandler : public arm::pipe::CommandHandlerFunctor
+{
+
+public:
+    DeactivateTimelineReportingCommandHandler(uint32_t familyId,
+                                              uint32_t packetId,
+                                              uint32_t version,
+                                              std::atomic<bool>& timelineReporting,
+                                              ProfilingStateMachine& profilingStateMachine,
+                                              INotifyBackends& notifyBackends)
+        : CommandHandlerFunctor(familyId, packetId, version)
+        , m_TimelineReporting(timelineReporting)
+        , m_StateMachine(profilingStateMachine)
+        , m_BackendNotifier(notifyBackends)
+    {}
+
+    void operator()(const arm::pipe::Packet& packet) override;
+
+private:
+    std::atomic<bool>&     m_TimelineReporting;
+    ProfilingStateMachine& m_StateMachine;
+    INotifyBackends&       m_BackendNotifier;
+};
+
+} // namespace pipe
+
+} // namespace arm
\ No newline at end of file
diff --git a/profiling/client/src/FileOnlyProfilingConnection.cpp b/profiling/client/src/FileOnlyProfilingConnection.cpp
new file mode 100644
index 0000000..bee2c62
--- /dev/null
+++ b/profiling/client/src/FileOnlyProfilingConnection.cpp
@@ -0,0 +1,307 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "FileOnlyProfilingConnection.hpp"
+
+#include <common/include/Constants.hpp>
+#include <common/include/ProfilingException.hpp>
+#include <common/include/PacketVersionResolver.hpp>
+
+#include <algorithm>
+#include <iostream>
+#include <thread>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+std::vector<uint32_t> StreamMetaDataProcessor::GetHeadersAccepted()
+{
+    std::vector<uint32_t> headers;
+    headers.push_back(m_MetaDataPacketHeader);
+    return headers;
+}
+
+void StreamMetaDataProcessor::HandlePacket(const arm::pipe::Packet& packet)
+{
+    if (packet.GetHeader() != m_MetaDataPacketHeader)
+    {
+        throw arm::pipe::ProfilingException("StreamMetaDataProcessor can only handle Stream Meta Data Packets");
+    }
+    // determine the endianness of the protocol
+    TargetEndianness endianness;
+    if (ToUint32(packet.GetData(),TargetEndianness::BeWire) == arm::pipe::PIPE_MAGIC)
+    {
+        endianness = TargetEndianness::BeWire;
+    }
+    else if (ToUint32(packet.GetData(), TargetEndianness::LeWire) == arm::pipe::PIPE_MAGIC)
+    {
+        endianness = TargetEndianness::LeWire;
+    }
+    else
+    {
+        throw arm::pipe::ProfilingException("Protocol read error. Unable to read the PIPE_MAGIC value.");
+    }
+    m_FileOnlyProfilingConnection->SetEndianess(endianness);
+    // send back the acknowledgement
+    std::unique_ptr<unsigned char[]> uniqueNullPtr = nullptr;
+    arm::pipe::Packet returnPacket(0x10000, 0, uniqueNullPtr);
+    m_FileOnlyProfilingConnection->ReturnPacket(returnPacket);
+}
+
+uint32_t StreamMetaDataProcessor::ToUint32(const unsigned char* data, TargetEndianness endianness)
+{
+    // Extract the first 4 bytes starting at data and push them into a 32bit integer based on the
+    // specified endianness.
+    if (endianness == TargetEndianness::BeWire)
+    {
+        return static_cast<uint32_t>(data[0]) << 24 | static_cast<uint32_t>(data[1]) << 16 |
+               static_cast<uint32_t>(data[2]) << 8 | static_cast<uint32_t>(data[3]);
+    }
+    else
+    {
+        return static_cast<uint32_t>(data[3]) << 24 | static_cast<uint32_t>(data[2]) << 16 |
+               static_cast<uint32_t>(data[1]) << 8 | static_cast<uint32_t>(data[0]);
+    }
+}
+
+FileOnlyProfilingConnection::~FileOnlyProfilingConnection()
+{
+    try
+    {
+        Close();
+    }
+    catch (...)
+    {
+        // do nothing
+    }
+}
+
+bool FileOnlyProfilingConnection::IsOpen() const
+{
+    // This type of connection is always open.
+    return true;
+}
+
+void FileOnlyProfilingConnection::Close()
+{
+    // Dump any unread packets out of the queue.
+    size_t initialSize = m_PacketQueue.size();
+    for (size_t i = 0; i < initialSize; ++i)
+    {
+        m_PacketQueue.pop();
+    }
+    // dispose of the processing thread
+    m_KeepRunning.store(false);
+    if (m_LocalHandlersThread.joinable())
+    {
+        // make sure the thread wakes up and sees it has to stop
+        m_ConditionPacketReadable.notify_one();
+        m_LocalHandlersThread.join();
+    }
+}
+
+bool FileOnlyProfilingConnection::WritePacket(const unsigned char* buffer, uint32_t length)
+{
+    ARM_PIPE_ASSERT(buffer);
+    arm::pipe::Packet packet = ReceivePacket(buffer, length);
+    ForwardPacketToHandlers(packet);
+    return true;
+}
+
+void FileOnlyProfilingConnection::ReturnPacket(arm::pipe::Packet& packet)
+{
+    {
+        std::lock_guard<std::mutex> lck(m_PacketAvailableMutex);
+        m_PacketQueue.push(std::move(packet));
+    }
+    m_ConditionPacketAvailable.notify_one();
+}
+
+arm::pipe::Packet FileOnlyProfilingConnection::ReadPacket(uint32_t timeout)
+{
+    std::unique_lock<std::mutex> lck(m_PacketAvailableMutex);
+
+    // Here we are using m_PacketQueue.empty() as a predicate variable
+    // The conditional variable will wait until packetQueue is not empty or until a timeout
+    if (!m_ConditionPacketAvailable.wait_for(lck,
+                                             std::chrono::milliseconds(timeout),
+                                             [&]{return !m_PacketQueue.empty();}))
+    {
+        arm::pipe::Packet empty;
+        return empty;
+    }
+
+    arm::pipe::Packet returnedPacket = std::move(m_PacketQueue.front());
+    m_PacketQueue.pop();
+    return returnedPacket;
+}
+
+void FileOnlyProfilingConnection::Fail(const std::string& errorMessage)
+{
+    Close();
+    throw arm::pipe::ProfilingException(errorMessage);
+}
+
+/// Adds a local packet handler to the FileOnlyProfilingConnection. Invoking this will start
+/// a processing thread that will ensure that processing of packets will happen on a separate
+/// thread from the profiling services send thread and will therefore protect against the
+/// profiling message buffer becoming exhausted because packet handling slows the dispatch.
+void FileOnlyProfilingConnection::AddLocalPacketHandler(ILocalPacketHandlerSharedPtr localPacketHandler)
+{
+    m_PacketHandlers.push_back(std::move(localPacketHandler));
+    ILocalPacketHandlerSharedPtr localCopy = m_PacketHandlers.back();
+    localCopy->SetConnection(this);
+    if (localCopy->GetHeadersAccepted().empty())
+    {
+        //this is a universal handler
+        m_UniversalHandlers.push_back(localCopy);
+    }
+    else
+    {
+        for (uint32_t header : localCopy->GetHeadersAccepted())
+        {
+            auto iter = m_IndexedHandlers.find(header);
+            if (iter == m_IndexedHandlers.end())
+            {
+                std::vector<ILocalPacketHandlerSharedPtr> handlers;
+                handlers.push_back(localCopy);
+                m_IndexedHandlers.emplace(std::make_pair(header, handlers));
+            }
+            else
+            {
+                iter->second.push_back(localCopy);
+            }
+        }
+    }
+}
+
+void FileOnlyProfilingConnection::StartProcessingThread()
+{
+    // check if the thread has already started
+    if (m_IsRunning.load())
+    {
+        return;
+    }
+    // make sure if there was one running before it is joined
+    if (m_LocalHandlersThread.joinable())
+    {
+        m_LocalHandlersThread.join();
+    }
+    m_IsRunning.store(true);
+    m_KeepRunning.store(true);
+    m_LocalHandlersThread = std::thread(&FileOnlyProfilingConnection::ServiceLocalHandlers, this);
+}
+
+void FileOnlyProfilingConnection::ForwardPacketToHandlers(arm::pipe::Packet& packet)
+{
+    if (m_PacketHandlers.empty())
+    {
+        return;
+    }
+    if (!m_KeepRunning.load())
+    {
+        return;
+    }
+    {
+        std::unique_lock<std::mutex> readableListLock(m_ReadableMutex);
+        if (!m_KeepRunning.load())
+        {
+            return;
+        }
+        m_ReadableList.push(std::move(packet));
+    }
+    m_ConditionPacketReadable.notify_one();
+}
+
+void FileOnlyProfilingConnection::ServiceLocalHandlers()
+{
+    do
+    {
+        arm::pipe::Packet returnedPacket;
+        bool readPacket = false;
+        {   // only lock while we are taking the packet off the incoming list
+            std::unique_lock<std::mutex> lck(m_ReadableMutex);
+            if (m_Timeout < 0)
+            {
+                m_ConditionPacketReadable.wait(lck,
+                                               [&] { return !m_ReadableList.empty(); });
+            }
+            else
+            {
+                m_ConditionPacketReadable.wait_for(lck,
+                                                   std::chrono::milliseconds(std::max(m_Timeout, 1000)),
+                                                   [&] { return !m_ReadableList.empty(); });
+            }
+            if (m_KeepRunning.load())
+            {
+                if (!m_ReadableList.empty())
+                {
+                    returnedPacket = std::move(m_ReadableList.front());
+                    m_ReadableList.pop();
+                    readPacket = true;
+                }
+            }
+            else
+            {
+                ClearReadableList();
+            }
+        }
+        if (m_KeepRunning.load() && readPacket)
+        {
+            DispatchPacketToHandlers(returnedPacket);
+        }
+    } while (m_KeepRunning.load());
+    // make sure the readable list is cleared
+    ClearReadableList();
+    m_IsRunning.store(false);
+}
+
+void FileOnlyProfilingConnection::ClearReadableList()
+{
+    // make sure the incoming packet queue gets emptied
+    size_t initialSize = m_ReadableList.size();
+    for (size_t i = 0; i < initialSize; ++i)
+    {
+        m_ReadableList.pop();
+    }
+}
+
+void FileOnlyProfilingConnection::DispatchPacketToHandlers(const arm::pipe::Packet& packet)
+{
+    for (auto& delegate : m_UniversalHandlers)
+    {
+        delegate->HandlePacket(packet);
+    }
+    auto iter = m_IndexedHandlers.find(packet.GetHeader());
+    if (iter != m_IndexedHandlers.end())
+    {
+        for (auto& delegate : iter->second)
+        {
+            try
+            {
+                delegate->HandlePacket(packet);
+            }
+            catch (const arm::pipe::ProfilingException& ex)
+            {
+                Fail(ex.what());
+            }
+            catch (const std::exception& ex)
+            {
+                Fail(ex.what());
+            }
+            catch (...)
+            {
+                Fail("handler failed");
+            }
+        }
+    }
+}
+
+}    // namespace pipe
+
+}    // namespace arm
diff --git a/profiling/client/src/FileOnlyProfilingConnection.hpp b/profiling/client/src/FileOnlyProfilingConnection.hpp
new file mode 100644
index 0000000..c7e60f5
--- /dev/null
+++ b/profiling/client/src/FileOnlyProfilingConnection.hpp
@@ -0,0 +1,137 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "IProfilingConnection.hpp"
+#include "ProfilingUtils.hpp"
+
+#include <client/include/ILocalPacketHandler.hpp>
+#include <client/include/ProfilingOptions.hpp>
+
+#include <common/include/Assert.hpp>
+#include <common/include/Packet.hpp>
+
+#include <server/include/timelineDecoder/DirectoryCaptureCommandHandler.hpp>
+
+#include <atomic>
+#include <condition_variable>
+#include <fstream>
+#include <map>
+#include <mutex>
+#include <queue>
+#include <thread>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+// forward declaration
+class FileOnlyProfilingConnection;
+
+class StreamMetaDataProcessor : public ILocalPacketHandler
+{
+public:
+    explicit StreamMetaDataProcessor(FileOnlyProfilingConnection* fileOnlyProfilingConnection) :
+            m_FileOnlyProfilingConnection(fileOnlyProfilingConnection),
+            m_MetaDataPacketHeader(ConstructHeader(0, 0)) {};
+
+    std::vector<uint32_t> GetHeadersAccepted() override;
+
+    void HandlePacket(const arm::pipe::Packet& packet) override;
+
+private:
+    FileOnlyProfilingConnection* m_FileOnlyProfilingConnection;
+    uint32_t m_MetaDataPacketHeader;
+
+    static uint32_t ToUint32(const unsigned char* data, TargetEndianness endianness);
+};
+
+class FileOnlyProfilingConnection : public IProfilingConnection, public IInternalProfilingConnection
+{
+public:
+    explicit FileOnlyProfilingConnection(const ProfilingOptions& options)
+        : m_Options(options)
+        , m_Endianness(TargetEndianness::LeWire)    // Set a sensible default.
+                                                    // StreamMetaDataProcessor will set a real value.
+        , m_IsRunning(false)
+        , m_KeepRunning(false)
+        , m_Timeout(1000)
+    {
+        // add the StreamMetaDataProcessor
+        auto streamMetaDataProcessor = std::make_shared<StreamMetaDataProcessor>(this);
+        AddLocalPacketHandler(streamMetaDataProcessor);
+        // and any additional ones added by the users
+        for (const ILocalPacketHandlerSharedPtr& localPacketHandler : options.m_LocalPacketHandlers)
+        {
+            AddLocalPacketHandler(localPacketHandler);
+        }
+        if (!m_PacketHandlers.empty())
+        {
+            StartProcessingThread();
+        }
+        // NOTE: could add timeout to the external profiling options
+    };
+
+    ~FileOnlyProfilingConnection() override;
+
+    bool IsOpen() const override;
+
+    void Close() override;
+
+    // This is effectively receiving a data packet from ArmNN.
+    bool WritePacket(const unsigned char* buffer, uint32_t length) override;
+
+    // Sending a packet back to ArmNN.
+    arm::pipe::Packet ReadPacket(uint32_t timeout) override;
+
+    void SetEndianess(const TargetEndianness& endianness) override //IInternalProfilingConnection
+    {
+        m_Endianness = endianness;
+    }
+
+    void ReturnPacket(arm::pipe::Packet& packet) override; //IInternalProfilingConnection
+
+private:
+    void AddLocalPacketHandler(ILocalPacketHandlerSharedPtr localPacketHandler);
+    void StartProcessingThread();
+    void ClearReadableList();
+    void DispatchPacketToHandlers(const arm::pipe::Packet& packet);
+
+    void Fail(const std::string& errorMessage);
+
+    void ForwardPacketToHandlers(arm::pipe::Packet& packet);
+    void ServiceLocalHandlers();
+
+    ProfilingOptions m_Options;
+    std::queue<arm::pipe::Packet> m_PacketQueue;
+    TargetEndianness m_Endianness;
+
+    std::mutex m_PacketAvailableMutex;
+    std::condition_variable m_ConditionPacketAvailable;
+
+    std::vector<ILocalPacketHandlerSharedPtr> m_PacketHandlers;
+    std::map<uint32_t, std::vector<ILocalPacketHandlerSharedPtr>> m_IndexedHandlers;
+    std::vector<ILocalPacketHandlerSharedPtr> m_UniversalHandlers;
+
+    // List of readable packets for the local packet handlers
+    std::queue<arm::pipe::Packet> m_ReadableList;
+    // Mutex and condition variable for the readable packet list
+    std::mutex m_ReadableMutex;
+    std::condition_variable m_ConditionPacketReadable;
+    // thread that takes items from the readable list and dispatches them
+    // to the handlers.
+    std::thread m_LocalHandlersThread;
+    // atomic booleans that control the operation of the local handlers thread
+    std::atomic<bool> m_IsRunning;
+    std::atomic<bool> m_KeepRunning;
+    int m_Timeout;
+};
+
+}    // namespace pipe
+
+}    // namespace arm
diff --git a/profiling/client/src/Holder.cpp b/profiling/client/src/Holder.cpp
new file mode 100644
index 0000000..d144e24
--- /dev/null
+++ b/profiling/client/src/Holder.cpp
@@ -0,0 +1,87 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <client/include/Holder.hpp>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+CaptureData& CaptureData::operator=(const CaptureData& other)
+{
+    m_CapturePeriod  = other.m_CapturePeriod;
+    m_CounterIds     = other.m_CounterIds;
+    m_ActiveBackends = other.m_ActiveBackends;
+
+    return *this;
+}
+
+void CaptureData::SetActiveBackends(const std::set<std::string>& activeBackends)
+{
+    m_ActiveBackends = activeBackends;
+}
+
+void CaptureData::SetCapturePeriod(uint32_t capturePeriod)
+{
+    m_CapturePeriod = capturePeriod;
+}
+
+void CaptureData::SetCounterIds(const std::vector<uint16_t>& counterIds)
+{
+    m_CounterIds = counterIds;
+}
+
+const std::set<std::string>& CaptureData::GetActiveBackends() const
+{
+    return m_ActiveBackends;
+}
+
+uint32_t CaptureData::GetCapturePeriod() const
+{
+    return m_CapturePeriod;
+}
+
+const std::vector<uint16_t>& CaptureData::GetCounterIds() const
+{
+    return m_CounterIds;
+}
+
+CaptureData Holder::GetCaptureData() const
+{
+    std::lock_guard<std::mutex> lockGuard(m_CaptureThreadMutex);
+
+    return m_CaptureData;
+}
+
+bool CaptureData::IsCounterIdInCaptureData(uint16_t counterId)
+{
+    for (auto m_CounterId : m_CounterIds) {
+        if (m_CounterId == counterId)
+        {
+            return true;
+        }
+    }
+
+    // Return false by default unless counterId is found
+    return false;
+}
+
+void Holder::SetCaptureData(uint32_t capturePeriod,
+                            const std::vector<uint16_t>& counterIds,
+                            const std::set<std::string>& activeBackends)
+{
+    std::lock_guard<std::mutex> lockGuard(m_CaptureThreadMutex);
+
+    m_CaptureData.SetCapturePeriod(capturePeriod);
+    m_CaptureData.SetCounterIds(counterIds);
+    m_CaptureData.SetActiveBackends(activeBackends);
+
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/IBufferManager.hpp b/profiling/client/src/IBufferManager.hpp
new file mode 100644
index 0000000..6aec43f
--- /dev/null
+++ b/profiling/client/src/IBufferManager.hpp
@@ -0,0 +1,43 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "IConsumer.hpp"
+#include "IPacketBuffer.hpp"
+
+#include <memory>
+
+#define MAX_METADATA_PACKET_LENGTH 4096
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class IBufferManager
+{
+public:
+    virtual ~IBufferManager() {}
+
+    virtual IPacketBufferPtr Reserve(unsigned int requestedSize, unsigned int& reservedSize) = 0;
+
+    virtual void Commit(IPacketBufferPtr& packetBuffer, unsigned int size, bool notifyConsumer = true) = 0;
+
+    virtual void Release(IPacketBufferPtr& packetBuffer) = 0;
+
+    virtual IPacketBufferPtr GetReadableBuffer() = 0;
+
+    virtual void MarkRead(IPacketBufferPtr& packetBuffer) = 0;
+
+    virtual void SetConsumer(IConsumer* consumer) = 0;
+
+    virtual void FlushReadList() = 0;
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/IConsumer.hpp b/profiling/client/src/IConsumer.hpp
new file mode 100644
index 0000000..7a7e5ef
--- /dev/null
+++ b/profiling/client/src/IConsumer.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class IConsumer
+{
+public:
+    virtual ~IConsumer() {}
+
+    /// Set a "ready to read" flag in the buffer to notify the reading thread to start reading it.
+    virtual void SetReadyToRead() = 0;
+};
+
+} // namespace pipe
+
+} // namespace arm
+
diff --git a/profiling/client/src/INotifyBackends.hpp b/profiling/client/src/INotifyBackends.hpp
new file mode 100644
index 0000000..4fa25b7
--- /dev/null
+++ b/profiling/client/src/INotifyBackends.hpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class INotifyBackends
+{
+public:
+    virtual ~INotifyBackends() {}
+    virtual void NotifyBackendsForTimelineReporting() = 0;
+};
+
+} // namespace pipe
+
+} // namespace arm
+
diff --git a/profiling/client/src/IPacketBuffer.hpp b/profiling/client/src/IPacketBuffer.hpp
new file mode 100644
index 0000000..02c40e4
--- /dev/null
+++ b/profiling/client/src/IPacketBuffer.hpp
@@ -0,0 +1,48 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <memory>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class IReadOnlyPacketBuffer // interface used by the read thread
+{
+public:
+    virtual ~IReadOnlyPacketBuffer() {}
+
+    virtual const unsigned char* GetReadableData() const = 0;
+
+    virtual unsigned int GetSize() const = 0;
+
+    virtual void MarkRead() = 0;
+};
+
+class IPacketBuffer : public IReadOnlyPacketBuffer // interface used by code that writes binary packets
+{
+public:
+    virtual ~IPacketBuffer() {}
+
+    virtual void Commit(unsigned int size) = 0;
+
+    virtual void Release() = 0;
+
+    virtual unsigned char* GetWritableData() = 0;
+
+    /// release the memory held and reset internal point to null.
+    /// After this function is invoked the PacketBuffer is unusable.
+    virtual void Destroy() = 0;
+};
+
+using IPacketBufferPtr = std::unique_ptr<IPacketBuffer>;
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/IPeriodicCounterCapture.hpp b/profiling/client/src/IPeriodicCounterCapture.hpp
new file mode 100644
index 0000000..f96d014
--- /dev/null
+++ b/profiling/client/src/IPeriodicCounterCapture.hpp
@@ -0,0 +1,23 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+namespace arm
+{
+namespace pipe
+{
+
+class IPeriodicCounterCapture
+{
+public:
+    virtual ~IPeriodicCounterCapture() {}
+
+    virtual void Start() = 0;
+    virtual void Stop() = 0;
+};
+
+} // namespace pipe
+} // namespace arm
diff --git a/profiling/client/src/IProfilingConnection.hpp b/profiling/client/src/IProfilingConnection.hpp
new file mode 100644
index 0000000..9a25854
--- /dev/null
+++ b/profiling/client/src/IProfilingConnection.hpp
@@ -0,0 +1,36 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <client/include/ILocalPacketHandler.hpp>
+
+#include <common/include/Packet.hpp>
+
+#include <cstdint>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class IProfilingConnection
+{
+public:
+    virtual ~IProfilingConnection() {}
+
+    virtual bool IsOpen() const = 0;
+
+    virtual void Close() =  0;
+
+    virtual bool WritePacket(const unsigned char* buffer, uint32_t length) = 0;
+
+    virtual arm::pipe::Packet ReadPacket(uint32_t timeout) = 0;
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/IProfilingConnectionFactory.hpp b/profiling/client/src/IProfilingConnectionFactory.hpp
new file mode 100644
index 0000000..8676077
--- /dev/null
+++ b/profiling/client/src/IProfilingConnectionFactory.hpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "IProfilingConnection.hpp"
+
+#include <client/include/ProfilingOptions.hpp>
+
+#include <memory>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class IProfilingConnectionFactory
+{
+public:
+    using ExternalProfilingOptions = ProfilingOptions;
+    using IProfilingConnectionPtr = std::unique_ptr<IProfilingConnection>;
+
+    virtual ~IProfilingConnectionFactory() {}
+
+    virtual IProfilingConnectionPtr GetProfilingConnection(const ProfilingOptions& options) const = 0;
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/IProfilingService.cpp b/profiling/client/src/IProfilingService.cpp
new file mode 100644
index 0000000..d147871
--- /dev/null
+++ b/profiling/client/src/IProfilingService.cpp
@@ -0,0 +1,60 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ProfilingService.hpp"
+
+#include <client/include/IProfilingService.hpp>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+std::unique_ptr<IProfilingService> IProfilingService::CreateProfilingService(
+    uint16_t maxGlobalCounterId,
+    IInitialiseProfilingService& initialiser,
+    const std::string& softwareInfo,
+    const std::string& softwareVersion,
+    const std::string& hardwareVersion,
+    arm::pipe::Optional<IReportStructure&> reportStructure)
+{
+    return std::make_unique<ProfilingService>(maxGlobalCounterId,
+                                              initialiser,
+                                              softwareInfo,
+                                              softwareVersion,
+                                              hardwareVersion,
+                                              reportStructure);
+}
+
+ProfilingGuidGenerator IProfilingService::m_GuidGenerator;
+
+ProfilingDynamicGuid IProfilingService::GetNextGuid()
+{
+    return m_GuidGenerator.NextGuid();
+}
+
+ProfilingStaticGuid IProfilingService::GetStaticId(const std::string& str)
+{
+    return m_GuidGenerator.GenerateStaticId(str);
+}
+
+void IProfilingService::ResetGuidGenerator()
+{
+    m_GuidGenerator.Reset();
+}
+
+ProfilingDynamicGuid IProfilingService::NextGuid()
+{
+    return IProfilingService::GetNextGuid();
+}
+
+ProfilingStaticGuid IProfilingService::GenerateStaticId(const std::string& str)
+{
+    return IProfilingService::GetStaticId(str);
+}
+
+} // namespace pipe
+} // namespace arm
diff --git a/profiling/client/src/ISendThread.hpp b/profiling/client/src/ISendThread.hpp
new file mode 100644
index 0000000..af76a25
--- /dev/null
+++ b/profiling/client/src/ISendThread.hpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "IProfilingConnection.hpp"
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class ISendThread
+{
+public:
+    virtual ~ISendThread() {}
+
+    /// Start the thread
+    virtual void Start(IProfilingConnection& profilingConnection) = 0;
+
+    /// Stop the thread
+    virtual void Stop(bool rethrowSendThreadExceptions = true) = 0;
+};
+
+} // namespace pipe
+
+} // namespace arm
+
diff --git a/profiling/client/src/NullProfilingConnection.hpp b/profiling/client/src/NullProfilingConnection.hpp
new file mode 100644
index 0000000..a5f8dae
--- /dev/null
+++ b/profiling/client/src/NullProfilingConnection.hpp
@@ -0,0 +1,41 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "IProfilingConnection.hpp"
+
+#include <common/include/IgnoreUnused.hpp>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class NullProfilingConnection : public IProfilingConnection
+{
+    virtual bool IsOpen() const override { return true; };
+
+    virtual void Close() override {};
+
+    virtual bool WritePacket(const unsigned char* buffer, uint32_t length) override
+    {
+        arm::pipe::IgnoreUnused(buffer);
+        arm::pipe::IgnoreUnused(length);
+        return true;
+    };
+
+    virtual Packet ReadPacket(uint32_t timeout) override
+    {
+        arm::pipe::IgnoreUnused(timeout);
+        return Packet(0);
+    }
+
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/PacketBuffer.cpp b/profiling/client/src/PacketBuffer.cpp
new file mode 100644
index 0000000..3b5fd35
--- /dev/null
+++ b/profiling/client/src/PacketBuffer.cpp
@@ -0,0 +1,67 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "PacketBuffer.hpp"
+
+#include <common/include/ProfilingException.hpp>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+PacketBuffer::PacketBuffer(unsigned int maxSize)
+    : m_MaxSize(maxSize)
+    , m_Size(0)
+{
+    m_Data = std::make_unique<unsigned char[]>(m_MaxSize);
+}
+
+const unsigned char* PacketBuffer::GetReadableData() const
+{
+    return m_Data.get();
+}
+
+unsigned int PacketBuffer::GetSize() const
+{
+    return m_Size;
+}
+
+void PacketBuffer::MarkRead()
+{
+    m_Size = 0;
+}
+
+void PacketBuffer::Commit(unsigned int size)
+{
+    if (size > m_MaxSize)
+    {
+        throw arm::pipe::ProfilingException("Cannot commit [" + std::to_string(size) +
+            "] bytes which is more than the maximum size of the buffer [" + std::to_string(m_MaxSize) + "]");
+    }
+    m_Size = size;
+}
+
+void PacketBuffer::Release()
+{
+    m_Size = 0;
+}
+
+unsigned char* PacketBuffer::GetWritableData()
+{
+    return m_Data.get();
+}
+
+void PacketBuffer::Destroy()
+{
+    m_Data.reset(nullptr);
+    m_Size = 0;
+    m_MaxSize = 0;
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/PacketBuffer.hpp b/profiling/client/src/PacketBuffer.hpp
new file mode 100644
index 0000000..3fa5c60
--- /dev/null
+++ b/profiling/client/src/PacketBuffer.hpp
@@ -0,0 +1,47 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "IPacketBuffer.hpp"
+
+#include <memory>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class PacketBuffer : public IPacketBuffer
+{
+public:
+    PacketBuffer(unsigned int maxSize);
+
+    ~PacketBuffer() {}
+
+    const unsigned char* GetReadableData() const  override;
+
+    unsigned int GetSize() const  override;
+
+    void MarkRead() override;
+
+    void Commit(unsigned int size)  override;
+
+    void Release() override;
+
+    unsigned char* GetWritableData() override;
+
+    void Destroy() override;
+
+private:
+    unsigned int m_MaxSize;
+    unsigned int m_Size;
+    std::unique_ptr<unsigned char[]> m_Data;
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/PerJobCounterSelectionCommandHandler.cpp b/profiling/client/src/PerJobCounterSelectionCommandHandler.cpp
new file mode 100644
index 0000000..2d2d2bd
--- /dev/null
+++ b/profiling/client/src/PerJobCounterSelectionCommandHandler.cpp
@@ -0,0 +1,50 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "PerJobCounterSelectionCommandHandler.hpp"
+
+#include <common/include/CommonProfilingUtils.hpp>
+
+#include <fmt/format.h>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+void PerJobCounterSelectionCommandHandler::operator()(const arm::pipe::Packet& packet)
+{
+    ProfilingState currentState = m_StateMachine.GetCurrentState();
+    switch (currentState)
+    {
+    case ProfilingState::Uninitialised:
+    case ProfilingState::NotConnected:
+    case ProfilingState::WaitingForAck:
+        throw arm::pipe::ProfilingException(fmt::format(
+            "Per-Job Counter Selection Command Handler invoked while in an incorrect state: {}",
+            GetProfilingStateName(currentState)));
+    case ProfilingState::Active:
+        // Process the packet
+        if (!(packet.GetPacketFamily() == 0u && packet.GetPacketId() == 5u))
+        {
+            throw arm::pipe::InvalidArgumentException(fmt::format("Expected Packet family = 0, id = 5 but "
+                                                                  "received family = {}, id = {}",
+                                                                  packet.GetPacketFamily(),
+                                                                  packet.GetPacketId()));
+        }
+
+        // Silently drop the packet
+
+        break;
+    default:
+        throw arm::pipe::ProfilingException(fmt::format("Unknown profiling service state: {}",
+                                                        static_cast<int>(currentState)));
+    }
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/PerJobCounterSelectionCommandHandler.hpp b/profiling/client/src/PerJobCounterSelectionCommandHandler.hpp
new file mode 100644
index 0000000..1d088e4
--- /dev/null
+++ b/profiling/client/src/PerJobCounterSelectionCommandHandler.hpp
@@ -0,0 +1,40 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ProfilingStateMachine.hpp"
+
+#include <common/include/CommandHandlerFunctor.hpp>
+#include <common/include/Packet.hpp>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class PerJobCounterSelectionCommandHandler : public arm::pipe::CommandHandlerFunctor
+{
+
+public:
+    PerJobCounterSelectionCommandHandler(uint32_t familyId,
+                                         uint32_t packetId,
+                                         uint32_t version,
+                                         const ProfilingStateMachine& profilingStateMachine)
+        : CommandHandlerFunctor(familyId, packetId, version)
+        , m_StateMachine(profilingStateMachine)
+    {}
+
+    void operator()(const Packet& packet) override;
+
+private:
+    const ProfilingStateMachine& m_StateMachine;
+};
+
+} // namespace pipe
+
+} // namespace arm
+
diff --git a/profiling/client/src/PeriodicCounterCapture.cpp b/profiling/client/src/PeriodicCounterCapture.cpp
new file mode 100644
index 0000000..490173c
--- /dev/null
+++ b/profiling/client/src/PeriodicCounterCapture.cpp
@@ -0,0 +1,139 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "PeriodicCounterCapture.hpp"
+
+#include <common/include/Logging.hpp>
+
+#include <iostream>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+void PeriodicCounterCapture::Start()
+{
+    // Check if the capture thread is already running
+    if (m_IsRunning)
+    {
+        // The capture thread is already running
+        return;
+    }
+
+    // Mark the capture thread as running
+    m_IsRunning = true;
+
+    // Keep the capture procedure going until the capture thread is signalled to stop
+    m_KeepRunning.store(true);
+
+    // Start the new capture thread.
+    m_PeriodCaptureThread = std::thread(&PeriodicCounterCapture::Capture, this, std::ref(m_ReadCounterValues));
+}
+
+void PeriodicCounterCapture::Stop()
+{
+    // Signal the capture thread to stop
+    m_KeepRunning.store(false);
+
+    // Check that the capture thread is running
+    if (m_PeriodCaptureThread.joinable())
+    {
+        // Wait for the capture thread to complete operations
+        m_PeriodCaptureThread.join();
+    }
+
+    // Mark the capture thread as not running
+    m_IsRunning = false;
+}
+
+CaptureData PeriodicCounterCapture::ReadCaptureData()
+{
+    return m_CaptureDataHolder.GetCaptureData();
+}
+
+void PeriodicCounterCapture::DispatchPeriodicCounterCapturePacket(
+    const std::string& backendId, const std::vector<Timestamp>& timestampValues)
+{
+    // Report counter values
+    for (const auto& timestampInfo : timestampValues)
+    {
+        std::vector<CounterValue> backendCounterValues = timestampInfo.counterValues;
+        for_each(backendCounterValues.begin(), backendCounterValues.end(), [&](CounterValue& backendCounterValue)
+        {
+            // translate the counterId to globalCounterId
+            backendCounterValue.counterId = m_CounterIdMap.GetGlobalId(backendCounterValue.counterId, backendId);
+        });
+
+        // Send Periodic Counter Capture Packet for the Timestamp
+        m_SendCounterPacket.SendPeriodicCounterCapturePacket(timestampInfo.timestamp, backendCounterValues);
+    }
+}
+
+void PeriodicCounterCapture::Capture(IReadCounterValues& readCounterValues)
+{
+    do
+    {
+        // Check if the current capture data indicates that there's data capture
+        auto currentCaptureData = ReadCaptureData();
+        const std::vector<uint16_t>& counterIds = currentCaptureData.GetCounterIds();
+        const uint32_t capturePeriod = currentCaptureData.GetCapturePeriod();
+
+        if (capturePeriod == 0)
+        {
+            // No data capture, wait the indicated capture period (milliseconds), if it is not zero
+            std::this_thread::sleep_for(std::chrono::milliseconds(50u));
+            continue;
+        }
+
+        if(counterIds.size() != 0)
+        {
+            std::vector<CounterValue> counterValues;
+
+            auto numCounters = counterIds.size();
+            counterValues.reserve(numCounters);
+
+            // Create a vector of pairs of CounterIndexes and Values
+            for (uint16_t index = 0; index < numCounters; ++index)
+            {
+                auto requestedId = counterIds[index];
+                uint32_t counterValue = 0;
+                try
+                {
+                    counterValue = readCounterValues.GetDeltaCounterValue(requestedId);
+                }
+                catch (const arm::pipe::ProfilingException& e)
+                {
+                    // Report the error and continue
+                    ARM_PIPE_LOG(warning) << "An error has occurred when getting a counter value: "
+                                       << e.what();
+                    continue;
+                }
+
+                counterValues.emplace_back(CounterValue {requestedId, counterValue });
+            }
+
+            // Send Periodic Counter Capture Packet for the Timestamp
+            m_SendCounterPacket.SendPeriodicCounterCapturePacket(GetTimestamp(), counterValues);
+        }
+
+        // Report counter values for each active backend
+        auto activeBackends = currentCaptureData.GetActiveBackends();
+        for_each(activeBackends.begin(), activeBackends.end(), [&](const std::string& backendId)
+        {
+            DispatchPeriodicCounterCapturePacket(
+                backendId, m_BackendProfilingContexts.at(backendId)->ReportCounterValues());
+        });
+
+        // Wait the indicated capture period (microseconds)
+        std::this_thread::sleep_for(std::chrono::microseconds(capturePeriod));
+    }
+    while (m_KeepRunning.load());
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/PeriodicCounterCapture.hpp b/profiling/client/src/PeriodicCounterCapture.hpp
new file mode 100644
index 0000000..8808417
--- /dev/null
+++ b/profiling/client/src/PeriodicCounterCapture.hpp
@@ -0,0 +1,71 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "IPeriodicCounterCapture.hpp"
+#include "SendCounterPacket.hpp"
+
+#include <client/include/CounterIdMap.hpp>
+#include <client/include/Holder.hpp>
+#include <client/include/ICounterValues.hpp>
+
+#include <client/include/backends/IBackendProfilingContext.hpp>
+
+#include <common/include/Packet.hpp>
+
+#include <atomic>
+#include <mutex>
+#include <thread>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class PeriodicCounterCapture final : public IPeriodicCounterCapture
+{
+public:
+    PeriodicCounterCapture(const Holder& data,
+                           ISendCounterPacket& packet,
+                           IReadCounterValues& readCounterValue,
+                           const ICounterMappings& counterIdMap,
+                           const std::unordered_map<std::string,
+                               std::shared_ptr<IBackendProfilingContext>>& backendProfilingContexts)
+            : m_CaptureDataHolder(data)
+            , m_IsRunning(false)
+            , m_KeepRunning(false)
+            , m_ReadCounterValues(readCounterValue)
+            , m_SendCounterPacket(packet)
+            , m_CounterIdMap(counterIdMap)
+            , m_BackendProfilingContexts(backendProfilingContexts)
+    {}
+    ~PeriodicCounterCapture() { Stop(); }
+
+    void Start() override;
+    void Stop() override;
+    bool IsRunning() const { return m_IsRunning; }
+
+private:
+    CaptureData ReadCaptureData();
+    void Capture(IReadCounterValues& readCounterValues);
+    void DispatchPeriodicCounterCapturePacket(
+            const std::string& backendId, const std::vector<Timestamp>& timestampValues);
+
+    const Holder&             m_CaptureDataHolder;
+    bool                      m_IsRunning;
+    std::atomic<bool>         m_KeepRunning;
+    std::thread               m_PeriodCaptureThread;
+    IReadCounterValues&       m_ReadCounterValues;
+    ISendCounterPacket&       m_SendCounterPacket;
+    const ICounterMappings&   m_CounterIdMap;
+    const std::unordered_map<std::string,
+            std::shared_ptr<IBackendProfilingContext>>& m_BackendProfilingContexts;
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/PeriodicCounterSelectionCommandHandler.cpp b/profiling/client/src/PeriodicCounterSelectionCommandHandler.cpp
new file mode 100644
index 0000000..06f2c65
--- /dev/null
+++ b/profiling/client/src/PeriodicCounterSelectionCommandHandler.cpp
@@ -0,0 +1,234 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "PeriodicCounterSelectionCommandHandler.hpp"
+#include "ProfilingUtils.hpp"
+
+#include <client/include/ProfilingOptions.hpp>
+
+#include <common/include/NumericCast.hpp>
+
+#include <fmt/format.h>
+
+#include <vector>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+void PeriodicCounterSelectionCommandHandler::ParseData(const arm::pipe::Packet& packet, CaptureData& captureData)
+{
+    std::vector<uint16_t> counterIds;
+    uint32_t sizeOfUint32 = arm::pipe::numeric_cast<uint32_t>(sizeof(uint32_t));
+    uint32_t sizeOfUint16 = arm::pipe::numeric_cast<uint32_t>(sizeof(uint16_t));
+    uint32_t offset = 0;
+
+    if (packet.GetLength() < 4)
+    {
+        // Insufficient packet size
+        return;
+    }
+
+    // Parse the capture period
+    uint32_t capturePeriod = ReadUint32(packet.GetData(), offset);
+
+    // Set the capture period
+    captureData.SetCapturePeriod(capturePeriod);
+
+    // Parse the counter ids
+    unsigned int counters = (packet.GetLength() - 4) / 2;
+    if (counters > 0)
+    {
+        counterIds.reserve(counters);
+        offset += sizeOfUint32;
+        for (unsigned int i = 0; i < counters; ++i)
+        {
+            // Parse the counter id
+            uint16_t counterId = ReadUint16(packet.GetData(), offset);
+            counterIds.emplace_back(counterId);
+            offset += sizeOfUint16;
+        }
+    }
+
+    // Set the counter ids
+    captureData.SetCounterIds(counterIds);
+}
+
+void PeriodicCounterSelectionCommandHandler::operator()(const arm::pipe::Packet& packet)
+{
+    ProfilingState currentState = m_StateMachine.GetCurrentState();
+    switch (currentState)
+    {
+    case ProfilingState::Uninitialised:
+    case ProfilingState::NotConnected:
+    case ProfilingState::WaitingForAck:
+        throw arm::pipe::ProfilingException(fmt::format("Periodic Counter Selection Command Handler invoked while in "
+                                            "an wrong state: {}",
+                                            GetProfilingStateName(currentState)));
+    case ProfilingState::Active:
+    {
+        // Process the packet
+        if (!(packet.GetPacketFamily() == 0u && packet.GetPacketId() == 4u))
+        {
+            throw arm::pipe::InvalidArgumentException(fmt::format("Expected Packet family = 0, id = 4 but "
+                                                                  "received family = {}, id = {}",
+                                                                  packet.GetPacketFamily(),
+                                                                  packet.GetPacketId()));
+        }
+
+        // Parse the packet to get the capture period and counter UIDs
+        CaptureData captureData;
+        ParseData(packet, captureData);
+
+        // Get the capture data
+        uint32_t capturePeriod = captureData.GetCapturePeriod();
+        // Validate that the capture period is within the acceptable range.
+        if (capturePeriod > 0  && capturePeriod < arm::pipe::LOWEST_CAPTURE_PERIOD)
+        {
+            capturePeriod = arm::pipe::LOWEST_CAPTURE_PERIOD;
+        }
+        const std::vector<uint16_t>& counterIds = captureData.GetCounterIds();
+
+        // Check whether the selected counter UIDs are valid
+        std::vector<uint16_t> validCounterIds;
+        for (uint16_t counterId : counterIds)
+        {
+            // Check whether the counter is registered
+            if (!m_ReadCounterValues.IsCounterRegistered(counterId))
+            {
+                // Invalid counter UID, ignore it and continue
+                continue;
+            }
+            // The counter is valid
+            validCounterIds.emplace_back(counterId);
+        }
+
+        std::sort(validCounterIds.begin(), validCounterIds.end());
+
+        auto backendIdStart = std::find_if(validCounterIds.begin(), validCounterIds.end(), [&](uint16_t& counterId)
+        {
+            return counterId > m_MaxArmCounterId;
+        });
+
+        std::set<std::string> activeBackends;
+        std::set<uint16_t> backendCounterIds = std::set<uint16_t>(backendIdStart, validCounterIds.end());
+
+        if (m_BackendCounterMap.size() != 0)
+        {
+            std::set<uint16_t> newCounterIds;
+            std::set<uint16_t> unusedCounterIds;
+
+            // Get any backend counter ids that is in backendCounterIds but not in m_PrevBackendCounterIds
+            std::set_difference(backendCounterIds.begin(), backendCounterIds.end(),
+                                m_PrevBackendCounterIds.begin(), m_PrevBackendCounterIds.end(),
+                                std::inserter(newCounterIds, newCounterIds.begin()));
+
+            // Get any backend counter ids that is in m_PrevBackendCounterIds but not in backendCounterIds
+            std::set_difference(m_PrevBackendCounterIds.begin(), m_PrevBackendCounterIds.end(),
+                                backendCounterIds.begin(), backendCounterIds.end(),
+                                std::inserter(unusedCounterIds, unusedCounterIds.begin()));
+
+            activeBackends = ProcessBackendCounterIds(capturePeriod, newCounterIds, unusedCounterIds);
+        }
+        else
+        {
+            activeBackends = ProcessBackendCounterIds(capturePeriod, backendCounterIds, {});
+        }
+
+        // save the new backend counter ids for next time
+        m_PrevBackendCounterIds = backendCounterIds;
+
+        // Set the capture data with only the valid armnn counter UIDs
+        m_CaptureDataHolder.SetCaptureData(capturePeriod, {validCounterIds.begin(), backendIdStart}, activeBackends);
+
+        // Echo back the Periodic Counter Selection packet to the Counter Stream Buffer
+        m_SendCounterPacket.SendPeriodicCounterSelectionPacket(capturePeriod, validCounterIds);
+
+        if (capturePeriod == 0 || validCounterIds.empty())
+        {
+            // No data capture stop the thread
+            m_PeriodicCounterCapture.Stop();
+        }
+        else
+        {
+            // Start the Period Counter Capture thread (if not running already)
+            m_PeriodicCounterCapture.Start();
+        }
+
+        break;
+    }
+    default:
+        throw arm::pipe::ProfilingException(fmt::format("Unknown profiling service state: {}",
+                                            static_cast<int>(currentState)));
+    }
+}
+
+std::set<std::string> PeriodicCounterSelectionCommandHandler::ProcessBackendCounterIds(
+                                                                      const uint32_t capturePeriod,
+                                                                      const std::set<uint16_t> newCounterIds,
+                                                                      const std::set<uint16_t> unusedCounterIds)
+{
+    std::set<std::string> changedBackends;
+    std::set<std::string> activeBackends = m_CaptureDataHolder.GetCaptureData().GetActiveBackends();
+
+    for (uint16_t counterId : newCounterIds)
+    {
+        auto backendId = m_CounterIdMap.GetBackendId(counterId);
+        m_BackendCounterMap[backendId.second].emplace_back(backendId.first);
+        changedBackends.insert(backendId.second);
+    }
+    // Add any new backends to active backends
+    activeBackends.insert(changedBackends.begin(), changedBackends.end());
+
+    for (uint16_t counterId : unusedCounterIds)
+    {
+        auto backendId = m_CounterIdMap.GetBackendId(counterId);
+        std::vector<uint16_t>& backendCounters = m_BackendCounterMap[backendId.second];
+
+        backendCounters.erase(std::remove(backendCounters.begin(), backendCounters.end(), backendId.first));
+
+        if(backendCounters.size() == 0)
+        {
+            // If a backend has no counters associated with it we remove it from active backends and
+            // send a capture period of zero with an empty vector, this will deactivate all the backends counters
+            activeBackends.erase(backendId.second);
+            ActivateBackendCounters(backendId.second, 0, {});
+        }
+        else
+        {
+            changedBackends.insert(backendId.second);
+        }
+    }
+
+    // If the capture period remains the same we only need to update the backends who's counters have changed
+    if(capturePeriod == m_PrevCapturePeriod)
+    {
+        for (auto backend : changedBackends)
+        {
+            ActivateBackendCounters(backend, capturePeriod, m_BackendCounterMap[backend]);
+        }
+    }
+    // Otherwise update all the backends with the new capture period and any new/unused counters
+    else
+    {
+        for (auto backend : m_BackendCounterMap)
+        {
+            ActivateBackendCounters(backend.first, capturePeriod, backend.second);
+        }
+        if(capturePeriod == 0)
+        {
+            activeBackends = {};
+        }
+        m_PrevCapturePeriod = capturePeriod;
+    }
+
+    return activeBackends;
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/PeriodicCounterSelectionCommandHandler.hpp b/profiling/client/src/PeriodicCounterSelectionCommandHandler.hpp
new file mode 100644
index 0000000..6e544c9
--- /dev/null
+++ b/profiling/client/src/PeriodicCounterSelectionCommandHandler.hpp
@@ -0,0 +1,102 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ProfilingStateMachine.hpp"
+#include "SendCounterPacket.hpp"
+#include "IPeriodicCounterCapture.hpp"
+
+#include <client/include/CounterIdMap.hpp>
+#include <client/include/Holder.hpp>
+#include <client/include/ICounterValues.hpp>
+
+#include <client/include/backends/IBackendProfilingContext.hpp>
+
+#include <common/include/CommandHandlerFunctor.hpp>
+#include <common/include/Logging.hpp>
+#include <common/include/Packet.hpp>
+
+#include <set>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+
+class PeriodicCounterSelectionCommandHandler : public arm::pipe::CommandHandlerFunctor
+{
+
+public:
+    PeriodicCounterSelectionCommandHandler(uint32_t familyId,
+                                           uint32_t packetId,
+                                           uint32_t version,
+                                           const std::unordered_map<std::string,
+                                                   std::shared_ptr<IBackendProfilingContext>>&
+                                                   backendProfilingContexts,
+                                           const ICounterMappings& counterIdMap,
+                                           Holder& captureDataHolder,
+                                           const uint16_t maxArmnnCounterId,
+                                           IPeriodicCounterCapture& periodicCounterCapture,
+                                           const IReadCounterValues& readCounterValue,
+                                           ISendCounterPacket& sendCounterPacket,
+                                           const ProfilingStateMachine& profilingStateMachine)
+        : CommandHandlerFunctor(familyId, packetId, version)
+        , m_BackendProfilingContexts(backendProfilingContexts)
+        , m_CounterIdMap(counterIdMap)
+        , m_CaptureDataHolder(captureDataHolder)
+        , m_MaxArmCounterId(maxArmnnCounterId)
+        , m_PeriodicCounterCapture(periodicCounterCapture)
+        , m_PrevCapturePeriod(0)
+        , m_ReadCounterValues(readCounterValue)
+        , m_SendCounterPacket(sendCounterPacket)
+        , m_StateMachine(profilingStateMachine)
+
+    {
+
+    }
+
+    void operator()(const arm::pipe::Packet& packet) override;
+
+private:
+
+    std::unordered_map<std::string, std::vector<uint16_t>> m_BackendCounterMap;
+    const std::unordered_map<std::string,
+          std::shared_ptr<IBackendProfilingContext>>& m_BackendProfilingContexts;
+    const ICounterMappings& m_CounterIdMap;
+    Holder& m_CaptureDataHolder;
+    const uint16_t m_MaxArmCounterId;
+    IPeriodicCounterCapture& m_PeriodicCounterCapture;
+    uint32_t m_PrevCapturePeriod;
+    std::set<uint16_t> m_PrevBackendCounterIds;
+    const IReadCounterValues& m_ReadCounterValues;
+    ISendCounterPacket& m_SendCounterPacket;
+    const ProfilingStateMachine& m_StateMachine;
+
+    void ActivateBackendCounters(const std::string backendId,
+                                 const uint32_t capturePeriod,
+                                 const std::vector<uint16_t> counterIds)
+    {
+        arm::pipe::Optional<std::string> errorMsg =
+                m_BackendProfilingContexts.at(backendId)->ActivateCounters(capturePeriod, counterIds);
+
+        if(errorMsg.has_value())
+        {
+            ARM_PIPE_LOG(warning) << "An error has occurred when activating counters of " << backendId << ": "
+                               << errorMsg.value();
+        }
+    }
+    void ParseData(const arm::pipe::Packet& packet, CaptureData& captureData);
+    std::set<std::string> ProcessBackendCounterIds(const uint32_t capturePeriod,
+                                                        const std::set<uint16_t> newCounterIds,
+                                                        const std::set<uint16_t> unusedCounterIds);
+
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/ProfilingConnectionDumpToFileDecorator.cpp b/profiling/client/src/ProfilingConnectionDumpToFileDecorator.cpp
new file mode 100644
index 0000000..7f13b08
--- /dev/null
+++ b/profiling/client/src/ProfilingConnectionDumpToFileDecorator.cpp
@@ -0,0 +1,162 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ProfilingConnectionDumpToFileDecorator.hpp"
+
+#include <common/include/NumericCast.hpp>
+#include <common/include/ProfilingException.hpp>
+
+#include <fstream>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+ProfilingConnectionDumpToFileDecorator::ProfilingConnectionDumpToFileDecorator(
+    std::unique_ptr<IProfilingConnection> connection,
+    const ProfilingOptions& options,
+    bool ignoreFailures)
+      : m_Connection(std::move(connection))
+      , m_Options(options)
+      , m_IgnoreFileErrors(ignoreFailures)
+{
+    if (!m_Connection)
+    {
+        throw arm::pipe::InvalidArgumentException("Connection cannot be nullptr");
+    }
+}
+
+ProfilingConnectionDumpToFileDecorator::~ProfilingConnectionDumpToFileDecorator()
+{
+    Close();
+}
+
+bool ProfilingConnectionDumpToFileDecorator::IsOpen() const
+{
+    return m_Connection->IsOpen();
+}
+
+void ProfilingConnectionDumpToFileDecorator::Close()
+{
+    m_IncomingDumpFileStream.flush();
+    m_IncomingDumpFileStream.close();
+    m_OutgoingDumpFileStream.flush();
+    m_OutgoingDumpFileStream.close();
+    m_Connection->Close();
+}
+
+bool ProfilingConnectionDumpToFileDecorator::WritePacket(const unsigned char* buffer, uint32_t length)
+{
+    bool success = true;
+    if (!m_Options.m_OutgoingCaptureFile.empty())
+    {
+        success &= DumpOutgoingToFile(buffer, length);
+    }
+    success &= m_Connection->WritePacket(buffer, length);
+    return success;
+}
+
+arm::pipe::Packet ProfilingConnectionDumpToFileDecorator::ReadPacket(uint32_t timeout)
+{
+    arm::pipe::Packet packet = m_Connection->ReadPacket(timeout);
+    if (!m_Options.m_IncomingCaptureFile.empty())
+    {
+        DumpIncomingToFile(packet);
+    }
+    return packet;
+}
+
+bool ProfilingConnectionDumpToFileDecorator::OpenIncomingDumpFile()
+{
+    m_IncomingDumpFileStream.open(m_Options.m_IncomingCaptureFile, std::ios::out | std::ios::binary);
+    return m_IncomingDumpFileStream.is_open();
+}
+
+bool ProfilingConnectionDumpToFileDecorator::OpenOutgoingDumpFile()
+{
+    m_OutgoingDumpFileStream.open(m_Options.m_OutgoingCaptureFile, std::ios::out | std::ios::binary);
+    return m_OutgoingDumpFileStream.is_open();
+}
+
+
+/// Dumps incoming data into the file specified by m_Settings.m_IncomingDumpFileName.
+/// If m_IgnoreFileErrors is set to true in m_Settings, write errors will be ignored,
+/// i.e. the method will not throw an exception if it encounters an error while trying
+/// to write the data into the specified file.
+/// @param packet data packet to write
+/// @return nothing
+void ProfilingConnectionDumpToFileDecorator::DumpIncomingToFile(const arm::pipe::Packet& packet)
+{
+    bool success = true;
+    if (!m_IncomingDumpFileStream.is_open())
+    {
+        // attempt to open dump file
+        success &= OpenIncomingDumpFile();
+        if (!(success || m_IgnoreFileErrors))
+        {
+            Fail("Failed to open \"" + m_Options.m_IncomingCaptureFile + "\" for writing");
+        }
+    }
+
+    // attempt to write binary data from packet
+    const unsigned int header       = packet.GetHeader();
+    const unsigned int packetLength = packet.GetLength();
+
+    m_IncomingDumpFileStream.write(reinterpret_cast<const char*>(&header), sizeof header);
+    m_IncomingDumpFileStream.write(reinterpret_cast<const char*>(&packetLength), sizeof packetLength);
+    m_IncomingDumpFileStream.write(reinterpret_cast<const char*>(packet.GetData()),
+                                   arm::pipe::numeric_cast<std::streamsize>(packetLength));
+
+    success &= m_IncomingDumpFileStream.good();
+    if (!(success || m_IgnoreFileErrors))
+    {
+        Fail("Error writing incoming packet of " + std::to_string(packetLength) + " bytes");
+    }
+}
+
+/// Dumps outgoing data into the file specified by m_Settings.m_OutgoingDumpFileName.
+/// If m_IgnoreFileErrors is set to true in m_Settings, write errors will be ignored,
+/// i.e. the method will not throw an exception if it encounters an error while trying
+/// to write the data into the specified file. However, the return value will still
+/// signal if the write has not been completed succesfully.
+/// @param buffer pointer to data to write
+/// @param length number of bytes to write
+/// @return true if write successful, false otherwise
+bool ProfilingConnectionDumpToFileDecorator::DumpOutgoingToFile(const unsigned char* buffer, uint32_t length)
+{
+    bool success = true;
+    if (!m_OutgoingDumpFileStream.is_open())
+    {
+        // attempt to open dump file
+        success &= OpenOutgoingDumpFile();
+        if (!(success || m_IgnoreFileErrors))
+        {
+            Fail("Failed to open \"" + m_Options.m_OutgoingCaptureFile + "\" for writing");
+        }
+    }
+
+    // attempt to write binary data
+    m_OutgoingDumpFileStream.write(reinterpret_cast<const char*>(buffer),
+                                   arm::pipe::numeric_cast<std::streamsize>(length));
+    success &= m_OutgoingDumpFileStream.good();
+    if (!(success || m_IgnoreFileErrors))
+    {
+        Fail("Error writing outgoing packet of " + std::to_string(length) + " bytes");
+    }
+
+    return success;
+}
+
+void ProfilingConnectionDumpToFileDecorator::Fail(const std::string& errorMessage)
+{
+    Close();
+    throw arm::pipe::ProfilingException(errorMessage);
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/ProfilingConnectionDumpToFileDecorator.hpp b/profiling/client/src/ProfilingConnectionDumpToFileDecorator.hpp
new file mode 100644
index 0000000..8f4812e
--- /dev/null
+++ b/profiling/client/src/ProfilingConnectionDumpToFileDecorator.hpp
@@ -0,0 +1,62 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "IProfilingConnection.hpp"
+#include "ProfilingUtils.hpp"
+
+#include <client/include/ProfilingOptions.hpp>
+
+#include <fstream>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class ProfilingConnectionDumpToFileDecorator : public IProfilingConnection
+{
+public:
+
+    ProfilingConnectionDumpToFileDecorator(std::unique_ptr<IProfilingConnection> connection,
+                                           const ProfilingOptions& options,
+                                           bool ignoreFailures = false);
+
+    ~ProfilingConnectionDumpToFileDecorator();
+
+    bool IsOpen() const override;
+
+    void Close() override;
+
+    bool WritePacket(const unsigned char* buffer, uint32_t length) override;
+
+    arm::pipe::Packet ReadPacket(uint32_t timeout) override;
+
+private:
+    bool OpenIncomingDumpFile();
+
+    bool OpenOutgoingDumpFile();
+
+    void DumpIncomingToFile(const arm::pipe::Packet& packet);
+
+    bool DumpOutgoingToFile(const unsigned char* buffer, uint32_t length);
+
+    void Fail(const std::string& errorMessage);
+
+    std::unique_ptr<IProfilingConnection> m_Connection;
+    ProfilingOptions                      m_Options;
+    std::ofstream                         m_IncomingDumpFileStream;
+    std::ofstream                         m_OutgoingDumpFileStream;
+    bool                                  m_IgnoreFileErrors;
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/ProfilingConnectionFactory.cpp b/profiling/client/src/ProfilingConnectionFactory.cpp
new file mode 100644
index 0000000..0b34a02
--- /dev/null
+++ b/profiling/client/src/ProfilingConnectionFactory.cpp
@@ -0,0 +1,61 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ProfilingConnectionFactory.hpp"
+
+#include "FileOnlyProfilingConnection.hpp"
+#include "ProfilingConnectionDumpToFileDecorator.hpp"
+#include "SocketProfilingConnection.hpp"
+
+namespace arm
+{
+
+namespace pipe
+{
+
+std::unique_ptr<IProfilingConnection> ProfilingConnectionFactory::GetProfilingConnection(
+    const ProfilingOptions& options) const
+{
+    // Before proceed to create the IProfilingConnection, check if the file format is supported
+    if (!(options.m_FileFormat == "binary"))
+    {
+        throw arm::pipe::UnimplementedException("Unsupported profiling file format, only binary is supported");
+    }
+
+    // We can create 3 different types of IProfilingConnection.
+    // 1: If no relevant options are specified then a SocketProfilingConnection is returned.
+    // 2: If both incoming and outgoing capture files are specified then a SocketProfilingConnection decorated by a
+    //    ProfilingConnectionDumpToFileDecorator is returned.
+    // 3: If both incoming and outgoing capture files are specified and "file only" then a FileOnlyProfilingConnection
+    //    decorated by a ProfilingConnectionDumpToFileDecorator is returned.
+    // 4. There is now another option if m_FileOnly == true and there are ILocalPacketHandlers specified
+    //    we can create a FileOnlyProfilingConnection without a file dump
+    if ((!options.m_IncomingCaptureFile.empty() || !options.m_OutgoingCaptureFile.empty()) && !options.m_FileOnly)
+    {
+        // This is type 2.
+        return std::make_unique<ProfilingConnectionDumpToFileDecorator>(std::make_unique<SocketProfilingConnection>(),
+                                                                        options);
+    }
+    else if ((!options.m_IncomingCaptureFile.empty() || !options.m_OutgoingCaptureFile.empty()) && options.m_FileOnly)
+    {
+        // This is type 3.
+        return std::make_unique<ProfilingConnectionDumpToFileDecorator>(
+            std::make_unique<FileOnlyProfilingConnection>(options), options);
+    }
+    else if (options.m_FileOnly && !options.m_LocalPacketHandlers.empty())
+    {
+        // This is the type 4.
+        return std::make_unique<FileOnlyProfilingConnection>(options);
+    }
+    else
+    {
+        // This is type 1.
+        return std::make_unique<SocketProfilingConnection>();
+    }
+}
+
+}    // namespace pipe
+
+}    // namespace arm
diff --git a/profiling/client/src/ProfilingConnectionFactory.hpp b/profiling/client/src/ProfilingConnectionFactory.hpp
new file mode 100644
index 0000000..441a31a
--- /dev/null
+++ b/profiling/client/src/ProfilingConnectionFactory.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "IProfilingConnectionFactory.hpp"
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class ProfilingConnectionFactory final : public IProfilingConnectionFactory
+{
+public:
+    ProfilingConnectionFactory()  = default;
+    ~ProfilingConnectionFactory() = default;
+
+    IProfilingConnectionPtr GetProfilingConnection(const ProfilingOptions& options) const override;
+};
+
+}    // namespace pipe
+
+}    // namespace arm
diff --git a/profiling/client/src/ProfilingService.cpp b/profiling/client/src/ProfilingService.cpp
new file mode 100644
index 0000000..7acddf1
--- /dev/null
+++ b/profiling/client/src/ProfilingService.cpp
@@ -0,0 +1,436 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ProfilingService.hpp"
+
+#include <common/include/Logging.hpp>
+#include <common/include/NumericCast.hpp>
+#include <common/include/ProfilingGuid.hpp>
+#include <common/include/SocketConnectionException.hpp>
+
+#include <fmt/format.h>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+void ProfilingService::ResetExternalProfilingOptions(const arm::pipe::ProfilingOptions& options,
+                                                     bool resetProfilingService)
+{
+    // Update the profiling options
+    m_Options = options;
+    m_TimelineReporting = options.m_TimelineEnabled;
+    m_ConnectionAcknowledgedCommandHandler.setTimelineEnabled(options.m_TimelineEnabled);
+
+    // Check if the profiling service needs to be reset
+    if (resetProfilingService)
+    {
+        // Reset the profiling service
+        Reset();
+    }
+}
+
+bool ProfilingService::IsProfilingEnabled() const
+{
+    return m_Options.m_EnableProfiling;
+}
+
+ProfilingState ProfilingService::ConfigureProfilingService(
+        const ProfilingOptions& options,
+        bool resetProfilingService)
+{
+    ResetExternalProfilingOptions(options, resetProfilingService);
+    ProfilingState currentState = m_StateMachine.GetCurrentState();
+    if (options.m_EnableProfiling)
+    {
+        switch (currentState)
+        {
+            case ProfilingState::Uninitialised:
+                Update(); // should transition to NotConnected
+                Update(); // will either stay in NotConnected because there is no server
+                          // or will enter WaitingForAck.
+                currentState = m_StateMachine.GetCurrentState();
+                if (currentState == ProfilingState::WaitingForAck)
+                {
+                    Update(); // poke it again to send out the metadata packet
+                }
+                currentState = m_StateMachine.GetCurrentState();
+                return currentState;
+            case ProfilingState::NotConnected:
+                Update(); // will either stay in NotConnected because there is no server
+                          // or will enter WaitingForAck
+                currentState = m_StateMachine.GetCurrentState();
+                if (currentState == ProfilingState::WaitingForAck)
+                {
+                    Update(); // poke it again to send out the metadata packet
+                }
+                currentState = m_StateMachine.GetCurrentState();
+                return currentState;
+            default:
+                return currentState;
+        }
+    }
+    else
+    {
+        // Make sure profiling is shutdown
+        switch (currentState)
+        {
+            case ProfilingState::Uninitialised:
+            case ProfilingState::NotConnected:
+                return currentState;
+            default:
+                Stop();
+                return m_StateMachine.GetCurrentState();
+        }
+    }
+}
+
+void ProfilingService::Update()
+{
+    if (!m_Options.m_EnableProfiling)
+    {
+        // Don't run if profiling is disabled
+        return;
+    }
+
+    ProfilingState currentState = m_StateMachine.GetCurrentState();
+    switch (currentState)
+    {
+    case ProfilingState::Uninitialised:
+
+        // Initialize the profiling service
+        Initialize();
+
+        // Move to the next state
+        m_StateMachine.TransitionToState(ProfilingState::NotConnected);
+        break;
+    case ProfilingState::NotConnected:
+        // Stop the command thread (if running)
+        m_CommandHandler.Stop();
+
+        // Stop the send thread (if running)
+        m_SendThread.Stop(false);
+
+        // Stop the periodic counter capture thread (if running)
+        m_PeriodicCounterCapture.Stop();
+
+        // Reset any existing profiling connection
+        m_ProfilingConnection.reset();
+
+        try
+        {
+            // Setup the profiling connection
+            ARM_PIPE_ASSERT(m_ProfilingConnectionFactory);
+            m_ProfilingConnection = m_ProfilingConnectionFactory->GetProfilingConnection(m_Options);
+        }
+        catch (const arm::pipe::ProfilingException& e)
+        {
+            ARM_PIPE_LOG(warning) << "An error has occurred when creating the profiling connection: "
+                                       << e.what();
+        }
+        catch (const arm::pipe::SocketConnectionException& e)
+        {
+            ARM_PIPE_LOG(warning) << "An error has occurred when creating the profiling connection ["
+                                       << e.what() << "] on socket [" << e.GetSocketFd() << "].";
+        }
+
+        // Move to the next state
+        m_StateMachine.TransitionToState(m_ProfilingConnection
+                                         ? ProfilingState::WaitingForAck  // Profiling connection obtained, wait for ack
+                                         : ProfilingState::NotConnected); // Profiling connection failed, stay in the
+                                                                          // "NotConnected" state
+        break;
+    case ProfilingState::WaitingForAck:
+        ARM_PIPE_ASSERT(m_ProfilingConnection);
+
+        // Start the command thread
+        m_CommandHandler.Start(*m_ProfilingConnection);
+
+        // Start the send thread, while in "WaitingForAck" state it'll send out a "Stream MetaData" packet waiting for
+        // a valid "Connection Acknowledged" packet confirming the connection
+        m_SendThread.Start(*m_ProfilingConnection);
+
+        // The connection acknowledged command handler will automatically transition the state to "Active" once a
+        // valid "Connection Acknowledged" packet has been received
+
+        break;
+    case ProfilingState::Active:
+
+        // The period counter capture thread is started by the Periodic Counter Selection command handler upon
+        // request by an external profiling service
+
+        break;
+    default:
+        throw arm::pipe::ProfilingException(fmt::format("Unknown profiling service state: {}",
+                                            static_cast<int>(currentState)));
+    }
+}
+
+void ProfilingService::Disconnect()
+{
+    ProfilingState currentState = m_StateMachine.GetCurrentState();
+    switch (currentState)
+    {
+    case ProfilingState::Uninitialised:
+    case ProfilingState::NotConnected:
+    case ProfilingState::WaitingForAck:
+        return; // NOP
+    case ProfilingState::Active:
+        // Stop the command thread (if running)
+        Stop();
+
+        break;
+    default:
+        throw arm::pipe::ProfilingException(fmt::format("Unknown profiling service state: {}",
+                                                        static_cast<int>(currentState)));
+    }
+}
+
+// Store a profiling context returned from a backend that support profiling, and register its counters
+void ProfilingService::AddBackendProfilingContext(const std::string& backendId,
+    std::shared_ptr<IBackendProfilingContext> profilingContext)
+{
+    ARM_PIPE_ASSERT(profilingContext != nullptr);
+    // Register the backend counters
+    m_MaxGlobalCounterId = profilingContext->RegisterCounters(m_MaxGlobalCounterId);
+    m_BackendProfilingContexts.emplace(backendId, std::move(profilingContext));
+}
+const ICounterDirectory& ProfilingService::GetCounterDirectory() const
+{
+    return m_CounterDirectory;
+}
+
+ICounterRegistry& ProfilingService::GetCounterRegistry()
+{
+    return m_CounterDirectory;
+}
+
+ProfilingState ProfilingService::GetCurrentState() const
+{
+    return m_StateMachine.GetCurrentState();
+}
+
+uint16_t ProfilingService::GetCounterCount() const
+{
+    return m_CounterDirectory.GetCounterCount();
+}
+
+bool ProfilingService::IsCounterRegistered(uint16_t counterUid) const
+{
+    return m_CounterDirectory.IsCounterRegistered(counterUid);
+}
+
+uint32_t ProfilingService::GetAbsoluteCounterValue(uint16_t counterUid) const
+{
+    CheckCounterUid(counterUid);
+    std::atomic<uint32_t>* counterValuePtr = m_CounterIndex.at(counterUid);
+    ARM_PIPE_ASSERT(counterValuePtr);
+    return counterValuePtr->load(std::memory_order::memory_order_relaxed);
+}
+
+uint32_t ProfilingService::GetDeltaCounterValue(uint16_t counterUid)
+{
+    CheckCounterUid(counterUid);
+    std::atomic<uint32_t>* counterValuePtr = m_CounterIndex.at(counterUid);
+    ARM_PIPE_ASSERT(counterValuePtr);
+    const uint32_t counterValue = counterValuePtr->load(std::memory_order::memory_order_relaxed);
+    SubtractCounterValue(counterUid, counterValue);
+    return counterValue;
+}
+
+const ICounterMappings& ProfilingService::GetCounterMappings() const
+{
+    return m_CounterIdMap;
+}
+
+IRegisterCounterMapping& ProfilingService::GetCounterMappingRegistry()
+{
+    return m_CounterIdMap;
+}
+
+bool ProfilingService::IsCategoryRegistered(const std::string& categoryName) const
+{
+    return m_CounterDirectory.IsCategoryRegistered(categoryName);
+}
+
+bool ProfilingService::IsCounterRegistered(const std::string& counterName) const
+{
+    return m_CounterDirectory.IsCounterRegistered(counterName);
+}
+
+CaptureData ProfilingService::GetCaptureData()
+{
+    return m_Holder.GetCaptureData();
+}
+
+void ProfilingService::SetCaptureData(uint32_t capturePeriod,
+                                      const std::vector<uint16_t>& counterIds,
+                                      const std::set<std::string>& activeBackends)
+{
+    m_Holder.SetCaptureData(capturePeriod, counterIds, activeBackends);
+}
+
+void ProfilingService::SetCounterValue(uint16_t counterUid, uint32_t value)
+{
+    CheckCounterUid(counterUid);
+    std::atomic<uint32_t>* counterValuePtr = m_CounterIndex.at(counterUid);
+    ARM_PIPE_ASSERT(counterValuePtr);
+    counterValuePtr->store(value, std::memory_order::memory_order_relaxed);
+}
+
+uint32_t ProfilingService::AddCounterValue(uint16_t counterUid, uint32_t value)
+{
+    CheckCounterUid(counterUid);
+    std::atomic<uint32_t>* counterValuePtr = m_CounterIndex.at(counterUid);
+    ARM_PIPE_ASSERT(counterValuePtr);
+    return counterValuePtr->fetch_add(value, std::memory_order::memory_order_relaxed);
+}
+
+uint32_t ProfilingService::SubtractCounterValue(uint16_t counterUid, uint32_t value)
+{
+    CheckCounterUid(counterUid);
+    std::atomic<uint32_t>* counterValuePtr = m_CounterIndex.at(counterUid);
+    ARM_PIPE_ASSERT(counterValuePtr);
+    return counterValuePtr->fetch_sub(value, std::memory_order::memory_order_relaxed);
+}
+
+uint32_t ProfilingService::IncrementCounterValue(uint16_t counterUid)
+{
+    CheckCounterUid(counterUid);
+    std::atomic<uint32_t>* counterValuePtr = m_CounterIndex.at(counterUid);
+    ARM_PIPE_ASSERT(counterValuePtr);
+    return counterValuePtr->operator++(std::memory_order::memory_order_relaxed);
+}
+
+std::unique_ptr<ISendTimelinePacket> ProfilingService::GetSendTimelinePacket() const
+{
+    return m_TimelinePacketWriterFactory.GetSendTimelinePacket();
+}
+
+void ProfilingService::Initialize()
+{
+    m_Initialiser.InitialiseProfilingService(*this);
+}
+
+void ProfilingService::InitializeCounterValue(uint16_t counterUid)
+{
+    // Increase the size of the counter index if necessary
+    if (counterUid >= m_CounterIndex.size())
+    {
+        m_CounterIndex.resize(arm::pipe::numeric_cast<size_t>(counterUid) + 1);
+    }
+
+    // Create a new atomic counter and add it to the list
+    m_CounterValues.emplace_back(0);
+
+    // Register the new counter to the counter index for quick access
+    std::atomic<uint32_t>* counterValuePtr = &(m_CounterValues.back());
+    m_CounterIndex.at(counterUid) = counterValuePtr;
+}
+
+void ProfilingService::Reset()
+{
+    // Stop the profiling service...
+    Stop();
+
+    // ...then delete all the counter data and configuration...
+    m_CounterIndex.clear();
+    m_CounterValues.clear();
+    m_CounterDirectory.Clear();
+    m_CounterIdMap.Reset();
+    m_BufferManager.Reset();
+
+    // ...finally reset the profiling state machine
+    m_StateMachine.Reset();
+    m_BackendProfilingContexts.clear();
+}
+
+void ProfilingService::Stop()
+{
+    {   // only lock when we are updating the inference completed variable
+        std::unique_lock<std::mutex> lck(m_ServiceActiveMutex);
+        m_ServiceActive = false;
+    }
+    // The order in which we reset/stop the components is not trivial!
+    // First stop the producing threads
+    // Command Handler first as it is responsible for launching then Periodic Counter capture thread
+    m_CommandHandler.Stop();
+    m_PeriodicCounterCapture.Stop();
+    // The the consuming thread
+    m_SendThread.Stop(false);
+
+    // ...then close and destroy the profiling connection...
+    if (m_ProfilingConnection != nullptr && m_ProfilingConnection->IsOpen())
+    {
+        m_ProfilingConnection->Close();
+    }
+    m_ProfilingConnection.reset();
+
+    // ...then move to the "NotConnected" state
+    m_StateMachine.TransitionToState(ProfilingState::NotConnected);
+}
+
+inline void ProfilingService::CheckCounterUid(uint16_t counterUid) const
+{
+    if (!IsCounterRegistered(counterUid))
+    {
+        throw arm::pipe::InvalidArgumentException(fmt::format("Counter UID {} is not registered", counterUid));
+    }
+}
+
+void ProfilingService::NotifyBackendsForTimelineReporting()
+{
+    BackendProfilingContext::iterator it = m_BackendProfilingContexts.begin();
+    while (it != m_BackendProfilingContexts.end())
+    {
+        auto& backendProfilingContext = it->second;
+        backendProfilingContext->EnableTimelineReporting(m_TimelineReporting);
+        // Increment the Iterator to point to next entry
+        it++;
+    }
+}
+
+void ProfilingService::NotifyProfilingServiceActive()
+{
+    {   // only lock when we are updating the inference completed variable
+        std::unique_lock<std::mutex> lck(m_ServiceActiveMutex);
+        m_ServiceActive = true;
+    }
+    m_ServiceActiveConditionVariable.notify_one();
+}
+
+void ProfilingService::WaitForProfilingServiceActivation(unsigned int timeout)
+{
+    std::unique_lock<std::mutex> lck(m_ServiceActiveMutex);
+
+    auto start = std::chrono::high_resolution_clock::now();
+    // Here we we will go back to sleep after a spurious wake up if
+    // m_InferenceCompleted is not yet true.
+    if (!m_ServiceActiveConditionVariable.wait_for(lck,
+                                                   std::chrono::milliseconds(timeout),
+                                                   [&]{return m_ServiceActive == true;}))
+    {
+        if (m_ServiceActive == true)
+        {
+            return;
+        }
+        auto finish = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double, std::milli> elapsed = finish - start;
+        std::stringstream ss;
+        ss << "Timed out waiting on profiling service activation for " << elapsed.count() << " ms";
+        ARM_PIPE_LOG(warning) << ss.str();
+    }
+    return;
+}
+
+ProfilingService::~ProfilingService()
+{
+    Stop();
+}
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/ProfilingService.hpp b/profiling/client/src/ProfilingService.hpp
new file mode 100644
index 0000000..b84b39d
--- /dev/null
+++ b/profiling/client/src/ProfilingService.hpp
@@ -0,0 +1,304 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ActivateTimelineReportingCommandHandler.hpp"
+#include "BufferManager.hpp"
+#include "CommandHandler.hpp"
+#include "ConnectionAcknowledgedCommandHandler.hpp"
+#include "DeactivateTimelineReportingCommandHandler.hpp"
+#include "INotifyBackends.hpp"
+#include "PeriodicCounterCapture.hpp"
+#include "PeriodicCounterSelectionCommandHandler.hpp"
+#include "PerJobCounterSelectionCommandHandler.hpp"
+#include "ProfilingConnectionFactory.hpp"
+#include "ProfilingStateMachine.hpp"
+#include "RequestCounterDirectoryCommandHandler.hpp"
+#include "SendCounterPacket.hpp"
+#include "SendThread.hpp"
+#include "SendTimelinePacket.hpp"
+#include "TimelinePacketWriterFactory.hpp"
+
+#include <client/include/CounterIdMap.hpp>
+#include <client/include/ICounterValues.hpp>
+#include <client/include/ILocalPacketHandler.hpp>
+#include <client/include/IProfilingService.hpp>
+#include <client/include/IReportStructure.hpp>
+
+#include <client/include/backends/IBackendProfilingContext.hpp>
+
+#include <common/include/CounterDirectory.hpp>
+
+#include <list>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class ProfilingService : public IProfilingService, public INotifyBackends
+{
+public:
+    using IProfilingConnectionFactoryPtr = std::unique_ptr<IProfilingConnectionFactory>;
+    using IProfilingConnectionPtr = std::unique_ptr<IProfilingConnection>;
+    using CounterIndices = std::vector<std::atomic<uint32_t>*>;
+    using CounterValues = std::list<std::atomic<uint32_t>>;
+    using BackendProfilingContext = std::unordered_map<std::string,
+                                                       std::shared_ptr<IBackendProfilingContext>>;
+
+    ProfilingService(uint16_t maxGlobalCounterId,
+                     IInitialiseProfilingService& initialiser,
+                     const std::string& softwareInfo,
+                     const std::string& softwareVersion,
+                     const std::string& hardwareVersion,
+                     arm::pipe::Optional<IReportStructure&> reportStructure = arm::pipe::EmptyOptional())
+        : m_Options()
+        , m_TimelineReporting(false)
+        , m_ProfilingConnectionFactory(new ProfilingConnectionFactory())
+        , m_ProfilingConnection()
+        , m_StateMachine()
+        , m_CounterIndex()
+        , m_CounterValues()
+        , m_CommandHandlerRegistry()
+        , m_PacketVersionResolver()
+        , m_CommandHandler(1000,
+                           false,
+                           m_CommandHandlerRegistry,
+                           m_PacketVersionResolver)
+        , m_BufferManager()
+        , m_SendCounterPacket(m_BufferManager, softwareInfo, softwareVersion, hardwareVersion)
+        , m_SendThread(m_StateMachine, m_BufferManager, m_SendCounterPacket)
+        , m_SendTimelinePacket(m_BufferManager)
+        , m_PeriodicCounterCapture(m_Holder, m_SendCounterPacket, *this, m_CounterIdMap, m_BackendProfilingContexts)
+        , m_ConnectionAcknowledgedCommandHandler(0,
+                                                 1,
+                                                 m_PacketVersionResolver.ResolvePacketVersion(0, 1).GetEncodedValue(),
+                                                 m_CounterDirectory,
+                                                 m_SendCounterPacket,
+                                                 m_SendTimelinePacket,
+                                                 m_StateMachine,
+                                                 *this,
+                                                 m_BackendProfilingContexts)
+        , m_RequestCounterDirectoryCommandHandler(0,
+                                                  3,
+                                                  m_PacketVersionResolver.ResolvePacketVersion(0, 3).GetEncodedValue(),
+                                                  m_CounterDirectory,
+                                                  m_SendCounterPacket,
+                                                  m_SendTimelinePacket,
+                                                  m_StateMachine)
+        , m_PeriodicCounterSelectionCommandHandler(0,
+                                                   4,
+                                                   m_PacketVersionResolver.ResolvePacketVersion(0, 4).GetEncodedValue(),
+                                                   m_BackendProfilingContexts,
+                                                   m_CounterIdMap,
+                                                   m_Holder,
+                                                   maxGlobalCounterId,
+                                                   m_PeriodicCounterCapture,
+                                                   *this,
+                                                   m_SendCounterPacket,
+                                                   m_StateMachine)
+        , m_PerJobCounterSelectionCommandHandler(0,
+                                                 5,
+                                                 m_PacketVersionResolver.ResolvePacketVersion(0, 5).GetEncodedValue(),
+                                                 m_StateMachine)
+        , m_ActivateTimelineReportingCommandHandler(0,
+                                                    6,
+                                                    m_PacketVersionResolver.ResolvePacketVersion(0, 6)
+                                                                           .GetEncodedValue(),
+                                                    m_SendTimelinePacket,
+                                                    m_StateMachine,
+                                                    reportStructure,
+                                                    m_TimelineReporting,
+                                                    *this,
+                                                    *this)
+        , m_DeactivateTimelineReportingCommandHandler(0,
+                                                      7,
+                                                      m_PacketVersionResolver.ResolvePacketVersion(0, 7)
+                                                                             .GetEncodedValue(),
+                                                      m_TimelineReporting,
+                                                      m_StateMachine,
+                                                      *this)
+        , m_TimelinePacketWriterFactory(m_BufferManager)
+        , m_MaxGlobalCounterId(maxGlobalCounterId)
+        , m_ServiceActive(false)
+        , m_Initialiser(initialiser)
+    {
+        // Register the "Connection Acknowledged" command handler
+        m_CommandHandlerRegistry.RegisterFunctor(&m_ConnectionAcknowledgedCommandHandler);
+
+        // Register the "Request Counter Directory" command handler
+        m_CommandHandlerRegistry.RegisterFunctor(&m_RequestCounterDirectoryCommandHandler);
+
+        // Register the "Periodic Counter Selection" command handler
+        m_CommandHandlerRegistry.RegisterFunctor(&m_PeriodicCounterSelectionCommandHandler);
+
+        // Register the "Per-Job Counter Selection" command handler
+        m_CommandHandlerRegistry.RegisterFunctor(&m_PerJobCounterSelectionCommandHandler);
+
+        m_CommandHandlerRegistry.RegisterFunctor(&m_ActivateTimelineReportingCommandHandler);
+
+        m_CommandHandlerRegistry.RegisterFunctor(&m_DeactivateTimelineReportingCommandHandler);
+    }
+
+    ~ProfilingService();
+
+    // Resets the profiling options, optionally clears the profiling service entirely
+    void ResetExternalProfilingOptions(const ProfilingOptions& options,
+                                       bool resetProfilingService = false) override;
+    ProfilingState ConfigureProfilingService(const ProfilingOptions& options,
+                                             bool resetProfilingService = false) override;
+
+
+    // Updates the profiling service, making it transition to a new state if necessary
+    void Update();
+
+    // Disconnects the profiling service from the external server
+    void Disconnect() override;
+
+    // Store a profiling context returned from a backend that support profiling.
+    void AddBackendProfilingContext(const std::string& backendId,
+        std::shared_ptr<IBackendProfilingContext> profilingContext) override;
+
+    // Enable the recording of timeline events and entities
+    void NotifyBackendsForTimelineReporting() override;
+
+    const ICounterDirectory& GetCounterDirectory() const;
+    ICounterRegistry& GetCounterRegistry() override;
+    ProfilingState GetCurrentState() const override;
+    bool IsCounterRegistered(uint16_t counterUid) const override;
+    uint32_t GetAbsoluteCounterValue(uint16_t counterUid) const override;
+    uint32_t GetDeltaCounterValue(uint16_t counterUid) override;
+    uint16_t GetCounterCount() const override;
+    // counter global/backend mapping functions
+    const ICounterMappings& GetCounterMappings() const override;
+    IRegisterCounterMapping& GetCounterMappingRegistry() override;
+    bool IsCategoryRegistered(const std::string& categoryName) const override;
+    bool IsCounterRegistered(const std::string& counterName) const override;
+
+    // Getters for the profiling service state
+    bool IsProfilingEnabled() const override;
+
+    CaptureData GetCaptureData() override;
+    void SetCaptureData(uint32_t capturePeriod,
+                        const std::vector<uint16_t>& counterIds,
+                        const std::set<std::string>& activeBackends);
+
+    // Setters for the profiling service state
+    void SetCounterValue(uint16_t counterUid, uint32_t value) override;
+    uint32_t AddCounterValue(uint16_t counterUid, uint32_t value) override;
+    uint32_t SubtractCounterValue(uint16_t counterUid, uint32_t value) override;
+    uint32_t IncrementCounterValue(uint16_t counterUid) override;
+
+    void InitializeCounterValue(uint16_t counterUid) override;
+
+    std::unique_ptr<ISendTimelinePacket> GetSendTimelinePacket() const override;
+
+    ISendCounterPacket& GetSendCounterPacket() override
+    {
+        return m_SendCounterPacket;
+    }
+
+    bool IsTimelineReportingEnabled() const override
+    {
+        return m_TimelineReporting;
+    }
+
+    void AddLocalPacketHandler(ILocalPacketHandlerSharedPtr localPacketHandler);
+
+    void NotifyProfilingServiceActive() override; // IProfilingServiceStatus
+    void WaitForProfilingServiceActivation(unsigned int timeout) override; // IProfilingServiceStatus
+
+private:
+    //Copy/move constructors/destructors and copy/move assignment operators are deleted
+    ProfilingService(const ProfilingService&) = delete;
+    ProfilingService(ProfilingService&&) = delete;
+    ProfilingService& operator=(const ProfilingService&) = delete;
+    ProfilingService& operator=(ProfilingService&&) = delete;
+
+    // Initialization/reset functions
+    void Initialize();
+    void Reset();
+    void Stop();
+
+    // Helper function
+    void CheckCounterUid(uint16_t counterUid) const;
+
+    // Profiling service components
+    ProfilingOptions                   m_Options;
+    std::atomic<bool>                  m_TimelineReporting;
+    CounterDirectory                   m_CounterDirectory;
+    CounterIdMap                       m_CounterIdMap;
+    IProfilingConnectionFactoryPtr     m_ProfilingConnectionFactory;
+    IProfilingConnectionPtr            m_ProfilingConnection;
+    ProfilingStateMachine              m_StateMachine;
+    CounterIndices                     m_CounterIndex;
+    CounterValues                      m_CounterValues;
+    arm::pipe::CommandHandlerRegistry  m_CommandHandlerRegistry;
+    arm::pipe::PacketVersionResolver   m_PacketVersionResolver;
+    CommandHandler                     m_CommandHandler;
+    BufferManager                      m_BufferManager;
+    SendCounterPacket                  m_SendCounterPacket;
+    SendThread                         m_SendThread;
+    SendTimelinePacket                 m_SendTimelinePacket;
+
+    Holder m_Holder;
+
+    PeriodicCounterCapture m_PeriodicCounterCapture;
+
+    ConnectionAcknowledgedCommandHandler      m_ConnectionAcknowledgedCommandHandler;
+    RequestCounterDirectoryCommandHandler     m_RequestCounterDirectoryCommandHandler;
+    PeriodicCounterSelectionCommandHandler    m_PeriodicCounterSelectionCommandHandler;
+    PerJobCounterSelectionCommandHandler      m_PerJobCounterSelectionCommandHandler;
+    ActivateTimelineReportingCommandHandler   m_ActivateTimelineReportingCommandHandler;
+    DeactivateTimelineReportingCommandHandler m_DeactivateTimelineReportingCommandHandler;
+
+    TimelinePacketWriterFactory m_TimelinePacketWriterFactory;
+    BackendProfilingContext     m_BackendProfilingContexts;
+    uint16_t                    m_MaxGlobalCounterId;
+
+    // Signalling to let external actors know when service is active or not
+    std::mutex m_ServiceActiveMutex;
+    std::condition_variable m_ServiceActiveConditionVariable;
+    bool m_ServiceActive;
+
+    IInitialiseProfilingService& m_Initialiser;
+
+protected:
+
+    // Protected methods for testing
+    void SwapProfilingConnectionFactory(ProfilingService& instance,
+                                        IProfilingConnectionFactory* other,
+                                        IProfilingConnectionFactory*& backup)
+    {
+        ARM_PIPE_ASSERT(instance.m_ProfilingConnectionFactory);
+        ARM_PIPE_ASSERT(other);
+
+        backup = instance.m_ProfilingConnectionFactory.release();
+        instance.m_ProfilingConnectionFactory.reset(other);
+    }
+    IProfilingConnection* GetProfilingConnection(ProfilingService& instance)
+    {
+        return instance.m_ProfilingConnection.get();
+    }
+    void TransitionToState(ProfilingService& instance, ProfilingState newState)
+    {
+        instance.m_StateMachine.TransitionToState(newState);
+    }
+    bool WaitForPacketSent(ProfilingService& instance, uint32_t timeout = 1000)
+    {
+        return instance.m_SendThread.WaitForPacketSent(timeout);
+    }
+
+    BufferManager& GetBufferManager(ProfilingService& instance)
+    {
+        return instance.m_BufferManager;
+    }
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/ProfilingStateMachine.cpp b/profiling/client/src/ProfilingStateMachine.cpp
new file mode 100644
index 0000000..e002c05
--- /dev/null
+++ b/profiling/client/src/ProfilingStateMachine.cpp
@@ -0,0 +1,95 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ProfilingStateMachine.hpp"
+
+#include <common/include/ProfilingException.hpp>
+
+#include <sstream>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+namespace
+{
+
+void ThrowStateTransitionException(ProfilingState expectedState, ProfilingState newState)
+{
+    std::stringstream ss;
+    ss << "Cannot transition from state [" << GetProfilingStateName(expectedState) << "] "
+       << "to state [" << GetProfilingStateName(newState) << "]";
+    throw arm::pipe::ProfilingException(ss.str());
+}
+
+} // Anonymous namespace
+
+ProfilingState ProfilingStateMachine::GetCurrentState() const
+{
+    return m_State.load();
+}
+
+void ProfilingStateMachine::TransitionToState(ProfilingState newState)
+{
+    ProfilingState currentState = m_State.load(std::memory_order::memory_order_relaxed);
+
+    switch (newState)
+    {
+    case ProfilingState::Uninitialised:
+        do
+        {
+            if (!IsOneOfStates(currentState, ProfilingState::Uninitialised))
+            {
+                ThrowStateTransitionException(currentState, newState);
+            }
+        }
+        while (!m_State.compare_exchange_strong(currentState, newState, std::memory_order::memory_order_relaxed));
+        break;
+    case  ProfilingState::NotConnected:
+        do
+        {
+            if (!IsOneOfStates(currentState, ProfilingState::Uninitialised, ProfilingState::NotConnected,
+                               ProfilingState::Active, ProfilingState::WaitingForAck))
+            {
+                ThrowStateTransitionException(currentState, newState);
+            }
+        }
+        while (!m_State.compare_exchange_strong(currentState, newState, std::memory_order::memory_order_relaxed));
+        break;
+    case ProfilingState::WaitingForAck:
+        do
+        {
+            if (!IsOneOfStates(currentState, ProfilingState::NotConnected, ProfilingState::WaitingForAck))
+            {
+                ThrowStateTransitionException(currentState, newState);
+            }
+        }
+        while (!m_State.compare_exchange_strong(currentState, newState, std::memory_order::memory_order_relaxed));
+        break;
+    case ProfilingState::Active:
+        do
+        {
+            if (!IsOneOfStates(currentState, ProfilingState::WaitingForAck, ProfilingState::Active))
+            {
+                ThrowStateTransitionException(currentState, newState);
+            }
+        }
+        while (!m_State.compare_exchange_strong(currentState, newState, std::memory_order::memory_order_relaxed));
+        break;
+    default:
+        break;
+    }
+}
+
+void ProfilingStateMachine::Reset()
+{
+    m_State.store(ProfilingState::Uninitialised);
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/ProfilingStateMachine.hpp b/profiling/client/src/ProfilingStateMachine.hpp
new file mode 100644
index 0000000..b507f1a
--- /dev/null
+++ b/profiling/client/src/ProfilingStateMachine.hpp
@@ -0,0 +1,67 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <client/include/ProfilingState.hpp>
+
+#include <common/include/IgnoreUnused.hpp>
+
+#include <atomic>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class ProfilingStateMachine
+{
+public:
+    ProfilingStateMachine() : m_State(ProfilingState::Uninitialised) {}
+    ProfilingStateMachine(ProfilingState state) : m_State(state) {}
+
+    ProfilingState GetCurrentState() const;
+    void TransitionToState(ProfilingState newState);
+    void Reset();
+
+    bool IsOneOfStates(ProfilingState state1)
+    {
+        arm::pipe::IgnoreUnused(state1);
+        return false;
+    }
+
+    template<typename T, typename... Args >
+    bool IsOneOfStates(T state1, T state2, Args... args)
+    {
+        if (state1 == state2)
+        {
+            return true;
+        }
+        else
+        {
+            return IsOneOfStates(state1, args...);
+        }
+    }
+
+private:
+    std::atomic<ProfilingState> m_State;
+};
+
+constexpr char const* GetProfilingStateName(ProfilingState state)
+{
+    switch (state)
+    {
+        case ProfilingState::Uninitialised: return "Uninitialised";
+        case ProfilingState::NotConnected:  return "NotConnected";
+        case ProfilingState::WaitingForAck: return "WaitingForAck";
+        case ProfilingState::Active:        return "Active";
+        default:                            return "Unknown";
+    }
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/ProfilingUtils.cpp b/profiling/client/src/ProfilingUtils.cpp
new file mode 100644
index 0000000..2963a98
--- /dev/null
+++ b/profiling/client/src/ProfilingUtils.cpp
@@ -0,0 +1,647 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ProfilingUtils.hpp"
+
+#include <common/include/Assert.hpp>
+#include <common/include/CommonProfilingUtils.hpp>
+#include <common/include/NumericCast.hpp>
+#include <common/include/ProfilingException.hpp>
+#include <common/include/SwTrace.hpp>
+
+#include <armnn/Version.hpp>
+
+#include <fstream>
+#include <iostream>
+#include <limits>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+void WriteBytes(const IPacketBufferPtr& packetBuffer, unsigned int offset,  const void* value, unsigned int valueSize)
+{
+    ARM_PIPE_ASSERT(packetBuffer);
+
+    WriteBytes(packetBuffer->GetWritableData(), offset, value, valueSize);
+}
+
+uint32_t ConstructHeader(uint32_t packetFamily,
+                         uint32_t packetId)
+{
+    return (( packetFamily & 0x0000003F ) << 26 )|
+           (( packetId     & 0x000003FF ) << 16 );
+}
+
+uint32_t ConstructHeader(uint32_t packetFamily, uint32_t packetClass, uint32_t packetType)
+{
+    return ((packetFamily & 0x0000003F) << 26) |
+           ((packetClass  & 0x0000007F) << 19) |
+           ((packetType   & 0x00000007) << 16);
+}
+
+void WriteUint64(const std::unique_ptr<IPacketBuffer>& packetBuffer, unsigned int offset, uint64_t value)
+{
+    ARM_PIPE_ASSERT(packetBuffer);
+
+    WriteUint64(packetBuffer->GetWritableData(), offset, value);
+}
+
+void WriteUint32(const IPacketBufferPtr& packetBuffer, unsigned int offset, uint32_t value)
+{
+    ARM_PIPE_ASSERT(packetBuffer);
+
+    WriteUint32(packetBuffer->GetWritableData(), offset, value);
+}
+
+void WriteUint16(const IPacketBufferPtr& packetBuffer, unsigned int offset, uint16_t value)
+{
+    ARM_PIPE_ASSERT(packetBuffer);
+
+    WriteUint16(packetBuffer->GetWritableData(), offset, value);
+}
+
+void WriteUint8(const IPacketBufferPtr& packetBuffer, unsigned int offset, uint8_t value)
+{
+    ARM_PIPE_ASSERT(packetBuffer);
+
+    WriteUint8(packetBuffer->GetWritableData(), offset, value);
+}
+
+void ReadBytes(const IPacketBufferPtr& packetBuffer, unsigned int offset, unsigned int valueSize, uint8_t outValue[])
+{
+    ARM_PIPE_ASSERT(packetBuffer);
+
+    ReadBytes(packetBuffer->GetReadableData(), offset, valueSize, outValue);
+}
+
+uint64_t ReadUint64(const IPacketBufferPtr& packetBuffer, unsigned int offset)
+{
+    ARM_PIPE_ASSERT(packetBuffer);
+
+    return ReadUint64(packetBuffer->GetReadableData(), offset);
+}
+
+uint32_t ReadUint32(const IPacketBufferPtr& packetBuffer, unsigned int offset)
+{
+    ARM_PIPE_ASSERT(packetBuffer);
+
+    return ReadUint32(packetBuffer->GetReadableData(), offset);
+}
+
+uint16_t ReadUint16(const IPacketBufferPtr& packetBuffer, unsigned int offset)
+{
+    ARM_PIPE_ASSERT(packetBuffer);
+
+    return ReadUint16(packetBuffer->GetReadableData(), offset);
+}
+
+uint8_t ReadUint8(const IPacketBufferPtr& packetBuffer, unsigned int offset)
+{
+    ARM_PIPE_ASSERT(packetBuffer);
+
+    return ReadUint8(packetBuffer->GetReadableData(), offset);
+}
+
+std::string GetProcessName()
+{
+    std::ifstream comm("/proc/self/comm");
+    std::string name;
+    getline(comm, name);
+    return name;
+}
+
+/// Creates a timeline packet header
+///
+/// \params
+///   packetFamiliy     Timeline Packet Family
+///   packetClass       Timeline Packet Class
+///   packetType        Timeline Packet Type
+///   streamId          Stream identifier
+///   seqeunceNumbered  When non-zero the 4 bytes following the header is a u32 sequence number
+///   dataLength        Unsigned 24-bit integer. Length of data, in bytes. Zero is permitted
+///
+/// \returns
+///   Pair of uint32_t containing word0 and word1 of the header
+std::pair<uint32_t, uint32_t> CreateTimelinePacketHeader(uint32_t packetFamily,
+                                                         uint32_t packetClass,
+                                                         uint32_t packetType,
+                                                         uint32_t streamId,
+                                                         uint32_t sequenceNumbered,
+                                                         uint32_t dataLength)
+{
+    // Packet header word 0:
+    // 26:31 [6] packet_family: timeline Packet Family, value 0b000001
+    // 19:25 [7] packet_class: packet class
+    // 16:18 [3] packet_type: packet type
+    // 8:15  [8] reserved: all zeros
+    // 0:7   [8] stream_id: stream identifier
+    uint32_t packetHeaderWord0 = ((packetFamily & 0x0000003F) << 26) |
+                                 ((packetClass  & 0x0000007F) << 19) |
+                                 ((packetType   & 0x00000007) << 16) |
+                                 ((streamId     & 0x00000007) <<  0);
+
+    // Packet header word 1:
+    // 25:31 [7]  reserved: all zeros
+    // 24    [1]  sequence_numbered: when non-zero the 4 bytes following the header is a u32 sequence number
+    // 0:23  [24] data_length: unsigned 24-bit integer. Length of data, in bytes. Zero is permitted
+    uint32_t packetHeaderWord1 = ((sequenceNumbered & 0x00000001) << 24) |
+                                 ((dataLength       & 0x00FFFFFF) <<  0);
+
+    return std::make_pair(packetHeaderWord0, packetHeaderWord1);
+}
+
+/// Creates a packet header for the timeline messages:
+/// * declareLabel
+/// * declareEntity
+/// * declareEventClass
+/// * declareRelationship
+/// * declareEvent
+///
+/// \param
+///   dataLength The length of the message body in bytes
+///
+/// \returns
+///   Pair of uint32_t containing word0 and word1 of the header
+std::pair<uint32_t, uint32_t> CreateTimelineMessagePacketHeader(unsigned int dataLength)
+{
+    return CreateTimelinePacketHeader(1,           // Packet family
+                                      0,           // Packet class
+                                      1,           // Packet type
+                                      0,           // Stream id
+                                      0,           // Sequence number
+                                      dataLength); // Data length
+}
+
+TimelinePacketStatus WriteTimelineLabelBinaryPacket(uint64_t profilingGuid,
+                                                    const std::string& label,
+                                                    unsigned char* buffer,
+                                                    unsigned int remainingBufferSize,
+                                                    unsigned int& numberOfBytesWritten)
+{
+    // Initialize the output value
+    numberOfBytesWritten = 0;
+
+    // Check that the given buffer is valid
+    if (buffer == nullptr || remainingBufferSize == 0)
+    {
+        return TimelinePacketStatus::BufferExhaustion;
+    }
+
+    // Utils
+    unsigned int uint32_t_size = sizeof(uint32_t);
+    unsigned int uint64_t_size = sizeof(uint64_t);
+
+    // Convert the label into a SWTrace string
+    std::vector<uint32_t> swTraceLabel;
+    bool result = arm::pipe::StringToSwTraceString<arm::pipe::SwTraceCharPolicy>(label, swTraceLabel);
+    if (!result)
+    {
+        return TimelinePacketStatus::Error;
+    }
+
+    // Calculate the size of the SWTrace string label (in bytes)
+    unsigned int swTraceLabelSize = arm::pipe::numeric_cast<unsigned int>(swTraceLabel.size()) * uint32_t_size;
+
+    // Calculate the length of the data (in bytes)
+    unsigned int timelineLabelPacketDataLength = uint32_t_size +   // decl_Id
+                                                 uint64_t_size +   // Profiling GUID
+                                                 swTraceLabelSize; // Label
+
+    // Check whether the timeline binary packet fits in the given buffer
+    if (timelineLabelPacketDataLength > remainingBufferSize)
+    {
+        return TimelinePacketStatus::BufferExhaustion;
+    }
+
+    // Initialize the offset for writing in the buffer
+    unsigned int offset = 0;
+
+    // Write decl_Id to the buffer
+    WriteUint32(buffer, offset, 0u);
+    offset += uint32_t_size;
+
+    // Write the timeline binary packet payload to the buffer
+    WriteUint64(buffer, offset, profilingGuid); // Profiling GUID
+    offset += uint64_t_size;
+    for (uint32_t swTraceLabelWord : swTraceLabel)
+    {
+        WriteUint32(buffer, offset, swTraceLabelWord); // Label
+        offset += uint32_t_size;
+    }
+
+    // Update the number of bytes written
+    numberOfBytesWritten = timelineLabelPacketDataLength;
+
+    return TimelinePacketStatus::Ok;
+}
+
+TimelinePacketStatus WriteTimelineEntityBinary(uint64_t profilingGuid,
+                                               unsigned char* buffer,
+                                               unsigned int remainingBufferSize,
+                                               unsigned int& numberOfBytesWritten)
+{
+    // Initialize the output value
+    numberOfBytesWritten = 0;
+
+    // Check that the given buffer is valid
+    if (buffer == nullptr || remainingBufferSize == 0)
+    {
+        return TimelinePacketStatus::BufferExhaustion;
+    }
+
+    // Utils
+    unsigned int uint32_t_size = sizeof(uint32_t);
+    unsigned int uint64_t_size = sizeof(uint64_t);
+
+    // Calculate the length of the data (in bytes)
+    unsigned int timelineEntityDataLength = uint32_t_size + uint64_t_size;  // decl_id + Profiling GUID
+
+    // Check whether the timeline binary packet fits in the given buffer
+    if (timelineEntityDataLength > remainingBufferSize)
+    {
+        return TimelinePacketStatus::BufferExhaustion;
+    }
+
+    // Initialize the offset for writing in the buffer
+    unsigned int offset = 0;
+
+    // Write the decl_Id to the buffer
+    WriteUint32(buffer, offset, 1u);
+    offset += uint32_t_size;
+
+    // Write the timeline binary packet payload to the buffer
+    WriteUint64(buffer, offset, profilingGuid); // Profiling GUID
+
+    // Update the number of bytes written
+    numberOfBytesWritten = timelineEntityDataLength;
+
+    return TimelinePacketStatus::Ok;
+}
+
+TimelinePacketStatus WriteTimelineRelationshipBinary(ProfilingRelationshipType relationshipType,
+                                                     uint64_t relationshipGuid,
+                                                     uint64_t headGuid,
+                                                     uint64_t tailGuid,
+                                                     uint64_t attributeGuid,
+                                                     unsigned char* buffer,
+                                                     unsigned int remainingBufferSize,
+                                                     unsigned int& numberOfBytesWritten)
+{
+    // Initialize the output value
+    numberOfBytesWritten = 0;
+
+    // Check that the given buffer is valid
+    if (buffer == nullptr || remainingBufferSize == 0)
+    {
+        return TimelinePacketStatus::BufferExhaustion;
+    }
+
+    // Utils
+    unsigned int uint32_t_size = sizeof(uint32_t);
+    unsigned int uint64_t_size = sizeof(uint64_t);
+
+    // Calculate the length of the data (in bytes)
+    unsigned int timelineRelationshipDataLength = uint32_t_size * 2 + // decl_id + Relationship Type
+                                                  uint64_t_size * 4;  // Relationship GUID + Head GUID +
+                                                                      // tail GUID + attributeGuid
+
+    // Check whether the timeline binary fits in the given buffer
+    if (timelineRelationshipDataLength > remainingBufferSize)
+    {
+        return TimelinePacketStatus::BufferExhaustion;
+    }
+
+    // Initialize the offset for writing in the buffer
+    unsigned int offset = 0;
+
+    uint32_t relationshipTypeUint = 0;
+
+    switch (relationshipType)
+    {
+        case ProfilingRelationshipType::RetentionLink:
+            relationshipTypeUint = 0;
+            break;
+        case ProfilingRelationshipType::ExecutionLink:
+            relationshipTypeUint = 1;
+            break;
+        case ProfilingRelationshipType::DataLink:
+            relationshipTypeUint = 2;
+            break;
+        case ProfilingRelationshipType::LabelLink:
+            relationshipTypeUint = 3;
+            break;
+        default:
+            throw arm::pipe::InvalidArgumentException("Unknown relationship type given.");
+    }
+
+    // Write the timeline binary payload to the buffer
+    // decl_id of the timeline message
+    uint32_t declId = 3;
+    WriteUint32(buffer, offset, declId); // decl_id
+    offset += uint32_t_size;
+    WriteUint32(buffer, offset, relationshipTypeUint); // Relationship Type
+    offset += uint32_t_size;
+    WriteUint64(buffer, offset, relationshipGuid); // GUID of this relationship
+    offset += uint64_t_size;
+    WriteUint64(buffer, offset, headGuid); // head of relationship GUID
+    offset += uint64_t_size;
+    WriteUint64(buffer, offset, tailGuid); // tail of relationship GUID
+    offset += uint64_t_size;
+    WriteUint64(buffer, offset, attributeGuid); // attribute of relationship GUID
+
+
+    // Update the number of bytes written
+    numberOfBytesWritten = timelineRelationshipDataLength;
+
+    return TimelinePacketStatus::Ok;
+}
+
+TimelinePacketStatus WriteTimelineMessageDirectoryPackage(unsigned char* buffer,
+                                                          unsigned int remainingBufferSize,
+                                                          unsigned int& numberOfBytesWritten)
+{
+    // Initialize the output value
+    numberOfBytesWritten = 0;
+
+    // Check that the given buffer is valid
+    if (buffer == nullptr || remainingBufferSize == 0)
+    {
+        return TimelinePacketStatus::BufferExhaustion;
+    }
+
+    // Utils
+    unsigned int uint8_t_size  = sizeof(uint8_t);
+    unsigned int uint32_t_size = sizeof(uint32_t);
+    unsigned int uint64_t_size = sizeof(uint64_t);
+
+    // The payload/data of the packet consists of swtrace event definitions encoded according
+    // to the swtrace directory specification. The messages being the five defined below:
+    //
+    // |  decl_id  |     decl_name       |      ui_name          |  arg_types  |            arg_names                |
+    // |-----------|---------------------|-----------------------|-------------|-------------------------------------|
+    // |    0      |   declareLabel      |   declare label       |    ps       |  guid,value                         |
+    // |    1      |   declareEntity     |   declare entity      |    p        |  guid                               |
+    // |    2      | declareEventClass   |  declare event class  |    pp       |  guid,nameGuid                      |
+    // |    3      | declareRelationship | declare relationship  |    Ipppp    |  relationshipType,relationshipGuid, |
+    // |           |                     |                       |             |  headGuid,tailGuid,attributeGuid    |
+    // |    4      |   declareEvent      |   declare event       |    @tp      |  timestamp,threadId,eventGuid       |
+    std::vector<std::vector<std::string>> timelineDirectoryMessages
+    {
+        { "0", "declareLabel", "declare label", "ps", "guid,value" },
+        { "1", "declareEntity", "declare entity", "p", "guid" },
+        { "2", "declareEventClass", "declare event class", "pp", "guid,nameGuid" },
+        { "3", "declareRelationship", "declare relationship", "Ipppp",
+          "relationshipType,relationshipGuid,headGuid,tailGuid,attributeGuid" },
+        { "4", "declareEvent", "declare event", "@tp", "timestamp,threadId,eventGuid" }
+    };
+
+    // Build the message declarations
+    std::vector<uint32_t> swTraceBuffer;
+    for (const auto& directoryComponent : timelineDirectoryMessages)
+    {
+        // decl_id
+        uint32_t declId = 0;
+        try
+        {
+            declId = arm::pipe::numeric_cast<uint32_t>(std::stoul(directoryComponent[0]));
+        }
+        catch (const std::exception&)
+        {
+            return TimelinePacketStatus::Error;
+        }
+        swTraceBuffer.push_back(declId);
+
+        bool result = true;
+        result &= arm::pipe::ConvertDirectoryComponent<arm::pipe::SwTraceNameCharPolicy>(
+                      directoryComponent[1], swTraceBuffer); // decl_name
+        result &= arm::pipe::ConvertDirectoryComponent<arm::pipe::SwTraceCharPolicy>    (
+                      directoryComponent[2], swTraceBuffer); // ui_name
+        result &= arm::pipe::ConvertDirectoryComponent<arm::pipe::SwTraceTypeCharPolicy>(
+                      directoryComponent[3], swTraceBuffer); // arg_types
+        result &= arm::pipe::ConvertDirectoryComponent<arm::pipe::SwTraceCharPolicy>    (
+                      directoryComponent[4], swTraceBuffer); // arg_names
+        if (!result)
+        {
+            return TimelinePacketStatus::Error;
+        }
+    }
+
+    unsigned int dataLength = 3 * uint8_t_size +  // Stream header (3 bytes)
+                              arm::pipe::numeric_cast<unsigned int>(swTraceBuffer.size()) *
+                                  uint32_t_size; // Trace directory (5 messages)
+
+    // Calculate the timeline directory binary packet size (in bytes)
+    unsigned int timelineDirectoryPacketSize = 2 * uint32_t_size + // Header (2 words)
+                                               dataLength;         // Payload
+
+    // Check whether the timeline directory binary packet fits in the given buffer
+    if (timelineDirectoryPacketSize > remainingBufferSize)
+    {
+        return TimelinePacketStatus::BufferExhaustion;
+    }
+
+    // Create packet header
+    auto packetHeader = CreateTimelinePacketHeader(1, 0, 0, 0, 0, arm::pipe::numeric_cast<uint32_t>(dataLength));
+
+    // Initialize the offset for writing in the buffer
+    unsigned int offset = 0;
+
+    // Write the timeline binary packet header to the buffer
+    WriteUint32(buffer, offset, packetHeader.first);
+    offset += uint32_t_size;
+    WriteUint32(buffer, offset, packetHeader.second);
+    offset += uint32_t_size;
+
+    // Write the stream header
+    uint8_t streamVersion = 4;
+    uint8_t pointerBytes  = arm::pipe::numeric_cast<uint8_t>(uint64_t_size); // All GUIDs are uint64_t
+    uint8_t threadIdBytes = arm::pipe::numeric_cast<uint8_t>(ThreadIdSize);
+    switch (threadIdBytes)
+    {
+    case 4: // Typically Windows and Android
+    case 8: // Typically Linux
+        break; // Valid values
+    default:
+        return TimelinePacketStatus::Error; // Invalid value
+    }
+    WriteUint8(buffer, offset, streamVersion);
+    offset += uint8_t_size;
+    WriteUint8(buffer, offset, pointerBytes);
+    offset += uint8_t_size;
+    WriteUint8(buffer, offset, threadIdBytes);
+    offset += uint8_t_size;
+
+    // Write the SWTrace directory
+    uint32_t numberOfDeclarations = arm::pipe::numeric_cast<uint32_t>(timelineDirectoryMessages.size());
+    WriteUint32(buffer, offset, numberOfDeclarations); // Number of declarations
+    offset += uint32_t_size;
+    for (uint32_t i : swTraceBuffer)
+    {
+        WriteUint32(buffer, offset, i); // Message declarations
+        offset += uint32_t_size;
+    }
+
+    // Update the number of bytes written
+    numberOfBytesWritten = timelineDirectoryPacketSize;
+
+    return TimelinePacketStatus::Ok;
+}
+
+TimelinePacketStatus WriteTimelineEventClassBinary(uint64_t profilingGuid,
+                                                   uint64_t nameGuid,
+                                                   unsigned char* buffer,
+                                                   unsigned int remainingBufferSize,
+                                                   unsigned int& numberOfBytesWritten)
+{
+    // Initialize the output value
+    numberOfBytesWritten = 0;
+
+    // Check that the given buffer is valid
+    if (buffer == nullptr || remainingBufferSize == 0)
+    {
+        return TimelinePacketStatus::BufferExhaustion;
+    }
+
+    // Utils
+    unsigned int uint32_t_size = sizeof(uint32_t);
+    unsigned int uint64_t_size = sizeof(uint64_t);
+
+    // decl_id of the timeline message
+    uint32_t declId = 2;
+
+    // Calculate the length of the data (in bytes)
+    unsigned int dataSize = uint32_t_size + (uint64_t_size * 2); // decl_id + Profiling GUID + Name GUID
+
+    // Check whether the timeline binary fits in the given buffer
+    if (dataSize > remainingBufferSize)
+    {
+        return TimelinePacketStatus::BufferExhaustion;
+    }
+
+    // Initialize the offset for writing in the buffer
+    unsigned int offset = 0;
+
+    // Write the timeline binary payload to the buffer
+    WriteUint32(buffer, offset, declId);        // decl_id
+    offset += uint32_t_size;
+    WriteUint64(buffer, offset, profilingGuid); // Profiling GUID
+    offset += uint64_t_size;
+    WriteUint64(buffer, offset, nameGuid); // Name GUID
+
+    // Update the number of bytes written
+    numberOfBytesWritten = dataSize;
+
+    return TimelinePacketStatus::Ok;
+}
+
+TimelinePacketStatus WriteTimelineEventBinary(uint64_t timestamp,
+                                              int threadId,
+                                              uint64_t profilingGuid,
+                                              unsigned char* buffer,
+                                              unsigned int remainingBufferSize,
+                                              unsigned int& numberOfBytesWritten)
+{
+    // Initialize the output value
+    numberOfBytesWritten = 0;
+    // Check that the given buffer is valid
+    if (buffer == nullptr || remainingBufferSize == 0)
+    {
+        return TimelinePacketStatus::BufferExhaustion;
+    }
+
+    // Utils
+    unsigned int uint32_t_size = sizeof(uint32_t);
+    unsigned int uint64_t_size = sizeof(uint64_t);
+
+    // decl_id of the timeline message
+    uint32_t declId = 4;
+
+    // Calculate the length of the data (in bytes)
+    unsigned int timelineEventDataLength = uint32_t_size + // decl_id
+                                           uint64_t_size + // Timestamp
+                                           ThreadIdSize +  // Thread id
+                                           uint64_t_size;  // Profiling GUID
+
+    // Check whether the timeline binary packet fits in the given buffer
+    if (timelineEventDataLength > remainingBufferSize)
+    {
+        return TimelinePacketStatus::BufferExhaustion;
+    }
+
+    // Initialize the offset for writing in the buffer
+    unsigned int offset = 0;
+
+    // Write the timeline binary payload to the buffer
+    WriteUint32(buffer, offset, declId); // decl_id
+    offset += uint32_t_size;
+    WriteUint64(buffer, offset, timestamp); // Timestamp
+    offset += uint64_t_size;
+    WriteBytes(buffer, offset, &threadId, ThreadIdSize); // Thread id
+    offset += ThreadIdSize;
+    WriteUint64(buffer, offset, profilingGuid); // Profiling GUID
+    offset += uint64_t_size;
+    // Update the number of bytes written
+    numberOfBytesWritten = timelineEventDataLength;
+
+    return TimelinePacketStatus::Ok;
+}
+
+uint64_t GetTimestamp()
+{
+#if USE_CLOCK_MONOTONIC_RAW
+    using clock = armnn::MonotonicClockRaw;
+#else
+    using clock = std::chrono::steady_clock;
+#endif
+
+    // Take a timestamp
+    auto timestamp = std::chrono::duration_cast<std::chrono::nanoseconds>(clock::now().time_since_epoch());
+
+    return static_cast<uint64_t>(timestamp.count());
+}
+
+arm::pipe::Packet ReceivePacket(const unsigned char* buffer, uint32_t length)
+{
+    if (buffer == nullptr)
+    {
+        throw arm::pipe::ProfilingException("data buffer is nullptr");
+    }
+    if (length < 8)
+    {
+        throw arm::pipe::ProfilingException("length of data buffer is less than 8");
+    }
+
+    uint32_t metadataIdentifier = 0;
+    std::memcpy(&metadataIdentifier, buffer, sizeof(metadataIdentifier));
+
+    uint32_t dataLength = 0;
+    std::memcpy(&dataLength, buffer + 4u, sizeof(dataLength));
+
+    std::unique_ptr<unsigned char[]> packetData;
+    if (dataLength > 0)
+    {
+        packetData = std::make_unique<unsigned char[]>(dataLength);
+        std::memcpy(packetData.get(), buffer + 8u, dataLength);
+    }
+
+    return arm::pipe::Packet(metadataIdentifier, dataLength, packetData);
+}
+
+} // namespace pipe
+
+} // namespace arm
+
+namespace std
+{
+
+bool operator==(const std::vector<uint8_t>& left, int right)
+{
+    return std::memcmp(left.data(), &right, left.size()) == 0;
+}
+
+} // namespace std
diff --git a/profiling/client/src/ProfilingUtils.hpp b/profiling/client/src/ProfilingUtils.hpp
new file mode 100644
index 0000000..cd5167c
--- /dev/null
+++ b/profiling/client/src/ProfilingUtils.hpp
@@ -0,0 +1,143 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "IPacketBuffer.hpp"
+
+#include <client/include/ISendTimelinePacket.hpp>
+
+#include <common/include/ICounterDirectory.hpp>
+#include <common/include/Packet.hpp>
+#include <common/include/ProfilingException.hpp>
+
+#include <algorithm>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <thread>
+#include <vector>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+constexpr unsigned int ThreadIdSize = sizeof(int); // Is platform dependent
+
+void WriteBytes(const IPacketBuffer& packetBuffer, unsigned int offset, const void* value, unsigned int valueSize);
+
+uint32_t ConstructHeader(uint32_t packetFamily, uint32_t packetId);
+
+uint32_t ConstructHeader(uint32_t packetFamily, uint32_t packetClass, uint32_t packetType);
+
+void WriteUint64(const IPacketBufferPtr& packetBuffer, unsigned int offset, uint64_t value);
+
+void WriteUint32(const IPacketBufferPtr& packetBuffer, unsigned int offset, uint32_t value);
+
+void WriteUint16(const IPacketBufferPtr& packetBuffer, unsigned int offset, uint16_t value);
+
+void WriteUint8(const IPacketBufferPtr& packetBuffer, unsigned int offset, uint8_t value);
+
+void WriteBytes(unsigned char* buffer, unsigned int offset, const void* value, unsigned int valueSize);
+
+void WriteUint64(unsigned char* buffer, unsigned int offset, uint64_t value);
+
+void WriteUint32(unsigned char* buffer, unsigned int offset, uint32_t value);
+
+void WriteUint16(unsigned char* buffer, unsigned int offset, uint16_t value);
+
+void WriteUint8(unsigned char* buffer, unsigned int offset, uint8_t value);
+
+void ReadBytes(const IPacketBufferPtr& packetBuffer, unsigned int offset, unsigned int valueSize, uint8_t outValue[]);
+
+uint64_t ReadUint64(const IPacketBufferPtr& packetBuffer, unsigned int offset);
+
+uint32_t ReadUint32(const IPacketBufferPtr& packetBuffer, unsigned int offset);
+
+uint16_t ReadUint16(const IPacketBufferPtr& packetBuffer, unsigned int offset);
+
+uint8_t ReadUint8(const IPacketBufferPtr& packetBuffer, unsigned int offset);
+
+void ReadBytes(const unsigned char* buffer, unsigned int offset, unsigned int valueSize, uint8_t outValue[]);
+
+uint64_t ReadUint64(unsigned const char* buffer, unsigned int offset);
+
+uint32_t ReadUint32(unsigned const char* buffer, unsigned int offset);
+
+uint16_t ReadUint16(unsigned const char* buffer, unsigned int offset);
+
+uint8_t ReadUint8(unsigned const char* buffer, unsigned int offset);
+
+std::pair<uint32_t, uint32_t> CreateTimelinePacketHeader(uint32_t packetFamily,
+                                                         uint32_t packetClass,
+                                                         uint32_t packetType,
+                                                         uint32_t streamId,
+                                                         uint32_t sequenceNumbered,
+                                                         uint32_t dataLength);
+
+std::string GetProcessName();
+
+enum class TimelinePacketStatus
+{
+    Ok,
+    Error,
+    BufferExhaustion
+};
+
+TimelinePacketStatus WriteTimelineLabelBinaryPacket(uint64_t profilingGuid,
+                                                    const std::string& label,
+                                                    unsigned char* buffer,
+                                                    unsigned int bufferSize,
+                                                    unsigned int& numberOfBytesWritten);
+
+TimelinePacketStatus WriteTimelineEntityBinary(uint64_t profilingGuid,
+                                               unsigned char* buffer,
+                                               unsigned int bufferSize,
+                                               unsigned int& numberOfBytesWritten);
+
+TimelinePacketStatus WriteTimelineRelationshipBinary(ProfilingRelationshipType relationshipType,
+                                                     uint64_t relationshipGuid,
+                                                     uint64_t headGuid,
+                                                     uint64_t tailGuid,
+                                                     uint64_t attributeGuid,
+                                                     unsigned char* buffer,
+                                                     unsigned int bufferSize,
+                                                     unsigned int& numberOfBytesWritten);
+
+TimelinePacketStatus WriteTimelineMessageDirectoryPackage(unsigned char* buffer,
+                                                          unsigned int bufferSize,
+                                                          unsigned int& numberOfBytesWritten);
+
+TimelinePacketStatus WriteTimelineEventClassBinary(uint64_t profilingGuid,
+                                                   uint64_t nameGuid,
+                                                   unsigned char* buffer,
+                                                   unsigned int bufferSize,
+                                                   unsigned int& numberOfBytesWritten);
+
+TimelinePacketStatus WriteTimelineEventBinary(uint64_t timestamp,
+                                              int threadId,
+                                              uint64_t profilingGuid,
+                                              unsigned char* buffer,
+                                              unsigned int bufferSize,
+                                              unsigned int& numberOfBytesWritten);
+
+std::string CentreAlignFormatting(const std::string& stringToPass, const int spacingWidth);
+
+uint64_t GetTimestamp();
+
+arm::pipe::Packet ReceivePacket(const unsigned char* buffer, uint32_t length);
+
+} // namespace pipe
+
+} // namespace arm
+
+namespace std
+{
+
+bool operator==(const std::vector<uint8_t>& left, int right);
+
+} // namespace std
diff --git a/profiling/client/src/RegisterBackendCounters.cpp b/profiling/client/src/RegisterBackendCounters.cpp
new file mode 100644
index 0000000..79ffa20
--- /dev/null
+++ b/profiling/client/src/RegisterBackendCounters.cpp
@@ -0,0 +1,85 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RegisterBackendCounters.hpp"
+
+namespace arm
+{
+
+namespace pipe
+{
+
+void RegisterBackendCounters::RegisterCategory(const std::string& categoryName)
+{
+     m_CounterDirectory.RegisterCategory(categoryName);
+}
+
+uint16_t RegisterBackendCounters::RegisterDevice(const std::string& deviceName,
+                                                 uint16_t cores,
+                                                 const arm::pipe::Optional<std::string>& parentCategoryName)
+{
+    const Device* devicePtr = m_CounterDirectory.RegisterDevice(deviceName, cores, parentCategoryName);
+    return devicePtr->m_Uid;
+}
+
+uint16_t RegisterBackendCounters::RegisterCounterSet(const std::string& counterSetName,
+                                                     uint16_t count,
+                                                     const arm::pipe::Optional<std::string>& parentCategoryName)
+{
+    const CounterSet* counterSetPtr = m_CounterDirectory.RegisterCounterSet(counterSetName, count, parentCategoryName);
+    return counterSetPtr->m_Uid;
+}
+
+uint16_t RegisterBackendCounters::RegisterCounter(const uint16_t uid,
+                                                  const std::string& parentCategoryName,
+                                                  uint16_t counterClass,
+                                                  uint16_t interpolation,
+                                                  double multiplier,
+                                                  const std::string& name,
+                                                  const std::string& description,
+                                                  const arm::pipe::Optional<std::string>& units,
+                                                  const arm::pipe::Optional<uint16_t>& numberOfCores,
+                                                  const arm::pipe::Optional<uint16_t>& deviceUid,
+                                                  const arm::pipe::Optional<uint16_t>& counterSetUid)
+{
+    ++m_CurrentMaxGlobalCounterID;
+    const Counter* counterPtr = m_CounterDirectory.RegisterCounter(m_BackendId,
+                                                                   m_CurrentMaxGlobalCounterID,
+                                                                   parentCategoryName,
+                                                                   counterClass,
+                                                                   interpolation,
+                                                                   multiplier,
+                                                                   name,
+                                                                   description,
+                                                                   units,
+                                                                   numberOfCores,
+                                                                   deviceUid,
+                                                                   counterSetUid);
+    m_CurrentMaxGlobalCounterID = counterPtr->m_MaxCounterUid;
+    // register mappings
+    IRegisterCounterMapping& counterIdMap = m_ProfilingService.GetCounterMappingRegistry();
+    uint16_t globalCounterId = counterPtr->m_Uid;
+    if (globalCounterId == counterPtr->m_MaxCounterUid)
+    {
+        counterIdMap.RegisterMapping(globalCounterId, uid, m_BackendId);
+    }
+    else
+    {
+        uint16_t backendCounterId = uid;
+        while (globalCounterId <= counterPtr->m_MaxCounterUid)
+        {
+            // register mapping
+            // globalCounterId -> backendCounterId, m_BackendId
+            counterIdMap.RegisterMapping(globalCounterId, backendCounterId, m_BackendId);
+            ++globalCounterId;
+            ++backendCounterId;
+        }
+    }
+    return m_CurrentMaxGlobalCounterID;
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/RegisterBackendCounters.hpp b/profiling/client/src/RegisterBackendCounters.hpp
new file mode 100644
index 0000000..f48f1ca
--- /dev/null
+++ b/profiling/client/src/RegisterBackendCounters.hpp
@@ -0,0 +1,67 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <client/include/CounterIdMap.hpp>
+#include <client/include/IProfilingService.hpp>
+
+#include <client/include/backends/IBackendProfiling.hpp>
+
+#include <common/include/CounterDirectory.hpp>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class RegisterBackendCounters : public IRegisterBackendCounters
+{
+public:
+
+    RegisterBackendCounters(
+        uint16_t currentMaxGlobalCounterID, const std::string& backendId, IProfilingService& profilingService)
+        : m_CurrentMaxGlobalCounterID(currentMaxGlobalCounterID),
+          m_BackendId(backendId),
+          m_ProfilingService(profilingService),
+          m_CounterDirectory(m_ProfilingService.GetCounterRegistry()) {}
+
+    ~RegisterBackendCounters() = default;
+
+    void RegisterCategory(const std::string& categoryName) override;
+
+    uint16_t RegisterDevice(const std::string& deviceName,
+                            uint16_t cores = 0,
+                            const arm::pipe::Optional<std::string>& parentCategoryName =
+                                arm::pipe::EmptyOptional()) override;
+
+    uint16_t RegisterCounterSet(const std::string& counterSetName,
+                                uint16_t count = 0,
+                                const arm::pipe::Optional<std::string>& parentCategoryName
+                                    = arm::pipe::EmptyOptional()) override;
+
+    uint16_t RegisterCounter(const uint16_t uid,
+                             const std::string& parentCategoryName,
+                             uint16_t counterClass,
+                             uint16_t interpolation,
+                             double multiplier,
+                             const std::string& name,
+                             const std::string& description,
+                             const arm::pipe::Optional<std::string>& units      = arm::pipe::EmptyOptional(),
+                             const arm::pipe::Optional<uint16_t>& numberOfCores = arm::pipe::EmptyOptional(),
+                             const arm::pipe::Optional<uint16_t>& deviceUid     = arm::pipe::EmptyOptional(),
+                             const arm::pipe::Optional<uint16_t>& counterSetUid = arm::pipe::EmptyOptional()) override;
+
+private:
+    uint16_t m_CurrentMaxGlobalCounterID;
+    const std::string& m_BackendId;
+    IProfilingService& m_ProfilingService;
+    ICounterRegistry& m_CounterDirectory;
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/RequestCounterDirectoryCommandHandler.cpp b/profiling/client/src/RequestCounterDirectoryCommandHandler.cpp
new file mode 100644
index 0000000..cf07c45
--- /dev/null
+++ b/profiling/client/src/RequestCounterDirectoryCommandHandler.cpp
@@ -0,0 +1,50 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RequestCounterDirectoryCommandHandler.hpp"
+
+#include <fmt/format.h>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+void RequestCounterDirectoryCommandHandler::operator()(const arm::pipe::Packet& packet)
+{
+    ProfilingState currentState = m_StateMachine.GetCurrentState();
+    switch (currentState)
+    {
+    case ProfilingState::Uninitialised:
+    case ProfilingState::NotConnected:
+    case ProfilingState::WaitingForAck:
+        throw arm::pipe::ProfilingException(fmt::format("Request Counter Directory Comand Handler invoked while in an "
+                                            "wrong state: {}",
+                                            GetProfilingStateName(currentState)));
+    case ProfilingState::Active:
+        // Process the packet
+        if (!(packet.GetPacketFamily() == 0u && packet.GetPacketId() == 3u))
+        {
+            throw arm::pipe::InvalidArgumentException(fmt::format("Expected Packet family = 0, id = 3 but "
+                                                                  "received family = {}, id = {}",
+                                                                  packet.GetPacketFamily(),
+                                                                  packet.GetPacketId()));
+        }
+
+        // Send all the packet required for the handshake with the external profiling service
+        m_SendCounterPacket.SendCounterDirectoryPacket(m_CounterDirectory);
+        m_SendTimelinePacket.SendTimelineMessageDirectoryPackage();
+
+        break;
+    default:
+        throw arm::pipe::ProfilingException(fmt::format("Unknown profiling service state: {}",
+                                            static_cast<int>(currentState)));
+    }
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/RequestCounterDirectoryCommandHandler.hpp b/profiling/client/src/RequestCounterDirectoryCommandHandler.hpp
new file mode 100644
index 0000000..2582fb2
--- /dev/null
+++ b/profiling/client/src/RequestCounterDirectoryCommandHandler.hpp
@@ -0,0 +1,51 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ProfilingStateMachine.hpp"
+
+#include <client/include/ISendCounterPacket.hpp>
+#include <client/include/ISendTimelinePacket.hpp>
+
+#include <common/include/CommandHandlerFunctor.hpp>
+#include <common/include/Packet.hpp>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class RequestCounterDirectoryCommandHandler : public arm::pipe::CommandHandlerFunctor
+{
+
+public:
+    RequestCounterDirectoryCommandHandler(uint32_t familyId,
+                                          uint32_t packetId,
+                                          uint32_t version,
+                                          ICounterDirectory& counterDirectory,
+                                          ISendCounterPacket& sendCounterPacket,
+                                          ISendTimelinePacket& sendTimelinePacket,
+                                          ProfilingStateMachine& profilingStateMachine)
+        : CommandHandlerFunctor(familyId, packetId, version)
+        , m_CounterDirectory(counterDirectory)
+        , m_SendCounterPacket(sendCounterPacket)
+        , m_SendTimelinePacket(sendTimelinePacket)
+        , m_StateMachine(profilingStateMachine)
+    {}
+
+    void operator()(const arm::pipe::Packet& packet) override;
+
+private:
+    const ICounterDirectory& m_CounterDirectory;
+    ISendCounterPacket& m_SendCounterPacket;
+    ISendTimelinePacket& m_SendTimelinePacket;
+    const ProfilingStateMachine& m_StateMachine;
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/SendCounterPacket.cpp b/profiling/client/src/SendCounterPacket.cpp
new file mode 100644
index 0000000..96adb95
--- /dev/null
+++ b/profiling/client/src/SendCounterPacket.cpp
@@ -0,0 +1,923 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "SendCounterPacket.hpp"
+
+#include <common/include/Assert.hpp>
+#include <common/include/Conversion.hpp>
+#include <common/include/Constants.hpp>
+#include <common/include/EncodeVersion.hpp>
+#include <common/include/Processes.hpp>
+#include <common/include/ProfilingException.hpp>
+#include <common/include/SwTrace.hpp>
+
+#include <fmt/format.h>
+
+#include <cstring>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+void SendCounterPacket::SendStreamMetaDataPacket()
+{
+    const std::string info(m_SoftwareInfo);
+    const std::string hardwareVersion(m_HardwareVersion);
+    const std::string softwareVersion(m_SoftwareVersion);
+    const std::string processName = GetProcessName().substr(0, 60);
+
+    const uint32_t infoSize =            arm::pipe::numeric_cast<uint32_t>(info.size()) + 1;
+    const uint32_t hardwareVersionSize = arm::pipe::numeric_cast<uint32_t>(hardwareVersion.size()) + 1;
+    const uint32_t softwareVersionSize = arm::pipe::numeric_cast<uint32_t>(softwareVersion.size()) + 1;
+    const uint32_t processNameSize =     arm::pipe::numeric_cast<uint32_t>(processName.size()) + 1;
+
+    const uint32_t sizeUint32 = sizeof(uint32_t);
+
+    const uint32_t headerSize = 2 * sizeUint32;
+    const uint32_t bodySize = 10 * sizeUint32;
+    const uint32_t packetVersionCountSize = sizeUint32;
+
+    // Supported Packets
+    // Packet Encoding version 1.0.0
+    // Control packet family
+    //   Stream metadata packet (packet family=0; packet id=0)
+    //   Connection Acknowledged packet ( packet family=0, packet id=1) Version 1.0.0
+    //   Counter Directory packet (packet family=0; packet id=2) Version 1.0.0
+    //   Request Counter Directory packet ( packet family=0, packet id=3) Version 1.0.0
+    //   Periodic Counter Selection packet ( packet family=0, packet id=4) Version 1.0.0
+    //   Per Job Counter Selection packet ( packet family=0, packet id=5) Version 1.0.0
+    //   Activate Timeline Reporting (packet family = 0, packet id = 6) Version 1.0.0
+    //   Deactivate Timeline Reporting (packet family = 0, packet id = 7) Version 1.0.0
+    // Counter Packet Family
+    //   Periodic Counter Capture (packet_family = 3, packet_class = 0, packet_type = 0) Version 1.0.0
+    //   Per-Job Counter Capture (packet_family = 3, packet_class = 1, packet_type = 0,1) Version  1.0.0
+    // Timeline Packet Family
+    //   Timeline Message Directory (packet_family = 1, packet_class = 0, packet_type = 0) Version 1.0.0
+    //   Timeline Message (packet_family = 1, packet_class = 0, packet_type = 1) Version 1.0.0
+    std::vector<std::pair<uint32_t, uint32_t>> packetVersions;
+    packetVersions.push_back(std::make_pair(ConstructHeader(0, 0), arm::pipe::EncodeVersion(1, 0, 0)));
+    packetVersions.push_back(std::make_pair(ConstructHeader(0, 1), arm::pipe::EncodeVersion(1, 0, 0)));
+    packetVersions.push_back(std::make_pair(ConstructHeader(0, 2), arm::pipe::EncodeVersion(1, 0, 0)));
+    packetVersions.push_back(std::make_pair(ConstructHeader(0, 3), arm::pipe::EncodeVersion(1, 0, 0)));
+    packetVersions.push_back(std::make_pair(ConstructHeader(0, 4), arm::pipe::EncodeVersion(1, 0, 0)));
+    packetVersions.push_back(std::make_pair(ConstructHeader(0, 5), arm::pipe::EncodeVersion(1, 0, 0)));
+    packetVersions.push_back(std::make_pair(ConstructHeader(0, 6), arm::pipe::EncodeVersion(1, 0, 0)));
+    packetVersions.push_back(std::make_pair(ConstructHeader(0, 7), arm::pipe::EncodeVersion(1, 0, 0)));
+    packetVersions.push_back(std::make_pair(ConstructHeader(3, 0, 0), arm::pipe::EncodeVersion(1, 0, 0)));
+    packetVersions.push_back(std::make_pair(ConstructHeader(3, 1, 0), arm::pipe::EncodeVersion(1, 0, 0)));
+    packetVersions.push_back(std::make_pair(ConstructHeader(3, 1, 1), arm::pipe::EncodeVersion(1, 0, 0)));
+    packetVersions.push_back(std::make_pair(ConstructHeader(1, 0, 0), arm::pipe::EncodeVersion(1, 0, 0)));
+    packetVersions.push_back(std::make_pair(ConstructHeader(1, 0, 1), arm::pipe::EncodeVersion(1, 0, 0)));
+    uint32_t numberOfVersions = arm::pipe::numeric_cast<uint32_t>(packetVersions.size());
+    uint32_t packetVersionSize = arm::pipe::numeric_cast<uint32_t>(numberOfVersions * 2 * sizeUint32);
+
+    const uint32_t payloadSize = arm::pipe::numeric_cast<uint32_t>(infoSize + hardwareVersionSize +
+                                                               softwareVersionSize + processNameSize +
+                                                               packetVersionCountSize + packetVersionSize);
+
+    const uint32_t totalSize = headerSize + bodySize + payloadSize;
+    uint32_t offset = 0;
+    uint32_t reserved = 0;
+
+    IPacketBufferPtr writeBuffer = m_BufferManager.Reserve(totalSize, reserved);
+
+    if (writeBuffer == nullptr || reserved < totalSize)
+    {
+        CancelOperationAndThrow<arm::pipe::BufferExhaustion>(
+            writeBuffer,
+            fmt::format("No space left in buffer. Unable to reserve ({}) bytes.", totalSize));
+    }
+
+    try
+    {
+        // Create header
+
+        WriteUint32(writeBuffer, offset, 0);
+        offset += sizeUint32;
+        WriteUint32(writeBuffer, offset, totalSize - headerSize);
+
+        // Packet body
+
+        offset += sizeUint32;
+        WriteUint32(writeBuffer, offset, arm::pipe::PIPE_MAGIC); // pipe_magic
+        offset += sizeUint32;
+        WriteUint32(writeBuffer, offset, arm::pipe::EncodeVersion(1, 0, 0)); // stream_metadata_version
+        offset += sizeUint32;
+        WriteUint32(writeBuffer, offset, MAX_METADATA_PACKET_LENGTH); // max_data_length
+        offset += sizeUint32;
+        int pid = arm::pipe::GetCurrentProcessId();
+        WriteUint32(writeBuffer, offset, arm::pipe::numeric_cast<uint32_t>(pid)); // pid
+        offset += sizeUint32;
+        uint32_t poolOffset = bodySize;
+        WriteUint32(writeBuffer, offset, poolOffset); // offset_info
+        offset += sizeUint32;
+        poolOffset += infoSize;
+        WriteUint32(writeBuffer, offset, poolOffset); // offset_hw_version
+        offset += sizeUint32;
+        poolOffset += hardwareVersionSize;
+        WriteUint32(writeBuffer, offset, poolOffset); // offset_sw_version
+        offset += sizeUint32;
+        poolOffset += softwareVersionSize;
+        WriteUint32(writeBuffer, offset, poolOffset); // offset_process_name
+        offset += sizeUint32;
+        poolOffset += processNameSize;
+        WriteUint32(writeBuffer, offset, poolOffset); // offset_packet_version_table
+        offset += sizeUint32;
+        WriteUint32(writeBuffer, offset, 0); // reserved
+        offset += sizeUint32;
+
+        // Pool
+
+        if (infoSize)
+        {
+            memcpy(&writeBuffer->GetWritableData()[offset], info.c_str(), infoSize);
+            offset += infoSize;
+        }
+
+        memcpy(&writeBuffer->GetWritableData()[offset], hardwareVersion.c_str(), hardwareVersionSize);
+        offset += hardwareVersionSize;
+        memcpy(&writeBuffer->GetWritableData()[offset], softwareVersion.c_str(), softwareVersionSize);
+        offset += softwareVersionSize;
+        memcpy(&writeBuffer->GetWritableData()[offset], processName.c_str(), processNameSize);
+        offset += processNameSize;
+
+        if (!packetVersions.empty())
+        {
+            // Packet Version Count
+            WriteUint32(writeBuffer, offset, numberOfVersions << 16);
+            offset += sizeUint32;
+
+            // Packet Version Entries
+            for (std::pair<uint32_t, uint32_t>& packetVersion : packetVersions)
+            {
+                WriteUint32(writeBuffer, offset, packetVersion.first);
+                offset += sizeUint32;
+                WriteUint32(writeBuffer, offset, packetVersion.second);
+                offset += sizeUint32;
+            }
+        }
+    }
+    catch(...)
+    {
+        CancelOperationAndThrow<arm::pipe::ProfilingException>(writeBuffer, "Error processing packet.");
+    }
+
+    m_BufferManager.Commit(writeBuffer, totalSize, false);
+}
+
+bool SendCounterPacket::CreateCategoryRecord(const CategoryPtr& category,
+                                             const Counters& counters,
+                                             CategoryRecord& categoryRecord,
+                                             std::string& errorMessage)
+{
+    ARM_PIPE_ASSERT(category);
+
+    const std::string& categoryName = category->m_Name;
+    ARM_PIPE_ASSERT(!categoryName.empty());
+
+    // Remove any duplicate counters
+    std::vector<uint16_t> categoryCounters;
+    for (size_t counterIndex = 0; counterIndex < category->m_Counters.size(); ++counterIndex)
+    {
+        uint16_t counterUid = category->m_Counters.at(counterIndex);
+        auto it = counters.find(counterUid);
+        if (it == counters.end())
+        {
+            errorMessage = fmt::format("Counter ({}) not found in category ({})",
+                                       counterUid,
+                                       category->m_Name );
+            return false;
+        }
+
+        const CounterPtr& counter = it->second;
+
+        if (counterUid == counter->m_MaxCounterUid)
+        {
+            categoryCounters.emplace_back(counterUid);
+        }
+    }
+    if (categoryCounters.empty())
+    {
+        errorMessage = fmt::format("No valid counters found in category ({})", categoryName);
+        return false;
+    }
+
+    // Utils
+    const size_t uint32_t_size = sizeof(uint32_t);
+
+    // Convert the device name into a SWTrace namestring
+    std::vector<uint32_t> categoryNameBuffer;
+    if (!arm::pipe::StringToSwTraceString<arm::pipe::SwTraceNameCharPolicy>(categoryName, categoryNameBuffer))
+    {
+        errorMessage = fmt::format("Cannot convert the name of category ({}) to an SWTrace namestring",
+                                   categoryName);
+        return false;
+    }
+
+    // Category record word 1:
+    // 16:31 [16] event_count: number of events belonging to this category
+    // 0:15  [16] reserved: all zeros
+    const uint32_t categoryRecordWord1 = static_cast<uint32_t>(categoryCounters.size()) << 16;
+
+    // Category record word 2:
+    // 0:31 [32] event_pointer_table_offset: offset from the beginning of the category data pool to
+    //                                       the event_pointer_table
+    const uint32_t categoryRecordWord2 = static_cast<uint32_t>(3u * uint32_t_size);
+
+    // Process the event records
+    const size_t counterCount = categoryCounters.size();
+    std::vector<EventRecord> eventRecords(counterCount);
+    std::vector<uint32_t> eventRecordOffsets(counterCount, 0);
+    size_t eventRecordsSize = 0;
+    uint32_t eventRecordsOffset = arm::pipe::numeric_cast<uint32_t>(
+                    (eventRecords.size() + categoryNameBuffer.size()) * uint32_t_size);
+    for (size_t counterIndex = 0, eventRecordIndex = 0, eventRecordOffsetIndex = 0;
+         counterIndex < counterCount;
+         counterIndex++, eventRecordIndex++, eventRecordOffsetIndex++)
+    {
+        uint16_t counterUid = categoryCounters.at(counterIndex);
+        auto it = counters.find(counterUid);
+        const CounterPtr& counter = it->second;
+
+        EventRecord& eventRecord = eventRecords.at(eventRecordIndex);
+        if (!CreateEventRecord(counter, eventRecord, errorMessage))
+        {
+            return false;
+        }
+
+        // Update the total size in words of the event records
+        eventRecordsSize += eventRecord.size();
+
+        // Add the event record offset to the event pointer table offset field
+        eventRecordOffsets[eventRecordOffsetIndex] = eventRecordsOffset;
+        eventRecordsOffset += arm::pipe::numeric_cast<uint32_t>(eventRecord.size() * uint32_t_size);
+    }
+
+    // Category record word 3:
+    // 0:31 [32] name_offset (offset from the beginning of the category data pool to the name field)
+    const uint32_t categoryRecordWord3 = arm::pipe::numeric_cast<uint32_t>(
+            (3u + eventRecordOffsets.size()) * uint32_t_size);
+
+    // Calculate the size in words of the category record
+    const size_t categoryRecordSize = 3u +// The size of the fixed part (device + counter_set + event_count +
+                                          // reserved + event_pointer_table_offset + name_offset)
+                                      eventRecordOffsets.size() + // The size of the variable part (
+                                      categoryNameBuffer.size() + // the event pointer table + the category name
+                                      eventRecordsSize;           // including the null-terminator + the event records)
+
+    // Allocate the necessary space for the category record
+    categoryRecord.resize(categoryRecordSize);
+
+    ARM_PIPE_NO_CONVERSION_WARN_BEGIN
+    // Create the category record
+    categoryRecord[0] = categoryRecordWord1; // event_count + reserved
+    categoryRecord[1] = categoryRecordWord2; // event_pointer_table_offset
+    categoryRecord[2] = categoryRecordWord3; // name_offset
+    auto offset = categoryRecord.begin() + 3u;
+    std::copy(eventRecordOffsets.begin(), eventRecordOffsets.end(), offset); // event_pointer_table
+    offset += eventRecordOffsets.size();
+    std::copy(categoryNameBuffer.begin(), categoryNameBuffer.end(), offset); // name
+    offset += categoryNameBuffer.size();
+    for (const EventRecord& eventRecord : eventRecords)
+    {
+        std::copy(eventRecord.begin(), eventRecord.end(), offset); // event_record
+        offset += eventRecord.size();
+    }
+    ARM_PIPE_NO_CONVERSION_WARN_END
+
+    return true;
+}
+
+bool SendCounterPacket::CreateDeviceRecord(const DevicePtr& device,
+                                           DeviceRecord& deviceRecord,
+                                           std::string& errorMessage)
+{
+    ARM_PIPE_ASSERT(device);
+
+    uint16_t deviceUid = device->m_Uid;
+    const std::string& deviceName = device->m_Name;
+    uint16_t deviceCores = device->m_Cores;
+
+    ARM_PIPE_ASSERT(!deviceName.empty());
+
+    // Device record word 0:
+    // 16:31 [16] uid: the unique identifier for the device
+    // 0:15  [16] cores: the number of individual streams of counters for one or more cores of some device
+    const uint32_t deviceRecordWord0 = (static_cast<uint32_t>(deviceUid) << 16) |
+                                 (static_cast<uint32_t>(deviceCores));
+
+    // Device record word 1:
+    // 0:31 [32] name_offset: offset from the beginning of the device record pool to the name field
+    const uint32_t deviceRecordWord1 = 8u; // The offset is always eight here, as the name field is always
+                                           // the first (and only) item in the pool and there are two device words
+
+    // Convert the device name into a SWTrace string
+    std::vector<uint32_t> deviceNameBuffer;
+    if (!arm::pipe::StringToSwTraceString<arm::pipe::SwTraceCharPolicy>(deviceName, deviceNameBuffer))
+    {
+        errorMessage = fmt::format("Cannot convert the name of device {} ({}) to an SWTrace string",
+                                   deviceUid,
+                                   deviceName);
+        return false;
+    }
+
+    // Calculate the size in words of the device record
+    const size_t deviceRecordSize = 2u + // The size of the fixed part (uid + cores + name_offset)
+                              deviceNameBuffer.size(); // The size of the variable part (the device name including
+                                                       // the null-terminator)
+
+    // Allocate the necessary space for the device record
+    deviceRecord.resize(deviceRecordSize);
+
+    // Create the device record
+    deviceRecord[0] = deviceRecordWord0; // uid + core
+    deviceRecord[1] = deviceRecordWord1; // name_offset
+    auto offset = deviceRecord.begin() + 2u;
+    std::copy(deviceNameBuffer.begin(), deviceNameBuffer.end(), offset); // name
+
+    return true;
+}
+
+bool SendCounterPacket::CreateCounterSetRecord(const CounterSetPtr& counterSet,
+                                               CounterSetRecord& counterSetRecord,
+                                               std::string& errorMessage)
+{
+    ARM_PIPE_ASSERT(counterSet);
+
+    uint16_t counterSetUid = counterSet->m_Uid;
+    const std::string& counterSetName = counterSet->m_Name;
+    uint16_t counterSetCount = counterSet->m_Count;
+
+    ARM_PIPE_ASSERT(!counterSetName.empty());
+
+    // Counter set record word 0:
+    // 16:31 [16] uid: the unique identifier for the counter_set
+    // 0:15  [16] count: the number of counters which can be active in this set at any one time
+    const uint32_t counterSetRecordWord0 = (static_cast<uint32_t>(counterSetUid) << 16) |
+                                           (static_cast<uint32_t>(counterSetCount));
+
+    // Counter set record word 1:
+    // 0:31 [32] name_offset: offset from the beginning of the counter set pool to the name field
+    const uint32_t counterSetRecordWord1 = 8u; // The offset is always eight here, as the name field is always
+                                               // the first (and only) item in the pool after the two counter set words
+
+    // Convert the device name into a SWTrace namestring
+    std::vector<uint32_t> counterSetNameBuffer;
+    if (!arm::pipe::StringToSwTraceString<arm::pipe::SwTraceNameCharPolicy>(counterSet->m_Name, counterSetNameBuffer))
+    {
+        errorMessage = fmt::format("Cannot convert the name of counter set {} ({}) to an SWTrace namestring",
+                                   counterSetUid,
+                                   counterSetName);
+        return false;
+    }
+
+    // Calculate the size in words of the counter set record
+    const size_t counterSetRecordSize = 2u + // The size of the fixed part (uid + cores + name_offset)
+                                        counterSetNameBuffer.size(); // The size of the variable part (the counter set
+                                                                     // name including the null-terminator)
+
+    // Allocate the space for the counter set record
+    counterSetRecord.resize(counterSetRecordSize);
+
+    // Create the counter set record
+    counterSetRecord[0] = counterSetRecordWord0; // uid + core
+    counterSetRecord[1] = counterSetRecordWord1; // name_offset
+    auto offset = counterSetRecord.begin() + 2u;
+    std::copy(counterSetNameBuffer.begin(), counterSetNameBuffer.end(), offset); // name
+
+    return true;
+}
+
+bool SendCounterPacket::CreateEventRecord(const CounterPtr& counter,
+                                          EventRecord& eventRecord,
+                                          std::string& errorMessage)
+{
+    ARM_PIPE_ASSERT(counter);
+
+    uint16_t           counterUid           = counter->m_Uid;
+    uint16_t           maxCounterUid        = counter->m_MaxCounterUid;
+    uint16_t           deviceUid            = counter->m_DeviceUid;
+    uint16_t           counterSetUid        = counter->m_CounterSetUid;
+    uint16_t           counterClass         = counter->m_Class;
+    uint16_t           counterInterpolation = counter->m_Interpolation;
+    double             counterMultiplier    = counter->m_Multiplier;
+    const std::string& counterName          = counter->m_Name;
+    const std::string& counterDescription   = counter->m_Description;
+    const std::string& counterUnits         = counter->m_Units;
+
+    ARM_PIPE_ASSERT(counterClass == 0 || counterClass == 1);
+    ARM_PIPE_ASSERT(counterInterpolation == 0 || counterInterpolation == 1);
+    ARM_PIPE_ASSERT(counterMultiplier);
+
+    // Utils
+    const size_t uint32_t_size = sizeof(uint32_t);
+    // eventRecordBlockSize is the size of the fixed part
+    // (counter_uid + max_counter_uid + device +
+    // counter_set + class + interpolation +
+    // multiplier + name_offset + description_offset +
+    // units_offset)
+    const size_t eventRecordBlockSize = 8u;
+
+    // Event record word 0:
+    // 16:31 [16] max_counter_uid: if the device this event is associated with has more than one core and there
+    //                             is one of these counters per core this value will be set to
+    //                             (counter_uid + cores (from device_record)) - 1.
+    //                             If there is only a single core then this value will be the same as
+    //                             the counter_uid value
+    // 0:15  [16] count_uid: unique ID for the counter. Must be unique across all counters in all categories
+    const uint32_t eventRecordWord0 = (static_cast<uint32_t>(maxCounterUid) << 16) |
+                                      (static_cast<uint32_t>(counterUid));
+
+    // Event record word 1:
+    // 16:31 [16] device: UID of the device this event is associated with. Set to zero if the event is NOT
+    //                    associated with a device
+    // 0:15  [16] counter_set: UID of the counter_set this event is associated with. Set to zero if the event
+    //                         is NOT associated with a counter_set
+    const uint32_t eventRecordWord1 = (static_cast<uint32_t>(deviceUid) << 16) |
+                                      (static_cast<uint32_t>(counterSetUid));
+
+    // Event record word 2:
+    // 16:31 [16] class: type describing how to treat each data point in a stream of data points
+    // 0:15  [16] interpolation: type describing how to interpolate each data point in a stream of data points
+    const uint32_t eventRecordWord2 = (static_cast<uint32_t>(counterClass) << 16) |
+                                      (static_cast<uint32_t>(counterInterpolation));
+
+    // Event record word 3-4:
+    // 0:63 [64] multiplier: internal data stream is represented as integer values, this allows scaling of
+    //                       those values as if they are fixed point numbers. Zero is not a valid value
+    uint32_t multiplier[2] = { 0u, 0u };
+    ARM_PIPE_ASSERT(sizeof(counterMultiplier) == sizeof(multiplier));
+    std::memcpy(multiplier, &counterMultiplier, sizeof(multiplier));
+    const uint32_t eventRecordWord3 = multiplier[0];
+    const uint32_t eventRecordWord4 = multiplier[1];
+
+    // Event record word 5:
+    // 0:31 [32] name_offset: offset from the beginning of the event record pool to the name field
+    const uint32_t eventRecordWord5 = static_cast<uint32_t>(eventRecordBlockSize * uint32_t_size);
+
+    // Convert the counter name into a SWTrace string
+    std::vector<uint32_t> counterNameBuffer;
+    if (!arm::pipe::StringToSwTraceString<arm::pipe::SwTraceCharPolicy>(counterName, counterNameBuffer))
+    {
+        errorMessage = fmt::format("Cannot convert the name of counter {} (name: {}) to an SWTrace string",
+                                   counterUid,
+                                   counterName);
+        return false;
+    }
+
+    // Event record word 6:
+    // 0:31 [32] description_offset: offset from the beginning of the event record pool to the description field
+    // The size of the name buffer in bytes
+    uint32_t eventRecordWord6 =
+            static_cast<uint32_t>((counterNameBuffer.size() + eventRecordBlockSize) * uint32_t_size);
+
+    // Convert the counter description into a SWTrace string
+    std::vector<uint32_t> counterDescriptionBuffer;
+    if (!arm::pipe::StringToSwTraceString<arm::pipe::SwTraceCharPolicy>(counterDescription, counterDescriptionBuffer))
+    {
+        errorMessage = fmt::format("Cannot convert the description of counter {} (description: {}) "
+                                   "to an SWTrace string",
+                                   counterUid,
+                                   counterName);
+        return false;
+    }
+
+    // Event record word 7:
+    // 0:31 [32] units_offset: (optional) offset from the beginning of the event record pool to the units field.
+    //                         An offset value of zero indicates this field is not provided
+    bool includeUnits = !counterUnits.empty();
+    // The size of the description buffer in bytes
+    const uint32_t eventRecordWord7 = includeUnits ?
+                                eventRecordWord6 +
+                                arm::pipe::numeric_cast<uint32_t>(counterDescriptionBuffer.size()
+                                * uint32_t_size) :
+                                0;
+
+    // Convert the counter units into a SWTrace namestring (optional)
+    std::vector<uint32_t> counterUnitsBuffer;
+    if (includeUnits)
+    {
+        // Convert the counter units into a SWTrace namestring
+        if (!arm::pipe::StringToSwTraceString<arm::pipe::SwTraceNameCharPolicy>(counterUnits, counterUnitsBuffer))
+        {
+            errorMessage = fmt::format("Cannot convert the units of counter {} (units: {}) to an SWTrace string",
+                                       counterUid,
+                                       counterName);
+            return false;
+        }
+    }
+
+    // Calculate the size in words of the event record
+    const size_t eventRecordSize = eventRecordBlockSize +
+                                   counterNameBuffer.size() +        // The size of the variable part (the counter name,
+                                   counterDescriptionBuffer.size() + // description and units
+                                   counterUnitsBuffer.size();        // including the null-terminator)
+
+    // Allocate the space for the event record
+    eventRecord.resize(eventRecordSize);
+
+    ARM_PIPE_NO_CONVERSION_WARN_BEGIN
+    // Create the event record
+    eventRecord[0] = eventRecordWord0; // max_counter_uid + counter_uid
+    eventRecord[1] = eventRecordWord1; // device + counter_set
+    eventRecord[2] = eventRecordWord2; // class + interpolation
+    eventRecord[3] = eventRecordWord3; // multiplier
+    eventRecord[4] = eventRecordWord4; // multiplier
+    eventRecord[5] = eventRecordWord5; // name_offset
+    eventRecord[6] = eventRecordWord6; // description_offset
+    eventRecord[7] = eventRecordWord7; // units_offset
+    auto offset = eventRecord.begin() + 8u;
+    std::copy(counterNameBuffer.begin(), counterNameBuffer.end(), offset); // name
+    offset += counterNameBuffer.size();
+    std::copy(counterDescriptionBuffer.begin(), counterDescriptionBuffer.end(), offset); // description
+    if (includeUnits)
+    {
+        offset += counterDescriptionBuffer.size();
+        std::copy(counterUnitsBuffer.begin(), counterUnitsBuffer.end(), offset); // units
+    }
+    ARM_PIPE_NO_CONVERSION_WARN_END
+
+    return true;
+}
+
+void SendCounterPacket::SendCounterDirectoryPacket(const ICounterDirectory& counterDirectory)
+{
+    // Get the amount of data that needs to be put into the packet
+    const uint16_t categoryCount    = counterDirectory.GetCategoryCount();
+    const uint16_t deviceCount      = counterDirectory.GetDeviceCount();
+    const uint16_t counterSetCount  = counterDirectory.GetCounterSetCount();
+
+    // Utils
+    const size_t uint32_t_size = sizeof(uint32_t);
+    const size_t packetHeaderSize = 2u;
+    const size_t bodyHeaderSize = 6u;
+    const uint32_t bodyHeaderSizeBytes = bodyHeaderSize * uint32_t_size;
+
+    // Initialize the offset for the pointer tables
+    uint32_t pointerTableOffset = 0;
+
+    // --------------
+    // Device records
+    // --------------
+
+    // Process device records
+    std::vector<DeviceRecord> deviceRecords(deviceCount);
+    const Devices& devices = counterDirectory.GetDevices();
+    std::vector<uint32_t> deviceRecordOffsets(deviceCount, 0); // device_records_pointer_table
+    size_t deviceRecordsSize = 0;
+    size_t deviceIndex = 0;
+    size_t deviceRecordOffsetIndex = 0;
+
+    pointerTableOffset = arm::pipe::numeric_cast<uint32_t>(deviceCount * uint32_t_size +
+                                                       counterSetCount * uint32_t_size +
+                                                       categoryCount   * uint32_t_size);
+    for (auto it = devices.begin(); it != devices.end(); it++)
+    {
+        const DevicePtr& device = it->second;
+        DeviceRecord& deviceRecord = deviceRecords.at(deviceIndex);
+
+        std::string errorMessage;
+        if (!CreateDeviceRecord(device, deviceRecord, errorMessage))
+        {
+            CancelOperationAndThrow<arm::pipe::ProfilingException>(errorMessage);
+        }
+
+        // Update the total size in words of the device records
+        deviceRecordsSize += deviceRecord.size();
+
+        // Add the device record offset to the device records pointer table offset field
+        deviceRecordOffsets[deviceRecordOffsetIndex] = pointerTableOffset;
+        pointerTableOffset += arm::pipe::numeric_cast<uint32_t>(deviceRecord.size() * uint32_t_size);
+
+        deviceIndex++;
+        deviceRecordOffsetIndex++;
+    }
+
+    // -------------------
+    // Counter set records
+    // -------------------
+
+    // Process counter set records
+    std::vector<CounterSetRecord> counterSetRecords(counterSetCount);
+    const CounterSets& counterSets = counterDirectory.GetCounterSets();
+    std::vector<uint32_t> counterSetRecordOffsets(counterSetCount, 0); // counter_set_records_pointer_table
+    size_t counterSetRecordsSize = 0;
+    size_t counterSetIndex = 0;
+    size_t counterSetRecordOffsetIndex = 0;
+
+    pointerTableOffset -= arm::pipe::numeric_cast<uint32_t>(deviceCount * uint32_t_size);
+    for (auto it = counterSets.begin(); it != counterSets.end(); it++)
+    {
+        const CounterSetPtr& counterSet = it->second;
+        CounterSetRecord& counterSetRecord = counterSetRecords.at(counterSetIndex);
+
+        std::string errorMessage;
+        if (!CreateCounterSetRecord(counterSet, counterSetRecord, errorMessage))
+        {
+            CancelOperationAndThrow<arm::pipe::ProfilingException>(errorMessage);
+        }
+
+        // Update the total size in words of the counter set records
+        counterSetRecordsSize += counterSetRecord.size();
+
+        // Add the counter set record offset to the counter set records pointer table offset field
+        counterSetRecordOffsets[counterSetRecordOffsetIndex] = pointerTableOffset;
+        pointerTableOffset += arm::pipe::numeric_cast<uint32_t>(counterSetRecord.size() * uint32_t_size);
+
+        counterSetIndex++;
+        counterSetRecordOffsetIndex++;
+    }
+
+    // ----------------
+    // Category records
+    // ----------------
+
+    // Process category records
+    std::vector<CategoryRecord> categoryRecords(categoryCount);
+    const Categories& categories = counterDirectory.GetCategories();
+    std::vector<uint32_t> categoryRecordOffsets(categoryCount, 0); // category_records_pointer_table
+    size_t categoryRecordsSize = 0;
+    size_t categoryIndex = 0;
+    size_t categoryRecordOffsetIndex = 0;
+
+    pointerTableOffset -= arm::pipe::numeric_cast<uint32_t>(counterSetCount * uint32_t_size);
+    for (auto it = categories.begin(); it != categories.end(); it++)
+    {
+        const CategoryPtr& category = *it;
+        CategoryRecord& categoryRecord = categoryRecords.at(categoryIndex);
+
+        std::string errorMessage;
+        if (!CreateCategoryRecord(category, counterDirectory.GetCounters(), categoryRecord, errorMessage))
+        {
+            CancelOperationAndThrow<arm::pipe::ProfilingException>(errorMessage);
+        }
+
+        // Update the total size in words of the category records
+        categoryRecordsSize += categoryRecord.size();
+
+        // Add the category record offset to the category records pointer table offset field
+        categoryRecordOffsets[categoryRecordOffsetIndex] = pointerTableOffset;
+        pointerTableOffset += arm::pipe::numeric_cast<uint32_t>(categoryRecord.size() * uint32_t_size);
+
+        categoryIndex++;
+        categoryRecordOffsetIndex++;
+    }
+
+    // Calculate the length in words of the counter directory packet's data (excludes the packet header size)
+    const size_t counterDirectoryPacketDataLength =
+                 bodyHeaderSize +                 // The size of the body header
+                 deviceRecordOffsets.size() +     // The size of the device records pointer table
+                 counterSetRecordOffsets.size() + // The size of counter set pointer table
+                 categoryRecordOffsets.size() +   // The size of category records pointer table
+                 deviceRecordsSize +              // The total size of the device records
+                 counterSetRecordsSize +          // The total size of the counter set records
+                 categoryRecordsSize;             // The total size of the category records
+
+    // Calculate the size in words of the counter directory packet (the data length plus the packet header size)
+    const size_t counterDirectoryPacketSize = packetHeaderSize +                // The size of the packet header
+                                              counterDirectoryPacketDataLength; // The data length
+
+    // Allocate the necessary space for the counter directory packet
+    std::vector<uint32_t> counterDirectoryPacket(counterDirectoryPacketSize, 0);
+
+    // -------------
+    // Packet header
+    // -------------
+
+    // Packet header word 0:
+    // 26:31 [6]  packet_family: control Packet Family
+    // 16:25 [10] packet_id: packet identifier
+    // 8:15  [8]  reserved: all zeros
+    // 0:7   [8]  reserved: all zeros
+    uint32_t packetFamily = 0;
+    uint32_t packetId = 2;
+    uint32_t packetHeaderWord0 = ((packetFamily & 0x3F) << 26) | ((packetId & 0x3FF) << 16);
+
+    // Packet header word 1:
+    // 0:31 [32] data_length: length of data, in bytes
+    uint32_t packetHeaderWord1 = arm::pipe::numeric_cast<uint32_t>(
+            counterDirectoryPacketDataLength * uint32_t_size);
+
+    // Create the packet header
+    uint32_t packetHeader[2]
+    {
+        packetHeaderWord0, // packet_family + packet_id + reserved + reserved
+        packetHeaderWord1  // data_length
+    };
+
+    // -----------
+    // Body header
+    // -----------
+
+    // Body header word 0:
+    // 16:31 [16] device_records_count: number of entries in the device_records_pointer_table
+    // 0:15  [16] reserved: all zeros
+    const uint32_t bodyHeaderWord0 = static_cast<uint32_t>(deviceCount) << 16;
+
+    // Body header word 1:
+    // 0:31 [32] device_records_pointer_table_offset: offset to the device_records_pointer_table
+    const uint32_t bodyHeaderWord1 = bodyHeaderSizeBytes; // The offset is always the bodyHeaderSize,
+                                                          // as the device record pointer table field
+                                                          // is always the first item in the pool
+
+    // Body header word 2:
+    // 16:31 [16] counter_set_count: number of entries in the counter_set_pointer_table
+    // 0:15  [16] reserved: all zeros
+    const uint32_t bodyHeaderWord2 = static_cast<uint32_t>(counterSetCount) << 16;
+
+    // Body header word 3:
+    // 0:31 [32] counter_set_pointer_table_offset: offset to the counter_set_pointer_table
+    const uint32_t bodyHeaderWord3 = arm::pipe::numeric_cast<uint32_t>(deviceRecordOffsets.size() *
+                                                                   uint32_t_size +       // The size of the
+                                                                   bodyHeaderSizeBytes); // device records pointer table
+
+    // Body header word 4:
+    // 16:31 [16] categories_count: number of entries in the categories_pointer_table
+    // 0:15  [16] reserved: all zeros
+    const uint32_t bodyHeaderWord4 = static_cast<uint32_t>(categoryCount) << 16;
+
+    // Body header word 3:
+    // 0:31 [32] categories_pointer_table_offset: offset to the categories_pointer_table
+    const uint32_t bodyHeaderWord5 =
+                   arm::pipe::numeric_cast<uint32_t>(
+                       deviceRecordOffsets.size() * uint32_t_size +     // The size of the device records
+                       counterSetRecordOffsets.size() * uint32_t_size   // pointer table, plus the size of
+                       +  bodyHeaderSizeBytes);                         // the counter set pointer table
+
+    // Create the body header
+    const uint32_t bodyHeader[bodyHeaderSize]
+    {
+        bodyHeaderWord0, // device_records_count + reserved
+        bodyHeaderWord1, // device_records_pointer_table_offset
+        bodyHeaderWord2, // counter_set_count + reserved
+        bodyHeaderWord3, // counter_set_pointer_table_offset
+        bodyHeaderWord4, // categories_count + reserved
+        bodyHeaderWord5  // categories_pointer_table_offset
+    };
+
+    ARM_PIPE_NO_CONVERSION_WARN_BEGIN
+    // Create the counter directory packet
+    auto counterDirectoryPacketOffset = counterDirectoryPacket.begin();
+    // packet_header
+    std::copy(packetHeader, packetHeader + packetHeaderSize, counterDirectoryPacketOffset);
+    counterDirectoryPacketOffset += packetHeaderSize;
+    // body_header
+    std::copy(bodyHeader, bodyHeader + bodyHeaderSize, counterDirectoryPacketOffset);
+    counterDirectoryPacketOffset += bodyHeaderSize;
+    // device_records_pointer_table
+    std::copy(deviceRecordOffsets.begin(), deviceRecordOffsets.end(), counterDirectoryPacketOffset);
+    counterDirectoryPacketOffset += deviceRecordOffsets.size();
+    // counter_set_pointer_table
+    std::copy(counterSetRecordOffsets.begin(), counterSetRecordOffsets.end(), counterDirectoryPacketOffset);
+    counterDirectoryPacketOffset += counterSetRecordOffsets.size();
+    // category_pointer_table
+    std::copy(categoryRecordOffsets.begin(), categoryRecordOffsets.end(), counterDirectoryPacketOffset);
+    counterDirectoryPacketOffset += categoryRecordOffsets.size();
+    // device_records
+    for (const DeviceRecord& deviceRecord : deviceRecords)
+    {
+        std::copy(deviceRecord.begin(), deviceRecord.end(), counterDirectoryPacketOffset); // device_record
+        counterDirectoryPacketOffset += deviceRecord.size();
+    }
+    // counter_set_records
+    for (const CounterSetRecord& counterSetRecord : counterSetRecords)
+    {
+        std::copy(counterSetRecord.begin(), counterSetRecord.end(), counterDirectoryPacketOffset); // counter_set_record
+        counterDirectoryPacketOffset += counterSetRecord.size();
+    }
+    // category_records
+    for (const CategoryRecord& categoryRecord : categoryRecords)
+    {
+        std::copy(categoryRecord.begin(), categoryRecord.end(), counterDirectoryPacketOffset); // category_record
+        counterDirectoryPacketOffset += categoryRecord.size();
+    }
+    ARM_PIPE_NO_CONVERSION_WARN_END
+
+    // Calculate the total size in bytes of the counter directory packet
+    uint32_t totalSize = arm::pipe::numeric_cast<uint32_t>(counterDirectoryPacketSize * uint32_t_size);
+
+    // Reserve space in the buffer for the packet
+    uint32_t reserved = 0;
+    IPacketBufferPtr writeBuffer = m_BufferManager.Reserve(totalSize, reserved);
+
+    if (writeBuffer == nullptr || reserved < totalSize)
+    {
+        CancelOperationAndThrow<arm::pipe::BufferExhaustion>(
+            writeBuffer,
+            fmt::format("No space left in buffer. Unable to reserve ({}) bytes.", totalSize));
+    }
+
+    // Offset for writing to the buffer
+    uint32_t offset = 0;
+
+    // Write the counter directory packet to the buffer
+    for (uint32_t counterDirectoryPacketWord : counterDirectoryPacket)
+    {
+        WriteUint32(writeBuffer, offset, counterDirectoryPacketWord);
+        offset += arm::pipe::numeric_cast<uint32_t>(uint32_t_size);
+    }
+
+    m_BufferManager.Commit(writeBuffer, totalSize);
+}
+
+void SendCounterPacket::SendPeriodicCounterCapturePacket(uint64_t timestamp, const IndexValuePairsVector& values)
+{
+    uint32_t uint16_t_size = sizeof(uint16_t);
+    uint32_t uint32_t_size = sizeof(uint32_t);
+    uint32_t uint64_t_size = sizeof(uint64_t);
+
+    uint32_t packetFamily = 3;
+    uint32_t packetClass = 0;
+    uint32_t packetType = 0;
+    uint32_t headerSize = 2 * uint32_t_size;
+    uint32_t bodySize = uint64_t_size + arm::pipe::numeric_cast<uint32_t>(
+        values.size()) * (uint16_t_size + uint32_t_size);
+    uint32_t totalSize = headerSize + bodySize;
+    uint32_t offset = 0;
+    uint32_t reserved = 0;
+
+    IPacketBufferPtr writeBuffer = m_BufferManager.Reserve(totalSize, reserved);
+
+    if (writeBuffer == nullptr || reserved < totalSize)
+    {
+        CancelOperationAndThrow<arm::pipe::BufferExhaustion>(
+            writeBuffer,
+            fmt::format("No space left in buffer. Unable to reserve ({}) bytes.", totalSize));
+    }
+
+    // Create header.
+    WriteUint32(writeBuffer,
+                offset,
+                ((packetFamily & 0x0000003F) << 26) |
+                ((packetClass  & 0x0000007F) << 19) |
+                ((packetType   & 0x00000007) << 16));
+    offset += uint32_t_size;
+    WriteUint32(writeBuffer, offset, bodySize);
+
+    // Copy captured Timestamp.
+    offset += uint32_t_size;
+    WriteUint64(writeBuffer, offset, timestamp);
+
+    // Copy selectedCounterIds.
+    offset += uint64_t_size;
+    for (const auto& pair: values)
+    {
+        WriteUint16(writeBuffer, offset, pair.counterId);
+        offset += uint16_t_size;
+        WriteUint32(writeBuffer, offset, pair.counterValue);
+        offset += uint32_t_size;
+    }
+
+    m_BufferManager.Commit(writeBuffer, totalSize);
+}
+
+void SendCounterPacket::SendPeriodicCounterSelectionPacket(uint32_t capturePeriod,
+                                                           const std::vector<uint16_t>& selectedCounterIds)
+{
+    uint32_t uint16_t_size = sizeof(uint16_t);
+    uint32_t uint32_t_size = sizeof(uint32_t);
+
+    uint32_t packetFamily = 0;
+    uint32_t packetId = 4;
+    uint32_t headerSize = 2 * uint32_t_size;
+    uint32_t bodySize = uint32_t_size + arm::pipe::numeric_cast<uint32_t>(selectedCounterIds.size()) * uint16_t_size;
+    uint32_t totalSize = headerSize + bodySize;
+    uint32_t offset = 0;
+    uint32_t reserved = 0;
+
+    IPacketBufferPtr writeBuffer = m_BufferManager.Reserve(totalSize, reserved);
+
+    if (writeBuffer == nullptr || reserved < totalSize)
+    {
+        CancelOperationAndThrow<arm::pipe::BufferExhaustion>(
+            writeBuffer,
+            fmt::format("No space left in buffer. Unable to reserve ({}) bytes.", totalSize));
+    }
+
+    // Create header.
+    WriteUint32(writeBuffer, offset, ((packetFamily & 0x3F) << 26) | ((packetId & 0x3FF) << 16));
+    offset += uint32_t_size;
+    WriteUint32(writeBuffer, offset, bodySize);
+
+    // Copy capturePeriod.
+    offset += uint32_t_size;
+    WriteUint32(writeBuffer, offset, capturePeriod);
+
+    // Copy selectedCounterIds.
+    offset += uint32_t_size;
+    for(const uint16_t& id: selectedCounterIds)
+    {
+        WriteUint16(writeBuffer, offset, id);
+        offset += uint16_t_size;
+    }
+
+    m_BufferManager.Commit(writeBuffer, totalSize);
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/SendCounterPacket.hpp b/profiling/client/src/SendCounterPacket.hpp
new file mode 100644
index 0000000..ace7ed1
--- /dev/null
+++ b/profiling/client/src/SendCounterPacket.hpp
@@ -0,0 +1,100 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "IBufferManager.hpp"
+#include "ProfilingUtils.hpp"
+
+#include <client/include/ISendCounterPacket.hpp>
+
+#include <type_traits>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class SendCounterPacket : public ISendCounterPacket
+{
+public:
+    using CategoryRecord        = std::vector<uint32_t>;
+    using DeviceRecord          = std::vector<uint32_t>;
+    using CounterSetRecord      = std::vector<uint32_t>;
+    using EventRecord           = std::vector<uint32_t>;
+    using IndexValuePairsVector = std::vector<CounterValue>;
+
+    SendCounterPacket(IBufferManager& buffer,
+                      const std::string& softwareInfo,
+                      const std::string& softwareVersion,
+                      const std::string& hardwareVersion)
+        : m_BufferManager(buffer),
+          m_SoftwareInfo(softwareInfo),
+          m_SoftwareVersion(softwareVersion),
+          m_HardwareVersion(hardwareVersion)
+    {}
+
+    void SendStreamMetaDataPacket() override;
+
+    void SendCounterDirectoryPacket(const ICounterDirectory& counterDirectory) override;
+
+    void SendPeriodicCounterCapturePacket(uint64_t timestamp, const IndexValuePairsVector& values) override;
+
+    void SendPeriodicCounterSelectionPacket(uint32_t capturePeriod,
+                                            const std::vector<uint16_t>& selectedCounterIds) override;
+
+private:
+    template <typename ExceptionType>
+    void CancelOperationAndThrow(const std::string& errorMessage)
+    {
+        // Throw a runtime exception with the given error message
+        throw ExceptionType(errorMessage);
+    }
+
+    template <typename ExceptionType>
+    void CancelOperationAndThrow(IPacketBufferPtr& writerBuffer, const std::string& errorMessage)
+    {
+        if (std::is_same<ExceptionType, arm::pipe::BufferExhaustion>::value)
+        {
+            m_BufferManager.FlushReadList();
+        }
+
+        if (writerBuffer != nullptr)
+        {
+            // Cancel the operation
+            m_BufferManager.Release(writerBuffer);
+        }
+
+        // Throw a runtime exception with the given error message
+        throw ExceptionType(errorMessage);
+    }
+
+    IBufferManager& m_BufferManager;
+
+protected:
+    // Helper methods, protected for testing
+    bool CreateCategoryRecord(const CategoryPtr& category,
+                              const Counters& counters,
+                              CategoryRecord& categoryRecord,
+                              std::string& errorMessage);
+    bool CreateDeviceRecord(const DevicePtr& device,
+                            DeviceRecord& deviceRecord,
+                            std::string& errorMessage);
+    bool CreateCounterSetRecord(const CounterSetPtr& counterSet,
+                                CounterSetRecord& counterSetRecord,
+                                std::string& errorMessage);
+    bool CreateEventRecord(const CounterPtr& counter,
+                           EventRecord& eventRecord,
+                           std::string& errorMessage);
+private:
+    std::string m_SoftwareInfo;
+    std::string m_SoftwareVersion;
+    std::string m_HardwareVersion;
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/SendThread.cpp b/profiling/client/src/SendThread.cpp
new file mode 100644
index 0000000..7fb8e65
--- /dev/null
+++ b/profiling/client/src/SendThread.cpp
@@ -0,0 +1,272 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "SendThread.hpp"
+#include "ProfilingUtils.hpp"
+
+#include <common/include/NumericCast.hpp>
+#include <common/include/ProfilingException.hpp>
+
+#include <cstring>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+SendThread::SendThread(ProfilingStateMachine& profilingStateMachine,
+                       IBufferManager& buffer,
+                       ISendCounterPacket& sendCounterPacket,
+                       int timeout)
+    : m_StateMachine(profilingStateMachine)
+    , m_BufferManager(buffer)
+    , m_SendCounterPacket(sendCounterPacket)
+    , m_Timeout(timeout)
+    , m_IsRunning(false)
+    , m_KeepRunning(false)
+    , m_SendThreadException(nullptr)
+{
+    m_BufferManager.SetConsumer(this);
+}
+
+void SendThread::SetReadyToRead()
+{
+    // We need to wait for the send thread to release its mutex
+    {
+        std::lock_guard<std::mutex> lck(m_WaitMutex);
+        m_ReadyToRead = true;
+    }
+    // Signal the send thread that there's something to read in the buffer
+    m_WaitCondition.notify_one();
+}
+
+void SendThread::Start(IProfilingConnection& profilingConnection)
+{
+    // Check if the send thread is already running
+    if (m_IsRunning.load())
+    {
+        // The send thread is already running
+        return;
+    }
+
+    if (m_SendThread.joinable())
+    {
+        m_SendThread.join();
+    }
+
+    // Mark the send thread as running
+    m_IsRunning.store(true);
+
+    // Keep the send procedure going until the send thread is signalled to stop
+    m_KeepRunning.store(true);
+
+    // Make sure the send thread will not flush the buffer until signaled to do so
+    // no need for a mutex as the send thread can not be running at this point
+    m_ReadyToRead = false;
+
+    m_PacketSent = false;
+
+    // Start the send thread
+    m_SendThread = std::thread(&SendThread::Send, this, std::ref(profilingConnection));
+}
+
+void SendThread::Stop(bool rethrowSendThreadExceptions)
+{
+    // Signal the send thread to stop
+    m_KeepRunning.store(false);
+
+    // Check that the send thread is running
+    if (m_SendThread.joinable())
+    {
+        // Kick the send thread out of the wait condition
+        SetReadyToRead();
+        // Wait for the send thread to complete operations
+        m_SendThread.join();
+    }
+
+    // Check if the send thread exception has to be rethrown
+    if (!rethrowSendThreadExceptions)
+    {
+        // No need to rethrow the send thread exception, return immediately
+        return;
+    }
+
+    // Check if there's an exception to rethrow
+    if (m_SendThreadException)
+    {
+        // Rethrow the send thread exception
+        std::rethrow_exception(m_SendThreadException);
+
+        // Nullify the exception as it has been rethrown
+        m_SendThreadException = nullptr;
+    }
+}
+
+void SendThread::Send(IProfilingConnection& profilingConnection)
+{
+    // Run once and keep the sending procedure looping until the thread is signalled to stop
+    do
+    {
+        // Check the current state of the profiling service
+        ProfilingState currentState = m_StateMachine.GetCurrentState();
+        switch (currentState)
+        {
+        case ProfilingState::Uninitialised:
+        case ProfilingState::NotConnected:
+
+            // The send thread cannot be running when the profiling service is uninitialized or not connected,
+            // stop the thread immediately
+            m_KeepRunning.store(false);
+            m_IsRunning.store(false);
+
+            // An exception should be thrown here, save it to be rethrown later from the main thread so that
+            // it can be caught by the consumer
+            m_SendThreadException =
+                std::make_exception_ptr(arm::pipe::ProfilingException(
+                "The send thread should not be running with the profiling service not yet initialized or connected"));
+
+            return;
+        case ProfilingState::WaitingForAck:
+
+            // Send out a StreamMetadata packet and wait for the profiling connection to be acknowledged.
+            // When a ConnectionAcknowledged packet is received, the profiling service state will be automatically
+            // updated by the command handler
+
+            // Prepare a StreamMetadata packet and write it to the Counter Stream buffer
+            m_SendCounterPacket.SendStreamMetaDataPacket();
+
+             // Flush the buffer manually to send the packet
+            FlushBuffer(profilingConnection);
+
+            // Wait for a connection ack from the remote server. We should expect a response within timeout value.
+            // If not, drop back to the start of the loop and detect somebody closing the thread. Then send the
+            // StreamMetadata again.
+
+            // Wait condition lock scope - Begin
+            {
+                std::unique_lock<std::mutex> lock(m_WaitMutex);
+
+                bool timeout = m_WaitCondition.wait_for(lock,
+                                                        std::chrono::milliseconds(std::max(m_Timeout, 1000)),
+                                                        [&]{ return m_ReadyToRead; });
+                // If we get notified we need to flush the buffer again
+                if(timeout)
+                {
+                    // Otherwise if we just timed out don't flush the buffer
+                    continue;
+                }
+                //reset condition variable predicate for next use
+                m_ReadyToRead = false;
+            }
+            // Wait condition lock scope - End
+            break;
+        case ProfilingState::Active:
+        default:
+            // Wait condition lock scope - Begin
+            {
+                std::unique_lock<std::mutex> lock(m_WaitMutex);
+
+                // Normal working state for the send thread
+                // Check if the send thread is required to enforce a timeout wait policy
+                if (m_Timeout < 0)
+                {
+                    // Wait indefinitely until notified that something to read has become available in the buffer
+                    m_WaitCondition.wait(lock, [&] { return m_ReadyToRead; });
+                }
+                else
+                {
+                    // Wait until the thread is notified of something to read from the buffer,
+                    // or check anyway after the specified number of milliseconds
+                    m_WaitCondition.wait_for(lock, std::chrono::milliseconds(m_Timeout), [&] { return m_ReadyToRead; });
+                }
+
+                //reset condition variable predicate for next use
+                m_ReadyToRead = false;
+            }
+            // Wait condition lock scope - End
+            break;
+        }
+
+        // Send all the available packets in the buffer
+        FlushBuffer(profilingConnection);
+    } while (m_KeepRunning.load());
+
+    // Ensure that all readable data got written to the profiling connection before the thread is stopped
+    // (do not notify any watcher in this case, as this is just to wrap up things before shutting down the send thread)
+    FlushBuffer(profilingConnection, false);
+
+    // Mark the send thread as not running
+    m_IsRunning.store(false);
+}
+
+void SendThread::FlushBuffer(IProfilingConnection& profilingConnection, bool notifyWatchers)
+{
+    // Get the first available readable buffer
+    IPacketBufferPtr packetBuffer = m_BufferManager.GetReadableBuffer();
+
+    // Initialize the flag that indicates whether at least a packet has been sent
+    bool packetsSent = false;
+
+    while (packetBuffer != nullptr)
+    {
+        // Get the data to send from the buffer
+        const unsigned char* readBuffer = packetBuffer->GetReadableData();
+        unsigned int readBufferSize = packetBuffer->GetSize();
+
+        if (readBuffer == nullptr || readBufferSize == 0)
+        {
+            // Nothing to send, get the next available readable buffer and continue
+            m_BufferManager.MarkRead(packetBuffer);
+            packetBuffer = m_BufferManager.GetReadableBuffer();
+
+            continue;
+        }
+
+        // Check that the profiling connection is open, silently drop the data and continue if it's closed
+        if (profilingConnection.IsOpen())
+        {
+            // Write a packet to the profiling connection. Silently ignore any write error and continue
+            profilingConnection.WritePacket(readBuffer, arm::pipe::numeric_cast<uint32_t>(readBufferSize));
+
+            // Set the flag that indicates whether at least a packet has been sent
+            packetsSent = true;
+        }
+
+        // Mark the packet buffer as read
+        m_BufferManager.MarkRead(packetBuffer);
+
+        // Get the next available readable buffer
+        packetBuffer = m_BufferManager.GetReadableBuffer();
+    }
+    // Check whether at least a packet has been sent
+    if (packetsSent && notifyWatchers)
+    {
+        // Wait for the parent thread to release its mutex if necessary
+        {
+            std::lock_guard<std::mutex> lck(m_PacketSentWaitMutex);
+            m_PacketSent = true;
+        }
+        // Notify to any watcher that something has been sent
+        m_PacketSentWaitCondition.notify_one();
+    }
+}
+
+bool SendThread::WaitForPacketSent(uint32_t timeout = 1000)
+{
+    std::unique_lock<std::mutex> lock(m_PacketSentWaitMutex);
+    // Blocks until notified that at least a packet has been sent or until timeout expires.
+    bool timedOut = m_PacketSentWaitCondition.wait_for(lock,
+                                                       std::chrono::milliseconds(timeout),
+                                                       [&] { return m_PacketSent; });
+
+    m_PacketSent = false;
+
+    return timedOut;
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/SendThread.hpp b/profiling/client/src/SendThread.hpp
new file mode 100644
index 0000000..b96a6d5
--- /dev/null
+++ b/profiling/client/src/SendThread.hpp
@@ -0,0 +1,77 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "IBufferManager.hpp"
+#include "IConsumer.hpp"
+#include "ISendThread.hpp"
+#include "IProfilingConnection.hpp"
+#include "ProfilingStateMachine.hpp"
+#include "ProfilingUtils.hpp"
+
+#include <client/include/ISendCounterPacket.hpp>
+
+#include <common/include/ICounterDirectory.hpp>
+
+#include <atomic>
+#include <condition_variable>
+#include <mutex>
+#include <thread>
+#include <type_traits>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class SendThread : public ISendThread, public IConsumer
+{
+public:
+    SendThread(ProfilingStateMachine& profilingStateMachine,
+        IBufferManager& buffer, ISendCounterPacket& sendCounterPacket, int timeout= 1000);
+    ~SendThread()
+    {
+        // Don't rethrow when destructing the object
+        Stop(false);
+    }
+    void Start(IProfilingConnection& profilingConnection) override;
+
+    void Stop(bool rethrowSendThreadExceptions = true) override;
+
+    void SetReadyToRead() override;
+
+    bool IsRunning() { return m_IsRunning.load(); }
+
+    bool WaitForPacketSent(uint32_t timeout);
+
+private:
+    void Send(IProfilingConnection& profilingConnection);
+
+    void FlushBuffer(IProfilingConnection& profilingConnection, bool notifyWatchers = true);
+
+    ProfilingStateMachine& m_StateMachine;
+    IBufferManager& m_BufferManager;
+    ISendCounterPacket& m_SendCounterPacket;
+    int m_Timeout;
+    std::mutex m_WaitMutex;
+    std::condition_variable m_WaitCondition;
+    std::thread m_SendThread;
+    std::atomic<bool> m_IsRunning;
+    std::atomic<bool> m_KeepRunning;
+    // m_ReadyToRead will be protected by m_WaitMutex
+    bool m_ReadyToRead;
+    // m_PacketSent will be protected by m_PacketSentWaitMutex
+    bool m_PacketSent;
+    std::exception_ptr m_SendThreadException;
+    std::mutex m_PacketSentWaitMutex;
+    std::condition_variable m_PacketSentWaitCondition;
+
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/SendTimelinePacket.cpp b/profiling/client/src/SendTimelinePacket.cpp
new file mode 100644
index 0000000..62d1530
--- /dev/null
+++ b/profiling/client/src/SendTimelinePacket.cpp
@@ -0,0 +1,163 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "SendTimelinePacket.hpp"
+
+namespace arm
+{
+
+namespace pipe
+{
+
+void SendTimelinePacket::Commit()
+{
+    if (m_WriteBuffer == nullptr)
+    {
+        // Can't commit from a null buffer
+        return;
+    }
+
+    if (!m_DirectoryPackage)
+    {
+        // Datalength should be Offset minus the two header words
+        m_PacketDataLength = m_Offset - m_uint32_t_size * 2;
+        // Reset offset to prepend header with full packet datalength
+        m_Offset = 0;
+
+        // Add header before commit
+        m_PacketHeader = CreateTimelinePacketHeader(1,0,1,0,0,m_PacketDataLength);
+
+        // Write the timeline binary packet header to the buffer
+        WriteUint32(m_WriteBuffer->GetWritableData(), m_Offset, m_PacketHeader.first);
+        m_Offset += m_uint32_t_size;
+        WriteUint32(m_WriteBuffer->GetWritableData(), m_Offset, m_PacketHeader.second);
+
+        m_BufferManager.Commit(m_WriteBuffer, m_PacketDataLength + m_uint32_t_size * 2);
+
+    }
+    else
+    {
+        m_DirectoryPackage = false;
+        m_BufferManager.Commit(m_WriteBuffer, m_Offset);
+    }
+
+    // Commit the message
+    m_WriteBuffer.reset(nullptr);
+    // Reset offset to start after prepended header
+    m_Offset = 8;
+    m_RemainingBufferSize = 0;
+}
+
+void SendTimelinePacket::ReserveBuffer()
+{
+    if (m_WriteBuffer != nullptr)
+    {
+        // Buffer already reserved
+        return;
+    }
+
+    uint32_t reserved = 0;
+
+    // Reserve the buffer
+    m_WriteBuffer = m_BufferManager.Reserve(MAX_METADATA_PACKET_LENGTH, reserved);
+
+    // Check if there is enough space in the buffer
+    if (m_WriteBuffer == nullptr)
+    {
+        throw arm::pipe::BufferExhaustion("No free buffers left", LOCATION());
+    }
+    if (reserved < m_Offset)
+    {
+        throw arm::pipe::BufferExhaustion("Reserved space too small for use", LOCATION());
+    }
+
+    if (m_DirectoryPackage)
+    {
+        m_RemainingBufferSize = reserved;
+        return;
+    }
+    // Account for the header size which is added at Commit()
+    m_RemainingBufferSize = reserved - 8;
+}
+
+void SendTimelinePacket::SendTimelineEntityBinaryPacket(uint64_t profilingGuid)
+{
+    ForwardWriteBinaryFunction(WriteTimelineEntityBinary,
+                               profilingGuid);
+}
+
+void SendTimelinePacket::SendTimelineEventBinaryPacket(uint64_t timestamp,
+                                                       int threadId,
+                                                       uint64_t profilingGuid)
+{
+    ForwardWriteBinaryFunction(WriteTimelineEventBinary,
+                               timestamp,
+                               threadId,
+                               profilingGuid);
+}
+
+void SendTimelinePacket::SendTimelineEventClassBinaryPacket(uint64_t profilingGuid, uint64_t nameGuid)
+{
+    ForwardWriteBinaryFunction(WriteTimelineEventClassBinary,
+                               profilingGuid,
+                               nameGuid);
+}
+
+void SendTimelinePacket::SendTimelineLabelBinaryPacket(uint64_t profilingGuid, const std::string& label)
+{
+    ForwardWriteBinaryFunction(WriteTimelineLabelBinaryPacket,
+                               profilingGuid,
+                               label);
+}
+
+void SendTimelinePacket::SendTimelineRelationshipBinaryPacket(ProfilingRelationshipType relationshipType,
+                                                              uint64_t relationshipGuid,
+                                                              uint64_t headGuid,
+                                                              uint64_t tailGuid,
+                                                              uint64_t attributeGuid)
+{
+    ForwardWriteBinaryFunction(WriteTimelineRelationshipBinary,
+                               relationshipType,
+                               relationshipGuid,
+                               headGuid,
+                               tailGuid,
+                               attributeGuid);
+}
+
+void SendTimelinePacket::SendTimelineMessageDirectoryPackage()
+{
+    try
+    {
+        // Flag to Reserve & Commit() that a DirectoryPackage is being sent
+        m_DirectoryPackage = true;
+        // Reserve buffer if it hasn't already been reserved
+        ReserveBuffer();
+        // Write to buffer
+        unsigned int numberOfBytesWritten = 0;
+        // Offset is initialised to 8
+        m_Offset = 0;
+
+        TimelinePacketStatus result = WriteTimelineMessageDirectoryPackage(&m_WriteBuffer->GetWritableData()[m_Offset],
+                                                                           m_RemainingBufferSize,
+                                                                           numberOfBytesWritten);
+        if (result != TimelinePacketStatus::Ok)
+        {
+            throw arm::pipe::ProfilingException("Error processing TimelineMessageDirectoryPackage", LOCATION());
+        }
+
+        // Commit the message
+        m_Offset     += numberOfBytesWritten;
+        m_RemainingBufferSize -= numberOfBytesWritten;
+        Commit();
+    }
+    catch (...)
+    {
+        throw arm::pipe::ProfilingException("Error processing TimelineMessageDirectoryPackage", LOCATION());
+    }
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/SendTimelinePacket.hpp b/profiling/client/src/SendTimelinePacket.hpp
new file mode 100644
index 0000000..f20671f
--- /dev/null
+++ b/profiling/client/src/SendTimelinePacket.hpp
@@ -0,0 +1,129 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "IBufferManager.hpp"
+#include "ProfilingUtils.hpp"
+
+#include <client/include/ISendTimelinePacket.hpp>
+
+#include <common/include/Assert.hpp>
+
+#include <memory>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class SendTimelinePacket : public ISendTimelinePacket
+{
+public:
+    SendTimelinePacket(IBufferManager& bufferManager)
+      : m_BufferManager(bufferManager)
+      , m_WriteBuffer(nullptr)
+      , m_Offset(8u)
+      , m_RemainingBufferSize(0u)
+      , m_PacketDataLength(0u)
+    {}
+
+    /// Commits the current buffer and reset the member variables
+    void Commit() override;
+
+    /// Create and write a TimelineEntityBinaryPacket from the parameters to the buffer.
+    void SendTimelineEntityBinaryPacket(uint64_t profilingGuid) override;
+
+    /// Create and write a TimelineEventBinaryPacket from the parameters to the buffer.
+    void SendTimelineEventBinaryPacket(uint64_t timestamp, int threadId, uint64_t profilingGuid) override;
+
+    /// Create and write a TimelineEventClassBinaryPacket from the parameters to the buffer.
+    void SendTimelineEventClassBinaryPacket(uint64_t profilingGuid, uint64_t nameGuid) override;
+
+    /// Create and write a TimelineLabelBinaryPacket from the parameters to the buffer.
+    void SendTimelineLabelBinaryPacket(uint64_t profilingGuid, const std::string& label) override;
+
+    /// Create and write a TimelineMessageDirectoryPackage in the buffer
+    void SendTimelineMessageDirectoryPackage() override;
+
+    /// Create and write a TimelineRelationshipBinaryPacket from the parameters to the buffer.
+    virtual void SendTimelineRelationshipBinaryPacket(ProfilingRelationshipType relationshipType,
+                                                      uint64_t relationshipGuid,
+                                                      uint64_t headGuid,
+                                                      uint64_t tailGuid,
+                                                      uint64_t attributeGuid) override;
+private:
+    /// Reserves maximum packet size from buffer
+    void ReserveBuffer();
+
+    template <typename Func, typename ... Params>
+    void ForwardWriteBinaryFunction(Func& func, Params&& ... params);
+
+    IBufferManager&  m_BufferManager;
+    IPacketBufferPtr m_WriteBuffer;
+    unsigned int     m_Offset;
+    unsigned int     m_RemainingBufferSize;
+
+    const unsigned int m_uint32_t_size = sizeof(uint32_t);
+
+    std::pair<uint32_t, uint32_t> m_PacketHeader;
+    uint32_t                      m_PacketDataLength;
+
+    bool m_DirectoryPackage = false;
+};
+
+template<typename Func, typename ... Params>
+void SendTimelinePacket::ForwardWriteBinaryFunction(Func& func, Params&& ... params)
+{
+    try
+    {
+        ReserveBuffer();
+        ARM_PIPE_ASSERT(m_WriteBuffer);
+        unsigned int numberOfBytesWritten = 0;
+        // Header will be prepended to the buffer on Commit()
+        while ( true )
+        {
+            TimelinePacketStatus result = func(std::forward<Params>(params)...,
+                                               &m_WriteBuffer->GetWritableData()[m_Offset],
+                                               m_RemainingBufferSize,
+                                               numberOfBytesWritten);
+            switch ( result )
+            {
+                case TimelinePacketStatus::BufferExhaustion:
+                    Commit();
+                    ReserveBuffer();
+                    continue;
+
+                case TimelinePacketStatus::Error:
+                    throw arm::pipe::ProfilingException("Error processing while sending TimelineBinaryPacket",
+                                                        LOCATION());
+
+                default:
+                    m_Offset += numberOfBytesWritten;
+                    m_RemainingBufferSize -= numberOfBytesWritten;
+                    return;
+            }
+        }
+    }
+    catch (const arm::pipe::BufferExhaustion& ex)
+    {
+        // ditto
+        throw ex;
+    }
+    catch (const arm::pipe::ProfilingException& ex)
+    {
+        // don't swallow in the catch all block
+        throw ex;
+    }
+    catch ( ... )
+    {
+        throw arm::pipe::ProfilingException("Unknown Exception thrown while sending TimelineBinaryPacket", LOCATION());
+    }
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/SocketProfilingConnection.cpp b/profiling/client/src/SocketProfilingConnection.cpp
new file mode 100644
index 0000000..a211567
--- /dev/null
+++ b/profiling/client/src/SocketProfilingConnection.cpp
@@ -0,0 +1,225 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "SocketProfilingConnection.hpp"
+
+#include <common/include/SocketConnectionException.hpp>
+
+#include <cerrno>
+#include <cstring>
+#include <fcntl.h>
+#include <string>
+
+
+namespace arm
+{
+namespace pipe
+{
+
+SocketProfilingConnection::SocketProfilingConnection()
+{
+    arm::pipe::Initialize();
+    memset(m_Socket, 0, sizeof(m_Socket));
+    // Note: we're using Linux specific SOCK_CLOEXEC flag.
+    m_Socket[0].fd = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+    if (m_Socket[0].fd == -1)
+    {
+        throw arm::pipe::SocketConnectionException(
+            std::string("SocketProfilingConnection: Socket construction failed: ")  + strerror(errno),
+            m_Socket[0].fd,
+            errno);
+    }
+
+    // Connect to the named unix domain socket.
+    sockaddr_un server{};
+    memset(&server, 0, sizeof(sockaddr_un));
+    // As m_GatorNamespace begins with a null character we need to ignore that when getting its length.
+    memcpy(server.sun_path, m_GatorNamespace, strlen(m_GatorNamespace + 1) + 1);
+    server.sun_family = AF_UNIX;
+    if (0 != connect(m_Socket[0].fd, reinterpret_cast<const sockaddr*>(&server), sizeof(sockaddr_un)))
+    {
+        Close();
+        throw arm::pipe::SocketConnectionException(
+            std::string("SocketProfilingConnection: Cannot connect to stream socket: ")  + strerror(errno),
+            m_Socket[0].fd,
+            errno);
+    }
+
+    // Our socket will only be interested in polling reads.
+    m_Socket[0].events = POLLIN;
+
+    // Make the socket non blocking.
+    if (!arm::pipe::SetNonBlocking(m_Socket[0].fd))
+    {
+        Close();
+        throw arm::pipe::SocketConnectionException(
+            std::string("SocketProfilingConnection: Failed to set socket as non blocking: ")  + strerror(errno),
+            m_Socket[0].fd,
+            errno);
+    }
+}
+
+bool SocketProfilingConnection::IsOpen() const
+{
+    return m_Socket[0].fd > 0;
+}
+
+void SocketProfilingConnection::Close()
+{
+    if (arm::pipe::Close(m_Socket[0].fd) != 0)
+    {
+        throw arm::pipe::SocketConnectionException(
+            std::string("SocketProfilingConnection: Cannot close stream socket: ")  + strerror(errno),
+            m_Socket[0].fd,
+            errno);
+    }
+
+    memset(m_Socket, 0, sizeof(m_Socket));
+}
+
+bool SocketProfilingConnection::WritePacket(const unsigned char* buffer, uint32_t length)
+{
+    if (buffer == nullptr || length == 0)
+    {
+        return false;
+    }
+
+    return arm::pipe::Write(m_Socket[0].fd, buffer, length) != -1;
+}
+
+arm::pipe::Packet SocketProfilingConnection::ReadPacket(uint32_t timeout)
+{
+    // Is there currently at least a header worth of data waiting to be read?
+    int bytes_available = 0;
+    arm::pipe::Ioctl(m_Socket[0].fd, FIONREAD, &bytes_available);
+    if (bytes_available >= 8)
+    {
+        // Yes there is. Read it:
+        return ReceivePacket();
+    }
+
+    // Poll for data on the socket or until timeout occurs
+    int pollResult = arm::pipe::Poll(&m_Socket[0], 1, static_cast<int>(timeout));
+
+    switch (pollResult)
+    {
+    case -1: // Error
+        throw arm::pipe::SocketConnectionException(
+            std::string("SocketProfilingConnection: Error occured while reading from socket: ") + strerror(errno),
+            m_Socket[0].fd,
+            errno);
+
+    case 0: // Timeout
+        throw arm::pipe::TimeoutException("SocketProfilingConnection: Timeout while reading from socket");
+
+    default: // Normal poll return but it could still contain an error signal
+        // Check if the socket reported an error
+        if (m_Socket[0].revents & (POLLNVAL | POLLERR | POLLHUP))
+        {
+            if (m_Socket[0].revents == POLLNVAL)
+            {
+                // This is an unrecoverable error.
+                Close();
+                throw arm::pipe::SocketConnectionException(
+                    std::string("SocketProfilingConnection: Error occured while polling receiving socket: POLLNVAL."),
+                    m_Socket[0].fd);
+            }
+            if (m_Socket[0].revents == POLLERR)
+            {
+                throw arm::pipe::SocketConnectionException(
+                    std::string(
+                        "SocketProfilingConnection: Error occured while polling receiving socket: POLLERR: ")
+                        + strerror(errno),
+                    m_Socket[0].fd,
+                    errno);
+            }
+            if (m_Socket[0].revents == POLLHUP)
+            {
+                // This is an unrecoverable error.
+                Close();
+                throw arm::pipe::SocketConnectionException(
+                    std::string("SocketProfilingConnection: Connection closed by remote client: POLLHUP."),
+                    m_Socket[0].fd);
+            }
+        }
+
+        // Check if there is data to read
+        if (!(m_Socket[0].revents & (POLLIN)))
+        {
+            // This is a corner case. The socket as been woken up but not with any data.
+            // We'll throw a timeout exception to loop around again.
+            throw arm::pipe::TimeoutException(
+                "SocketProfilingConnection: File descriptor was polled but no data was available to receive.");
+        }
+
+        return ReceivePacket();
+    }
+}
+
+arm::pipe::Packet SocketProfilingConnection::ReceivePacket()
+{
+    char header[8] = {};
+    long receiveResult = arm::pipe::Read(m_Socket[0].fd, &header, sizeof(header));
+    // We expect 8 as the result here. 0 means EOF, socket is closed. -1 means there been some other kind of error.
+    switch( receiveResult )
+    {
+        case 0:
+            // Socket has closed.
+            Close();
+            throw arm::pipe::SocketConnectionException(
+                std::string("SocketProfilingConnection: Remote socket has closed the connection."),
+                m_Socket[0].fd);
+        case -1:
+            // There's been a socket error. We will presume it's unrecoverable.
+            Close();
+            throw arm::pipe::SocketConnectionException(
+                std::string("SocketProfilingConnection: Error occured while reading the packet: ") + strerror(errno),
+                m_Socket[0].fd,
+                errno);
+        default:
+            if (receiveResult < 8)
+            {
+                 throw arm::pipe::SocketConnectionException(
+                     std::string(
+                         "SocketProfilingConnection: The received packet did not contains a valid PIPE header."),
+                     m_Socket[0].fd);
+            }
+            break;
+    }
+
+    // stream_metadata_identifier is the first 4 bytes
+    uint32_t metadataIdentifier = 0;
+    std::memcpy(&metadataIdentifier, header, sizeof(metadataIdentifier));
+
+    // data_length is the next 4 bytes
+    uint32_t dataLength = 0;
+    std::memcpy(&dataLength, header + 4u, sizeof(dataLength));
+
+    std::unique_ptr<unsigned char[]> packetData;
+    if (dataLength > 0)
+    {
+        packetData = std::make_unique<unsigned char[]>(dataLength);
+        long receivedLength = arm::pipe::Read(m_Socket[0].fd, packetData.get(), dataLength);
+        if (receivedLength < 0)
+        {
+            throw arm::pipe::SocketConnectionException(
+                std::string("SocketProfilingConnection: Error occured while reading the packet: ")  + strerror(errno),
+                m_Socket[0].fd,
+                errno);
+        }
+        if (dataLength != static_cast<uint32_t>(receivedLength))
+        {
+            // What do we do here if we can't read in a full packet?
+            throw arm::pipe::SocketConnectionException(
+                std::string("SocketProfilingConnection: Invalid PIPE packet."),
+                m_Socket[0].fd);
+        }
+    }
+
+    return arm::pipe::Packet(metadataIdentifier, dataLength, packetData);
+}
+
+} // namespace pipe
+} // namespace arm
diff --git a/profiling/client/src/SocketProfilingConnection.hpp b/profiling/client/src/SocketProfilingConnection.hpp
new file mode 100644
index 0000000..52616c9
--- /dev/null
+++ b/profiling/client/src/SocketProfilingConnection.hpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "IProfilingConnection.hpp"
+
+#include <common/include/NetworkSockets.hpp>
+
+#pragma once
+
+namespace arm
+{
+namespace pipe
+{
+
+class SocketProfilingConnection : public IProfilingConnection
+{
+public:
+    SocketProfilingConnection();
+    bool IsOpen() const final;
+    void Close() final;
+    bool WritePacket(const unsigned char* buffer, uint32_t length) final;
+    arm::pipe::Packet ReadPacket(uint32_t timeout) final;
+
+private:
+
+    // Read a full packet from the socket.
+    arm::pipe::Packet ReceivePacket();
+
+#ifndef __APPLE__
+    // To indicate we want to use an abstract UDS ensure the first character of the address is 0.
+    const char* m_GatorNamespace = "\0gatord_namespace";
+#else
+    // MACOSX does not support abstract UDS
+    const char* m_GatorNamespace = "/tmp/gatord_namespace";
+#endif
+    arm::pipe::PollFd m_Socket[1]{};
+};
+
+} // namespace pipe
+} // namespace arm
diff --git a/profiling/client/src/TimelinePacketWriterFactory.cpp b/profiling/client/src/TimelinePacketWriterFactory.cpp
new file mode 100644
index 0000000..78eda79
--- /dev/null
+++ b/profiling/client/src/TimelinePacketWriterFactory.cpp
@@ -0,0 +1,23 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "TimelinePacketWriterFactory.hpp"
+
+#include "SendTimelinePacket.hpp"
+
+namespace arm
+{
+
+namespace pipe
+{
+
+std::unique_ptr<ISendTimelinePacket> TimelinePacketWriterFactory::GetSendTimelinePacket() const
+{
+    return std::make_unique<SendTimelinePacket>(m_BufferManager);
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/TimelinePacketWriterFactory.hpp b/profiling/client/src/TimelinePacketWriterFactory.hpp
new file mode 100644
index 0000000..607770f
--- /dev/null
+++ b/profiling/client/src/TimelinePacketWriterFactory.hpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "IBufferManager.hpp"
+
+#include <client/include/ISendTimelinePacket.hpp>
+
+#include <memory>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class TimelinePacketWriterFactory
+{
+public:
+    TimelinePacketWriterFactory(IBufferManager& bufferManager) : m_BufferManager(bufferManager) {}
+
+    std::unique_ptr<ISendTimelinePacket> GetSendTimelinePacket() const;
+
+private:
+    IBufferManager& m_BufferManager;
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/TimelineUtilityMethods.cpp b/profiling/client/src/TimelineUtilityMethods.cpp
new file mode 100644
index 0000000..27c13ca
--- /dev/null
+++ b/profiling/client/src/TimelineUtilityMethods.cpp
@@ -0,0 +1,423 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#include "ProfilingUtils.hpp"
+
+#include <client/include/TimelineUtilityMethods.hpp>
+
+#include <common/include/LabelsAndEventClasses.hpp>
+#include <common/include/Threads.hpp>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+std::unique_ptr<TimelineUtilityMethods> TimelineUtilityMethods::GetTimelineUtils(IProfilingService& profilingService)
+{
+    if (profilingService.GetCurrentState() == ProfilingState::Active && profilingService.IsTimelineReportingEnabled())
+    {
+        std::unique_ptr<ISendTimelinePacket> sendTimelinepacket = profilingService.GetSendTimelinePacket();
+        return std::make_unique<TimelineUtilityMethods>(sendTimelinepacket);
+    }
+    else
+    {
+        std::unique_ptr<TimelineUtilityMethods> empty;
+        return empty;
+    }
+}
+
+
+void TimelineUtilityMethods::SendWellKnownLabelsAndEventClasses(ISendTimelinePacket& timelinePacket)
+{
+    // Send the "name" label, this call throws in case of error
+    timelinePacket.SendTimelineLabelBinaryPacket(LabelsAndEventClasses::NAME_GUID,
+                                                 LabelsAndEventClasses::NAME_LABEL);
+
+    // Send the "type" label, this call throws in case of error
+    timelinePacket.SendTimelineLabelBinaryPacket(LabelsAndEventClasses::TYPE_GUID,
+                                                 LabelsAndEventClasses::TYPE_LABEL);
+
+    // Send the "index" label, this call throws in case of error
+    timelinePacket.SendTimelineLabelBinaryPacket(LabelsAndEventClasses::INDEX_GUID,
+                                                 LabelsAndEventClasses::INDEX_LABEL);
+
+    // Send the "backendId" label, this call throws in case of error
+    timelinePacket.SendTimelineLabelBinaryPacket(LabelsAndEventClasses::BACKENDID_GUID,
+                                                 LabelsAndEventClasses::BACKENDID_LABEL);
+
+    // Send the "child" label, this call throws in case of error
+    timelinePacket.SendTimelineLabelBinaryPacket(LabelsAndEventClasses::CHILD_GUID,
+                                                 LabelsAndEventClasses::CHILD_LABEL);
+
+    // Send the "execution_of" label, this call throws in case of error
+    timelinePacket.SendTimelineLabelBinaryPacket(LabelsAndEventClasses::EXECUTION_OF_GUID,
+                                                 LabelsAndEventClasses::EXECUTION_OF_LABEL);
+
+    // Send the "process_id" label, this call throws in case of error
+    timelinePacket.SendTimelineLabelBinaryPacket(LabelsAndEventClasses::PROCESS_ID_GUID,
+                                                 LabelsAndEventClasses::PROCESS_ID_LABEL);
+
+    // Send the "layer" label, this call throws in case of error
+    timelinePacket.SendTimelineLabelBinaryPacket(LabelsAndEventClasses::LAYER_GUID,
+                                                 LabelsAndEventClasses::LAYER);
+
+    // Send the "workload" label, this call throws in case of error
+    timelinePacket.SendTimelineLabelBinaryPacket(LabelsAndEventClasses::WORKLOAD_GUID,
+                                                 LabelsAndEventClasses::WORKLOAD);
+
+    // Send the "network" label, this call throws in case of error
+    timelinePacket.SendTimelineLabelBinaryPacket(LabelsAndEventClasses::NETWORK_GUID,
+                                                 LabelsAndEventClasses::NETWORK);
+
+    // Send the "connection" label, this call throws in case of error
+    timelinePacket.SendTimelineLabelBinaryPacket(LabelsAndEventClasses::CONNECTION_GUID,
+                                                 LabelsAndEventClasses::CONNECTION);
+
+    // Send the "inference" label, this call throws in case of error
+    timelinePacket.SendTimelineLabelBinaryPacket(LabelsAndEventClasses::INFERENCE_GUID,
+                                                 LabelsAndEventClasses::INFERENCE);
+
+    // Send the "workload_execution" label, this call throws in case of error
+    timelinePacket.SendTimelineLabelBinaryPacket(LabelsAndEventClasses::WORKLOAD_EXECUTION_GUID,
+                                                 LabelsAndEventClasses::WORKLOAD_EXECUTION);
+
+    // Send the "start of life" event class, this call throws in case of error
+    timelinePacket.SendTimelineLabelBinaryPacket(LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS_NAME_GUID,
+                                                 LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS_NAME);
+    timelinePacket.SendTimelineEventClassBinaryPacket(LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS,
+                                                      LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS_NAME_GUID);
+
+    // Send the "end of life" event class, this call throws in case of error
+    timelinePacket.SendTimelineLabelBinaryPacket(LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS_NAME_GUID,
+                                                 LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS_NAME);
+    timelinePacket.SendTimelineEventClassBinaryPacket(LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS,
+                                                      LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS_NAME_GUID);
+
+    timelinePacket.Commit();
+}
+
+ProfilingDynamicGuid TimelineUtilityMethods::CreateNamedTypedEntity(const std::string& name, const std::string& type)
+{
+    // Check that the entity name is valid
+    if (name.empty())
+    {
+        throw arm::pipe::InvalidArgumentException("Invalid entity name, the entity name cannot be empty");
+    }
+
+    // Check that the entity type is valid
+    if (type.empty())
+    {
+        throw arm::pipe::InvalidArgumentException("Invalid entity type, the entity type cannot be empty");
+    }
+
+    // Generate dynamic GUID of the entity
+    ProfilingDynamicGuid entityGuid = IProfilingService::GetNextGuid();
+
+    CreateNamedTypedEntity(entityGuid, name, type);
+
+    return entityGuid;
+}
+
+void TimelineUtilityMethods::CreateNamedTypedEntity(ProfilingGuid entityGuid,
+                                                    const std::string& name,
+                                                    const std::string& type)
+{
+    // Check that the entity name is valid
+    if (name.empty())
+    {
+        throw arm::pipe::InvalidArgumentException("Invalid entity name, the entity name cannot be empty");
+    }
+
+    // Check that the entity type is valid
+    if (type.empty())
+    {
+        throw arm::pipe::InvalidArgumentException("Invalid entity type, the entity type cannot be empty");
+    }
+
+    // Send Entity Binary Packet of the entity to the external profiling service
+    m_SendTimelinePacket->SendTimelineEntityBinaryPacket(entityGuid);
+
+    // Create name entity and send the relationship of the entity with the given name
+    NameEntity(entityGuid, name);
+
+    // Create type entity and send the relationship of the entity with the given type
+    TypeEntity(entityGuid, type);
+}
+
+void TimelineUtilityMethods::CreateNamedTypedEntity(ProfilingGuid entityGuid,
+                                                    const std::string& name,
+                                                    ProfilingStaticGuid typeGuid)
+{
+    // Check that the entity name is valid
+    if (name.empty())
+    {
+        throw arm::pipe::InvalidArgumentException("Invalid entity name, the entity name cannot be empty");
+    }
+
+    // Send Entity Binary Packet of the entity to the external profiling service
+    m_SendTimelinePacket->SendTimelineEntityBinaryPacket(entityGuid);
+
+    // Create name entity and send the relationship of the entity with the given name
+    NameEntity(entityGuid, name);
+
+    // Create type entity and send the relationship of the entity with the given type
+    MarkEntityWithType(entityGuid, typeGuid);
+}
+
+ProfilingStaticGuid TimelineUtilityMethods::DeclareLabel(const std::string& labelName)
+{
+    // Check that the label name is valid
+    if (labelName.empty())
+    {
+        // The label name is invalid
+        throw arm::pipe::InvalidArgumentException("Invalid label name, the label name cannot be empty");
+    }
+
+    // Generate a static GUID for the given label name
+    ProfilingStaticGuid labelGuid = IProfilingService::GetStaticId(labelName);
+
+    // Send the new label to the external profiling service, this call throws in case of error
+    m_SendTimelinePacket->SendTimelineLabelBinaryPacket(labelGuid, labelName);
+
+    return labelGuid;
+}
+
+void TimelineUtilityMethods::MarkEntityWithLabel(ProfilingGuid entityGuid,
+                                                 const std::string& labelName,
+                                                 ProfilingStaticGuid labelTypeGuid)
+{
+    // Check that the label name is valid
+    if (labelName.empty())
+    {
+        // The label name is invalid
+        throw arm::pipe::InvalidArgumentException("Invalid entity name, the entity name cannot be empty");
+    }
+
+    // Declare a label with the label's name, this call throws in case of error
+    ProfilingStaticGuid labelGuid = DeclareLabel(labelName);
+
+    // Generate a GUID for the label relationship
+    ProfilingDynamicGuid relationshipGuid = IProfilingService::GetNextGuid();
+
+    // Send the new label link to the external profiling service, this call throws in case of error
+    m_SendTimelinePacket->SendTimelineRelationshipBinaryPacket(ProfilingRelationshipType::LabelLink,
+                                                               relationshipGuid,
+                                                               entityGuid,
+                                                               labelGuid,
+                                                               labelTypeGuid);
+}
+
+void TimelineUtilityMethods::MarkEntityWithType(ProfilingGuid entityGuid,
+                                                ProfilingStaticGuid typeNameGuid)
+{
+    // Generate a GUID for the label relationship
+    ProfilingDynamicGuid relationshipGuid = IProfilingService::GetNextGuid();
+
+    // Send the new label link to the external profiling service, this call throws in case of error
+    m_SendTimelinePacket->SendTimelineRelationshipBinaryPacket(ProfilingRelationshipType::LabelLink,
+                                                               relationshipGuid,
+                                                               entityGuid,
+                                                               typeNameGuid,
+                                                               LabelsAndEventClasses::TYPE_GUID);
+}
+
+void TimelineUtilityMethods::NameEntity(ProfilingGuid entityGuid, const std::string& name)
+{
+    MarkEntityWithLabel(entityGuid, name, LabelsAndEventClasses::NAME_GUID);
+}
+
+void TimelineUtilityMethods::TypeEntity(ProfilingGuid entityGuid, const std::string& type)
+{
+    MarkEntityWithLabel(entityGuid, type, LabelsAndEventClasses::TYPE_GUID);
+}
+
+ProfilingDynamicGuid TimelineUtilityMethods::CreateNamedTypedChildEntity(ProfilingGuid parentEntityGuid,
+                                                                         const std::string& entityName,
+                                                                         const std::string& entityType)
+{
+    // Check that the entity name is valid
+    if (entityName.empty())
+    {
+        // The entity name is invalid
+        throw arm::pipe::InvalidArgumentException("Invalid entity name, the entity name cannot be empty");
+    }
+
+    // Check that the entity type is valid
+    if (entityType.empty())
+    {
+        // The entity type is invalid
+        throw arm::pipe::InvalidArgumentException("Invalid entity type, the entity type cannot be empty");
+    }
+
+    // Create a named type entity from the given name and type, this call throws in case of error
+    ProfilingDynamicGuid childEntityGuid = CreateNamedTypedEntity(entityName, entityType);
+
+    // Generate a GUID for the retention link relationship
+    ProfilingDynamicGuid retentionLinkGuid = IProfilingService::GetNextGuid();
+
+    // Send the new retention link to the external profiling service, this call throws in case of error
+    m_SendTimelinePacket->SendTimelineRelationshipBinaryPacket(ProfilingRelationshipType::RetentionLink,
+                                                               retentionLinkGuid,
+                                                               parentEntityGuid,
+                                                               childEntityGuid,
+                                                               LabelsAndEventClasses::EMPTY_GUID);
+
+    return childEntityGuid;
+}
+
+void TimelineUtilityMethods::CreateNamedTypedChildEntity(ProfilingGuid childEntityGuid,
+                                                         ProfilingGuid parentEntityGuid,
+                                                         const std::string& entityName,
+                                                         const std::string& entityType)
+{
+    // Check that the entity name is valid
+    if (entityName.empty())
+    {
+        // The entity name is invalid
+        throw arm::pipe::InvalidArgumentException("Invalid entity name, the entity name cannot be empty");
+    }
+
+    // Check that the entity type is valid
+    if (entityType.empty())
+    {
+        // The entity type is invalid
+        throw arm::pipe::InvalidArgumentException("Invalid entity type, the entity type cannot be empty");
+    }
+
+    // Create a named type entity from the given guid, name and type, this call throws in case of error
+    CreateNamedTypedEntity(childEntityGuid, entityName, entityType);
+
+    // Generate a GUID for the retention link relationship
+    ProfilingDynamicGuid retentionLinkGuid = IProfilingService::GetNextGuid();
+
+    // Send the new retention link to the external profiling service, this call throws in case of error
+    m_SendTimelinePacket->SendTimelineRelationshipBinaryPacket(ProfilingRelationshipType::RetentionLink,
+                                                               retentionLinkGuid,
+                                                               parentEntityGuid,
+                                                               childEntityGuid,
+                                                               LabelsAndEventClasses::CHILD_GUID);
+}
+
+void TimelineUtilityMethods::CreateNamedTypedChildEntity(ProfilingGuid childEntityGuid,
+                                                         ProfilingGuid parentEntityGuid,
+                                                         const std::string& entityName,
+                                                         ProfilingStaticGuid typeGuid)
+{
+    // Check that the entity name is valid
+    if (entityName.empty())
+    {
+        // The entity name is invalid
+        throw arm::pipe::InvalidArgumentException("Invalid entity name, the entity name cannot be empty");
+    }
+
+    // Create a named type entity from the given guid, name and type, this call throws in case of error
+    CreateNamedTypedEntity(childEntityGuid, entityName, typeGuid);
+
+    // Generate a GUID for the retention link relationship
+    ProfilingDynamicGuid retentionLinkGuid = IProfilingService::GetNextGuid();
+
+    // Send the new retention link to the external profiling service, this call throws in case of error
+    m_SendTimelinePacket->SendTimelineRelationshipBinaryPacket(ProfilingRelationshipType::RetentionLink,
+                                                               retentionLinkGuid,
+                                                               parentEntityGuid,
+                                                               childEntityGuid,
+                                                               LabelsAndEventClasses::CHILD_GUID);
+}
+
+ProfilingDynamicGuid TimelineUtilityMethods::CreateRelationship(ProfilingRelationshipType relationshipType,
+                                                                ProfilingGuid headGuid,
+                                                                ProfilingGuid tailGuid,
+                                                                ProfilingGuid relationshipCategory)
+{
+    // Generate a GUID for the relationship
+    ProfilingDynamicGuid relationshipGuid = IProfilingService::GetNextGuid();
+
+    // Send the new retention link to the external profiling service, this call throws in case of error
+    m_SendTimelinePacket->SendTimelineRelationshipBinaryPacket(relationshipType,
+                                                               relationshipGuid,
+                                                               headGuid,
+                                                               tailGuid,
+                                                               relationshipCategory);
+    return relationshipGuid;
+}
+
+ProfilingDynamicGuid TimelineUtilityMethods::CreateConnectionRelationship(ProfilingRelationshipType relationshipType,
+                                                                          ProfilingGuid headGuid,
+                                                                          ProfilingGuid tailGuid)
+{
+    // Generate a GUID for the relationship
+    ProfilingDynamicGuid relationshipGuid = IProfilingService::GetNextGuid();
+
+    // Send the new retention link to the external profiling service, this call throws in case of error
+    m_SendTimelinePacket->SendTimelineRelationshipBinaryPacket(relationshipType,
+                                                               relationshipGuid,
+                                                               headGuid,
+                                                               tailGuid,
+                                                               LabelsAndEventClasses::CONNECTION_GUID);
+    return relationshipGuid;
+}
+
+void TimelineUtilityMethods::CreateTypedEntity(ProfilingGuid entityGuid, ProfilingStaticGuid entityTypeGuid)
+{
+    // Send Entity Binary Packet of the entity to the external profiling service
+    m_SendTimelinePacket->SendTimelineEntityBinaryPacket(entityGuid);
+
+    // Create type entity and send the relationship of the entity with the given type
+    MarkEntityWithType(entityGuid, entityTypeGuid);
+}
+
+ProfilingDynamicGuid TimelineUtilityMethods::RecordEvent(ProfilingGuid entityGuid, ProfilingStaticGuid eventClassGuid)
+{
+    // Take a timestamp
+    uint64_t timestamp = GetTimestamp();
+
+    // Get the thread id
+    int threadId = arm::pipe::GetCurrentThreadId();
+
+    // Generate a GUID for the event
+    ProfilingDynamicGuid eventGuid = IProfilingService::GetNextGuid();
+
+    // Send the new timeline event to the external profiling service, this call throws in case of error
+    m_SendTimelinePacket->SendTimelineEventBinaryPacket(timestamp, threadId, eventGuid);
+
+    // Generate a GUID for the execution link
+    ProfilingDynamicGuid executionLinkId = IProfilingService::GetNextGuid();
+
+    // Send the new execution link to the external profiling service, this call throws in case of error
+    m_SendTimelinePacket->SendTimelineRelationshipBinaryPacket(ProfilingRelationshipType::ExecutionLink,
+                                                               executionLinkId,
+                                                               entityGuid,
+                                                               eventGuid,
+                                                               eventClassGuid);
+
+    return eventGuid;
+}
+
+ProfilingDynamicGuid TimelineUtilityMethods::RecordWorkloadInferenceAndStartOfLifeEvent(ProfilingGuid workloadGuid,
+                                                                                        ProfilingGuid inferenceGuid)
+{
+    ProfilingDynamicGuid workloadInferenceGuid = IProfilingService::GetNextGuid();
+    CreateTypedEntity(workloadInferenceGuid, LabelsAndEventClasses::WORKLOAD_EXECUTION_GUID);
+    CreateRelationship(ProfilingRelationshipType::RetentionLink,
+                       inferenceGuid,
+                       workloadInferenceGuid,
+                       LabelsAndEventClasses::CHILD_GUID);
+    CreateRelationship(ProfilingRelationshipType::RetentionLink,
+                       workloadGuid,
+                       workloadInferenceGuid,
+                       LabelsAndEventClasses::EXECUTION_OF_GUID);
+    RecordEvent(workloadInferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
+    return workloadInferenceGuid;
+}
+
+void TimelineUtilityMethods::RecordEndOfLifeEvent(ProfilingGuid entityGuid)
+{
+    RecordEvent(entityGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/profiling/client/src/backends/BackendProfiling.cpp b/profiling/client/src/backends/BackendProfiling.cpp
new file mode 100644
index 0000000..c33d5a7
--- /dev/null
+++ b/profiling/client/src/backends/BackendProfiling.cpp
@@ -0,0 +1,97 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "BackendProfiling.hpp"
+#include <client/src/RegisterBackendCounters.hpp>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+std::unique_ptr<IRegisterBackendCounters>
+    BackendProfiling::GetCounterRegistrationInterface(uint16_t currentMaxGlobalCounterID)
+{
+    return std::make_unique<RegisterBackendCounters>(
+        RegisterBackendCounters(currentMaxGlobalCounterID, m_BackendId, m_ProfilingService));
+}
+
+std::unique_ptr<ISendTimelinePacket> BackendProfiling::GetSendTimelinePacket()
+{
+    return m_ProfilingService.GetSendTimelinePacket();
+}
+
+IProfilingGuidGenerator& BackendProfiling::GetProfilingGuidGenerator()
+{
+    // The profiling service is our Guid Generator.
+    return m_ProfilingService;
+}
+
+void BackendProfiling::ReportCounters(const std::vector<Timestamp>& timestamps)
+{
+    for (const auto& timestampInfo : timestamps)
+    {
+        std::vector<CounterValue> backendCounterValues = timestampInfo.counterValues;
+        for_each(backendCounterValues.begin(), backendCounterValues.end(), [&](CounterValue& backendCounterValue)
+        {
+            // translate the counterId to globalCounterId
+            backendCounterValue.counterId = m_ProfilingService.GetCounterMappings().GetGlobalId(
+                backendCounterValue.counterId, m_BackendId);
+        });
+
+        // Send Periodic Counter Capture Packet for the Timestamp
+        m_ProfilingService.GetSendCounterPacket().SendPeriodicCounterCapturePacket(
+            timestampInfo.timestamp, backendCounterValues);
+    }
+}
+
+CounterStatus BackendProfiling::GetCounterStatus(uint16_t backendCounterId)
+{
+    uint16_t globalCounterId = m_ProfilingService.GetCounterMappings().GetGlobalId(backendCounterId, m_BackendId);
+    CaptureData captureData = m_ProfilingService.GetCaptureData();
+
+    CounterStatus counterStatus(backendCounterId, globalCounterId, false, 0);
+
+    if (captureData.IsCounterIdInCaptureData(globalCounterId))
+    {
+        counterStatus.m_Enabled = true;
+        counterStatus.m_SamplingRateInMicroseconds = captureData.GetCapturePeriod();
+    }
+
+    return counterStatus;
+}
+
+std::vector<CounterStatus> BackendProfiling::GetActiveCounters()
+{
+    CaptureData captureData = m_ProfilingService.GetCaptureData();
+
+    const std::vector<uint16_t>& globalCounterIds = captureData.GetCounterIds();
+    std::vector<CounterStatus> activeCounterIds;
+
+    for (auto globalCounterId : globalCounterIds) {
+        // Get pair of local counterId and backendId using globalCounterId
+        const std::pair<uint16_t, std::string>& backendCounterIdPair =
+                m_ProfilingService.GetCounterMappings().GetBackendId(globalCounterId);
+        if (backendCounterIdPair.second == m_BackendId)
+        {
+            activeCounterIds.emplace_back(backendCounterIdPair.first,
+                                          globalCounterId,
+                                          true,
+                                          captureData.GetCapturePeriod());
+        }
+    }
+
+    return activeCounterIds;
+}
+
+bool BackendProfiling::IsProfilingEnabled() const
+{
+    return m_ProfilingService.IsProfilingEnabled();
+}
+
+}    // namespace pipe
+
+}    // namespace arm
diff --git a/profiling/client/src/backends/BackendProfiling.hpp b/profiling/client/src/backends/BackendProfiling.hpp
new file mode 100644
index 0000000..4db02f9
--- /dev/null
+++ b/profiling/client/src/backends/BackendProfiling.hpp
@@ -0,0 +1,53 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <client/include/backends/IBackendProfiling.hpp>
+#include <client/include/IProfilingService.hpp>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class BackendProfiling : public IBackendProfiling
+{
+public:
+    BackendProfiling(const ProfilingOptions& options,
+                     IProfilingService& profilingService,
+                     const std::string& backendId)
+            : m_Options(options),
+              m_ProfilingService(profilingService),
+              m_BackendId(backendId) {}
+
+    ~BackendProfiling()
+    {}
+
+    std::unique_ptr<IRegisterBackendCounters>
+            GetCounterRegistrationInterface(uint16_t currentMaxGlobalCounterID) override;
+
+    std::unique_ptr<ISendTimelinePacket> GetSendTimelinePacket() override;
+
+    IProfilingGuidGenerator& GetProfilingGuidGenerator() override;
+
+    void ReportCounters(const std::vector<Timestamp>&) override;
+
+    CounterStatus GetCounterStatus(uint16_t backendCounterId) override;
+
+    std::vector<CounterStatus> GetActiveCounters() override;
+
+    bool IsProfilingEnabled() const override;
+
+private:
+    ProfilingOptions m_Options;
+    IProfilingService& m_ProfilingService;
+    std::string m_BackendId;
+};
+
+}    // namespace pipe
+
+}    // namespace arm
diff --git a/profiling/client/src/backends/IBackendProfiling.cpp b/profiling/client/src/backends/IBackendProfiling.cpp
new file mode 100644
index 0000000..f172be9
--- /dev/null
+++ b/profiling/client/src/backends/IBackendProfiling.cpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "BackendProfiling.hpp"
+
+#include <client/include/backends/IBackendProfiling.hpp>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+std::unique_ptr<IBackendProfiling> IBackendProfiling::CreateBackendProfiling(const ProfilingOptions& options,
+                                                                             IProfilingService& profilingService,
+                                                                             const std::string& backendId)
+{
+    return std::make_unique<BackendProfiling>(options, profilingService, backendId);
+}
+
+} // namespace pipe
+} // namespace arm