IVGCVSW-6412 Implement CLBackend ICustomAllocator

 * Added implementation of ClBackendDefaultAllocator
 * Added back in some pure virtual functions that were mistakenly removed from the CustomMemoryAllocatorSample
 * Added a new Gralloc MemorySource for memory which originates from the gpu
 * Added unittests

Signed-off-by: David Monahan <David.Monahan@arm.com>
Change-Id: Id2abb33d82697df36426f5709756c616af3e8ed7
diff --git a/src/backends/cl/CMakeLists.txt b/src/backends/cl/CMakeLists.txt
index 0005c81..3fef51c 100644
--- a/src/backends/cl/CMakeLists.txt
+++ b/src/backends/cl/CMakeLists.txt
@@ -23,6 +23,7 @@
         ClBackend.hpp
         ClBackendContext.cpp
         ClBackendContext.hpp
+        ClBackendDefaultAllocator.hpp
         ClBackendId.hpp
         ClBackendModelContext.cpp
         ClBackendModelContext.hpp
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp
index dd58e00..5c58269 100644
--- a/src/backends/cl/ClBackend.cpp
+++ b/src/backends/cl/ClBackend.cpp
@@ -5,6 +5,7 @@
 
 #include "ClBackend.hpp"
 #include "ClBackendContext.hpp"
+#include "ClBackendDefaultAllocator.hpp"
 #include "ClBackendId.hpp"
 #include "ClBackendModelContext.hpp"
 #include "ClImportTensorHandleFactory.hpp"
@@ -216,6 +217,11 @@
     return layerSupport;
 }
 
+std::unique_ptr<ICustomAllocator> ClBackend::GetDefaultAllocator() const
+{
+    return std::make_unique<ClBackendDefaultAllocator>();
+}
+
 OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
                                                   const ModelOptions& modelOptions) const
 {
diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp
index ffce800..7597d09 100644
--- a/src/backends/cl/ClBackend.hpp
+++ b/src/backends/cl/ClBackend.hpp
@@ -88,6 +88,8 @@
     IBackendInternal::IBackendSpecificModelContextPtr CreateBackendSpecificModelContext(
         const ModelOptions& modelOptions) const override;
 
+    std::unique_ptr<ICustomAllocator> GetDefaultAllocator() const override;
+
     BackendCapabilities GetCapabilities() const override
     {
         return gpuAccCapabilities;
diff --git a/src/backends/cl/ClBackendDefaultAllocator.hpp b/src/backends/cl/ClBackendDefaultAllocator.hpp
new file mode 100644
index 0000000..300f560
--- /dev/null
+++ b/src/backends/cl/ClBackendDefaultAllocator.hpp
@@ -0,0 +1,47 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+#include <cstddef>
+#include <memory>
+#include <armnn/MemorySources.hpp>
+#include <armnn/utility/IgnoreUnused.hpp>
+
+namespace armnn
+{
+/** Default Memory Allocator class returned from IBackendInternal::GetDefaultAllocator(MemorySource) */
+class ClBackendDefaultAllocator : public armnn::ICustomAllocator
+{
+public:
+    ClBackendDefaultAllocator() = default;
+
+    void* allocate(size_t size, size_t alignment = 0) override
+    {
+        IgnoreUnused(alignment);
+        cl_mem buf{ clCreateBuffer(arm_compute::CLScheduler::get().context().get(),
+                                   CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
+                                   size,
+                                   nullptr,
+                                   nullptr)};
+        return static_cast<void *>(buf);
+    }
+
+    void free(void* ptr) override
+    {
+        ARM_COMPUTE_ERROR_ON(ptr == nullptr);
+        clReleaseMemObject(static_cast<cl_mem>(ptr));
+    }
+
+    armnn::MemorySource GetMemorySourceType() override
+    {
+        return armnn::MemorySource::Gralloc;
+    }
+
+    void* GetMemoryRegionAtOffset(void* buffer, size_t offset, size_t alignment = 0) override
+    {
+        IgnoreUnused(alignment);
+        return static_cast<char*>(buffer) + offset;
+    }
+};
+} // namespace armnn
\ No newline at end of file
diff --git a/src/backends/cl/ClImportTensorHandle.hpp b/src/backends/cl/ClImportTensorHandle.hpp
index 48fb2f7..a24ab56 100644
--- a/src/backends/cl/ClImportTensorHandle.hpp
+++ b/src/backends/cl/ClImportTensorHandle.hpp
@@ -46,7 +46,7 @@
     ClImportTensorHandle(const TensorInfo& tensorInfo,
                          DataLayout dataLayout,
                          MemorySourceFlags importFlags)
-        : m_ImportFlags(importFlags)
+        : m_ImportFlags(importFlags), m_Imported(false)
     {
         armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
     }
@@ -139,6 +139,48 @@
                 return ClImport(importProperties, memory, true);
 
             }
+            // Case for importing memory allocated by OpenCl externally directly into the tensor
+            else if (source == MemorySource::Gralloc)
+            {
+                // m_Tensor not yet Allocated
+                if (!m_Imported && !m_Tensor.buffer())
+                {
+                    // Importing memory allocated by OpenCl into the tensor directly.
+                    arm_compute::Status status =
+                        m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
+                    m_Imported = bool(status);
+                    if (!m_Imported)
+                    {
+                        throw MemoryImportException(status.error_description());
+                    }
+                    return m_Imported;
+                }
+
+                // m_Tensor.buffer() initially allocated with Allocate().
+                else if (!m_Imported && m_Tensor.buffer())
+                {
+                    throw MemoryImportException(
+                        "ClImportTensorHandle::Import Attempting to import on an already allocated tensor");
+                }
+
+                // m_Tensor.buffer() previously imported.
+                else if (m_Imported)
+                {
+                    // Importing memory allocated by OpenCl into the tensor directly.
+                    arm_compute::Status status =
+                        m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
+                    m_Imported = bool(status);
+                    if (!m_Imported)
+                    {
+                        throw MemoryImportException(status.error_description());
+                    }
+                    return m_Imported;
+                }
+                else
+                {
+                    throw MemoryImportException("ClImportTensorHandle::Failed to Import Gralloc Memory");
+                }
+            }
             else
             {
                 throw MemoryImportException("ClImportTensorHandle::Import flag is not supported");
@@ -276,6 +318,7 @@
 
     arm_compute::CLTensor m_Tensor;
     MemorySourceFlags m_ImportFlags;
+    bool m_Imported;
 };
 
 class ClImportSubTensorHandle : public IClImportTensorHandle
diff --git a/src/backends/cl/test/DefaultAllocatorTests.cpp b/src/backends/cl/test/DefaultAllocatorTests.cpp
index 196c0fb..3132bc2 100644
--- a/src/backends/cl/test/DefaultAllocatorTests.cpp
+++ b/src/backends/cl/test/DefaultAllocatorTests.cpp
@@ -13,6 +13,7 @@
 #include <doctest/doctest.h>
 #include <backendsCommon/DefaultAllocator.hpp>
 #include <backendsCommon/test/MockBackend.hpp>
+#include <cl/ClBackendDefaultAllocator.hpp>
 
 using namespace armnn;
 
@@ -118,6 +119,74 @@
     backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic());
 }
 
+}
+
+
+TEST_SUITE("ClDefaultAllocatorTests")
+{
+
+TEST_CASE("ClDefaultAllocatorTest")
+{
+    float number = 3;
+
+    TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
+
+    // Create ArmNN runtime
+    IRuntime::CreationOptions options; // default options
+    auto customAllocator = std::make_shared<ClBackendDefaultAllocator>();
+    options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}};
+    IRuntimePtr run = IRuntime::Create(options);
+
+    // Creates structures for input & output
+    unsigned int numElements = inputTensorInfo.GetNumElements();
+    size_t totalBytes = numElements * sizeof(float);
+
+    void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, 0);
+
+    auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
+    std::fill_n(inputPtr, numElements, number);
+    CHECK(inputPtr[0] == 3);
+
+    auto& backendRegistry = armnn::BackendRegistryInstance();
+    backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic());
+}
+
+TEST_CASE("ClDefaultAllocatorTestMulti")
+{
+    float number = 3;
+
+    TensorInfo inputTensorInfo(TensorShape({2, 1}), DataType::Float32);
+
+    // Create ArmNN runtime
+    IRuntime::CreationOptions options; // default options
+    auto customAllocator = std::make_shared<ClBackendDefaultAllocator>();
+    options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}};
+    IRuntimePtr run = IRuntime::Create(options);
+
+    // Creates structures for input & output
+    unsigned int numElements = inputTensorInfo.GetNumElements();
+    size_t totalBytes = numElements * sizeof(float);
+
+    void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, 0);
+    void* alignedInputPtr2 = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, 0);
+
+    auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
+    std::fill_n(inputPtr, numElements, number);
+    CHECK(inputPtr[0] == 3);
+    CHECK(inputPtr[1] == 3);
+
+    auto* inputPtr2 = reinterpret_cast<float*>(alignedInputPtr2);
+    std::fill_n(inputPtr2, numElements, number);
+    CHECK(inputPtr2[0] == 3);
+    CHECK(inputPtr2[1] == 3);
+
+    // No overlap
+    CHECK(inputPtr[0] == 3);
+    CHECK(inputPtr[1] == 3);
+
+    auto& backendRegistry = armnn::BackendRegistryInstance();
+    backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic());
+}
 
 }