IVGCVSW-6687 Implement ICLTensorProxy

 * Implement ICLTensorProxy and unit tests
 * Remove IClImportTensorHandle and use IClTensorHandle to access ICLTensor

Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com>
Change-Id: I791d0f2c6f8bad841a56e39e196baf0e533c7124
diff --git a/src/backends/cl/CMakeLists.txt b/src/backends/cl/CMakeLists.txt
index 3fef51c..845ba70 100644
--- a/src/backends/cl/CMakeLists.txt
+++ b/src/backends/cl/CMakeLists.txt
@@ -44,6 +44,8 @@
         ClTensorHandleFactory.hpp
         ClWorkloadFactory.cpp
         ClWorkloadFactory.hpp
+        IClTensorHandle.hpp
+        ICLTensorProxy.hpp
         OpenClTimer.cpp
         OpenClTimer.hpp
     )
diff --git a/src/backends/cl/ClImportTensorHandle.hpp b/src/backends/cl/ClImportTensorHandle.hpp
index 18cd1ff..a236a70 100644
--- a/src/backends/cl/ClImportTensorHandle.hpp
+++ b/src/backends/cl/ClImportTensorHandle.hpp
@@ -19,22 +19,15 @@
 #include <arm_compute/core/TensorShape.h>
 #include <arm_compute/core/Coordinates.h>
 
+#include <cl/IClTensorHandle.hpp>
+
 #include <CL/cl_ext.h>
 #include <arm_compute/core/CL/CLKernelLibrary.h>
 
 namespace armnn
 {
 
-class IClImportTensorHandle : public IAclTensorHandle
-{
-public:
-    virtual arm_compute::ICLTensor& GetTensor() = 0;
-    virtual arm_compute::ICLTensor const& GetTensor() const = 0;
-    virtual arm_compute::DataType GetDataType() const = 0;
-    virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) = 0;
-};
-
-class ClImportTensorHandle : public IClImportTensorHandle
+class ClImportTensorHandle : public IClTensorHandle
 {
 public:
     ClImportTensorHandle(const TensorInfo& tensorInfo, MemorySourceFlags importFlags)
@@ -380,12 +373,12 @@
     bool m_Imported;
 };
 
-class ClImportSubTensorHandle : public IClImportTensorHandle
+class ClImportSubTensorHandle : public IClTensorHandle
 {
 public:
-    ClImportSubTensorHandle(IClImportTensorHandle* parent,
-                      const arm_compute::TensorShape& shape,
-                      const arm_compute::Coordinates& coords)
+    ClImportSubTensorHandle(IClTensorHandle* parent,
+                            const arm_compute::TensorShape& shape,
+                            const arm_compute::Coordinates& coords)
     : m_Tensor(&parent->GetTensor(), shape, coords)
     {
         parentHandle = parent;
diff --git a/src/backends/cl/ClImportTensorHandleFactory.cpp b/src/backends/cl/ClImportTensorHandleFactory.cpp
index 26d5f9c..29db522 100644
--- a/src/backends/cl/ClImportTensorHandleFactory.cpp
+++ b/src/backends/cl/ClImportTensorHandleFactory.cpp
@@ -50,7 +50,7 @@
     }
 
     return std::make_unique<ClImportSubTensorHandle>(
-        PolymorphicDowncast<IClImportTensorHandle*>(&parent), shape, coords);
+        PolymorphicDowncast<IClTensorHandle*>(&parent), shape, coords);
 }
 
 std::unique_ptr<ITensorHandle> ClImportTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
diff --git a/src/backends/cl/ClTensorHandle.hpp b/src/backends/cl/ClTensorHandle.hpp
index 6fccb8d..f63f1fa 100644
--- a/src/backends/cl/ClTensorHandle.hpp
+++ b/src/backends/cl/ClTensorHandle.hpp
@@ -18,19 +18,11 @@
 #include <arm_compute/core/TensorShape.h>
 #include <arm_compute/core/Coordinates.h>
 
+#include <cl/IClTensorHandle.hpp>
+
 namespace armnn
 {
 
-
-class IClTensorHandle : public IAclTensorHandle
-{
-public:
-    virtual arm_compute::ICLTensor& GetTensor() = 0;
-    virtual arm_compute::ICLTensor const& GetTensor() const = 0;
-    virtual arm_compute::DataType GetDataType() const = 0;
-    virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) = 0;
-};
-
 class ClTensorHandle : public IClTensorHandle
 {
 public:
diff --git a/src/backends/cl/ICLTensorProxy.hpp b/src/backends/cl/ICLTensorProxy.hpp
new file mode 100644
index 0000000..fff9c53
--- /dev/null
+++ b/src/backends/cl/ICLTensorProxy.hpp
@@ -0,0 +1,78 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <arm_compute/core/CL/ICLTensor.h>
+#include <arm_compute/core/ITensorInfo.h>
+
+namespace armnn
+{
+
+class ICLTensorProxy : public arm_compute::ICLTensor
+{
+public:
+    ICLTensorProxy(arm_compute::ICLTensor* iclTensor) : m_DelegateTensor(iclTensor) {}
+    ICLTensorProxy(const ICLTensorProxy&) = delete;
+    ICLTensorProxy& operator=(const ICLTensorProxy&) = delete;
+    ICLTensorProxy(ICLTensorProxy&&) = default;
+    ICLTensorProxy& operator=(ICLTensorProxy&&) = default;
+
+    void set(arm_compute::ICLTensor* iclTensor)
+    {
+        if(iclTensor != nullptr)
+        {
+            m_DelegateTensor = iclTensor;
+        }
+    }
+
+    // Inherited methods overridden:
+    arm_compute::ITensorInfo* info() const
+    {
+        ARM_COMPUTE_ERROR_ON(m_DelegateTensor == nullptr);
+        return m_DelegateTensor->info();
+    }
+
+    arm_compute::ITensorInfo* info()
+    {
+        ARM_COMPUTE_ERROR_ON(m_DelegateTensor == nullptr);
+        return m_DelegateTensor->info();
+    }
+
+    uint8_t* buffer() const
+    {
+        ARM_COMPUTE_ERROR_ON(m_DelegateTensor == nullptr);
+        return m_DelegateTensor->buffer();
+    }
+
+    arm_compute::CLQuantization quantization() const
+    {
+        ARM_COMPUTE_ERROR_ON(m_DelegateTensor == nullptr);
+        return m_DelegateTensor->quantization();
+    }
+
+    const cl::Buffer& cl_buffer() const
+    {
+        ARM_COMPUTE_ERROR_ON(m_DelegateTensor == nullptr);
+        return m_DelegateTensor->cl_buffer();
+    }
+
+protected:
+    uint8_t* do_map(cl::CommandQueue& q, bool blocking)
+    {
+        ARM_COMPUTE_ERROR_ON(m_DelegateTensor == nullptr);
+        m_DelegateTensor->map(q, blocking);
+        return m_DelegateTensor->buffer();
+    }
+    void do_unmap(cl::CommandQueue& q)
+    {
+        ARM_COMPUTE_ERROR_ON(m_DelegateTensor == nullptr);
+        return m_DelegateTensor->unmap(q);
+    }
+
+private:
+    arm_compute::ICLTensor* m_DelegateTensor{ nullptr };
+};
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/backends/cl/IClTensorHandle.hpp b/src/backends/cl/IClTensorHandle.hpp
new file mode 100644
index 0000000..48cf5f5
--- /dev/null
+++ b/src/backends/cl/IClTensorHandle.hpp
@@ -0,0 +1,22 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <arm_compute/core/CL/ICLTensor.h>
+#include <arm_compute/runtime/MemoryGroup.h>
+
+namespace armnn
+{
+
+class IClTensorHandle : public IAclTensorHandle
+{
+public:
+    virtual arm_compute::ICLTensor& GetTensor() = 0;
+    virtual arm_compute::ICLTensor const& GetTensor() const = 0;
+    virtual arm_compute::DataType GetDataType() const = 0;
+    virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) = 0;
+};
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/backends/cl/test/CMakeLists.txt b/src/backends/cl/test/CMakeLists.txt
index 9840b82..434cdb8 100644
--- a/src/backends/cl/test/CMakeLists.txt
+++ b/src/backends/cl/test/CMakeLists.txt
@@ -20,6 +20,7 @@
     ClWorkloadFactoryHelper.hpp
     DefaultAllocatorTests.cpp
     Fp16SupportTest.cpp
+    ICLTensorProxyTests.cpp
     OpenClTimerTest.cpp
 )
 
diff --git a/src/backends/cl/test/ICLTensorProxyTests.cpp b/src/backends/cl/test/ICLTensorProxyTests.cpp
new file mode 100644
index 0000000..4ee0b10
--- /dev/null
+++ b/src/backends/cl/test/ICLTensorProxyTests.cpp
@@ -0,0 +1,151 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <arm_compute/runtime/CL/functions/CLActivationLayer.h>
+
+#include <armnnTestUtils/TensorCopyUtils.hpp>
+
+#include <cl/ClImportTensorHandle.hpp>
+#include <cl/ClImportTensorHandleFactory.hpp>
+#include <cl/ClTensorHandle.hpp>
+#include <cl/ClTensorHandleFactory.hpp>
+#include <cl/ICLTensorProxy.hpp>
+#include <cl/test/ClContextControlFixture.hpp>
+#include <cl/test/ClWorkloadFactoryHelper.hpp>
+
+#include <doctest/doctest.h>
+
+using namespace armnn;
+
+TEST_SUITE("ICLTensorProxyTests")
+{
+
+TEST_CASE_FIXTURE(ClContextControlFixture, "ICLTensorProxyTest")
+{
+    ClTensorHandleFactory handleFactory =
+        ClWorkloadFactoryHelper::GetTensorHandleFactory(ClWorkloadFactoryHelper::GetMemoryManager());
+
+    TensorInfo info({ 1, 3, 4, 1 }, DataType::Float32);
+
+    // create TensorHandle for memory import
+    auto handle = handleFactory.CreateTensorHandle(info, true);
+
+    std::vector<float> inputData
+    {
+        -5, -2, 1, 2,
+        3, 10, -20, 8,
+        0, -12, 7, -9
+    };
+
+    handle->Allocate();
+
+    CopyDataToITensorHandle(handle.get(), inputData.data());
+
+    // Get CLtensor
+    arm_compute::CLTensor& tensor = PolymorphicDowncast<ClTensorHandle*>(handle.get())->GetTensor();
+    ICLTensorProxy iclTensorProxy(&tensor);
+
+    // Check that the ICLTensorProxy get correct information from the delegate tensor
+    CHECK((iclTensorProxy.info() == tensor.info()));
+    CHECK((iclTensorProxy.buffer() == tensor.buffer()));
+    CHECK((iclTensorProxy.cl_buffer() == tensor.cl_buffer()));
+    CHECK((iclTensorProxy.quantization().scale == tensor.quantization().scale));
+    CHECK((iclTensorProxy.quantization().offset == tensor.quantization().offset));
+}
+
+TEST_CASE_FIXTURE(ClContextControlFixture, "ChangeICLTensorProxyExecutionTest")
+{
+    // Start execution with with copied tensor
+    ClTensorHandleFactory handleFactory =
+        ClWorkloadFactoryHelper::GetTensorHandleFactory(ClWorkloadFactoryHelper::GetMemoryManager());
+
+    TensorInfo info({ 1, 3, 4, 1 }, DataType::Float32);
+    unsigned int numElements = info.GetNumElements();
+
+    // create TensorHandle for memory import
+    auto handle = handleFactory.CreateTensorHandle(info, true);
+
+    std::vector<float> inputData
+    {
+        -5, -2, 1, 2,
+        3, 10, -20, 8,
+        0, -12, 7, -9
+    };
+
+    std::vector<float> ExpectedOutput
+    {
+        0, 0, 1, 2,
+        3, 10, 0, 8,
+        0, 0, 7, 0
+    };
+
+    handle->Allocate();
+
+    CopyDataToITensorHandle(handle.get(), inputData.data());
+
+    // Get CLtensor
+    arm_compute::CLTensor& tensor = PolymorphicDowncast<ClTensorHandle*>(handle.get())->GetTensor();
+
+    // Set a proxy tensor to allocated tensor
+    std::unique_ptr<ICLTensorProxy> iclTensorProxy;
+    iclTensorProxy = std::make_unique<ICLTensorProxy>(&tensor);
+
+    // Create and configure activation function
+    const arm_compute::ActivationLayerInfo act_info(arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
+    arm_compute::CLActivationLayer act_func;
+    act_func.configure(iclTensorProxy.get(), nullptr, act_info);
+
+    act_func.run();
+    arm_compute::CLScheduler::get().sync();
+
+    std::vector<float> actualOutput(info.GetNumElements());
+
+    CopyDataFromITensorHandle(actualOutput.data(), handle.get());
+
+    // Validate result as expected output
+    for(unsigned int i = 0; i < numElements; ++i)
+    {
+        CHECK((actualOutput[i] == ExpectedOutput[i]));
+    }
+
+    // Change to execute with imported tensor
+    ClImportTensorHandleFactory importHandleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
+                                              static_cast<MemorySourceFlags>(MemorySource::Malloc));
+    // create TensorHandle for memory import
+    auto importHandle = importHandleFactory.CreateTensorHandle(info);
+
+    // Get CLtensor
+    arm_compute::CLTensor& importTensor = PolymorphicDowncast<ClImportTensorHandle*>(importHandle.get())->GetTensor();
+
+    // Allocate user memory
+    const size_t totalBytes = importTensor.info()->total_size();
+    const size_t alignment =
+        arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
+    size_t space = totalBytes + alignment + alignment;
+    auto testData = std::make_unique<uint8_t[]>(space);
+    void* alignedPtr = testData.get();
+    CHECK(std::align(alignment, totalBytes, alignedPtr, space));
+
+    // Import memory
+    CHECK(importHandle->Import(alignedPtr, armnn::MemorySource::Malloc));
+
+    // Input with negative values
+    auto* typedPtr = reinterpret_cast<float*>(alignedPtr);
+    std::fill_n(typedPtr, numElements, -5.0f);
+
+    // Set the import Tensor to TensorProxy to change Tensor in the CLActivationLayer without calling configure function
+    iclTensorProxy->set(&importTensor);
+
+    // Execute function and sync
+    act_func.run();
+    arm_compute::CLScheduler::get().sync();
+
+    // Validate result by checking that the output has no negative values
+    for(unsigned int i = 0; i < numElements; ++i)
+    {
+        CHECK(typedPtr[i] == 0);
+    }
+}
+}
\ No newline at end of file