IVGCVSW-3307 Introduce RefTensorHandle

Use it for intermediate tensors on reference backend.
Lays the groundwork for memory management in the reference backend.

Change-Id: I7d3ee132cac31bde70ae6e1b815f4f0b03d550a6
Signed-off-by: Matthew Bentham <Matthew.Bentham@arm.com>
diff --git a/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.cpp b/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.cpp
index c293066..c21ef76 100644
--- a/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.cpp
+++ b/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.cpp
@@ -26,4 +26,4 @@
 }
 
 
-} //namespace armnn
\ No newline at end of file
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefConvolution2dWorkload.cpp
index 0824d5c..a660d2e 100644
--- a/src/backends/reference/workloads/RefConvolution2dWorkload.cpp
+++ b/src/backends/reference/workloads/RefConvolution2dWorkload.cpp
@@ -17,15 +17,16 @@
         : BaseWorkload<Convolution2dQueueDescriptor>(descriptor, info)
 {
     m_Weight = std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight));
-    const TensorInfo& rFilterInfo = GetTensorInfo(m_Weight.get());
+    const TensorInfo& rFilterInfo = m_Weight->GetTensorInfo();
+
     m_FilterShape = rFilterInfo.GetShape();
     m_FilterDecoder = MakeDecoder<float>(rFilterInfo, m_Weight.get()->Map(true));
 
     if (descriptor.m_Parameters.m_BiasEnabled)
     {
         m_Bias = std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias));
-        const TensorInfo& biasInfo = GetTensorInfo(m_Bias.get());
-        m_BiasDecoder = MakeDecoder<float>(biasInfo, m_Bias.get()->Map(true));
+        const TensorInfo& biasInfo = m_Bias->GetTensorInfo();
+        m_BiasDecoder = MakeDecoder<float>(biasInfo, m_Bias->Map(true));
     }
 }
 
diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp
index c7dc4af..48a20cf 100644
--- a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp
+++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp
@@ -20,15 +20,15 @@
         : BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
 {
     m_Weight = std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight));
-    const TensorInfo& rFilterInfo = GetTensorInfo(m_Weight.get());
+    const TensorInfo& rFilterInfo = m_Weight->GetTensorInfo();
     m_FilterShape = rFilterInfo.GetShape();
-    m_FilterDecoder = MakeDecoder<float>(rFilterInfo, m_Weight.get()->Map(true));
+    m_FilterDecoder = MakeDecoder<float>(rFilterInfo, m_Weight->Map(true));
 
     if (descriptor.m_Parameters.m_BiasEnabled)
     {
         m_Bias = std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias));
-        const TensorInfo& biasInfo = GetTensorInfo(m_Bias.get());
-        m_BiasDecoder = MakeDecoder<float>(biasInfo, m_Bias.get()->Map(true));
+        const TensorInfo& biasInfo = m_Bias->GetTensorInfo();
+        m_BiasDecoder = MakeDecoder<float>(biasInfo, m_Bias->Map(true));
     }
 }
 
diff --git a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp
index db24cc5..b9817ba 100644
--- a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp
+++ b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp
@@ -24,7 +24,7 @@
 
     const TensorInfo& boxEncodingsInfo = GetTensorInfo(m_Data.m_Inputs[0]);
     const TensorInfo& scoresInfo       = GetTensorInfo(m_Data.m_Inputs[1]);
-    const TensorInfo& anchorsInfo      = GetTensorInfo(m_Anchors.get());
+    const TensorInfo& anchorsInfo      = m_Anchors->GetTensorInfo();
 
     const TensorInfo& detectionBoxesInfo   = GetTensorInfo(m_Data.m_Outputs[0]);
     const TensorInfo& detectionClassesInfo = GetTensorInfo(m_Data.m_Outputs[1]);
diff --git a/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp b/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp
index dc7030e..c7a3d90 100644
--- a/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp
+++ b/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp
@@ -17,14 +17,14 @@
         : BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info),
           m_Weight(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight)))
 {
-    const TensorInfo& rWeightInfo = GetTensorInfo(m_Weight.get());
+    const TensorInfo& rWeightInfo = m_Weight->GetTensorInfo();
     m_WeightShape = rWeightInfo.GetShape();
     m_WeightDecoder = MakeDecoder<float>(rWeightInfo, m_Weight->Map(true));
 
     if (descriptor.m_Parameters.m_BiasEnabled)
     {
         m_Bias = std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias));
-        const TensorInfo& biasInfo = GetTensorInfo(m_Bias.get());
+        const TensorInfo& biasInfo = m_Bias->GetTensorInfo();
         m_BiasDecoder = MakeDecoder<float>(biasInfo, m_Bias->Map(true));
     }
 }
diff --git a/src/backends/reference/workloads/RefPermuteWorkload.cpp b/src/backends/reference/workloads/RefPermuteWorkload.cpp
index 9e44d16..c943eb8 100644
--- a/src/backends/reference/workloads/RefPermuteWorkload.cpp
+++ b/src/backends/reference/workloads/RefPermuteWorkload.cpp
@@ -20,11 +20,11 @@
     ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, GetName() + "_Execute");
 
     const ITensorHandle*     src      = m_Data.m_Inputs[0];
-    const ITensorHandle*     dst      = m_Data.m_Outputs[0];
+    ITensorHandle*           dst      = m_Data.m_Outputs[0];
     const PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings;
 
     armnnUtils::Permute(GetTensorInfo(dst).GetShape(), mappings,
-                        GetConstCpuData<void>(src), GetCpuData<void>(dst), sizeof(T));
+                        src->Map(), dst->Map(), sizeof(T));
 }
 
 template class RefPermuteWorkload<DataType::Float16>;
diff --git a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp
index 50dafca..ec60030 100644
--- a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp
+++ b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp
@@ -19,7 +19,7 @@
 {
     // set up weights decoder
     m_Weights = std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Weight));
-    const TensorInfo& weightsInfo = GetTensorInfo(m_Weights.get());
+    const TensorInfo& weightsInfo = m_Weights->GetTensorInfo();
 
     m_WeightsDecoder = MakeDecoder<float>(weightsInfo, m_Weights.get()->Map(true));
     m_WeightsShape   = weightsInfo.GetShape();
@@ -28,7 +28,7 @@
     if (descriptor.m_Parameters.m_BiasEnabled)
     {
         m_Biases = std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Bias));
-        const TensorInfo& biasesInfo = GetTensorInfo(m_Biases.get());
+        const TensorInfo& biasesInfo = m_Biases->GetTensorInfo();
         m_BiasesDecoder = MakeDecoder<float>(biasesInfo, m_Biases.get()->Map(true));
     }
 }
diff --git a/src/backends/reference/workloads/RefWorkloadUtils.hpp b/src/backends/reference/workloads/RefWorkloadUtils.hpp
index ce79616..c3260c8 100644
--- a/src/backends/reference/workloads/RefWorkloadUtils.hpp
+++ b/src/backends/reference/workloads/RefWorkloadUtils.hpp
@@ -9,8 +9,10 @@
 
 #include <armnn/Tensor.hpp>
 #include <armnn/Types.hpp>
-#include <Half.hpp>
 
+#include <reference/RefTensorHandle.hpp>
+
+#include <Half.hpp>
 #include <boost/polymorphic_cast.hpp>
 
 namespace armnn
@@ -22,41 +24,24 @@
 
 inline const TensorInfo& GetTensorInfo(const ITensorHandle* tensorHandle)
 {
-    // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate.
-    const ConstCpuTensorHandle* cpuTensorHandle =
-        boost::polymorphic_downcast<const ConstCpuTensorHandle*>(tensorHandle);
-    return cpuTensorHandle->GetTensorInfo();
+    // We know that reference workloads use RefTensorHandles for inputs and outputs
+    const RefTensorHandle* refTensorHandle =
+        boost::polymorphic_downcast<const RefTensorHandle*>(tensorHandle);
+    return refTensorHandle->GetTensorInfo();
 }
 
-template <typename DataType>
-inline const DataType* GetConstCpuData(const ITensorHandle* tensorHandle)
-{
-    // We know that reference workloads use (Const)CpuTensorHandles only, so this cast is legitimate.
-    const ConstCpuTensorHandle* cpuTensorHandle =
-        boost::polymorphic_downcast<const ConstCpuTensorHandle*>(tensorHandle);
-    return cpuTensorHandle->GetConstTensor<DataType>();
-}
-
-template <typename DataType>
-inline DataType* GetCpuData(const ITensorHandle* tensorHandle)
-{
-    // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate.
-    const CpuTensorHandle* cpuTensorHandle = boost::polymorphic_downcast<const CpuTensorHandle*>(tensorHandle);
-    return cpuTensorHandle->GetTensor<DataType>();
-};
-
 template <typename DataType, typename PayloadType>
 const DataType* GetInputTensorData(unsigned int idx, const PayloadType& data)
 {
     const ITensorHandle* tensorHandle = data.m_Inputs[idx];
-    return GetConstCpuData<DataType>(tensorHandle);
+    return reinterpret_cast<const DataType*>(tensorHandle->Map());
 }
 
 template <typename DataType, typename PayloadType>
 DataType* GetOutputTensorData(unsigned int idx, const PayloadType& data)
 {
-    const ITensorHandle* tensorHandle = data.m_Outputs[idx];
-    return GetCpuData<DataType>(tensorHandle);
+    ITensorHandle* tensorHandle = data.m_Outputs[idx];
+    return reinterpret_cast<DataType*>(tensorHandle->Map());
 }
 
 template <typename PayloadType>
@@ -87,35 +72,6 @@
 /// u8 helpers
 ////////////////////////////////////////////
 
-inline const uint8_t* GetConstCpuU8Data(const ITensorHandle* tensorHandle)
-{
-    // We know that reference workloads use (Const)CpuTensorHandles only, so this cast is legitimate.
-    const ConstCpuTensorHandle* cpuTensorHandle =
-        boost::polymorphic_downcast<const ConstCpuTensorHandle*>(tensorHandle);
-    return cpuTensorHandle->GetConstTensor<uint8_t>();
-};
-
-inline uint8_t* GetCpuU8Data(const ITensorHandle* tensorHandle)
-{
-    // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate.
-    const CpuTensorHandle* cpuTensorHandle = boost::polymorphic_downcast<const CpuTensorHandle*>(tensorHandle);
-    return cpuTensorHandle->GetTensor<uint8_t>();
-};
-
-template <typename PayloadType>
-const uint8_t* GetInputTensorDataU8(unsigned int idx, const PayloadType& data)
-{
-    const ITensorHandle* tensorHandle = data.m_Inputs[idx];
-    return GetConstCpuU8Data(tensorHandle);
-}
-
-template <typename PayloadType>
-uint8_t* GetOutputTensorDataU8(unsigned int idx, const PayloadType& data)
-{
-    const ITensorHandle* tensorHandle = data.m_Outputs[idx];
-    return GetCpuU8Data(tensorHandle);
-}
-
 template<typename T>
 std::vector<float> Dequantize(const T* quant, const TensorInfo& info)
 {