IVGCVSW-5843 Separate memory managers for WorkingMemHandles

 * Add inter layer memory management to WorkingMemHandle
 * Change Const layers to be executed once in loadedNetworkConstruction
   and share tensorHandle between all WorkingMemHandles
 * Fix various reference workloads pointing to memory in the queueDescriptor

Signed-off-by: Finn Williams <Finn.Williams@arm.com>
Change-Id: I69d4b3c5c84d2f5abe4540c3e624ab4f00d88226
diff --git a/src/backends/reference/workloads/InstanceNorm.cpp b/src/backends/reference/workloads/InstanceNorm.cpp
index d628c03..b6e616a 100644
--- a/src/backends/reference/workloads/InstanceNorm.cpp
+++ b/src/backends/reference/workloads/InstanceNorm.cpp
@@ -16,10 +16,10 @@
 {
 
 void InstanceNorm(const InstanceNormalizationQueueDescriptor& data,
+                  const TensorInfo& inputInfo,
                   Decoder<float>& inputDecoder,
                   Encoder<float>& outputEncoder)
 {
-    const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]);
     const TensorShape inputShape = inputInfo.GetShape();
 
     armnnUtils::DataLayoutIndexed dataLayout(data.m_Parameters.m_DataLayout);
diff --git a/src/backends/reference/workloads/InstanceNorm.hpp b/src/backends/reference/workloads/InstanceNorm.hpp
index 2e3a18f..6a78373 100644
--- a/src/backends/reference/workloads/InstanceNorm.hpp
+++ b/src/backends/reference/workloads/InstanceNorm.hpp
@@ -14,6 +14,7 @@
 {
 
 void InstanceNorm(const InstanceNormalizationQueueDescriptor& data,
+                  const TensorInfo& inputInfo,
                   Decoder<float>& inputData,
                   Encoder<float>& outputData);
 
diff --git a/src/backends/reference/workloads/Pad.cpp b/src/backends/reference/workloads/Pad.cpp
index 1f8b674..f58dbae 100644
--- a/src/backends/reference/workloads/Pad.cpp
+++ b/src/backends/reference/workloads/Pad.cpp
@@ -38,6 +38,8 @@
 
 void Pad(const TensorInfo& inputInfo,
          const TensorInfo& outputInfo,
+         const ITensorHandle* inputHandle,
+         ITensorHandle* outputHandle,
          const PadQueueDescriptor& data)
 {
     auto padList  = data.m_Parameters.m_PadList;
@@ -66,15 +68,15 @@
     unsigned int outputHeight   = 0;
     unsigned int outputWidth    = 0;
 
-    auto inputData = MakeDecoder<float>(inputInfo, data.m_Inputs[0]->Map());
-    auto outData   = MakeEncoder<float>(outputInfo, data.m_Outputs[0]->Map());
+    auto inputData = MakeDecoder<float>(inputInfo, inputHandle->Map());
+    auto outData   = MakeEncoder<float>(outputInfo, outputHandle->Map());
 
     // Fill the output tensor with Pad value first
     if (outputInfo.IsQuantized())
     {
         // For Quantized types Pad Value should not be quantized with scale and offset of the tensor info
         auto temporaryInfo = TensorInfo(outputInfo.GetShape(), outputInfo.GetDataType(), 1.0f, 0);
-        auto outputData = MakeEncoder<float>(temporaryInfo, data.m_Outputs[0]->Map());
+        auto outputData = MakeEncoder<float>(temporaryInfo, outputHandle->Map());
         FillOutputWithPadValue(*outputData, padValue, numOutputElements);
     }
     else
diff --git a/src/backends/reference/workloads/Pad.hpp b/src/backends/reference/workloads/Pad.hpp
index e7be44e..65f64df 100644
--- a/src/backends/reference/workloads/Pad.hpp
+++ b/src/backends/reference/workloads/Pad.hpp
@@ -15,6 +15,8 @@
 
 void Pad(const TensorInfo& inputInfo,
          const TensorInfo& outputInfo,
+         const ITensorHandle* inputHandle,
+         ITensorHandle* outputHandle,
          const PadQueueDescriptor& data);
 
 } //namespace armnn
diff --git a/src/backends/reference/workloads/PreluImpl.cpp b/src/backends/reference/workloads/PreluImpl.cpp
index 458025b..6df259f 100644
--- a/src/backends/reference/workloads/PreluImpl.cpp
+++ b/src/backends/reference/workloads/PreluImpl.cpp
@@ -10,15 +10,13 @@
 namespace armnn
 {
 
-void PreluImpl(const PreluQueueDescriptor& data,
+void PreluImpl(const TensorInfo& inputInfo,
+               const TensorInfo& alphaInfo,
+               const TensorInfo& outputInfo,
                Decoder<float>& inputData,
                Decoder<float>& alphaData,
                Encoder<float>& outputData)
 {
-    const TensorInfo& inputInfo  = GetTensorInfo(data.m_Inputs[0]);
-    const TensorInfo& alphaInfo  = GetTensorInfo(data.m_Inputs[1]);
-    const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]);
-
     const TensorShape& inputShape  = inputInfo.GetShape();
     const TensorShape& alphaShape  = alphaInfo.GetShape();
     const TensorShape& outputShape = outputInfo.GetShape();
diff --git a/src/backends/reference/workloads/PreluImpl.hpp b/src/backends/reference/workloads/PreluImpl.hpp
index 9299b1c..0b3d3b0 100644
--- a/src/backends/reference/workloads/PreluImpl.hpp
+++ b/src/backends/reference/workloads/PreluImpl.hpp
@@ -13,7 +13,9 @@
 namespace armnn
 {
 
-void PreluImpl(const PreluQueueDescriptor& data,
+void PreluImpl(const TensorInfo& inputInfo,
+               const TensorInfo& alphaInfo,
+               const TensorInfo& outputInfo,
                Decoder<float>& inputData,
                Decoder<float>& alphaData,
                Encoder<float>& outputData);
diff --git a/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp b/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp
index 77167a8..2d635bf 100644
--- a/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp
+++ b/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp
@@ -41,11 +41,11 @@
     const TensorInfo &outputTensorInfo = GetTensorInfo(outputs[0]);
 
     if (outputTensorInfo.GetDataType() == armnn::DataType::Signed32) {
-        int32_t *output = GetOutputTensorData<int32_t>(0, m_Data);
+        int32_t *output = GetOutputTensorData<int32_t>(outputs[0]);
         ArgMinMax(decoder, output, inputTensorInfo, outputTensorInfo, m_Data.m_Parameters.m_Function,
                   m_Data.m_Parameters.m_Axis);
     } else {
-        int64_t *output = GetOutputTensorData<int64_t>(0, m_Data);
+        int64_t *output = GetOutputTensorData<int64_t>(outputs[0]);
         ArgMinMax(decoder, output, inputTensorInfo, outputTensorInfo, m_Data.m_Parameters.m_Function,
                   m_Data.m_Parameters.m_Axis);
     }
diff --git a/src/backends/reference/workloads/RefGatherWorkload.cpp b/src/backends/reference/workloads/RefGatherWorkload.cpp
index 020c067..be3274f 100644
--- a/src/backends/reference/workloads/RefGatherWorkload.cpp
+++ b/src/backends/reference/workloads/RefGatherWorkload.cpp
@@ -34,7 +34,7 @@
     std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputInfo0, inputs[0]->Map());
     Decoder<float>& decoder = *decoderPtr;
 
-    const int32_t* indicesData = GetInputTensorData<int32_t>(1, m_Data);
+    const int32_t* indicesData = reinterpret_cast<int32_t*>(inputs[1]->Map());
 
     std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo, outputs[0]->Map());
     Encoder<float>& encoder = *encoderPtr;
diff --git a/src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp b/src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp
index daee97a..e642dc9 100644
--- a/src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp
+++ b/src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp
@@ -37,8 +37,9 @@
                                                                        inputs[0]->Map());
     std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]),
                                                                        outputs[0]->Map());
+    const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
 
-    InstanceNorm(m_Data, *inputDecoder, *outputEncoder);
+    InstanceNorm(m_Data, inputInfo, *inputDecoder, *outputEncoder);
 }
 
 } // namespace armnn
diff --git a/src/backends/reference/workloads/RefPadWorkload.cpp b/src/backends/reference/workloads/RefPadWorkload.cpp
index ea515ca..f15306d 100644
--- a/src/backends/reference/workloads/RefPadWorkload.cpp
+++ b/src/backends/reference/workloads/RefPadWorkload.cpp
@@ -31,6 +31,8 @@
 
     armnn::Pad(inputInfo,
                outputInfo,
+               inputs[0],
+               outputs[0],
                m_Data);
 }
 
diff --git a/src/backends/reference/workloads/RefPreluWorkload.cpp b/src/backends/reference/workloads/RefPreluWorkload.cpp
index b298874..c1d8de2 100644
--- a/src/backends/reference/workloads/RefPreluWorkload.cpp
+++ b/src/backends/reference/workloads/RefPreluWorkload.cpp
@@ -32,6 +32,10 @@
 {
     ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPreluWorkload_Execute");
 
+    const TensorInfo& inputInfo  = GetTensorInfo(inputs[0]);
+    const TensorInfo& alphaInfo  = GetTensorInfo(inputs[1]);
+    const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
+
     std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(inputs[0]),
                                                                       inputs[0]->Map());
     std::unique_ptr<Decoder<float>> alphaDecoder = MakeDecoder<float>(GetTensorInfo(inputs[1]),
@@ -39,7 +43,7 @@
     std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]),
                                                                        outputs[0]->Map());
 
-    PreluImpl(m_Data, *inputDecoder, *alphaDecoder, *outputEncoder);
+    PreluImpl(inputInfo, alphaInfo, outputInfo, *inputDecoder, *alphaDecoder, *outputEncoder);
 }
 
 } // namespace armnn
diff --git a/src/backends/reference/workloads/RefRankWorkload.hpp b/src/backends/reference/workloads/RefRankWorkload.hpp
index 237ae99..288dddd 100644
--- a/src/backends/reference/workloads/RefRankWorkload.hpp
+++ b/src/backends/reference/workloads/RefRankWorkload.hpp
@@ -32,7 +32,7 @@
     {
         const int32_t rank = static_cast<int32_t>(GetTensorInfo(inputs[0]).GetNumDimensions());
 
-        std::memcpy(GetOutputTensorData<void>(0, m_Data), &rank, sizeof(int32_t));
+        std::memcpy(outputs[0]->Map(), &rank, sizeof(int32_t));
         outputs[0]->Unmap();
     }
 };
diff --git a/src/backends/reference/workloads/RefStackWorkload.cpp b/src/backends/reference/workloads/RefStackWorkload.cpp
index 20cf3b3..31949e9 100644
--- a/src/backends/reference/workloads/RefStackWorkload.cpp
+++ b/src/backends/reference/workloads/RefStackWorkload.cpp
@@ -32,26 +32,6 @@
 {
     ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefStackWorkload_Execute");
 
-    // Can perform a simple concatenation when axis == 0
-    if (!m_Data.m_Parameters.m_Axis)
-    {
-        float* output = GetOutputTensorData<float>(0, m_Data);
-        ARMNN_ASSERT(output != nullptr);
-
-        unsigned int numInputs = m_Data.m_Parameters.m_NumInputs;
-        unsigned int inputLength = GetTensorInfo(inputs[0]).GetNumElements();
-
-        for (unsigned int inputIdx=0; inputIdx<numInputs; ++inputIdx)
-        {
-            const float* input = GetInputTensorData<float>(inputIdx, m_Data);
-            for (unsigned int elmt=0; elmt<inputLength; ++elmt)
-            {
-                output[(inputIdx * inputLength) + elmt] = input[elmt];
-            }
-        }
-        return;
-    }
-
     std::vector<std::unique_ptr<Decoder<float>>> inputDecoders;
     for (unsigned int i=0; i<inputs.size(); ++i)
     {
diff --git a/src/backends/reference/workloads/RefWorkloadUtils.hpp b/src/backends/reference/workloads/RefWorkloadUtils.hpp
index dfde58f..0d839af 100644
--- a/src/backends/reference/workloads/RefWorkloadUtils.hpp
+++ b/src/backends/reference/workloads/RefWorkloadUtils.hpp
@@ -45,6 +45,12 @@
     return reinterpret_cast<DataType*>(tensorHandle->Map());
 }
 
+template <typename DataType>
+DataType* GetOutputTensorData(ITensorHandle* tensorHandle)
+{
+    return reinterpret_cast<DataType*>(tensorHandle->Map());
+}
+
 template <typename PayloadType>
 const float* GetInputTensorDataFloat(unsigned int idx, const PayloadType& data)
 {
diff --git a/src/backends/reference/workloads/Stack.cpp b/src/backends/reference/workloads/Stack.cpp
index 386c899..f2bce54 100644
--- a/src/backends/reference/workloads/Stack.cpp
+++ b/src/backends/reference/workloads/Stack.cpp
@@ -24,6 +24,24 @@
 
     unsigned int axis = data.m_Parameters.m_Axis;
 
+    // Can perform a simple concatenation when axis == 0
+    if (!axis)
+    {
+        unsigned int numInputs = data.m_Parameters.m_NumInputs;
+        unsigned int inputLength = inputInfo.GetNumElements();
+
+        for (unsigned int inputIdx=0; inputIdx<numInputs; ++inputIdx)
+        {
+            for (unsigned int elmt=0; elmt<inputLength; ++elmt)
+            {
+                (*inputs[inputIdx])[elmt];
+                output[(inputIdx * inputLength) + elmt];
+                output.Set(inputs[inputIdx]->Get());
+            }
+        }
+        return;
+    }
+
     // Initialise output data
     unsigned int numOutputElements = 1;
     for (unsigned int i=0; i<outputNumDims; ++i)