Fix graph copy memory spike

 * Change layer storage of ConstTensors to std::shared_ptr<ConstCpuTensorHandle>
 * Change clone to share ConstTensor rather than copy
 * Remove uses of non-const GetTensor() call
 * Reduce scope of non-optimized network in ExeNet, so memory can be released after use

Signed-off-by: Finn Williams <Finn.Williams@arm.com>

Change-Id: Ibb2c7309d12411d21405bd6024c76bcdf5404545
diff --git a/src/backends/reference/workloads/RefLstmWorkload.cpp b/src/backends/reference/workloads/RefLstmWorkload.cpp
index 70b3443..7c37301 100644
--- a/src/backends/reference/workloads/RefLstmWorkload.cpp
+++ b/src/backends/reference/workloads/RefLstmWorkload.cpp
@@ -110,27 +110,27 @@
 
     std::unique_ptr<Decoder<float>> inputToInputWeightsTensor;
     std::unique_ptr<Decoder<float>> inputToForgetWeightsTensor = MakeDecoder<float>(
-        m_InputToForgetWeightsTensor->GetTensorInfo(), m_InputToForgetWeightsTensor->GetTensor<void>());
+        m_InputToForgetWeightsTensor->GetTensorInfo(), m_InputToForgetWeightsTensor->GetConstTensor<void>());
     std::unique_ptr<Decoder<float>> inputToCellWeightsTensor = MakeDecoder<float>(
-        m_InputToCellWeightsTensor->GetTensorInfo(), m_InputToCellWeightsTensor->GetTensor<void>());
+        m_InputToCellWeightsTensor->GetTensorInfo(), m_InputToCellWeightsTensor->GetConstTensor<void>());
     std::unique_ptr<Decoder<float>> inputToOutputWeightsTensor = MakeDecoder<float>(
-        m_InputToOutputWeightsTensor->GetTensorInfo(), m_InputToOutputWeightsTensor->GetTensor<void>());
+        m_InputToOutputWeightsTensor->GetTensorInfo(), m_InputToOutputWeightsTensor->GetConstTensor<void>());
 
     std::unique_ptr<Decoder<float>> recurrentToInputWeightsTensor;
     std::unique_ptr<Decoder<float>> recurrentToForgetWeightsTensor = MakeDecoder<float>(
-        m_RecurrentToForgetWeightsTensor->GetTensorInfo(), m_RecurrentToForgetWeightsTensor->GetTensor<void>());
+        m_RecurrentToForgetWeightsTensor->GetTensorInfo(), m_RecurrentToForgetWeightsTensor->GetConstTensor<void>());
     std::unique_ptr<Decoder<float>> recurrentToCellWeightsTensor = MakeDecoder<float>(
-        m_RecurrentToCellWeightsTensor->GetTensorInfo(), m_RecurrentToCellWeightsTensor->GetTensor<void>());
+        m_RecurrentToCellWeightsTensor->GetTensorInfo(), m_RecurrentToCellWeightsTensor->GetConstTensor<void>());
     std::unique_ptr<Decoder<float>> recurrentToOutputWeightsTensor = MakeDecoder<float>(
-        m_RecurrentToOutputWeightsTensor->GetTensorInfo(), m_RecurrentToOutputWeightsTensor->GetTensor<void>());
+        m_RecurrentToOutputWeightsTensor->GetTensorInfo(), m_RecurrentToOutputWeightsTensor->GetConstTensor<void>());
 
     std::unique_ptr<Decoder<float>> inputGateBiasTensor;
     std::unique_ptr<Decoder<float>> forgetGateBiasTensor = MakeDecoder<float>(
-        m_ForgetGateBiasTensor->GetTensorInfo(), m_ForgetGateBiasTensor->GetTensor<void>());
+        m_ForgetGateBiasTensor->GetTensorInfo(), m_ForgetGateBiasTensor->GetConstTensor<void>());
     std::unique_ptr<Decoder<float>> cellBiasTensor = MakeDecoder<float>(
-        m_CellBiasTensor->GetTensorInfo(), m_CellBiasTensor->GetTensor<void>());
+        m_CellBiasTensor->GetTensorInfo(), m_CellBiasTensor->GetConstTensor<void>());
     std::unique_ptr<Decoder<float>> outputGateBiasTensor = MakeDecoder<float>(
-        m_OutputGateBiasTensor->GetTensorInfo(), m_OutputGateBiasTensor->GetTensor<void>());
+        m_OutputGateBiasTensor->GetTensorInfo(), m_OutputGateBiasTensor->GetConstTensor<void>());
 
     std::unique_ptr<Decoder<float>> cellToInputWeightsTensor;
     std::unique_ptr<Decoder<float>> cellToForgetWeightsTensor;
@@ -149,48 +149,48 @@
         if (!useCifg)
         {
             inputLayerNormWeights = MakeDecoder<float>(
-                    m_InputLayerNormWeights->GetTensorInfo(), m_InputLayerNormWeights->GetTensor<void>());
+                    m_InputLayerNormWeights->GetTensorInfo(), m_InputLayerNormWeights->GetConstTensor<void>());
         }
         forgetLayerNormWeights = MakeDecoder<float>(
-                m_ForgetLayerNormWeights->GetTensorInfo(), m_ForgetLayerNormWeights->GetTensor<void>());
+                m_ForgetLayerNormWeights->GetTensorInfo(), m_ForgetLayerNormWeights->GetConstTensor<void>());
         cellLayerNormWeights = MakeDecoder<float>(
-                m_CellLayerNormWeights->GetTensorInfo(), m_CellLayerNormWeights->GetTensor<void>());
+                m_CellLayerNormWeights->GetTensorInfo(), m_CellLayerNormWeights->GetConstTensor<void>());
         outputLayerNormWeights = MakeDecoder<float>(
-                m_OutputLayerNormWeights->GetTensorInfo(), m_OutputLayerNormWeights->GetTensor<void>());
+                m_OutputLayerNormWeights->GetTensorInfo(), m_OutputLayerNormWeights->GetConstTensor<void>());
     }
 
     if (!useCifg)
     {
         inputToInputWeightsTensor = MakeDecoder<float>(
-            m_InputToInputWeightsTensor->GetTensorInfo(), m_InputToInputWeightsTensor->GetTensor<void>());
+            m_InputToInputWeightsTensor->GetTensorInfo(), m_InputToInputWeightsTensor->GetConstTensor<void>());
         inputGateBiasTensor = MakeDecoder<float>(
-            m_InputGateBiasTensor->GetTensorInfo(), m_InputGateBiasTensor->GetTensor<void>());
+            m_InputGateBiasTensor->GetTensorInfo(), m_InputGateBiasTensor->GetConstTensor<void>());
         recurrentToInputWeightsTensor = MakeDecoder<float>(
-            m_RecurrentToInputWeightsTensor->GetTensorInfo(), m_RecurrentToInputWeightsTensor->GetTensor<void>());
+            m_RecurrentToInputWeightsTensor->GetTensorInfo(), m_RecurrentToInputWeightsTensor->GetConstTensor<void>());
     }
 
     if (usePeephole)
     {
         cellToForgetWeightsTensor = MakeDecoder<float>(
-            m_CellToForgetWeightsTensor->GetTensorInfo(), m_CellToForgetWeightsTensor->GetTensor<void>());
+            m_CellToForgetWeightsTensor->GetTensorInfo(), m_CellToForgetWeightsTensor->GetConstTensor<void>());
         cellToOutputWeightsTensor = MakeDecoder<float>(
-            m_CellToOutputWeightsTensor->GetTensorInfo(), m_CellToOutputWeightsTensor->GetTensor<void>());
+            m_CellToOutputWeightsTensor->GetTensorInfo(), m_CellToOutputWeightsTensor->GetConstTensor<void>());
     }
 
     if (!useCifg && usePeephole)
     {
         cellToInputWeightsTensor = MakeDecoder<float>(
-            m_CellToInputWeightsTensor->GetTensorInfo(), m_CellToInputWeightsTensor->GetTensor<void>());
+            m_CellToInputWeightsTensor->GetTensorInfo(), m_CellToInputWeightsTensor->GetConstTensor<void>());
     }
 
     if (m_Data.m_Parameters.m_ProjectionEnabled)
     {
         projectionWeightsTensor = MakeDecoder<float>(
-            m_ProjectionWeightsTensor->GetTensorInfo(), m_ProjectionWeightsTensor->GetTensor<void>());
+            m_ProjectionWeightsTensor->GetTensorInfo(), m_ProjectionWeightsTensor->GetConstTensor<void>());
         if (m_ProjectionBiasTensor)
         {
             projectionBiasTensor = MakeDecoder<float>(
-                m_ProjectionBiasTensor->GetTensorInfo(), m_ProjectionBiasTensor->GetTensor<void>());
+                m_ProjectionBiasTensor->GetTensorInfo(), m_ProjectionBiasTensor->GetConstTensor<void>());
         }
     }
 
diff --git a/src/backends/reference/workloads/RefQLstmWorkload.cpp b/src/backends/reference/workloads/RefQLstmWorkload.cpp
index e11ea55..bcd6a62 100644
--- a/src/backends/reference/workloads/RefQLstmWorkload.cpp
+++ b/src/backends/reference/workloads/RefQLstmWorkload.cpp
@@ -101,18 +101,20 @@
 
     // Weights decoders
     std::unique_ptr<Decoder<float>> inputToForgetWeightsDecoder = MakeDecoder<float>(
-            m_InputToForgetWeightsTensor->GetTensorInfo(), m_InputToForgetWeightsTensor->GetTensor<void>());
+            m_InputToForgetWeightsTensor->GetTensorInfo(), m_InputToForgetWeightsTensor->GetConstTensor<void>());
     std::unique_ptr<Decoder<float>> inputToCellWeightsDecoder = MakeDecoder<float>(
-            m_InputToCellWeightsTensor->GetTensorInfo(), m_InputToCellWeightsTensor->GetTensor<void>());
+            m_InputToCellWeightsTensor->GetTensorInfo(), m_InputToCellWeightsTensor->GetConstTensor<void>());
     std::unique_ptr<Decoder<float>> inputToOutputWeightsDecoder = MakeDecoder<float>(
-            m_InputToOutputWeightsTensor->GetTensorInfo(), m_InputToOutputWeightsTensor->GetTensor<void>());
+            m_InputToOutputWeightsTensor->GetTensorInfo(), m_InputToOutputWeightsTensor->GetConstTensor<void>());
 
     std::unique_ptr<Decoder<float>> recurrentToForgetWeightsDecoder = MakeDecoder<float>(
-            m_RecurrentToForgetWeightsTensor->GetTensorInfo(), m_RecurrentToForgetWeightsTensor->GetTensor<void>());
+            m_RecurrentToForgetWeightsTensor->GetTensorInfo(),
+            m_RecurrentToForgetWeightsTensor->GetConstTensor<void>());
     std::unique_ptr<Decoder<float>> recurrentToCellWeightsDecoder = MakeDecoder<float>(
-            m_RecurrentToCellWeightsTensor->GetTensorInfo(), m_RecurrentToCellWeightsTensor->GetTensor<void>());
+            m_RecurrentToCellWeightsTensor->GetTensorInfo(), m_RecurrentToCellWeightsTensor->GetConstTensor<void>());
     std::unique_ptr<Decoder<float>> recurrentToOutputWeightsDecoder = MakeDecoder<float>(
-            m_RecurrentToOutputWeightsTensor->GetTensorInfo(), m_RecurrentToOutputWeightsTensor->GetTensor<void>());
+            m_RecurrentToOutputWeightsTensor->GetTensorInfo(),
+            m_RecurrentToOutputWeightsTensor->GetConstTensor<void>());
 
     // Optional CIFG params
     std::unique_ptr<Decoder<float>> inputToInputWeightsDecoder;
@@ -198,9 +200,9 @@
     if (!cifgEnabled)
     {
         inputToInputWeightsDecoder = MakeDecoder<float>(
-                m_InputToInputWeightsTensor->GetTensorInfo(), m_InputToInputWeightsTensor->GetTensor<void>());
-        recurrentToInputWeightsDecoder = MakeDecoder<float>(
-                m_RecurrentToInputWeightsTensor->GetTensorInfo(), m_RecurrentToInputWeightsTensor->GetTensor<void>());
+                m_InputToInputWeightsTensor->GetTensorInfo(), m_InputToInputWeightsTensor->GetConstTensor<void>());
+        recurrentToInputWeightsDecoder = MakeDecoder<float>(m_RecurrentToInputWeightsTensor->GetTensorInfo(),
+                                                            m_RecurrentToInputWeightsTensor->GetConstTensor<void>());
     }
 
     if (peepholeEnabled)
@@ -208,22 +210,22 @@
         if (!cifgEnabled)
         {
             cellToInputWeightsDecoder = MakeDecoder<float>(
-                    m_CellToInputWeightsTensor->GetTensorInfo(), m_CellToInputWeightsTensor->GetTensor<void>());
+                    m_CellToInputWeightsTensor->GetTensorInfo(), m_CellToInputWeightsTensor->GetConstTensor<void>());
         }
         cellToForgetWeightsDecoder = MakeDecoder<float>(
-                m_CellToForgetWeightsTensor->GetTensorInfo(), m_CellToForgetWeightsTensor->GetTensor<void>());
+                m_CellToForgetWeightsTensor->GetTensorInfo(), m_CellToForgetWeightsTensor->GetConstTensor<void>());
         cellToOutputWeightsDecoder = MakeDecoder<float>(
-                m_CellToOutputWeightsTensor->GetTensorInfo(), m_CellToOutputWeightsTensor->GetTensor<void>());
+                m_CellToOutputWeightsTensor->GetTensorInfo(), m_CellToOutputWeightsTensor->GetConstTensor<void>());
     }
 
     if (projectionEnabled)
     {
         projectionWeightsDecoder = MakeDecoder<float>(
-                m_ProjectionWeightsTensor->GetTensorInfo(), m_ProjectionWeightsTensor->GetTensor<void>());
+                m_ProjectionWeightsTensor->GetTensorInfo(), m_ProjectionWeightsTensor->GetConstTensor<void>());
         if (m_ProjectionBiasTensor)
         {
             projectionBiasDecoder = MakeDecoder<float>(
-                    m_ProjectionBiasTensor->GetTensorInfo(), m_ProjectionBiasTensor->GetTensor<void>());
+                    m_ProjectionBiasTensor->GetTensorInfo(), m_ProjectionBiasTensor->GetConstTensor<void>());
         }
     }
 
@@ -231,38 +233,40 @@
     {
         if (!cifgEnabled)
         {
-            inputLayerNormWeightsDecoder = MakeDecoder<float>(
-                    m_InputLayerNormWeightsTensor->GetTensorInfo(), m_InputLayerNormWeightsTensor->GetTensor<void>());
+            inputLayerNormWeightsDecoder = MakeDecoder<float>(m_InputLayerNormWeightsTensor->GetTensorInfo(),
+                                                              m_InputLayerNormWeightsTensor->GetConstTensor<void>());
 
             // Bias only used if layer norm enabled
             armnn::TensorInfo inputGateBiasTensorInfo({outputSize}, armnn::DataType::Signed32,
                     m_InputLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
             inputGateBiasDecoder = MakeDecoder<float>(
-                    inputGateBiasTensorInfo, m_InputGateBiasTensor->GetTensor<void>());
+                    inputGateBiasTensorInfo, m_InputGateBiasTensor->GetConstTensor<void>());
         }
 
         forgetLayerNormWeightsDecoder = MakeDecoder<float>(
-                m_ForgetLayerNormWeightsTensor->GetTensorInfo(), m_ForgetLayerNormWeightsTensor->GetTensor<void>());
+                m_ForgetLayerNormWeightsTensor->GetTensorInfo(),
+                m_ForgetLayerNormWeightsTensor->GetConstTensor<void>());
         cellLayerNormWeightsDecoder = MakeDecoder<float>(
-                m_CellLayerNormWeightsTensor->GetTensorInfo(), m_CellLayerNormWeightsTensor->GetTensor<void>());
+                m_CellLayerNormWeightsTensor->GetTensorInfo(), m_CellLayerNormWeightsTensor->GetConstTensor<void>());
         outputLayerNormWeightsDecoder = MakeDecoder<float>(
-                m_OutputLayerNormWeightsTensor->GetTensorInfo(), m_OutputLayerNormWeightsTensor->GetTensor<void>());
+                m_OutputLayerNormWeightsTensor->GetTensorInfo(),
+                m_OutputLayerNormWeightsTensor->GetConstTensor<void>());
 
         // Bias only used if layer norm enabled
         armnn::TensorInfo forgetGateBiasTensorInfo({outputSize}, armnn::DataType::Signed32,
                 m_ForgetLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
         forgetGateBiasDecoder = MakeDecoder<float>(
-                forgetGateBiasTensorInfo, m_ForgetGateBiasTensor->GetTensor<void>());
+                forgetGateBiasTensorInfo, m_ForgetGateBiasTensor->GetConstTensor<void>());
 
         armnn::TensorInfo cellGateBiasTensorInfo({outputSize}, armnn::DataType::Signed32,
                 m_CellLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
         cellGateBiasDecoder = MakeDecoder<float>(
-                cellGateBiasTensorInfo, m_CellBiasTensor->GetTensor<void>());
+                cellGateBiasTensorInfo, m_CellBiasTensor->GetConstTensor<void>());
 
         armnn::TensorInfo outputGateBiasTensorInfo({outputSize}, armnn::DataType::Signed32,
                 m_OutputLayerNormWeightsTensor->GetTensorInfo().GetQuantizationScale() / 1024, 0);
         outputGateBiasDecoder = MakeDecoder<float>(
-                outputGateBiasTensorInfo, m_OutputGateBiasTensor->GetTensor<void>());
+                outputGateBiasTensorInfo, m_OutputGateBiasTensor->GetConstTensor<void>());
     }
 
     // Initialize internal state tensors with zeroes.