IVGCVSW-6552 Add support of aligned host memory

 * Add AllocatedData functions to OutputHandler
 * Enable import aligned memory in ImportInputs
 * Enable import aligned memory in ImportOutputs
 * Allow to import input and output if the memory is aligned
 * Implement Reconfigure function on ClConvolution2dWorkload
 * End-to-end test on Ref and Cl to ensure that input and output memory
are imported when aligned

Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com>
Change-Id: I9e5e4c26d1ac2f1d806803ade5f64c6479c51718
diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp
index 2752e72..95fb8a3 100644
--- a/src/armnn/Runtime.cpp
+++ b/src/armnn/Runtime.cpp
@@ -77,14 +77,16 @@
     return pRuntimeImpl->GetOutputTensorInfo(networkId, layerId);
 }
 
-std::vector<ImportedInputId> IRuntime::ImportInputs(NetworkId networkId, const InputTensors& inputTensors)
+std::vector<ImportedInputId> IRuntime::ImportInputs(NetworkId networkId, const InputTensors& inputTensors,
+                                                    MemorySource forceImportMemorySource)
 {
-    return pRuntimeImpl->ImportInputs(networkId, inputTensors);
+    return pRuntimeImpl->ImportInputs(networkId, inputTensors, forceImportMemorySource);
 }
 
-std::vector<ImportedOutputId> IRuntime::ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors)
+std::vector<ImportedOutputId> IRuntime::ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors,
+                                                      MemorySource forceImportMemorySource)
 {
-    return pRuntimeImpl->ImportOutputs(networkId, outputTensors);
+    return pRuntimeImpl->ImportOutputs(networkId, outputTensors, forceImportMemorySource);
 }
 
 void IRuntime::ClearImportedInputs(NetworkId networkId, const std::vector<ImportedInputId> inputIds)
@@ -98,9 +100,12 @@
 
 Status IRuntime::EnqueueWorkload(NetworkId networkId,
                                  const InputTensors& inputTensors,
-                                 const OutputTensors& outputTensors)
+                                 const OutputTensors& outputTensors,
+                                 std::vector<ImportedInputId> preImportedInputIds,
+                                 std::vector<ImportedOutputId> preImportedOutputIds)
 {
-    return pRuntimeImpl->EnqueueWorkload(networkId, inputTensors, outputTensors);
+    return pRuntimeImpl->EnqueueWorkload(networkId, inputTensors, outputTensors,
+                                         preImportedInputIds, preImportedOutputIds);
 }
 
 Status IRuntime::Execute(IWorkingMemHandle& workingMemHandle,
@@ -566,14 +571,16 @@
     return GetLoadedNetworkPtr(networkId)->GetOutputTensorInfo(layerId);
 }
 
-std::vector<ImportedInputId> RuntimeImpl::ImportInputs(NetworkId networkId, const InputTensors& inputTensors)
+std::vector<ImportedInputId> RuntimeImpl::ImportInputs(NetworkId networkId, const InputTensors& inputTensors,
+                                                       MemorySource forceImportMemorySource)
 {
-    return GetLoadedNetworkPtr(networkId)->ImportInputs(inputTensors);
+    return GetLoadedNetworkPtr(networkId)->ImportInputs(inputTensors, forceImportMemorySource);
 }
 
-std::vector<ImportedOutputId> RuntimeImpl::ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors)
+std::vector<ImportedOutputId> RuntimeImpl::ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors,
+                                                         MemorySource forceImportMemorySource)
 {
-    return GetLoadedNetworkPtr(networkId)->ImportOutputs(outputTensors);
+    return GetLoadedNetworkPtr(networkId)->ImportOutputs(outputTensors, forceImportMemorySource);
 }
 
 void RuntimeImpl::ClearImportedInputs(NetworkId networkId, const std::vector<ImportedInputId> inputIds)
@@ -587,7 +594,9 @@
 
 Status RuntimeImpl::EnqueueWorkload(NetworkId networkId,
                                 const InputTensors& inputTensors,
-                                const OutputTensors& outputTensors)
+                                const OutputTensors& outputTensors,
+                                std::vector<ImportedInputId> preImportedInputIds,
+                                std::vector<ImportedOutputId> preImportedOutputIds)
 {
     const auto startTime = armnn::GetTimeNow();
 
@@ -617,7 +626,8 @@
     }
     lastId=networkId;
 
-    auto status = loadedNetwork->EnqueueWorkload(inputTensors, outputTensors);
+    auto status = loadedNetwork->EnqueueWorkload(inputTensors, outputTensors,
+                                                 preImportedInputIds, preImportedOutputIds);
 
     ARMNN_LOG(info) << "Execution time: " << std::setprecision(2)
                     << std::fixed << armnn::GetTimeDuration(startTime).count() << " ms.";