IVGCVSW-3687 Add INetworkProperties to LoadNetwork

 * Allows users to specify if Import/Export should be used

Signed-off-by: David Monahan <david.monahan@arm.com>
Change-Id: I64da26a6acbeb91ef72d31b6ccc01bb1447f624d
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 5b64085..1000ece 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -41,7 +41,8 @@
 } // anonymous
 
 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
-                                                                std::string & errorMessage)
+                                                                std::string& errorMessage,
+                                                                const INetworkProperties& networkProperties)
 {
     std::unique_ptr<LoadedNetwork> loadedNetwork;
 
@@ -55,7 +56,7 @@
 
     try
     {
-        loadedNetwork.reset(new LoadedNetwork(std::move(net)));
+        loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties));
     }
     catch (const armnn::RuntimeException& error)
     {
@@ -73,8 +74,11 @@
     return loadedNetwork;
 }
 
-LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net)
-    : m_OptimizedNetwork(std::move(net))
+LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
+                             const INetworkProperties& networkProperties) :
+                             m_OptimizedNetwork(std::move(net)),
+                             m_IsImportEnabled(networkProperties.m_ImportEnabled),
+                             m_IsExportEnabled(networkProperties.m_ExportEnabled)
 {
     // Create a profiler and register it for the current thread.
     m_Profiler = std::make_shared<Profiler>();
@@ -392,7 +396,7 @@
     info.m_OutputTensorInfos.push_back(outputTensorInfo);
 
     MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
-    if (CheckFlag(importFlags, MemorySource::Malloc))  // Try import the input tensor
+    if (CheckFlag(importFlags, MemorySource::Malloc) && m_IsImportEnabled)  // Try import the input tensor
     {
         // This assumes a CPU Tensor handle
         void* mem = tensorHandle->Map(false);
@@ -402,13 +406,16 @@
             return; // No need for a workload since the import has been done.
         }
         tensorHandle->Unmap();
+        throw MemoryImportException("EnqueueInput: Memory Import failed");
     }
+    else
+    {
+        // Create a mem copy workload for input since we did not import
+        auto inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
 
-    // Create a mem copy workload for input since we could not import
-    auto inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
-
-    BOOST_ASSERT_MSG(inputWorkload, "No input workload created");
-    m_InputQueue.push_back(move(inputWorkload));
+        BOOST_ASSERT_MSG(inputWorkload, "No input workload created");
+        m_InputQueue.push_back(move(inputWorkload));
+    }
 }
 
 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
@@ -444,7 +451,8 @@
     // b) The tensor has zero padding
     // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
     // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
-    if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
+    if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input
+        && m_IsExportEnabled)
     {
         if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1)
         {
@@ -467,17 +475,23 @@
 
                     return; //No need to add the output workload below
                 }
+                else
+                {
+                    throw MemoryExportException("EnqueueOutput: Memory Export failed");
+                }
             }
         }
     }
+    else
+    {
+        // If we got here then we couldn't import the memory, so add an output workload which performs a memcopy.
+        outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
+        info.m_InputTensorInfos.push_back(inputTensorInfo);
 
-    // If we got here then we couldn't import the memory, so add an output workload which performs a memcopy.
-    outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
-    info.m_InputTensorInfos.push_back(inputTensorInfo);
-
-    auto outputWorkload = std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
-    BOOST_ASSERT_MSG(outputWorkload, "No output workload created");
-    m_OutputQueue.push_back(move(outputWorkload));
+        auto outputWorkload = std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
+        BOOST_ASSERT_MSG(outputWorkload, "No output workload created");
+        m_OutputQueue.push_back(move(outputWorkload));
+    }
 }
 
 void LoadedNetwork::AllocateWorkingMemory()
diff --git a/src/armnn/LoadedNetwork.hpp b/src/armnn/LoadedNetwork.hpp
index 808a932..08c09b8 100644
--- a/src/armnn/LoadedNetwork.hpp
+++ b/src/armnn/LoadedNetwork.hpp
@@ -41,7 +41,8 @@
     Status EnqueueWorkload(const InputTensors& inputTensors, const OutputTensors& outputTensors);
 
     static std::unique_ptr<LoadedNetwork> MakeLoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
-                                                            std::string & errorMessage);
+                                                            std::string & errorMessage,
+                                                            const INetworkProperties& networkProperties);
 
     // NOTE we return by reference as the purpose of this method is only to provide
     // access to the private m_Profiler and in theory we should not need to increment
@@ -55,7 +56,7 @@
 private:
     void AllocateWorkingMemory();
 
-    LoadedNetwork(std::unique_ptr<OptimizedNetwork> net);
+    LoadedNetwork(std::unique_ptr<OptimizedNetwork> net, const INetworkProperties& networkProperties);
 
     void EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo);
 
@@ -84,6 +85,8 @@
     mutable std::mutex m_WorkingMemMutex;
 
     bool m_IsWorkingMemAllocated=false;
+    bool m_IsImportEnabled=false;
+    bool m_IsExportEnabled=false;
 
     TensorHandleFactoryRegistry m_TensorHandleFactoryRegistry;
 };
diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp
index 9e87484..e478356 100644
--- a/src/armnn/Runtime.cpp
+++ b/src/armnn/Runtime.cpp
@@ -49,7 +49,16 @@
 
 Status Runtime::LoadNetwork(NetworkId& networkIdOut,
                             IOptimizedNetworkPtr inNetwork,
-                            std::string & errorMessage)
+                            std::string& errorMessage)
+{
+    INetworkProperties networkProperties;
+    return LoadNetwork(networkIdOut, std::move(inNetwork), errorMessage, networkProperties);
+}
+
+Status Runtime::LoadNetwork(NetworkId& networkIdOut,
+                            IOptimizedNetworkPtr inNetwork,
+                            std::string& errorMessage,
+                            const INetworkProperties& networkProperties)
 {
     IOptimizedNetwork* rawNetwork = inNetwork.release();
 
@@ -62,7 +71,8 @@
 
     unique_ptr<LoadedNetwork> loadedNetwork = LoadedNetwork::MakeLoadedNetwork(
         std::unique_ptr<OptimizedNetwork>(boost::polymorphic_downcast<OptimizedNetwork*>(rawNetwork)),
-        errorMessage);
+        errorMessage,
+        networkProperties);
 
     if (!loadedNetwork)
     {
diff --git a/src/armnn/Runtime.hpp b/src/armnn/Runtime.hpp
index 35684f1..a028c87 100644
--- a/src/armnn/Runtime.hpp
+++ b/src/armnn/Runtime.hpp
@@ -38,7 +38,12 @@
     /// @return armnn::Status
     virtual Status LoadNetwork(NetworkId& networkIdOut,
                                IOptimizedNetworkPtr network,
-                               std::string & errorMessage) override;
+                               std::string& errorMessage) override;
+
+    virtual Status LoadNetwork(NetworkId& networkIdOut,
+                               IOptimizedNetworkPtr network,
+                               std::string& errorMessage,
+                               const INetworkProperties& networkProperties) override;
 
     virtual TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const override;
     virtual TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const override;
diff --git a/src/backends/backendsCommon/test/EndToEndTestImpl.hpp b/src/backends/backendsCommon/test/EndToEndTestImpl.hpp
index 040782b..ecc8806 100644
--- a/src/backends/backendsCommon/test/EndToEndTestImpl.hpp
+++ b/src/backends/backendsCommon/test/EndToEndTestImpl.hpp
@@ -172,7 +172,7 @@
     }
 }
 
-inline void ImportNonAlignedPointerTest(std::vector<BackendId> backends)
+inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
 {
     using namespace armnn;
 
@@ -201,7 +201,10 @@
 
     // Loads it into the runtime.
     NetworkId netId;
-    runtime->LoadNetwork(netId, std::move(optNet));
+    std::string ignoredErrorMessage;
+    // Enable Importing
+    INetworkProperties networkProperties(true, true);
+    runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
 
     // Creates structures for input & output
     std::vector<float> inputData
@@ -214,12 +217,77 @@
 
     std::vector<float> outputData(5);
 
+    // Aligned output
+    float * alignedOutputData = outputData.data();
+
+    InputTensors inputTensors
+    {
+        {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputData)},
+    };
+    OutputTensors outputTensors
+    {
+        {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
+    };
+
+    // The result of the inference is not important, just the fact that there
+    // should not be CopyMemGeneric workloads.
+    runtime->GetProfiler(netId)->EnableProfiling(true);
+
+    // Do the inference and expect it to fail with a ImportMemoryException
+    BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
+}
+
+inline void ImportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
+{
+    using namespace armnn;
+
+    // Create runtime in which test will run
+    IRuntime::CreationOptions options;
+    IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+    // build up the structure of the network
+    INetworkPtr net(INetwork::Create());
+
+    IConnectableLayer* input = net->AddInputLayer(0);
+
+    NormalizationDescriptor descriptor;
+    IConnectableLayer* norm = net->AddNormalizationLayer(descriptor);
+
+    IConnectableLayer* output = net->AddOutputLayer(0);
+
+    input->GetOutputSlot(0).Connect(norm->GetInputSlot(0));
+    norm->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
+    norm->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
+
+    // Optimize the network
+    IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+
+    // Loads it into the runtime.
+    NetworkId netId;
+    std::string ignoredErrorMessage;
+    // Enable Importing
+    INetworkProperties networkProperties(true, true);
+    runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
+
+    // Creates structures for input & output
+    std::vector<float> inputData
+    {
+        1.0f, 2.0f, 3.0f, 4.0f, 5.0f
+    };
+
+    // Aligned input
+    float * alignedInputData = inputData.data();
+
+    std::vector<float> outputData(5);
+
     // Misaligned output
     float* misalignedOutputData = reinterpret_cast<float*>(reinterpret_cast<char*>(outputData.data()) + 1);
 
     InputTensors inputTensors
     {
-        {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputData)},
+        {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputData)},
     };
     OutputTensors outputTensors
     {
@@ -230,24 +298,8 @@
     // should not be CopyMemGeneric workloads.
     runtime->GetProfiler(netId)->EnableProfiling(true);
 
-    // Do the inference
-    runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
-
-    // Retrieve the Profiler.Print() output to get the workload execution
-    ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
-    std::stringstream ss;
-    profilerManager.GetProfiler()->Print(ss);;
-    std::string dump = ss.str();
-
-    // Contains RefNormalizationWorkload
-    std::size_t found = dump.find("RefNormalizationWorkload");
-    BOOST_TEST(found != std::string::npos);
-    // No Contains SyncMemGeneric (Created when importing the output tensor handle)
-    found = dump.find("SyncMemGeneric");
-    BOOST_TEST(found == std::string::npos);
-    // Contains CopyMemGeneric
-    found = dump.find("CopyMemGeneric");
-    BOOST_TEST(found != std::string::npos);
+    // Do the inference and expect it to fail with a ImportMemoryException
+    BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
 }
 
 inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
@@ -279,7 +331,10 @@
 
     // Loads it into the runtime.
     NetworkId netId;
-    runtime->LoadNetwork(netId, std::move(optNet));
+    std::string ignoredErrorMessage;
+    // Enable Importing
+    INetworkProperties networkProperties(true, true);
+    runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
 
     // Creates structures for input & output
     std::vector<float> inputData
diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp
index 6024f15..52454a2 100644
--- a/src/backends/reference/test/RefEndToEndTests.cpp
+++ b/src/backends/reference/test/RefEndToEndTests.cpp
@@ -973,7 +973,12 @@
 // Only run these tests on non Android platforms
 BOOST_AUTO_TEST_CASE(RefImportNonAlignedPointerTest)
 {
-    ImportNonAlignedPointerTest(defaultBackends);
+    ImportNonAlignedInputPointerTest(defaultBackends);
+}
+
+BOOST_AUTO_TEST_CASE(RefExportNonAlignedPointerTest)
+{
+    ImportNonAlignedOutputPointerTest(defaultBackends);
 }
 
 BOOST_AUTO_TEST_CASE(RefImportAlignedPointerTest)