Release 18.08
diff --git a/src/armnn/test/RuntimeTests.cpp b/src/armnn/test/RuntimeTests.cpp
index fcb0a1e..e29a1d4 100644
--- a/src/armnn/test/RuntimeTests.cpp
+++ b/src/armnn/test/RuntimeTests.cpp
@@ -32,33 +32,46 @@
 BOOST_AUTO_TEST_CASE(RuntimeUnloadNetwork)
 {
     // build 2 mock-networks and load them into the runtime
-    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(armnn::Compute::CpuRef));
+    armnn::IRuntime::CreationOptions options;
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
 
-    // mock network 1
+    // Mock network 1.
     armnn::NetworkId networkIdentifier1 = 1;
     armnn::INetworkPtr mockNetwork1(armnn::INetwork::Create());
     mockNetwork1->AddInputLayer(0, "test layer");
-    runtime->LoadNetwork(networkIdentifier1, Optimize(*mockNetwork1, runtime->GetDeviceSpec()));
+    std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
+    runtime->LoadNetwork(networkIdentifier1, Optimize(*mockNetwork1, backends, runtime->GetDeviceSpec()));
 
-    // mock network 2
+    // Mock network 2.
     armnn::NetworkId networkIdentifier2 = 2;
     armnn::INetworkPtr mockNetwork2(armnn::INetwork::Create());
     mockNetwork2->AddInputLayer(0, "test layer");
-    runtime->LoadNetwork(networkIdentifier2, Optimize(*mockNetwork2, runtime->GetDeviceSpec()));
+    runtime->LoadNetwork(networkIdentifier2, Optimize(*mockNetwork2, backends, runtime->GetDeviceSpec()));
 
-    // unload one by its networkID
+    // Unloads one by its networkID.
     BOOST_TEST(runtime->UnloadNetwork(networkIdentifier1) == armnn::Status::Success);
 
     BOOST_TEST(runtime->UnloadNetwork(networkIdentifier1) == armnn::Status::Failure);
 }
 
 // Note: the current builds we don't do valgrind and gperftools based leak checking at the same
-//       time, so in practice WITH_VALGRIND and ARMNN_LEAK_CHECKING_ENABLED are exclusive. In
-//       the future the gperftools based leak checking should stay and the valgrind based should
-//       be removed.
+//       time, so in practice WITH_VALGRIND and ARMNN_LEAK_CHECKING_ENABLED are exclusive. The
+//       valgrind tests can stay for x86 builds, but on hikey Valgrind is just way too slow
+//       to be integrated into the CI system.
 
-#if ARMNN_LEAK_CHECKING_ENABLED
-void CreateAndDropDummyNetwork(armnn::Runtime & runtime)
+#ifdef ARMNN_LEAK_CHECKING_ENABLED
+
+struct DisableGlobalLeakChecking
+{
+    DisableGlobalLeakChecking()
+    {
+        ARMNN_LOCAL_LEAK_CHECKING_ONLY();
+    }
+};
+
+BOOST_GLOBAL_FIXTURE(DisableGlobalLeakChecking);
+
+void CreateAndDropDummyNetwork(const std::vector<armnn::Compute>& backends, armnn::Runtime& runtime)
 {
     armnn::NetworkId networkIdentifier;
     {
@@ -74,12 +87,12 @@
         input->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
         layer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
 
-        // set the tensors in the network
+        // Sets the tensors in the network.
         input->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
         layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
 
         // optimize the network
-        armnn::IOptimizedNetworkPtr optNet = Optimize(*network, runtime.GetDeviceSpec());
+        armnn::IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime.GetDeviceSpec());
 
         runtime.LoadNetwork(networkIdentifier, std::move(optNet));
     }
@@ -94,10 +107,13 @@
         ARMNN_SCOPED_LEAK_CHECKER("Sanity_Check_Outer");
         {
             ARMNN_SCOPED_LEAK_CHECKER("Sanity_Check_Inner");
+            BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE() == true);
             std::unique_ptr<char[]> dummyAllocation(new char[1000]);
-            BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE() == false);
-            BOOST_TEST(ARMNN_BYTES_LEAKED_IN_SCOPE() >= 1000);
-            BOOST_TEST(ARMNN_OBJECTS_LEAKED_IN_SCOPE() >= 1);
+            BOOST_CHECK_MESSAGE(ARMNN_NO_LEAKS_IN_SCOPE() == false,
+                "A leak of 1000 bytes is expected here. "
+                "Please make sure environment variable: HEAPCHECK=draconian is set!");
+            BOOST_TEST(ARMNN_BYTES_LEAKED_IN_SCOPE() == 1000);
+            BOOST_TEST(ARMNN_OBJECTS_LEAKED_IN_SCOPE() == 1);
         }
         BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE());
         BOOST_TEST(ARMNN_BYTES_LEAKED_IN_SCOPE() == 0);
@@ -109,22 +125,24 @@
 BOOST_AUTO_TEST_CASE(RuntimeMemoryLeaksGpuAcc)
 {
     BOOST_TEST(ARMNN_LEAK_CHECKER_IS_ACTIVE());
-
-    armnn::Runtime runtime(armnn::Compute::GpuAcc);
+    armnn::IRuntime::CreationOptions options;
+    armnn::Runtime runtime(options);
     armnn::RuntimeLoadedNetworksReserve(&runtime);
 
+    std::vector<armnn::Compute> backends = {armnn::Compute::GpuAcc};
     {
         // Do a warmup of this so we make sure that all one-time
         // initialization happens before we do the leak checking.
-        CreateAndDropDummyNetwork(runtime);
+        CreateAndDropDummyNetwork(backends, runtime);
     }
 
     {
         ARMNN_SCOPED_LEAK_CHECKER("LoadAndUnloadNetworkGpuAcc");
+        BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE());
         // In the second run we check for all remaining memory
         // in use after the network was unloaded. If there is any
         // then it will be treated as a memory leak.
-        CreateAndDropDummyNetwork(runtime);
+        CreateAndDropDummyNetwork(backends, runtime);
         BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE());
         BOOST_TEST(ARMNN_BYTES_LEAKED_IN_SCOPE() == 0);
         BOOST_TEST(ARMNN_OBJECTS_LEAKED_IN_SCOPE() == 0);
@@ -136,22 +154,24 @@
 BOOST_AUTO_TEST_CASE(RuntimeMemoryLeaksCpuAcc)
 {
     BOOST_TEST(ARMNN_LEAK_CHECKER_IS_ACTIVE());
-
-    armnn::Runtime runtime(armnn::Compute::CpuAcc);
+    armnn::IRuntime::CreationOptions options;
+    armnn::Runtime runtime(options);
     armnn::RuntimeLoadedNetworksReserve(&runtime);
 
+    std::vector<armnn::Compute> backends = {armnn::Compute::CpuAcc};
     {
         // Do a warmup of this so we make sure that all one-time
         // initialization happens before we do the leak checking.
-        CreateAndDropDummyNetwork(runtime);
+        CreateAndDropDummyNetwork(backends, runtime);
     }
 
     {
         ARMNN_SCOPED_LEAK_CHECKER("LoadAndUnloadNetworkCpuAcc");
+        BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE());
         // In the second run we check for all remaining memory
         // in use after the network was unloaded. If there is any
         // then it will be treated as a memory leak.
-        CreateAndDropDummyNetwork(runtime);
+        CreateAndDropDummyNetwork(backends, runtime);
         BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE());
         BOOST_TEST(ARMNN_BYTES_LEAKED_IN_SCOPE() == 0);
         BOOST_TEST(ARMNN_OBJECTS_LEAKED_IN_SCOPE() == 0);
@@ -163,21 +183,24 @@
 {
     BOOST_TEST(ARMNN_LEAK_CHECKER_IS_ACTIVE());
 
-    armnn::Runtime runtime(armnn::Compute::CpuRef);
+    armnn::IRuntime::CreationOptions options;
+    armnn::Runtime runtime(options);
     armnn::RuntimeLoadedNetworksReserve(&runtime);
 
+    std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
     {
         // Do a warmup of this so we make sure that all one-time
         // initialization happens before we do the leak checking.
-        CreateAndDropDummyNetwork(runtime);
+        CreateAndDropDummyNetwork(backends, runtime);
     }
 
     {
         ARMNN_SCOPED_LEAK_CHECKER("LoadAndUnloadNetworkCpuRef");
+        BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE());
         // In the second run we check for all remaining memory
         // in use after the network was unloaded. If there is any
         // then it will be treated as a memory leak.
-        CreateAndDropDummyNetwork(runtime);
+        CreateAndDropDummyNetwork(backends, runtime);
         BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE());
         BOOST_TEST(ARMNN_BYTES_LEAKED_IN_SCOPE() == 0);
         BOOST_TEST(ARMNN_OBJECTS_LEAKED_IN_SCOPE() == 0);
@@ -199,25 +222,28 @@
 
     // A start-pointer or chain of start-pointers to the block is found. Since the block is still pointed at,
     // the programmer could, at least in principle, have freed it before program exit.
-    // We want to test this in case memory is not freed as early as it could have been
+    // We want to test this in case memory is not freed as early as it could have been.
     unsigned long reachableBefore = 0;
     unsigned long reachableAfter = 0;
 
-    // needed as out params but we don't test them
+    // Needed as out params but we don't test them.
     unsigned long dubious = 0;
     unsigned long suppressed = 0;
 
-    // ensure that runtime is large enough before checking for memory leaks
-    // otherwise when loading the network it will automatically reserve memory that won't be released until destruction
+    // Ensure that runtime is large enough before checking for memory leaks.
+    // Otherwise, when loading the network, it will automatically reserve memory that won't be released
+    // until destruction.
     armnn::NetworkId networkIdentifier;
-    armnn::Runtime runtime(armnn::Compute::GpuAcc);
+    armnn::IRuntime::CreationOptions options;
+    armnn::Runtime runtime(options);
     armnn::RuntimeLoadedNetworksReserve(&runtime);
 
-    // check for leaks before we load the network and record them so that we can see the delta after unloading
+    // Checks for leaks before we load the network and record them so that we can see the delta after unloading.
     VALGRIND_DO_QUICK_LEAK_CHECK;
     VALGRIND_COUNT_LEAKS(leakedBefore, dubious, reachableBefore, suppressed);
 
     // build a mock-network and load it into the runtime
+    std::vector<armnn::Compute> backends = {armnn::Compute::GpuAcc};
     {
         armnn::TensorInfo inputTensorInfo(armnn::TensorShape({ 7, 7 }), armnn::DataType::Float32);
         armnn::TensorInfo outputTensorInfo(armnn::TensorShape({ 7, 7 }), armnn::DataType::Float32);
@@ -231,12 +257,12 @@
         input->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
         layer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
 
-        // set the tensors in the network
+        // Sets the tensors in the network.
         input->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
         layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
 
         // optimize the network
-        armnn::IOptimizedNetworkPtr optNet = Optimize(*mockNetwork, runtime.GetDeviceSpec());
+        armnn::IOptimizedNetworkPtr optNet = Optimize(*mockNetwork, backends, runtime.GetDeviceSpec());
 
         runtime.LoadNetwork(networkIdentifier, std::move(optNet));
     }
@@ -246,16 +272,16 @@
     VALGRIND_DO_ADDED_LEAK_CHECK;
     VALGRIND_COUNT_LEAKS(leakedAfter, dubious, reachableAfter, suppressed);
 
-    // if we're not running under Valgrind, these vars will have been initialised to 0, so this will always pass
+    // If we're not running under Valgrind, these vars will have been initialised to 0, so this will always pass.
     BOOST_TEST(leakedBefore == leakedAfter);
 
     // Add resonable threshold after and before running valgrind with the ACL clear cache function.
     // TODO Threshold set to 80k until the root cause of the memory leakage is found and fixed. Revert threshold
-    // value to 1024 when fixed
+    // value to 1024 when fixed.
     BOOST_TEST(static_cast<long>(reachableAfter) - static_cast<long>(reachableBefore) < 81920);
 
-    // these are needed because VALGRIND_COUNT_LEAKS is a macro that assigns to the parameters
-    // so they are assigned to, but still considered unused, causing a warning
+    // These are needed because VALGRIND_COUNT_LEAKS is a macro that assigns to the parameters
+    // so they are assigned to, but still considered unused, causing a warning.
     boost::ignore_unused(dubious);
     boost::ignore_unused(suppressed);
 }
@@ -263,7 +289,7 @@
 
 // Note: this part of the code is due to be removed when we fully trust the gperftools based results.
 #ifdef WITH_VALGRIND
-// run with the following command to get all the amazing output (in the devenv/build folder) :)
+// Run with the following command to get all the amazing output (in the devenv/build folder) :)
 // valgrind --leak-check=full --show-leak-kinds=all --log-file=Valgrind_Memcheck_Leak_Report.txt armnn/test/UnitTests
 BOOST_AUTO_TEST_CASE(RuntimeMemoryLeak)
 {
@@ -276,11 +302,11 @@
 
     // A start-pointer or chain of start-pointers to the block is found. Since the block is still pointed at,
     // the programmer could, at least in principle, have freed it before program exit.
-    // We want to test this in case memory is not freed as early as it could have been
+    // We want to test this in case memory is not freed as early as it could have been.
     unsigned long reachableBefore = 0;
     unsigned long reachableAfter = 0;
 
-    // needed as out params but we don't test them
+    // Needed as out params but we don't test them.
     unsigned long dubious = 0;
     unsigned long suppressed = 0;
 
@@ -288,14 +314,15 @@
 
     // ensure that runtime is large enough before checking for memory leaks
     // otherwise when loading the network it will automatically reserve memory that won't be released until destruction
-    armnn::Runtime runtime(armnn::Compute::CpuRef);
+    armnn::IRuntime::CreationOptions options;
+    armnn::Runtime runtime(options);
     armnn::RuntimeLoadedNetworksReserve(&runtime);
 
-    // check for leaks before we load the network and record them so that we can see the delta after unloading
+    // Checks for leaks before we load the network and record them so that we can see the delta after unloading.
     VALGRIND_DO_QUICK_LEAK_CHECK;
     VALGRIND_COUNT_LEAKS(leakedBefore, dubious, reachableBefore, suppressed);
 
-    // build a mock-network and load it into the runtime
+    // Builds a mock-network and load it into the runtime.
     {
         unsigned int inputShape[] = {1, 7, 1, 1};
         armnn::TensorInfo inputTensorInfo(4, inputShape, armnn::DataType::Float32);
@@ -303,10 +330,9 @@
         std::unique_ptr<armnn::Network> mockNetwork1 = std::make_unique<armnn::Network>();
         mockNetwork1->AddInputLayer(0, "test layer");
 
-        armnn::DeviceSpec device;
-        device.DefaultComputeDevice = armnn::Compute::CpuRef;
 
-        runtime.LoadNetwork(networkIdentifier1, Optimize(*mockNetwork1, device));
+        std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
+        runtime.LoadNetwork(networkIdentifier1, Optimize(*mockNetwork1, backends, runtime.GetDeviceSpec()));
     }
 
     runtime.UnloadNetwork(networkIdentifier1);
@@ -314,7 +340,7 @@
     VALGRIND_DO_ADDED_LEAK_CHECK;
     VALGRIND_COUNT_LEAKS(leakedAfter, dubious, reachableAfter, suppressed);
 
-    // if we're not running under Valgrind, these vars will have been initialised to 0, so this will always pass
+    // If we're not running under Valgrind, these vars will have been initialised to 0, so this will always pass.
     BOOST_TEST(leakedBefore == leakedAfter);
 
     #if defined(ARMCOMPUTECL_ENABLED)
@@ -329,11 +355,134 @@
 
     BOOST_TEST(reachableBefore >= reachableAfter);
 
-    // these are needed because VALGRIND_COUNT_LEAKS is a macro that assigns to the parameters
-    // so they are assigned to, but still considered unused, causing a warning
+    // These are needed because VALGRIND_COUNT_LEAKS is a macro that assigns to the parameters
+    // so they are assigned to, but still considered unused, causing a warning.
     boost::ignore_unused(dubious);
     boost::ignore_unused(suppressed);
 }
 #endif
 
+#if ARMCOMPUTENEON_ENABLED
+BOOST_AUTO_TEST_CASE(RuntimeValidateCpuAccDeviceSupportLayerNoFallback)
+{
+    // build up the structure of the network
+    armnn::INetworkPtr net(armnn::INetwork::Create());
+
+    armnn::IConnectableLayer* input = net->AddInputLayer(0);
+
+    armnn::IConnectableLayer* output = net->AddOutputLayer(0);
+
+    input->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+
+    armnn::IRuntime::CreationOptions options;
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+    std::vector<armnn::Compute> backends = { armnn::Compute::CpuAcc };
+    armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
+    BOOST_CHECK(optNet);
+
+    // Load it into the runtime. It should success.
+    armnn::NetworkId netId;
+    BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == armnn::Status::Success);
+}
+#endif // ARMCOMPUTENEON_ENABLED
+
+#if ARMCOMPUTECL_ENABLED
+BOOST_AUTO_TEST_CASE(RuntimeValidateGpuDeviceSupportLayerNoFallback)
+{
+    // build up the structure of the network
+    armnn::INetworkPtr net(armnn::INetwork::Create());
+
+    armnn::IConnectableLayer* input = net->AddInputLayer(0);
+
+    armnn::IConnectableLayer* output = net->AddOutputLayer(0);
+
+    input->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+
+    armnn::IRuntime::CreationOptions options;
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+    std::vector<armnn::Compute> backends = { armnn::Compute::GpuAcc };
+    armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
+    BOOST_CHECK(optNet);
+
+    // Load it into the runtime. It should success.
+    armnn::NetworkId netId;
+    BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == armnn::Status::Success);
+}
+#endif // ARMCOMPUTECL_ENABLED
+
+BOOST_AUTO_TEST_CASE(RuntimeCpuRef)
+{
+    using namespace armnn;
+
+    // Create runtime in which test will run
+    armnn::IRuntime::CreationOptions options;
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+    // build up the structure of the network
+    INetworkPtr net(INetwork::Create());
+
+    IConnectableLayer* input = net->AddInputLayer(0);
+
+    // This layer configuration isn't supported by CpuAcc, should be fall back to CpuRef.
+    NormalizationDescriptor descriptor;
+    IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor);
+
+    IConnectableLayer* output = net->AddOutputLayer(0);
+
+    input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0));
+    normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32));
+    normalize->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32));
+
+    // optimize the network
+    std::vector<armnn::Compute> backends = { armnn::Compute::CpuRef };
+    IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+
+    // Load it into the runtime. It should success.
+    armnn::NetworkId netId;
+    BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == Status::Success);
+}
+
+BOOST_AUTO_TEST_CASE(RuntimeFallbackToCpuRef)
+{
+    using namespace armnn;
+
+    // Create runtime in which test will run
+    armnn::IRuntime::CreationOptions options;
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+    // build up the structure of the network
+    INetworkPtr net(INetwork::Create());
+
+    IConnectableLayer* input = net->AddInputLayer(0);
+
+    // This layer configuration isn't supported by CpuAcc, should be fall back to CpuRef.
+    NormalizationDescriptor descriptor;
+    IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor);
+
+    IConnectableLayer* output = net->AddOutputLayer(0);
+
+    input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0));
+    normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32));
+    normalize->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32));
+
+    // Allow fallback to CpuRef.
+    std::vector<armnn::Compute> backends = { armnn::Compute::CpuAcc, armnn::Compute::CpuRef };
+    // optimize the network
+    IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+
+    // Load it into the runtime. It should succeed.
+    armnn::NetworkId netId;
+    BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == Status::Success);
+}
+
 BOOST_AUTO_TEST_SUITE_END()