IVGCVSW-5831 Add additional options to Arm NN External Delegate

 * Added enable-fast-math and number-of-threads options.
 * Added save-cached-network and cached-network-filepath options.
 * Added external_delegate python tests for new options.

Signed-off-by: Matthew Sloyan <matthew.sloyan@arm.com>
Change-Id: I7cf6522a6f895cd71ed8f369d94a5113d78594f9
diff --git a/delegate/python/test/test_external_delegate.py b/delegate/python/test/test_external_delegate.py
index 93d373d..f01a2d3 100644
--- a/delegate/python/test/test_external_delegate.py
+++ b/delegate/python/test/test_external_delegate.py
@@ -57,6 +57,115 @@
     # cleanup
     os.remove(tuning_file)
 
+@pytest.mark.GpuAccTest
+def test_external_delegate_options_gpu_cached_network(delegate_dir, test_data_folder, tmp_path):
+
+    binary_file = os.path.join(str(tmp_path), "test_binary.bin")
+    # cleanup previous test run if necessary
+    if os.path.exists(binary_file):
+        os.remove(binary_file)
+
+    # Create blank binary file to write to.
+    open(binary_file, 'a').close()
+    assert (os.path.exists(binary_file))
+    assert (os.stat(binary_file).st_size == 0)
+
+    # Run inference to save cached network.
+    armnn_delegate = tflite.load_delegate(
+        delegate_dir,
+        options={
+            "backends": "GpuAcc",
+            "save-cached-network": "1",
+            "cached-network-filepath": binary_file,
+            "logging-severity": "info"})
+
+    run_mock_model(armnn_delegate, test_data_folder)
+
+    # destroy delegate and check if file has been saved.
+    armnn_delegate.__del__()
+    assert (os.stat(binary_file).st_size != 0)
+
+    # Create second delegate to load in binary file created.
+    armnn_delegate2 = tflite.load_delegate(
+        delegate_dir,
+        options={
+            "backends": "GpuAcc",
+            "cached-network-filepath": binary_file,
+            "logging-severity": "info"})
+
+    run_mock_model(armnn_delegate2, test_data_folder)
+
+    # cleanup
+    os.remove(binary_file)
+
+@pytest.mark.GpuAccTest
+def test_external_delegate_gpu_fastmath(delegate_dir, test_data_folder):
+    # create armnn delegate with enable-fast-math
+    # fast-math is only enabled on Conv2d layer, so use conv2d model.
+    armnn_delegate = tflite.load_delegate(delegate_dir, options = {'backends': 'GpuAcc',
+                                                                   'enable-fast-math': '1',
+                                                                   "logging-severity": "info"})
+
+    model_file_name = 'conv2d.tflite'
+
+    inputShape = [ 1, 5, 5, 1 ]
+    outputShape = [ 1, 3, 3, 1 ]
+
+    inputValues = [ 1, 5, 2, 3, 5,
+                    8, 7, 3, 6, 3,
+                    3, 3, 9, 1, 9,
+                    4, 1, 8, 1, 3,
+                    6, 8, 1, 9, 2 ]
+
+    expectedResult = [ 28, 38, 29,
+                       96, 104, 53,
+                       31, 55, 24 ]
+
+    input = np.array(inputValues, dtype=np.float32).reshape(inputShape)
+    expected_output = np.array(expectedResult, dtype=np.float32).reshape(outputShape)
+
+    # run the inference
+    armnn_outputs = run_inference(test_data_folder, model_file_name, [input], [armnn_delegate])
+
+    # check results
+    compare_outputs(armnn_outputs, [expected_output])
+
+@pytest.mark.CpuAccTest
+def test_external_delegate_cpu_options(capfd, delegate_dir, test_data_folder):
+    # create armnn delegate with enable-fast-math and number-of-threads options
+    # fast-math is only enabled on Conv2d layer, so use conv2d model.
+    armnn_delegate = tflite.load_delegate(delegate_dir, options = {'backends': 'CpuAcc',
+                                                                   'enable-fast-math': '1',
+                                                                   'number-of-threads': '4',
+                                                                   "logging-severity": "info"})
+
+    model_file_name = 'conv2d.tflite'
+
+    inputShape = [ 1, 5, 5, 1 ]
+    outputShape = [ 1, 3, 3, 1 ]
+
+    inputValues = [ 1, 5, 2, 3, 5,
+                    8, 7, 3, 6, 3,
+                    3, 3, 9, 1, 9,
+                    4, 1, 8, 1, 3,
+                    6, 8, 1, 9, 2 ]
+
+    expectedResult = [ 28, 38, 29,
+                       96, 104, 53,
+                       31, 55, 24 ]
+
+    input = np.array(inputValues, dtype=np.float32).reshape(inputShape)
+    expected_output = np.array(expectedResult, dtype=np.float32).reshape(outputShape)
+
+    # run the inference
+    armnn_outputs = run_inference(test_data_folder, model_file_name, [input], [armnn_delegate])
+
+    # check results
+    compare_outputs(armnn_outputs, [expected_output])
+
+    captured = capfd.readouterr()
+    assert 'Set CPPScheduler to Linear mode, with 4 threads to use' in captured.out
+
 def test_external_delegate_options_wrong_logging_level(delegate_dir):
     with pytest.raises(ValueError):
         tflite.load_delegate(