Update tosa_verif_run_ref

Rename to tosa_verif_run_tests to match build_tests
Improve output and system under test support
Improve xunit support
Add results checker
Add utilities json2numpy and json2fbbin
Add set of python tests
Update README.md

Signed-off-by: Jeremy Johnson <jeremy.johnson@arm.com>
Change-Id: Ia09f8e6fd126579b3ba1c1cda95c1326802417ca
diff --git a/verif/tests/mock_flatc.py b/verif/tests/mock_flatc.py
new file mode 100755
index 0000000..bdee0f8
--- /dev/null
+++ b/verif/tests/mock_flatc.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+"""Mocked flatc compiler for testing."""
+# Copyright (c) 2021-2022, ARM Limited.
+# SPDX-License-Identifier: Apache-2.0
+from pathlib import Path
+
+
+def main(argv=None):
+    """Mock the required behaviour of the flatc compiler."""
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-o",
+        dest="output_dir",
+        type=Path,
+        help="output directory",
+    )
+    parser.add_argument(
+        "--json",
+        action="store_true",
+        help="convert to JSON",
+    )
+    parser.add_argument(
+        "--binary",
+        action="store_true",
+        help="convert to binary",
+    )
+    parser.add_argument(
+        "--raw-binary",
+        action="store_true",
+        help="convert from raw-binary",
+    )
+    parser.add_argument(
+        "path",
+        type=Path,
+        action="append",
+        nargs="*",
+        help="the path to fbs or files to convert",
+    )
+
+    args = parser.parse_args(argv)
+    path = args.path
+    if len(path) == 0:
+        print("ERROR: Missing fbs files and files to convert")
+        return 2
+    return 0
+
+
+if __name__ == "__main__":
+    exit(main())
diff --git a/verif/tests/test_json2numpy.py b/verif/tests/test_json2numpy.py
new file mode 100644
index 0000000..aec555c
--- /dev/null
+++ b/verif/tests/test_json2numpy.py
@@ -0,0 +1,142 @@
+"""Tests for json2numpy.py."""
+# Copyright (c) 2021-2022, ARM Limited.
+# SPDX-License-Identifier: Apache-2.0
+import json
+import os
+
+import numpy as np
+import pytest
+
+from json2numpy.json2numpy import main
+
+
+@pytest.mark.parametrize(
+    "npy_filename,json_filename,data_type",
+    [
+        ("single_num.npy", "single_num.json", np.int8),
+        ("multiple_num.npy", "multiple_num.json", np.int8),
+        ("single_num.npy", "single_num.json", np.int16),
+        ("multiple_num.npy", "multiple_num.json", np.int16),
+        ("single_num.npy", "single_num.json", np.int32),
+        ("multiple_num.npy", "multiple_num.json", np.int32),
+        ("single_num.npy", "single_num.json", np.int64),
+        ("multiple_num.npy", "multiple_num.json", np.int64),
+        ("single_num.npy", "single_num.json", np.uint8),
+        ("multiple_num.npy", "multiple_num.json", np.uint8),
+        ("single_num.npy", "single_num.json", np.uint16),
+        ("multiple_num.npy", "multiple_num.json", np.uint16),
+        ("single_num.npy", "single_num.json", np.uint32),
+        ("multiple_num.npy", "multiple_num.json", np.uint32),
+        ("single_num.npy", "single_num.json", np.uint64),
+        ("multiple_num.npy", "multiple_num.json", np.uint64),
+        ("single_num.npy", "single_num.json", np.float16),
+        ("multiple_num.npy", "multiple_num.json", np.float16),
+        ("single_num.npy", "single_num.json", np.float32),
+        ("multiple_num.npy", "multiple_num.json", np.float32),
+        ("single_num.npy", "single_num.json", np.float64),
+        ("multiple_num.npy", "multiple_num.json", np.float64),
+        ("single_num.npy", "single_num.json", bool),
+        ("multiple_num.npy", "multiple_num.json", bool),
+    ],
+)
+def test_json2numpy_npy_file(npy_filename, json_filename, data_type):
+    """Test conversion to JSON."""
+    # Generate numpy data.
+    if "single" in npy_filename:
+        npy_data = np.ndarray(shape=(1, 1), dtype=data_type)
+    elif "multiple" in npy_filename:
+        npy_data = np.ndarray(shape=(2, 3), dtype=data_type)
+
+    # Get filepaths
+    npy_file = os.path.join(os.path.dirname(__file__), npy_filename)
+    json_file = os.path.join(os.path.dirname(__file__), json_filename)
+
+    # Save npy data to file and reload it.
+    with open(npy_file, "wb") as f:
+        np.save(f, npy_data)
+    npy_data = np.load(npy_file)
+
+    args = [npy_file]
+    """Converts npy file to json"""
+    assert main(args) == 0
+
+    json_data = json.load(open(json_file))
+    assert np.dtype(json_data["type"]) == npy_data.dtype
+    assert np.array(json_data["data"]).shape == npy_data.shape
+    assert (np.array(json_data["data"]) == npy_data).all()
+
+    # Remove files created
+    if os.path.exists(npy_file):
+        os.remove(npy_file)
+    if os.path.exists(json_file):
+        os.remove(json_file)
+
+
+@pytest.mark.parametrize(
+    "npy_filename,json_filename,data_type",
+    [
+        ("single_num.npy", "single_num.json", np.int8),
+        ("multiple_num.npy", "multiple_num.json", np.int8),
+        ("single_num.npy", "single_num.json", np.int16),
+        ("multiple_num.npy", "multiple_num.json", np.int16),
+        ("single_num.npy", "single_num.json", np.int32),
+        ("multiple_num.npy", "multiple_num.json", np.int32),
+        ("single_num.npy", "single_num.json", np.int64),
+        ("multiple_num.npy", "multiple_num.json", np.int64),
+        ("single_num.npy", "single_num.json", np.uint8),
+        ("multiple_num.npy", "multiple_num.json", np.uint8),
+        ("single_num.npy", "single_num.json", np.uint16),
+        ("multiple_num.npy", "multiple_num.json", np.uint16),
+        ("single_num.npy", "single_num.json", np.uint32),
+        ("multiple_num.npy", "multiple_num.json", np.uint32),
+        ("single_num.npy", "single_num.json", np.uint64),
+        ("multiple_num.npy", "multiple_num.json", np.uint64),
+        ("single_num.npy", "single_num.json", np.float16),
+        ("multiple_num.npy", "multiple_num.json", np.float16),
+        ("single_num.npy", "single_num.json", np.float32),
+        ("multiple_num.npy", "multiple_num.json", np.float32),
+        ("single_num.npy", "single_num.json", np.float64),
+        ("multiple_num.npy", "multiple_num.json", np.float64),
+        ("single_num.npy", "single_num.json", bool),
+        ("multiple_num.npy", "multiple_num.json", bool),
+    ],
+)
+def test_json2numpy_json_file(npy_filename, json_filename, data_type):
+    """Test conversion to binary."""
+    # Generate json data.
+    if "single" in npy_filename:
+        npy_data = np.ndarray(shape=(1, 1), dtype=data_type)
+    elif "multiple" in npy_filename:
+        npy_data = np.ndarray(shape=(2, 3), dtype=data_type)
+
+    # Generate json dictionary
+    list_data = npy_data.tolist()
+    json_data_type = str(npy_data.dtype)
+
+    json_data = {}
+    json_data["type"] = json_data_type
+    json_data["data"] = list_data
+
+    # Get filepaths
+    npy_file = os.path.join(os.path.dirname(__file__), npy_filename)
+    json_file = os.path.join(os.path.dirname(__file__), json_filename)
+
+    # Save json data to file and reload it.
+    with open(json_file, "w") as f:
+        json.dump(json_data, f)
+    json_data = json.load(open(json_file))
+
+    args = [json_file]
+    """Converts json file to npy"""
+    assert main(args) == 0
+
+    npy_data = np.load(npy_file)
+    assert np.dtype(json_data["type"]) == npy_data.dtype
+    assert np.array(json_data["data"]).shape == npy_data.shape
+    assert (np.array(json_data["data"]) == npy_data).all()
+
+    # Remove files created
+    if os.path.exists(npy_file):
+        os.remove(npy_file)
+    if os.path.exists(json_file):
+        os.remove(json_file)
diff --git a/verif/tests/test_tosa_result_checker.py b/verif/tests/test_tosa_result_checker.py
new file mode 100644
index 0000000..bc8a2fc
--- /dev/null
+++ b/verif/tests/test_tosa_result_checker.py
@@ -0,0 +1,197 @@
+"""Tests for tosa_result_checker.py."""
+# Copyright (c) 2021-2022, ARM Limited.
+# SPDX-License-Identifier: Apache-2.0
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+import checker.tosa_result_checker as trc
+
+
+def _create_data_file(name, npy_data):
+    """Create numpy data file."""
+    file = Path(__file__).parent / name
+    with open(file, "wb") as f:
+        np.save(f, npy_data)
+    return file
+
+
+def _create_empty_file(name):
+    """Create numpy data file."""
+    file = Path(__file__).parent / name
+    f = open(file, "wb")
+    f.close()
+    return file
+
+
+def _delete_data_file(file: Path):
+    """Delete numpy data file."""
+    file.unlink()
+
+
+@pytest.mark.parametrize(
+    "data_type,expected",
+    [
+        (np.int8, trc.TestResult.MISMATCH),
+        (np.int16, trc.TestResult.MISMATCH),
+        (np.int32, trc.TestResult.PASS),
+        (np.int64, trc.TestResult.PASS),
+        (np.uint8, trc.TestResult.MISMATCH),
+        (np.uint16, trc.TestResult.MISMATCH),
+        (np.uint32, trc.TestResult.MISMATCH),
+        (np.uint64, trc.TestResult.MISMATCH),
+        (np.float16, trc.TestResult.MISMATCH),
+        (np.float32, trc.TestResult.PASS),
+        (np.float64, trc.TestResult.MISMATCH),
+        (bool, trc.TestResult.PASS),
+    ],
+)
+def test_supported_types(data_type, expected):
+    """Check which data types are supported."""
+    # Generate data
+    npy_data = np.ndarray(shape=(2, 3), dtype=data_type)
+
+    # Save data as reference and result files to compare.
+    reference_file = _create_data_file("reference.npy", npy_data)
+    result_file = _create_data_file("result.npy", npy_data)
+
+    args = [str(reference_file), str(result_file)]
+    """Compares reference and result npy files, returns zero if it passes."""
+    assert trc.main(args) == expected
+
+    # Remove files created
+    _delete_data_file(reference_file)
+    _delete_data_file(result_file)
+
+
+@pytest.mark.parametrize(
+    "data_type,expected",
+    [
+        (np.int32, trc.TestResult.MISMATCH),
+        (np.int64, trc.TestResult.MISMATCH),
+        (np.float32, trc.TestResult.MISMATCH),
+        (bool, trc.TestResult.MISMATCH),
+    ],
+)
+def test_shape_mismatch(data_type, expected):
+    """Check that mismatch shapes do not pass."""
+    # Generate and save data as reference and result files to compare.
+    npy_data = np.ones(shape=(3, 2), dtype=data_type)
+    reference_file = _create_data_file("reference.npy", npy_data)
+    npy_data = np.ones(shape=(2, 3), dtype=data_type)
+    result_file = _create_data_file("result.npy", npy_data)
+
+    args = [str(reference_file), str(result_file)]
+    """Compares reference and result npy files, returns zero if it passes."""
+    assert trc.main(args) == expected
+
+    # Remove files created
+    _delete_data_file(reference_file)
+    _delete_data_file(result_file)
+
+
+@pytest.mark.parametrize(
+    "data_type,expected",
+    [
+        (np.int32, trc.TestResult.MISMATCH),
+        (np.int64, trc.TestResult.MISMATCH),
+        (np.float32, trc.TestResult.MISMATCH),
+        (bool, trc.TestResult.MISMATCH),
+    ],
+)
+def test_results_mismatch(data_type, expected):
+    """Check that different results do not pass."""
+    # Generate and save data as reference and result files to compare.
+    npy_data = np.zeros(shape=(2, 3), dtype=data_type)
+    reference_file = _create_data_file("reference.npy", npy_data)
+    npy_data = np.ones(shape=(2, 3), dtype=data_type)
+    result_file = _create_data_file("result.npy", npy_data)
+
+    args = [str(reference_file), str(result_file)]
+    """Compares reference and result npy files, returns zero if it passes."""
+    assert trc.main(args) == expected
+
+    # Remove files created
+    _delete_data_file(reference_file)
+    _delete_data_file(result_file)
+
+
+@pytest.mark.parametrize(
+    "data_type1,data_type2,expected",
+    [  # Pairwise testing of all supported types
+        (np.int32, np.int64, trc.TestResult.MISMATCH),
+        (bool, np.float32, trc.TestResult.MISMATCH),
+    ],
+)
+def test_types_mismatch(data_type1, data_type2, expected):
+    """Check that different types in results do not pass."""
+    # Generate and save data as reference and result files to compare.
+    npy_data = np.ones(shape=(3, 2), dtype=data_type1)
+    reference_file = _create_data_file("reference.npy", npy_data)
+    npy_data = np.ones(shape=(3, 2), dtype=data_type2)
+    result_file = _create_data_file("result.npy", npy_data)
+
+    args = [str(reference_file), str(result_file)]
+    """Compares reference and result npy files, returns zero if it passes."""
+    assert trc.main(args) == expected
+
+    # Remove files created
+    _delete_data_file(reference_file)
+    _delete_data_file(result_file)
+
+
+@pytest.mark.parametrize(
+    "reference_exists,result_exists,expected",
+    [
+        (True, False, trc.TestResult.MISSING_FILE),
+        (False, True, trc.TestResult.MISSING_FILE),
+    ],
+)
+def test_missing_files(reference_exists, result_exists, expected):
+    """Check that missing files are caught."""
+    # Generate and save data
+    npy_data = np.ndarray(shape=(2, 3), dtype=bool)
+    reference_file = _create_data_file("reference.npy", npy_data)
+    result_file = _create_data_file("result.npy", npy_data)
+    if not reference_exists:
+        _delete_data_file(reference_file)
+    if not result_exists:
+        _delete_data_file(result_file)
+
+    args = [str(reference_file), str(result_file)]
+    assert trc.main(args) == expected
+
+    if reference_exists:
+        _delete_data_file(reference_file)
+    if result_exists:
+        _delete_data_file(result_file)
+
+
+@pytest.mark.parametrize(
+    "reference_numpy,result_numpy,expected",
+    [
+        (True, False, trc.TestResult.INCORRECT_FORMAT),
+        (False, True, trc.TestResult.INCORRECT_FORMAT),
+    ],
+)
+def test_incorrect_format_files(reference_numpy, result_numpy, expected):
+    """Check that incorrect format files are caught."""
+    # Generate and save data
+    npy_data = np.ndarray(shape=(2, 3), dtype=bool)
+    reference_file = (
+        _create_data_file("reference.npy", npy_data)
+        if reference_numpy
+        else _create_empty_file("empty.npy")
+    )
+    result_file = (
+        _create_data_file("result.npy", npy_data)
+        if result_numpy
+        else _create_empty_file("empty.npy")
+    )
+
+    args = [str(reference_file), str(result_file)]
+    assert trc.main(args) == expected
+
+    _delete_data_file(reference_file)
+    _delete_data_file(result_file)
diff --git a/verif/tests/test_tosa_run_tests_args.py b/verif/tests/test_tosa_run_tests_args.py
new file mode 100644
index 0000000..a0c3ed5
--- /dev/null
+++ b/verif/tests/test_tosa_run_tests_args.py
@@ -0,0 +1,68 @@
+"""Tests for tosa_verif_run_tests.py."""
+# Copyright (c) 2021-2022, ARM Limited.
+# SPDX-License-Identifier: Apache-2.0
+from runner.tosa_verif_run_tests import parseArgs
+
+
+def test_args_test():
+    """Test arguments - test."""
+    args = ["-t", "test"]
+    parsed_args = parseArgs(args)
+    assert parsed_args.test == ["test"]
+
+
+def test_args_ref_model_path():
+    """Test arguments - ref_model_path."""
+    args = ["--ref-model-path", "ref_model_path", "-t", "test"]
+    parsed_args = parseArgs(args)
+    assert parsed_args.ref_model_path == "ref_model_path"
+
+
+def test_args_ref_debug():
+    """Test arguments - ref_debug."""
+    args = ["--ref-debug", "ref_debug", "-t", "test"]
+    parsed_args = parseArgs(args)
+    assert parsed_args.ref_debug == "ref_debug"
+
+
+def test_args_ref_intermediates():
+    """Test arguments - ref_intermediates."""
+    args = ["--ref-intermediates", "2", "-t", "test"]
+    parsed_args = parseArgs(args)
+    assert parsed_args.ref_intermediates == 2
+
+
+def test_args_verbose():
+    """Test arguments - ref_verbose."""
+    args = ["-v", "-t", "test"]
+    parsed_args = parseArgs(args)
+    print(parsed_args.verbose)
+    assert parsed_args.verbose == 1
+
+
+def test_args_jobs():
+    """Test arguments - jobs."""
+    args = ["-j", "42", "-t", "test"]
+    parsed_args = parseArgs(args)
+    assert parsed_args.jobs == 42
+
+
+def test_args_sut_module():
+    """Test arguments - sut_module."""
+    args = ["--sut-module", "sut_module", "-t", "test"]
+    parsed_args = parseArgs(args)
+    assert parsed_args.sut_module == ["sut_module"]
+
+
+def test_args_sut_module_args():
+    """Test arguments - sut_module_args."""
+    args = ["--sut-module-args", "sut_module_args", "-t", "test"]
+    parsed_args = parseArgs(args)
+    assert parsed_args.sut_module_args == ["sut_module_args"]
+
+
+def test_args_xunit_file():
+    """Test arguments - xunit-file."""
+    args = ["--xunit-file", "xunit_file", "-t", "test"]
+    parsed_args = parseArgs(args)
+    assert parsed_args.xunit_file == "xunit_file"
diff --git a/verif/tests/test_tosa_run_tests_mocksut.py b/verif/tests/test_tosa_run_tests_mocksut.py
new file mode 100644
index 0000000..98044e0
--- /dev/null
+++ b/verif/tests/test_tosa_run_tests_mocksut.py
@@ -0,0 +1,241 @@
+"""Tests for tosa_verif_run_tests.py."""
+# Copyright (c) 2021-2022, ARM Limited.
+# SPDX-License-Identifier: Apache-2.0
+import json
+from copy import deepcopy
+from pathlib import Path
+from xml.dom import minidom
+
+import pytest
+
+from runner.tosa_verif_run_tests import main
+
+
+TEST_DESC = {
+    "tosa_file": "pytest.json",
+    "ifm_name": ["test-0", "test-1"],
+    "ifm_file": ["test-0.npy", "test-1.npy"],
+    "ofm_name": ["test-result-0"],
+    "ofm_file": ["test-result-0.npy"],
+    "expected_failure": False,
+}
+GRAPH_RESULT_VALID = "valid"
+GRAPH_RESULT_ERROR = "error"
+
+
+def _create_desc_json(json_object) -> Path:
+    """Create test desc.json."""
+    file = Path(__file__).parent / "desc.json"
+    with open(file, "w") as fd:
+        json.dump(json_object, fd, indent=2)
+    return file
+
+
+def _delete_desc_json(file: Path):
+    """Clean up desc.json."""
+    binary_file = file.parent / "desc_binary.json"
+    if binary_file.exists():
+        print(binary_file.read_text())
+        binary_file.unlink()
+    else:
+        print(file.read_text())
+    file.unlink()
+
+
+@pytest.fixture
+def testDir() -> str:
+    """Set up a mock expected pass test."""
+    print("SET UP - testDir")
+    file = _create_desc_json(TEST_DESC)
+    yield file.parent
+    print("TEAR DOWN - testDir")
+    _delete_desc_json(file)
+
+
+@pytest.fixture
+def testDirExpectedFail() -> str:
+    """Set up a mock expected fail test."""
+    print("SET UP - testDirExpectedFail")
+    fail = deepcopy(TEST_DESC)
+    fail["expected_failure"] = True
+    file = _create_desc_json(fail)
+    yield file.parent
+    print("TEAR DOWN - testDirExpectedFail")
+    _delete_desc_json(file)
+
+
+@pytest.fixture
+def testDirMultiOutputs() -> str:
+    """Set up a mock multiple results output test."""
+    print("SET UP - testDirMultiOutputs")
+    out = deepcopy(TEST_DESC)
+    out["ofm_name"].append("tr1")
+    out["ofm_file"].append("test-result-1.npy")
+    file = _create_desc_json(out)
+    yield file.parent
+    print("TEAR DOWN - testDirMultiOutputs")
+    _delete_desc_json(file)
+
+
+def _get_default_argv(testDir: Path, graphResult: str) -> list:
+    """Create default args based on test directory and graph result."""
+    return [
+        "--sut-module",
+        "tests.tosa_mock_sut_run",
+        "--test",
+        str(testDir),
+        "--xunit-file",
+        str(testDir / "result.xml"),
+        # Must be last argument to allow easy extension with extra args
+        "--sut-module-args",
+        f"tests.tosa_mock_sut_run:graph={graphResult}",
+    ]
+
+
+def _get_xml_results(argv: list):
+    """Get XML results and remove file."""
+    resultsFile = Path(argv[argv.index("--xunit-file") + 1])
+    results = minidom.parse(str(resultsFile))
+    resultsFile.unlink()
+    return results
+
+
+def _get_xml_testsuites_from_results(results, expectedTestSuites: int):
+    """Get XML testcases from results."""
+    testSuites = results.getElementsByTagName("testsuite")
+    assert len(testSuites) == expectedTestSuites
+    return testSuites
+
+
+def _get_xml_testcases_from_results(results, expectedTestCases: int):
+    """Get XML testcases from results."""
+    testCases = results.getElementsByTagName("testcase")
+    assert len(testCases) == expectedTestCases
+    return testCases
+
+
+def _get_xml_failure(argv: list):
+    """Get the results and single testcase with the failure result entry if there is one."""
+    results = _get_xml_results(argv)
+    testCases = _get_xml_testcases_from_results(results, 1)
+    fail = testCases[0].getElementsByTagName("failure")
+    if fail:
+        return fail[0].firstChild.data
+    return None
+
+
+def test_mock_sut_expected_pass(testDir: Path):
+    """Run expected pass SUT test."""
+    try:
+        argv = _get_default_argv(testDir, GRAPH_RESULT_VALID)
+        main(argv)
+        fail = _get_xml_failure(argv)
+    except Exception as e:
+        assert False, f"Unexpected exception {e}"
+    assert not fail
+
+
+UNEXPECTED_PASS_PREFIX_STR = "UNEXPECTED_PASS"
+UNEXPECTED_FAIL_PREFIX_STR = "UNEXPECTED_FAIL"
+
+
+def test_mock_sut_unexpected_pass(testDirExpectedFail: Path):
+    """Run unexpected pass SUT test."""
+    try:
+        argv = _get_default_argv(testDirExpectedFail, GRAPH_RESULT_VALID)
+        main(argv)
+        fail = _get_xml_failure(argv)
+    except Exception as e:
+        assert False, f"Unexpected exception {e}"
+    assert fail.startswith(UNEXPECTED_PASS_PREFIX_STR)
+
+
+def test_mock_sut_expected_failure(testDirExpectedFail: Path):
+    """Run expected failure SUT test."""
+    try:
+        argv = _get_default_argv(testDirExpectedFail, GRAPH_RESULT_ERROR)
+        main(argv)
+        fail = _get_xml_failure(argv)
+    except Exception as e:
+        assert False, f"Unexpected exception {e}"
+    assert not fail
+
+
+def test_mock_sut_unexpected_failure(testDir: Path):
+    """Run unexpected failure SUT test."""
+    try:
+        argv = _get_default_argv(testDir, GRAPH_RESULT_ERROR)
+        main(argv)
+        fail = _get_xml_failure(argv)
+    except Exception as e:
+        assert False, f"Unexpected exception {e}"
+    assert fail.startswith(UNEXPECTED_FAIL_PREFIX_STR)
+
+
+def test_mock_sut_binary_conversion(testDir: Path):
+    """Run unexpected failure SUT test."""
+    try:
+        argv = _get_default_argv(testDir, GRAPH_RESULT_VALID)
+        argv.extend(["--binary", "--flatc-path", str(testDir / "mock_flatc.py")])
+        main(argv)
+        binary_desc = testDir / "desc_binary.json"
+        assert binary_desc.exists()
+        fail = _get_xml_failure(argv)
+    except Exception as e:
+        assert False, f"Unexpected exception {e}"
+    assert not fail
+
+
+def test_mock_and_dummy_sut_results(testDir: Path):
+    """Run two SUTs and check they both return results."""
+    try:
+        argv = _get_default_argv(testDir, GRAPH_RESULT_VALID)
+        # Override sut-module setting with both SUTs
+        argv.extend(
+            ["--sut-module", "tests.tosa_dummy_sut_run", "tests.tosa_mock_sut_run"]
+        )
+        main(argv)
+        results = _get_xml_results(argv)
+        _get_xml_testsuites_from_results(results, 2)
+        _get_xml_testcases_from_results(results, 2)
+    except Exception as e:
+        assert False, f"Unexpected exception {e}"
+
+
+def test_two_mock_suts(testDir: Path):
+    """Test that a duplicated SUT is ignored."""
+    try:
+        argv = _get_default_argv(testDir, GRAPH_RESULT_VALID)
+        # Override sut-module setting with duplicated SUT
+        argv.extend(
+            ["--sut-module", "tests.tosa_mock_sut_run", "tests.tosa_mock_sut_run"]
+        )
+        main(argv)
+        results = _get_xml_results(argv)
+        _get_xml_testsuites_from_results(results, 1)
+        _get_xml_testcases_from_results(results, 1)
+    except Exception as e:
+        assert False, f"Unexpected exception {e}"
+
+
+def test_mock_sut_multi_outputs_expected_pass(testDirMultiOutputs: Path):
+    """Run expected pass SUT test with multiple outputs."""
+    try:
+        argv = _get_default_argv(testDirMultiOutputs, GRAPH_RESULT_VALID)
+        main(argv)
+        fail = _get_xml_failure(argv)
+    except Exception as e:
+        assert False, f"Unexpected exception {e}"
+    assert not fail
+
+
+def test_mock_sut_multi_outputs_unexpected_failure(testDirMultiOutputs: Path):
+    """Run SUT test which expects multiple outputs, but last one is missing."""
+    try:
+        argv = _get_default_argv(testDirMultiOutputs, GRAPH_RESULT_VALID)
+        argv.append("tests.tosa_mock_sut_run:num_results=1")
+        main(argv)
+        fail = _get_xml_failure(argv)
+    except Exception as e:
+        assert False, f"Unexpected exception {e}"
+    assert fail.startswith(UNEXPECTED_FAIL_PREFIX_STR)
diff --git a/verif/tests/test_tosa_run_tests_runshcmd.py b/verif/tests/test_tosa_run_tests_runshcmd.py
new file mode 100644
index 0000000..a765413
--- /dev/null
+++ b/verif/tests/test_tosa_run_tests_runshcmd.py
@@ -0,0 +1,54 @@
+"""Tests for tosa_verif_run_tests.py."""
+# Copyright (c) 2021-2022, ARM Limited.
+# SPDX-License-Identifier: Apache-2.0
+from runner.run_command import run_sh_command
+from runner.run_command import RunShCommandError
+
+
+def test_run_command_success():
+    """Run successful command."""
+    cmd = ["echo", "Hello Space Cadets"]
+    try:
+        run_sh_command(cmd)
+        ok = True
+    except RunShCommandError:
+        ok = False
+    assert ok
+
+
+def test_run_command_fail():
+    """Run unsuccessful command."""
+    cmd = ["cat", "non-existant-file-432342.txt"]
+    try:
+        run_sh_command(cmd)
+        ok = True
+    except RunShCommandError as e:
+        assert e.return_code == 1
+        ok = False
+    assert not ok
+
+
+def test_run_command_fail_with_stderr():
+    """Run unsuccessful command capturing output."""
+    cmd = ["ls", "--unknown-option"]
+    try:
+        stdout, stderr = run_sh_command(cmd, capture_output=True)
+        ok = True
+    except RunShCommandError as e:
+        assert e.return_code == 2
+        assert e.stderr
+        ok = False
+    assert not ok
+
+
+def test_run_command_success_verbose_with_stdout():
+    """Run successful command capturing output."""
+    output = "There is no Planet B"
+    cmd = ["echo", output]
+    try:
+        stdout, stderr = run_sh_command(cmd, verbose=True, capture_output=True)
+        assert stdout == f"{output}\n"
+        ok = True
+    except RunShCommandError:
+        ok = False
+    assert ok
diff --git a/verif/tests/tosa_dummy_sut_run.py b/verif/tests/tosa_dummy_sut_run.py
new file mode 100644
index 0000000..fffcfa1
--- /dev/null
+++ b/verif/tests/tosa_dummy_sut_run.py
@@ -0,0 +1,20 @@
+"""TOSA test runner module for a dummy System Under Test (SUT)."""
+# Copyright (c) 2021, ARM Limited.
+# SPDX-License-Identifier: Apache-2.0
+from runner.tosa_test_runner import TosaTestRunner
+
+
+class TosaSUTRunner(TosaTestRunner):
+    """TOSA dummy SUT runner."""
+
+    def __init__(self, args, runnerArgs, testDir):
+        """Initialize using the given test details."""
+        super().__init__(args, runnerArgs, testDir)
+
+    def runTestGraph(self):
+        """Nothing run as this is a dummy SUT that does nothing."""
+        graphResult = TosaTestRunner.TosaGraphResult.TOSA_VALID
+        graphMessage = "Dummy system under test - nothing run"
+
+        # Return graph result and message
+        return graphResult, graphMessage
diff --git a/verif/tests/tosa_mock_sut_run.py b/verif/tests/tosa_mock_sut_run.py
new file mode 100644
index 0000000..9572618
--- /dev/null
+++ b/verif/tests/tosa_mock_sut_run.py
@@ -0,0 +1,118 @@
+"""TOSA test runner module for a mock System Under Test (SUT)."""
+# Copyright (c) 2021, ARM Limited.
+# SPDX-License-Identifier: Apache-2.0
+import os
+
+from runner.run_command import run_sh_command
+from runner.run_command import RunShCommandError
+from runner.tosa_test_runner import TosaTestRunner
+
+
+class TosaSUTRunner(TosaTestRunner):
+    """TOSA mock SUT runner."""
+
+    def __init__(self, args, runnerArgs, testDir):
+        """Initialize using the given test details."""
+        super().__init__(args, runnerArgs, testDir)
+
+    def runTestGraph(self):
+        """Run the test on a mock SUT."""
+        # Read the command line sut-module-args in form arg=value
+        # and put them in a dictionary
+        # Note: On the command line (for this module) they look like:
+        #       tests.tosa_mock_sut_run:arg=value
+        sutArgs = {}
+        for runArg in self.runnerArgs:
+            try:
+                arg, value = runArg.split("=", 1)
+            except ValueError:
+                # Argument without a value - treat it as a flag
+                arg = runArg
+                value = True
+            sutArgs[arg] = value
+        print(f"MOCK SUT: Runner argument dictionary: {sutArgs}")
+
+        # Useful meta data and arguments
+        tosaFlatbufferSchema = self.args.operator_fbs
+        tosaSubgraphFile = self.testDesc["tosa_file"]
+        tosaTestDirectory = self.testDir
+        tosaTestDescFile = self.descFile
+
+        # Expected file name for the graph results on valid graph
+        graphResultFiles = []
+        for idx, name in enumerate(self.testDesc["ofm_name"]):
+            graphResultFiles.append(
+                "{}:{}".format(name, self.testDesc["ofm_file"][idx])
+            )
+
+        # Build up input "tensor_name":"filename" list
+        tosaInputTensors = []
+        for idx, name in enumerate(self.testDesc["ifm_name"]):
+            tosaInputTensors.append(
+                "{}:{}".format(name, self.testDesc["ifm_file"][idx])
+            )
+
+        # Build up command line
+        cmd = [
+            "echo",
+            f"FBS={tosaFlatbufferSchema}",
+            f"Path={tosaTestDirectory}",
+            f"Desc={tosaTestDescFile}",
+            f"Graph={tosaSubgraphFile}",
+            "Results={}".format(",".join(graphResultFiles)),
+            "Inputs={}".format(",".join(tosaInputTensors)),
+        ]
+
+        # Run test on implementation
+        graphResult = None
+        graphMessage = None
+        try:
+            stdout, stderr = run_sh_command(cmd, verbose=True, capture_output=True)
+        except RunShCommandError as e:
+            # Return codes can be used to indicate graphResult status (see tosa_ref_run.py)
+            # But in this mock version we just set the result based on sutArgs below
+            print(f"MOCK SUT: Unexpected error {e.return_code} from command: {e}")
+            graphResult = TosaTestRunner.TosaGraphResult.OTHER_ERROR
+            graphMessage = e.stderr
+
+        # Other mock system testing
+        if self.args.binary:
+            # Check that the mock binary conversion has happened
+            _, ext = os.path.splitext(tosaSubgraphFile)
+            if (
+                os.path.basename(tosaTestDescFile) != "desc_binary.json"
+                and ext != ".tosa"
+            ):
+                graphResult = TosaTestRunner.TosaGraphResult.OTHER_ERROR
+
+        # Mock up graph result based on passed arguments
+        if not graphResult:
+            try:
+                if sutArgs["graph"] == "valid":
+                    graphResult = TosaTestRunner.TosaGraphResult.TOSA_VALID
+                    # Create dummy output file(s) for passing result checker
+                    for idx, fname in enumerate(self.testDesc["ofm_file"]):
+                        if "num_results" in sutArgs and idx == int(
+                            sutArgs["num_results"]
+                        ):
+                            # Skip writing any more to test results checker
+                            break
+                        print("Created " + fname)
+                        fp = open(os.path.join(tosaTestDirectory, fname), "w")
+                        fp.close()
+                elif sutArgs["graph"] == "error":
+                    graphResult = TosaTestRunner.TosaGraphResult.TOSA_ERROR
+                    graphMessage = "MOCK SUT: ERROR_IF"
+                elif sutArgs["graph"] == "unpredictable":
+                    graphResult = TosaTestRunner.TosaGraphResult.TOSA_UNPREDICTABLE
+                    graphMessage = "MOCK SUT: UNPREDICTABLE"
+                else:
+                    graphResult = TosaTestRunner.TosaGraphResult.OTHER_ERROR
+                    graphMessage = "MOCK SUT: error from system under test"
+            except KeyError:
+                graphMessage = "MOCK SUT: No graph result specified!"
+                print(graphMessage)
+                graphResult = TosaTestRunner.TosaGraphResult.OTHER_ERROR
+
+        # Return graph result and message
+        return graphResult, graphMessage