Support for compliance checking testing

Updated to conformance generator to not generate tests with results for
compliance tests.
Updated test runner to run compliance mode version (precise & abs mode)
of reference model to create test results to use against SUT results.
Updated reference model to enable abs_mode on correct desc.json flags.
Updated test checker to support compliance checking using verifier lib.
Seperated color printing from test checker.

Change-Id: I7e2fbfc6883916caa5d94d4ece122c48bf45f530
Signed-off-by: Jeremy Johnson <jeremy.johnson@arm.com>
diff --git a/reference_model/src/main.cpp b/reference_model/src/main.cpp
index 62b8f6f..6d50f9e 100644
--- a/reference_model/src/main.cpp
+++ b/reference_model/src/main.cpp
@@ -90,6 +90,12 @@
 
     GraphStatus status = GraphStatus::TOSA_VALID;
 
+    if (isComplianceModeDotProduct(test_desc) && !g_func_config.precise_mode)
+    {
+        // Warn about precise mode for dot product compliance
+        DEBUG_INFO(CONFIG, "DOT_PRODUCT compliance: NOTE - enable precise mode for compliance results")
+    }
+
     // max of 2 runs, second run only happens when precise_mode is set, to do an abs_mode run
     for (int run = 0; run < 2; run++)
     {
@@ -213,10 +219,11 @@
                 fprintf(stderr, "Unknown graph status code=%d.\n", (int)main_gt.getGraphStatus());
         }
 
-        if (status == GraphStatus::TOSA_VALID && g_func_config.eval && g_func_config.precise_mode &&
+        if (run == 0 && status == GraphStatus::TOSA_VALID && g_func_config.precise_mode && g_func_config.eval &&
             isComplianceModeDotProduct(test_desc))
         {
-            // first run result is valid, in precise mode and eval is true: turn on abs_mode for second run
+            // first run result is valid and precise mode and eval is true: turn on abs_mode for second run
+            DEBUG_INFO(CONFIG, "DOT_PRODUCT compliance: Evaluating the graph again to produce bounds results")
             g_func_config.abs_mode = true;
             continue;
         }
@@ -354,14 +361,21 @@
     return g_func_config.abs_mode ? "bounds_" : "";
 }
 
-// returns true iff test_desc contains a dictionay, "compliance",
-// which contains entry "mode" whose value is "dot product"
+// returns true iff test_desc contains a "meta" object containing a "compliance"
+// object which contains "tensors" and one of those has a "mode" whose value is
+// "DOT_PRODUCT"
 bool isComplianceModeDotProduct(json& test_desc)
 {
-    if (test_desc.contains("compliance") && test_desc["compliance"].contains("mode") &&
-        test_desc["compliance"]["mode"] == "dot product")
+    if (test_desc.contains("meta") && test_desc["meta"].contains("compliance") &&
+        test_desc["meta"]["compliance"].contains("tensors"))
     {
-        return true;
+        for (auto t : test_desc["meta"]["compliance"]["tensors"])
+        {
+            if (t.contains("mode") && t["mode"] == "DOT_PRODUCT")
+            {
+                return true;
+            }
+        }
     }
     return false;
 }
diff --git a/scripts/convert2conformance/convert2conformance.py b/scripts/convert2conformance/convert2conformance.py
index 3654f9a..171ec3e 100755
--- a/scripts/convert2conformance/convert2conformance.py
+++ b/scripts/convert2conformance/convert2conformance.py
@@ -162,11 +162,11 @@
     test_dir: Path,
     test_desc,
     output_dir: Optional[Path] = None,
-    create_result=True,
+    record_result=True,
     profiles=None,
     tags=None,
 ):
-    """Update the desc.json format for conformance and optionally create result."""
+    """Update the desc.json format for conformance and optionally record result."""
     ofm_files = []
     cfm_files = []
     if not output_dir:
@@ -175,7 +175,7 @@
         ofm_path = test_dir / ofm
         if not test_desc["expected_failure"]:
             cfm = NAME_CONFORMANCE_RESULT_PREFIX + test_desc["ofm_name"][index]
-            if create_result:
+            if record_result:
                 if ofm_path.is_file():
                     # Use the desc.json name
                     ofm_refmodel = ofm_path
@@ -338,12 +338,18 @@
     for cpp in cpp_files:
         shutil.copy(str(cpp), str(args.output_dir))
 
+    # Work out if we have a result to record
+    record_result = not args.lazy_data_generation
+    if "meta" in test_desc and "compliance" in test_desc["meta"]:
+        # We don't have pre-generated results for compliance tests
+        record_result = False
+
     # Update desc.json and convert result files to JSON
     test_desc = update_desc_json(
         desc_filename.parent,
         test_desc,
         output_dir=args.output_dir,
-        create_result=(not args.lazy_data_generation),
+        record_result=record_result,
         profiles=args.profile,
         tags=args.tags,
     )
diff --git a/verif/checker/color_print.py b/verif/checker/color_print.py
new file mode 100644
index 0000000..1563b92
--- /dev/null
+++ b/verif/checker/color_print.py
@@ -0,0 +1,33 @@
+"""Color printing module."""
+# Copyright (c) 2020-2023, ARM Limited.
+# SPDX-License-Identifier: Apache-2.0
+from enum import Enum
+from enum import unique
+
+color_printing = True
+
+
+@unique
+class LogColors(Enum):
+    """Shell escape sequence colors for logging."""
+
+    NONE = "\u001b[0m"
+    GREEN = "\u001b[32;1m"
+    RED = "\u001b[31;1m"
+    YELLOW = "\u001b[33;1m"
+    BOLD_WHITE = "\u001b[1m"
+
+
+def set_print_in_color(enabled):
+    """Set color printing to enabled or disabled."""
+    global color_printing
+    color_printing = enabled
+
+
+def print_color(color, msg):
+    """Print color status messages if enabled."""
+    global color_printing
+    if not color_printing:
+        print(msg)
+    else:
+        print("{}{}{}".format(color.value, msg, LogColors.NONE.value))
diff --git a/verif/checker/tosa_result_checker.py b/verif/checker/tosa_result_checker.py
index 1169a95..38ed510 100644
--- a/verif/checker/tosa_result_checker.py
+++ b/verif/checker/tosa_result_checker.py
@@ -1,43 +1,19 @@
 """TOSA result checker script."""
-# Copyright (c) 2020-2022, ARM Limited.
+# Copyright (c) 2020-2023, ARM Limited.
 # SPDX-License-Identifier: Apache-2.0
 import argparse
-from enum import Enum
+import json
 from enum import IntEnum
 from enum import unique
 from pathlib import Path
 
 import numpy as np
+from checker.color_print import LogColors
+from checker.color_print import print_color
+from checker.verifier import VerifierError
+from checker.verifier import VerifierLibrary
 from generator.tosa_utils import float32_is_valid_bfloat16
-
-##################################
-color_printing = True
-
-
-@unique
-class LogColors(Enum):
-    """Shell escape sequence colors for logging."""
-
-    NONE = "\u001b[0m"
-    GREEN = "\u001b[32;1m"
-    RED = "\u001b[31;1m"
-    YELLOW = "\u001b[33;1m"
-    BOLD_WHITE = "\u001b[1m"
-
-
-def set_print_in_color(enabled):
-    """Set color printing to enabled or disabled."""
-    global color_printing
-    color_printing = enabled
-
-
-def print_color(color, msg):
-    """Print color status messages if enabled."""
-    global color_printing
-    if not color_printing:
-        print(msg)
-    else:
-        print("{}{}{}".format(color.value, msg, LogColors.NONE.value))
+from schemavalidation.schemavalidation import TestDescSchemaValidator
 
 
 @unique
@@ -62,46 +38,120 @@
 ##################################
 
 DEFAULT_FP_TOLERANCE = 1e-3
+result_printing = True
+
+
+def set_print_result(enabled):
+    """Set whether to print out or not."""
+    global result_printing
+    result_printing = enabled
+
+
+def _print_result(color, msg):
+    """Print out result."""
+    global result_printing
+    if result_printing:
+        print_color(color, msg)
+
+
+def compliance_check(
+    imp_result_path,
+    ref_result_path,
+    bnd_result_path,
+    test_name,
+    compliance_config,
+    ofm_name,
+    verify_lib_path,
+):
+    try:
+        vlib = VerifierLibrary(verify_lib_path)
+    except VerifierError as e:
+        _print_result(LogColors.RED, f"INTERNAL ERROR {test_name}")
+        msg = f"Could not load verfier library: {str(e)}"
+        return (TestResult.INTERNAL_ERROR, 0.0, msg)
+
+    success = vlib.verify_data(
+        ofm_name, compliance_config, imp_result_path, ref_result_path, bnd_result_path
+    )
+    if success:
+        _print_result(LogColors.GREEN, f"Results PASS {test_name}")
+        return (TestResult.PASS, 0.0, "")
+    else:
+        _print_result(LogColors.RED, f"Results NON-COMPLIANT {test_name}")
+        return (TestResult.MISMATCH, 0.0, "Non-compliance implementation results found")
 
 
 def test_check(
-    reference_path,
-    result_path,
-    test_name="test",
+    ref_result_path,
+    imp_result_path,
+    test_name=None,
     quantize_tolerance=0,
     float_tolerance=DEFAULT_FP_TOLERANCE,
     misc_checks=[],
+    test_desc=None,
+    bnd_result_path=None,
+    ofm_name=None,
+    verify_lib_path=None,
 ):
     """Check if the result is the same as the expected reference."""
-    if not reference_path.is_file():
-        print_color(LogColors.RED, "Reference MISSING FILE {}".format(test_name))
-        msg = "Missing reference file: {}".format(reference_path)
-        return (TestResult.MISSING_FILE, 0.0, msg)
-    if not result_path.is_file():
-        print_color(LogColors.RED, "Results MISSING FILE {}".format(test_name))
-        msg = "Missing result file: {}".format(result_path)
-        return (TestResult.MISSING_FILE, 0.0, msg)
+    if test_desc:
+        # New compliance method - first get test details
+        try:
+            TestDescSchemaValidator().validate_config(test_desc)
+        except Exception as e:
+            _print_result(LogColors.RED, f"Test INCORRECT FORMAT {test_name}")
+            msg = f"Incorrect test format: {e}"
+            return (TestResult.INCORRECT_FORMAT, 0.0, msg)
 
-    try:
-        test_result = np.load(result_path)
-    except Exception as e:
-        print_color(LogColors.RED, "Results INCORRECT FORMAT {}".format(test_name))
-        msg = "Incorrect numpy format of {}\nnumpy.load exception: {}".format(
-            result_path, e
+    if test_name is None:
+        test_name = "test"
+
+    paths = [imp_result_path, ref_result_path, bnd_result_path]
+    names = ["Implementation", "Reference", "Bounds"]
+    arrays = [None, None, None]
+
+    # Check the files exist and are in the right format
+    for idx, path in enumerate(paths):
+        name = names[idx]
+        if path is None and name == "Bounds":
+            # Bounds can be None - skip it
+            continue
+        if not path.is_file():
+            _print_result(LogColors.RED, f"{name} MISSING FILE {test_name}")
+            msg = f"Missing {name} file: {str(path)}"
+            return (TestResult.MISSING_FILE, 0.0, msg)
+        try:
+            arrays[idx] = np.load(path)
+        except Exception as e:
+            _print_result(LogColors.RED, f"{name} INCORRECT FORMAT {test_name}")
+            msg = f"Incorrect numpy format of {str(path)}\nnumpy.load exception: {e}"
+            return (TestResult.INCORRECT_FORMAT, 0.0, msg)
+
+    if test_desc and "meta" in test_desc and "compliance" in test_desc["meta"]:
+        # Switch to using the verifier library for full compliance
+        if ofm_name is None:
+            ofm_name = test_desc["ofm_name"][0]
+            if len(test_desc["ofm_name"]) > 1:
+                _print_result(LogColors.RED, f"Output Name MISSING FILE {test_name}")
+                msg = "Must specify output name (ofm_name) to check as multiple found in desc.json"
+                return (TestResult.MISSING_FILE, 0.0, msg)
+
+        compliance_json = test_desc["meta"]["compliance"]
+
+        return compliance_check(
+            *arrays,
+            test_name,
+            compliance_json,
+            ofm_name,
+            verify_lib_path,
         )
-        return (TestResult.INCORRECT_FORMAT, 0.0, msg)
-    try:
-        reference_result = np.load(reference_path)
-    except Exception as e:
-        print_color(LogColors.RED, "Reference INCORRECT FORMAT {}".format(test_name))
-        msg = "Incorrect numpy format of {}\nnumpy.load exception: {}".format(
-            reference_path, e
-        )
-        return (TestResult.INCORRECT_FORMAT, 0.0, msg)
+
+    # Else continue with original checking method
+    test_result, reference_result, _ = arrays
 
     # Type comparison
     if test_result.dtype != reference_result.dtype:
-        print_color(LogColors.RED, "Results TYPE MISMATCH {}".format(test_name))
+        _print_result(LogColors.RED, "Results TYPE MISMATCH {}".format(test_name))
         msg = "Mismatch results type: Expected {}, got {}".format(
             reference_result.dtype, test_result.dtype
         )
@@ -115,7 +165,7 @@
     difference = None
 
     if np.shape(test_result) != np.shape(reference_result):
-        print_color(LogColors.RED, "Results MISCOMPARE {}".format(test_name))
+        _print_result(LogColors.RED, "Results MISCOMPARE {}".format(test_name))
         msg = "Shapes mismatch: Reference {} vs {}".format(
             np.shape(test_result), np.shape(reference_result)
         )
@@ -139,7 +189,7 @@
     if reference_result.dtype == np.int32 or reference_result.dtype == np.int64:
 
         if np.all(np.absolute(reference_result - test_result) <= quantize_tolerance):
-            print_color(LogColors.GREEN, "Results PASS {}".format(test_name))
+            _print_result(LogColors.GREEN, "Results PASS {}".format(test_name))
             return (TestResult.PASS, 0.0, "")
         else:
             tolerance = quantize_tolerance + 1
@@ -166,7 +216,7 @@
         # All boolean values must match, xor will show up differences
         test = np.array_equal(reference_result, test_result)
         if np.all(test):
-            print_color(LogColors.GREEN, "Results PASS {}".format(test_name))
+            _print_result(LogColors.GREEN, "Results PASS {}".format(test_name))
             return (TestResult.PASS, 0.0, "")
         msg = "Boolean result does not match"
         tolerance = 0.0
@@ -177,18 +227,18 @@
     elif reference_result.dtype == np.float32 or reference_result.dtype == np.float16:
         tolerance = float_tolerance
         if np.allclose(reference_result, test_result, atol=tolerance, equal_nan=True):
-            print_color(LogColors.GREEN, "Results PASS {}".format(test_name))
+            _print_result(LogColors.GREEN, "Results PASS {}".format(test_name))
             return (TestResult.PASS, tolerance, "")
         msg = "Float result does not match within tolerance of {}".format(tolerance)
         difference = reference_result - test_result
         # Fall-through to below to add failure values
     else:
-        print_color(LogColors.RED, "Results UNSUPPORTED TYPE {}".format(test_name))
+        _print_result(LogColors.RED, "Results UNSUPPORTED TYPE {}".format(test_name))
         msg = "Unsupported results type: {}".format(reference_result.dtype)
         return (TestResult.MISMATCH, 0.0, msg)
 
     # Fall-through for mismatch failure to add values to msg
-    print_color(LogColors.RED, "Results MISCOMPARE {}".format(test_name))
+    _print_result(LogColors.RED, "Results MISCOMPARE {}".format(test_name))
     np.set_printoptions(threshold=128, edgeitems=2)
 
     if difference is not None:
@@ -209,18 +259,65 @@
     """Check that the supplied reference and result files have the same contents."""
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        "reference_path", type=Path, help="the path to the reference file to test"
+        "ref_result_path",
+        type=Path,
+        help="path to the reference model result file to check",
     )
     parser.add_argument(
-        "result_path", type=Path, help="the path to the result file to test"
+        "imp_result_path",
+        type=Path,
+        help="path to the implementation result file to check",
     )
     parser.add_argument(
         "--fp-tolerance", type=float, default=DEFAULT_FP_TOLERANCE, help="FP tolerance"
     )
+    parser.add_argument(
+        "--test_path", type=Path, help="path to the test that produced the results"
+    )
+    parser.add_argument(
+        "--bnd-result-path",
+        type=Path,
+        help="path to the reference model bounds result file for the dot product compliance check",
+    )
+    parser.add_argument(
+        "--ofm-name",
+        type=str,
+        help="name of the output tensor to check, defaults to the first ofm_name listed in the test",
+    )
+    parser.add_argument(
+        "--verify-lib-path",
+        type=Path,
+        help="path to TOSA verify library",
+    )
     args = parser.parse_args(argv)
 
+    if args.test_path:
+        # Get details from the test path
+        test_desc_path = args.test_path / "desc.json"
+        if not args.test_path.is_dir() or not test_desc_path.is_file():
+            print(f"Invalid test directory {str(args.test_path)}")
+            return TestResult.MISSING_FILE
+
+        try:
+            with test_desc_path.open("r") as fd:
+                test_desc = json.load(fd)
+        except Exception as e:
+            print(f"Invalid test description file {str(test_desc_path)}: {e}")
+            return TestResult.INCORRECT_FORMAT
+        test_name = args.test_path.name
+    else:
+        test_desc = None
+        test_name = None
+
     result, tolerance, msg = test_check(
-        args.reference_path, args.result_path, float_tolerance=args.fp_tolerance
+        args.ref_result_path,
+        args.imp_result_path,
+        float_tolerance=args.fp_tolerance,
+        test_name=test_name,
+        test_desc=test_desc,
+        bnd_result_path=args.bnd_result_path,
+        ofm_name=args.ofm_name,
+        verify_lib_path=args.verify_lib_path,
     )
     if result != TestResult.PASS:
         print(msg)
diff --git a/verif/conformance/tosa_verif_conformance_generator.py b/verif/conformance/tosa_verif_conformance_generator.py
index 564617b..c9a0b3a 100644
--- a/verif/conformance/tosa_verif_conformance_generator.py
+++ b/verif/conformance/tosa_verif_conformance_generator.py
@@ -234,6 +234,14 @@
         )
 
     for test in tests:
+        desc = test / "desc.json"
+        with desc.open("r") as fd:
+            test_desc = json.load(fd)
+        if "meta" in test_desc and "compliance" in test_desc["meta"]:
+            logger.info(
+                f"Skipping generating results for new compliance test - {str(test)}"
+            )
+            continue
         ref_cmd = ref_cmd_base.copy()
         ref_cmd.append(str(test))
         ref_cmds.append(ref_cmd)
diff --git a/verif/frameworks/tosa_verif_framework_compiler_runner.py b/verif/frameworks/tosa_verif_framework_compiler_runner.py
index 972ea0a..bf035cc 100755
--- a/verif/frameworks/tosa_verif_framework_compiler_runner.py
+++ b/verif/frameworks/tosa_verif_framework_compiler_runner.py
@@ -16,9 +16,9 @@
 from pathlib import Path
 
 import numpy as np
-from checker.tosa_result_checker import LogColors
-from checker.tosa_result_checker import print_color
-from checker.tosa_result_checker import set_print_in_color
+from checker.color_print import LogColors
+from checker.color_print import print_color
+from checker.color_print import set_print_in_color
 from runner.run_command import run_sh_command
 from xunit.xunit import xunit_results
 from xunit.xunit import xunit_test
diff --git a/verif/runner/tosa_refmodel_compliance_sut_run.py b/verif/runner/tosa_refmodel_compliance_sut_run.py
new file mode 100644
index 0000000..36e53b6
--- /dev/null
+++ b/verif/runner/tosa_refmodel_compliance_sut_run.py
@@ -0,0 +1,17 @@
+"""TOSA ref model compliance runner module."""
+# Copyright (c) 2023, ARM Limited.
+# SPDX-License-Identifier: Apache-2.0
+from runner.tosa_refmodel_sut_run import TosaSUTRunner as TosaRefRunner
+
+
+class TosaSUTRunner(TosaRefRunner):
+    """Compliance mode enabled ref model runner."""
+
+    def __init__(self, args, runnerArgs, testDirPath):
+        """Initialize the TosaTestRunner base class"""
+        super().__init__(args, runnerArgs, testDirPath)
+
+        # Override - Set compliance mode precise FP64 calculations
+        self.compliance = True
+
+    # All other functions inherited from refmodel_sut_run
diff --git a/verif/runner/tosa_refmodel_sut_run.py b/verif/runner/tosa_refmodel_sut_run.py
index 419f87b..d9eb108 100644
--- a/verif/runner/tosa_refmodel_sut_run.py
+++ b/verif/runner/tosa_refmodel_sut_run.py
@@ -25,6 +25,9 @@
         """Initialize using the given test details."""
         super().__init__(args, runnerArgs, testDirPath)
 
+        # Don't do any compliance runs
+        self.compliance = False
+
     def runTestGraph(self):
         """Run the test on the reference model."""
         # Build up the TOSA reference command line
@@ -46,7 +49,7 @@
         if args.ref_intermediates:
             cmd.extend(["--dump_intermediates", str(args.ref_intermediates)])
 
-        if args.precise_mode:
+        if args.precise_mode or self.compliance:
             cmd.extend(["--precise_mode=1"])
 
         # Run command and interpret tosa graph result via process return codes
diff --git a/verif/runner/tosa_test_presets.py b/verif/runner/tosa_test_presets.py
new file mode 100644
index 0000000..c45550d
--- /dev/null
+++ b/verif/runner/tosa_test_presets.py
@@ -0,0 +1,7 @@
+"""Presets file for test running."""
+# Copyright (c) 2023, ARM Limited.
+# SPDX-License-Identifier: Apache-2.0
+
+TOSA_REFCOMPLIANCE_RUNNER = "runner.tosa_refmodel_compliance_sut_run"
+TOSA_REFMODEL_RUNNER = "runner.tosa_refmodel_sut_run"
+MAX_XUNIT_TEST_MESSAGE = 1000
diff --git a/verif/runner/tosa_test_runner.py b/verif/runner/tosa_test_runner.py
index 579dd60..30a7168 100644
--- a/verif/runner/tosa_test_runner.py
+++ b/verif/runner/tosa_test_runner.py
@@ -4,11 +4,43 @@
 import json
 from enum import IntEnum
 
-from checker.tosa_result_checker import LogColors
-from checker.tosa_result_checker import print_color
-from checker.tosa_result_checker import set_print_in_color
+import conformance.model_files as cmf
+import schemavalidation.schemavalidation as sch
+from checker.color_print import LogColors
+from checker.color_print import print_color
+from checker.color_print import set_print_in_color
+from checker.tosa_result_checker import set_print_result
 from checker.tosa_result_checker import test_check
 from json2fbbin import json2fbbin
+from runner.tosa_test_presets import TOSA_REFCOMPLIANCE_RUNNER
+
+
+def isComplianceModeDotProduct(testDesc):
+    """Checks the test descriptor for DOT_PRODUCT compliance mode."""
+    if (
+        "meta" in testDesc
+        and "compliance" in testDesc["meta"]
+        and "tensors" in testDesc["meta"]["compliance"]
+    ):
+        for _, t in testDesc["meta"]["compliance"]["tensors"].items():
+            if "mode" in t and t["mode"] == "DOT_PRODUCT":
+                return True
+        return False
+
+
+def getRunnerResultFilePath(resultFilePath, sutModule):
+    """Return the result file path with the runner specific naming."""
+    return resultFilePath.with_suffix(f".{sutModule}{resultFilePath.suffix}")
+
+
+def getBoundsResultFilePath(resultFilePath, sutModule=None):
+    """Return the bounds result file with/without runner specific naming."""
+    boundsFilePath = resultFilePath.parent / f"bounds_{resultFilePath.name}"
+    if sutModule is not None:
+        boundsFilePath = boundsFilePath.with_suffix(
+            f".{sutModule}{boundsFilePath.suffix}"
+        )
+    return boundsFilePath
 
 
 class TosaTestInvalid(Exception):
@@ -39,8 +71,13 @@
         self.testDir = str(testDirPath)
         self.testDirPath = testDirPath
         self.testName = self.testDirPath.name
+        self.verify_lib_path = cmf.find_tosa_file(
+            cmf.TosaFileType.VERIFY_LIBRARY, args.ref_model_path
+        )
 
         set_print_in_color(not args.no_color)
+        # Stop the result checker printing anything - we will do it
+        set_print_result(False)
 
         # Check if we want to run binary and if its already converted
         descFilePath = testDirPath / "desc.json"
@@ -53,6 +90,8 @@
             # Load the json test file
             with descFilePath.open("r") as fd:
                 self.testDesc = json.load(fd)
+            # Validate the json with the schema
+            sch.TestDescSchemaValidator().validate_config(self.testDesc)
         except Exception as e:
             raise TosaTestInvalid(str(descFilePath), e)
 
@@ -76,6 +115,16 @@
         self.descFile = str(descFilePath)
         self.descFilePath = descFilePath
 
+        # Check for compliance mode - need to run refmodel to get results
+        if "meta" in self.testDesc and "compliance" in self.testDesc["meta"]:
+            self.complianceMode = True
+            if "expected_result" in self.testDesc:
+                if self.args.verbose:
+                    print("Warning: fixing conflicting compliance mode in test.desc")
+                self.testDesc.pop("expected_result")
+        else:
+            self.complianceMode = False
+
     def skipTest(self):
         """Check if the test is skipped due to test type or profile selection."""
         expectedFailure = self.testDesc["expected_failure"]
@@ -96,7 +145,9 @@
     def testResult(self, tosaGraphResult, graphMessage=None):
         """Work out test result based on graph result and output files."""
         expectedFailure = self.testDesc["expected_failure"]
-        print_result_line = True
+        print_check_result = False
+
+        sutModule = self.__module__
 
         if tosaGraphResult == TosaTestRunner.TosaGraphResult.TOSA_VALID:
             if expectedFailure:
@@ -107,8 +158,25 @@
                 # but overriding this with any failures found
                 result = TosaTestRunner.Result.EXPECTED_PASS
                 messages = []
+
+                # Go through each output result checking it
                 for resultNum, resultFileName in enumerate(self.testDesc["ofm_file"]):
-                    if "expected_result_file" in self.testDesc:
+                    resultFilePath = self.testDirPath / resultFileName
+
+                    # Work out the file to check against (if any)
+                    if self.complianceMode and sutModule != TOSA_REFCOMPLIANCE_RUNNER:
+                        conformanceFilePath = getRunnerResultFilePath(
+                            resultFilePath, TOSA_REFCOMPLIANCE_RUNNER
+                        )
+                        if isComplianceModeDotProduct(self.testDesc):
+                            conformanceBoundsPath = getBoundsResultFilePath(
+                                resultFilePath, TOSA_REFCOMPLIANCE_RUNNER
+                            )
+                        else:
+                            # Not expecting a bounds file for this test
+                            conformanceBoundsPath = None
+                    elif "expected_result_file" in self.testDesc:
+                        conformanceBoundsPath = None
                         try:
                             conformanceFilePath = (
                                 self.testDirPath
@@ -123,15 +191,20 @@
                             print(msg)
                             break
                     else:
+                        # Nothing to check against
                         conformanceFilePath = None
-                    resultFilePath = self.testDirPath / resultFileName
+                        conformanceBoundsPath = None
 
                     if conformanceFilePath:
-                        print_result_line = False  # Checker will print one for us
+                        print_check_result = True  # Result from checker
                         chkResult, tolerance, msg = test_check(
                             conformanceFilePath,
                             resultFilePath,
                             test_name=self.testName,
+                            test_desc=self.testDesc,
+                            bnd_result_path=conformanceBoundsPath,
+                            ofm_name=self.testDesc["ofm_name"][resultNum],
+                            verify_lib_path=self.verify_lib_path,
                         )
                         # Change EXPECTED_PASS assumption if we have any failures
                         if chkResult != 0:
@@ -143,18 +216,31 @@
                         # No conformance file to verify, just check results file exists
                         if not resultFilePath.is_file():
                             result = TosaTestRunner.Result.UNEXPECTED_FAILURE
-                            msg = "Results file is missing: {}".format(resultFilePath)
+                            msg = f"Results file is missing: {resultFilePath}"
                             messages.append(msg)
                             print(msg)
 
                     if resultFilePath.is_file():
                         # Move the resultFilePath to allow subsequent system under
                         # tests to create them and to test they have been created
-                        resultFilePath = resultFilePath.rename(
-                            resultFilePath.with_suffix(
-                                ".{}{}".format(self.__module__, resultFilePath.suffix)
-                            )
+                        # and to enable compliance testing against refmodel results
+                        resultFilePath.rename(
+                            getRunnerResultFilePath(resultFilePath, sutModule)
                         )
+                        if (
+                            isComplianceModeDotProduct(self.testDesc)
+                            and sutModule == TOSA_REFCOMPLIANCE_RUNNER
+                        ):
+                            boundsFilePath = getBoundsResultFilePath(resultFilePath)
+                            if boundsFilePath.is_file():
+                                boundsFilePath = boundsFilePath.rename(
+                                    getBoundsResultFilePath(resultFilePath, sutModule)
+                                )
+                            else:
+                                result = TosaTestRunner.Result.INTERNAL_ERROR
+                                msg = f"Internal error: Missing expected dot product compliance bounds file {boundsFilePath}"
+                                messages.append(msg)
+                                print(msg)
 
                 resultMessage = "\n".join(messages) if len(messages) > 0 else None
         else:
@@ -168,16 +254,14 @@
                 result = TosaTestRunner.Result.UNEXPECTED_FAILURE
                 resultMessage = graphMessage
 
-        if print_result_line:
-            if (
-                result == TosaTestRunner.Result.EXPECTED_FAILURE
-                or result == TosaTestRunner.Result.EXPECTED_PASS
-            ):
-                print_color(
-                    LogColors.GREEN, "Result code PASS {}".format(self.testName)
-                )
-            else:
-                print_color(LogColors.RED, "Result code FAIL {}".format(self.testName))
+        status = "Result" if print_check_result else "Result code"
+        if (
+            result == TosaTestRunner.Result.EXPECTED_FAILURE
+            or result == TosaTestRunner.Result.EXPECTED_PASS
+        ):
+            print_color(LogColors.GREEN, f"{sutModule}: {status} PASS {self.testName}")
+        else:
+            print_color(LogColors.RED, f"{sutModule}: {status} FAIL {self.testName}")
 
         return result, resultMessage
 
diff --git a/verif/runner/tosa_verif_run_tests.py b/verif/runner/tosa_verif_run_tests.py
index 722c0e7..d1755e6 100644
--- a/verif/runner/tosa_verif_run_tests.py
+++ b/verif/runner/tosa_verif_run_tests.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 import argparse
 import importlib
+import json
 import os
 import queue
 import threading
@@ -11,14 +12,12 @@
 from pathlib import Path
 
 import conformance.model_files as cmf
+import runner.tosa_test_presets as ttp
 from json2numpy import json2numpy
 from runner.tosa_test_runner import TosaTestInvalid
 from runner.tosa_test_runner import TosaTestRunner
 from xunit import xunit
 
-TOSA_REFMODEL_RUNNER = "runner.tosa_refmodel_sut_run"
-MAX_XUNIT_TEST_MESSAGE = 1000
-
 
 def parseArgs(argv):
     """Parse the arguments and return the settings."""
@@ -104,7 +103,7 @@
         dest="sut_module",
         type=str,
         nargs="+",
-        default=[TOSA_REFMODEL_RUNNER],
+        default=[ttp.TOSA_REFMODEL_RUNNER],
         help="System under test module to load (derives from TosaTestRunner).  May be repeated",
     )
     parser.add_argument(
@@ -175,18 +174,20 @@
 def convert2Numpy(test_path):
     """Convert all the JSON numpy files back into binary numpy."""
     jsons = test_path.glob("*.json")
-    for json in jsons:
+    for j in jsons:
         for exclude in EXCLUSION_PREFIX:
-            if json.name.startswith(exclude):
-                json = None
+            if j.name.startswith(exclude):
+                j = None
                 break
-        if json:
+        if j:
             # debug print(f"Converting {json}")
-            json2numpy.json_to_npy(json)
+            json2numpy.json_to_npy(j)
 
 
-def workerThread(task_queue, runnerList, args, result_queue):
+def workerThread(task_queue, runnerList, complianceRunner, args, result_queue):
     """Worker thread that runs the next test from the queue."""
+    complianceRunnerList = runnerList.copy()
+    complianceRunnerList.insert(0, (complianceRunner, []))
     while True:
         try:
             test_path = task_queue.get(block=False)
@@ -196,9 +197,24 @@
         if test_path is None:
             break
 
+        try:
+            # Check for compliance test
+            desc = test_path / "desc.json"
+            with desc.open("r") as fd:
+                j = json.load(fd)
+                compliance = "compliance" in j["meta"]
+        except Exception:
+            compliance = False
+
+        if compliance:
+            # Run compliance first to create output files!
+            currentRunners = complianceRunnerList
+        else:
+            currentRunners = runnerList
+
         msg = ""
         converted = False
-        for runnerModule, runnerArgs in runnerList:
+        for runnerModule, runnerArgs in currentRunners:
             try:
                 start_time = datetime.now()
                 # Set up system under test runner
@@ -358,8 +374,11 @@
             cmf.TosaFileType.SCHEMA, args.ref_model_path
         )
 
-    if TOSA_REFMODEL_RUNNER in args.sut_module and not args.ref_model_path.is_file():
-        print(f"Argument error: Reference Model not found - {str(args.ref_model_path)}")
+    # Always check as it will be needed for compliance
+    if not args.ref_model_path.is_file():
+        print(
+            f"Argument error: Reference Model not found - ({str(args.ref_model_path)})"
+        )
         exit(2)
 
     if args.test_list_file:
@@ -374,7 +393,12 @@
             )
             exit(2)
 
+    # Load in the runner modules and the ref model compliance module
     runnerList = loadSUTRunnerModules(args)
+    complianceRunner = importlib.import_module(ttp.TOSA_REFCOMPLIANCE_RUNNER)
+    # Create a separate reporting runner list as the compliance runner may not
+    # be always run - depends on compliance testing
+    fullRunnerList = runnerList + [(complianceRunner, [])]
 
     threads = []
     taskQueue = queue.Queue()
@@ -404,7 +428,8 @@
 
     for i in range(args.jobs):
         t = threading.Thread(
-            target=workerThread, args=(taskQueue, runnerList, args, resultQueue)
+            target=workerThread,
+            args=(taskQueue, runnerList, complianceRunner, args, resultQueue),
         )
         t.setDaemon(True)
         t.start()
@@ -415,7 +440,7 @@
     # Set up results lists for each system under test
     resultLists = {}
     results = {}
-    for runnerModule, _ in runnerList:
+    for runnerModule, _ in fullRunnerList:
         runner = runnerModule.__name__
         resultLists[runner] = []
         results[runner] = [0] * len(TosaTestRunner.Result)
@@ -428,19 +453,19 @@
             break
 
         # Limit error messages to make results easier to digest
-        if msg and len(msg) > MAX_XUNIT_TEST_MESSAGE:
-            half = int(MAX_XUNIT_TEST_MESSAGE / 2)
-            trimmed = len(msg) - MAX_XUNIT_TEST_MESSAGE
+        if msg and len(msg) > ttp.MAX_XUNIT_TEST_MESSAGE:
+            half = int(ttp.MAX_XUNIT_TEST_MESSAGE / 2)
+            trimmed = len(msg) - ttp.MAX_XUNIT_TEST_MESSAGE
             msg = "{} ...\nskipped {} bytes\n... {}".format(
                 msg[:half], trimmed, msg[-half:]
             )
         resultLists[runner].append((test_path, rc, msg, time_delta))
         results[runner][rc] += 1
 
-    createXUnitResults(args.xunit_file, runnerList, resultLists, args.verbose)
+    createXUnitResults(args.xunit_file, fullRunnerList, resultLists, args.verbose)
 
     # Print out results for each system under test
-    for runnerModule, _ in runnerList:
+    for runnerModule, _ in fullRunnerList:
         runner = runnerModule.__name__
         resultSummary = []
         for result in TosaTestRunner.Result:
diff --git a/verif/tests/test_tosa_run_tests_mocksut.py b/verif/tests/test_tosa_run_tests_mocksut.py
index 234f156..fb4a811 100644
--- a/verif/tests/test_tosa_run_tests_mocksut.py
+++ b/verif/tests/test_tosa_run_tests_mocksut.py
@@ -1,5 +1,5 @@
 """Tests for tosa_verif_run_tests.py."""
-# Copyright (c) 2021-2022, ARM Limited.
+# Copyright (c) 2021-2023, ARM Limited.
 # SPDX-License-Identifier: Apache-2.0
 import json
 from copy import deepcopy
@@ -7,6 +7,7 @@
 from xml.dom import minidom
 
 import pytest
+from runner.tosa_test_presets import TOSA_REFCOMPLIANCE_RUNNER
 from runner.tosa_verif_run_tests import main
 
 
@@ -21,11 +22,24 @@
 GRAPH_RESULT_VALID = "valid"
 GRAPH_RESULT_ERROR = "error"
 
+FAKE_REF_MODEL_PATH = Path(__file__).parent / "__fake_ref_model__"
+
+
+def _create_fake_ref_model():
+    """Create a fake ref model to fool the runner."""
+    with FAKE_REF_MODEL_PATH.open("w") as fd:
+        print("Fake ref model for mock testing", file=fd)
+
+
+def _delete_fake_ref_model():
+    """Clean up fake ref model."""
+    FAKE_REF_MODEL_PATH.unlink()
+
 
 def _create_desc_json(json_object) -> Path:
     """Create test desc.json."""
     file = Path(__file__).parent / "desc.json"
-    with open(file, "w") as fd:
+    with file.open("w") as fd:
         json.dump(json_object, fd, indent=2)
     return file
 
@@ -45,28 +59,33 @@
 def testDir() -> str:
     """Set up a mock expected pass test."""
     print("SET UP - testDir")
+    _create_fake_ref_model()
     file = _create_desc_json(TEST_DESC)
     yield file.parent
     print("TEAR DOWN - testDir")
     _delete_desc_json(file)
+    _delete_fake_ref_model()
 
 
 @pytest.fixture
 def testDirExpectedFail() -> str:
     """Set up a mock expected fail test."""
     print("SET UP - testDirExpectedFail")
+    _create_fake_ref_model()
     fail = deepcopy(TEST_DESC)
     fail["expected_failure"] = True
     file = _create_desc_json(fail)
     yield file.parent
     print("TEAR DOWN - testDirExpectedFail")
     _delete_desc_json(file)
+    _delete_fake_ref_model()
 
 
 @pytest.fixture
 def testDirMultiOutputs() -> str:
     """Set up a mock multiple results output test."""
     print("SET UP - testDirMultiOutputs")
+    _create_fake_ref_model()
     out = deepcopy(TEST_DESC)
     out["ofm_name"].append("tr1")
     out["ofm_file"].append("test-result-1.npy")
@@ -74,11 +93,14 @@
     yield file.parent
     print("TEAR DOWN - testDirMultiOutputs")
     _delete_desc_json(file)
+    _delete_fake_ref_model()
 
 
 def _get_default_argv(testDir: Path, graphResult: str) -> list:
     """Create default args based on test directory and graph result."""
     return [
+        "--ref-model-path",
+        f"{str(FAKE_REF_MODEL_PATH)}",
         "--sut-module",
         "tests.tosa_mock_sut_run",
         "--test",
@@ -99,13 +121,22 @@
     return results
 
 
-def _get_xml_testsuites_from_results(results, expectedTestSuites: int):
-    """Get XML testcases from results."""
+def _get_xml_testsuites_from_results(results, numExpectedTestSuites: int):
+    """Get XML testsuites from results."""
     testSuites = results.getElementsByTagName("testsuite")
-    assert len(testSuites) == expectedTestSuites
+    assert len(testSuites) == numExpectedTestSuites
     return testSuites
 
 
+def _check_xml_testsuites_in_results(results, expectedTestSuites: list):
+    """Check XML testsuites in results."""
+    # Add compliance to expected list
+    expectedTestSuites.append(TOSA_REFCOMPLIANCE_RUNNER)
+    testSuites = _get_xml_testsuites_from_results(results, len(expectedTestSuites))
+    for suite in testSuites:
+        assert suite.getAttribute("name") in expectedTestSuites
+
+
 def _get_xml_testcases_from_results(results, expectedTestCases: int):
     """Get XML testcases from results."""
     testCases = results.getElementsByTagName("testcase")
@@ -188,14 +219,13 @@
 def test_mock_and_dummy_sut_results(testDir: Path):
     """Run two SUTs and check they both return results."""
     try:
+        suts = ["tests.tosa_dummy_sut_run", "tests.tosa_mock_sut_run"]
         argv = _get_default_argv(testDir, GRAPH_RESULT_VALID)
         # Override sut-module setting with both SUTs
-        argv.extend(
-            ["--sut-module", "tests.tosa_dummy_sut_run", "tests.tosa_mock_sut_run"]
-        )
+        argv.extend(["--sut-module"] + suts)
         main(argv)
         results = _get_xml_results(argv)
-        _get_xml_testsuites_from_results(results, 2)
+        _check_xml_testsuites_in_results(results, suts)
         _get_xml_testcases_from_results(results, 2)
     except Exception as e:
         assert False, f"Unexpected exception {e}"
@@ -204,14 +234,13 @@
 def test_two_mock_suts(testDir: Path):
     """Test that a duplicated SUT is ignored."""
     try:
+        sut = ["tests.tosa_mock_sut_run"]
         argv = _get_default_argv(testDir, GRAPH_RESULT_VALID)
         # Override sut-module setting with duplicated SUT
-        argv.extend(
-            ["--sut-module", "tests.tosa_mock_sut_run", "tests.tosa_mock_sut_run"]
-        )
+        argv.extend(["--sut-module"] + sut * 2)
         main(argv)
         results = _get_xml_results(argv)
-        _get_xml_testsuites_from_results(results, 1)
+        _check_xml_testsuites_in_results(results, sut)
         _get_xml_testcases_from_results(results, 1)
     except Exception as e:
         assert False, f"Unexpected exception {e}"