Data generator library python interface added

Added support for using generate library in tosa_verif_build_tests
and tosa_verif_run_tests tosa tool scripts.
Reduced scope of compliance test creation and verification to
the supported type of FP32.
Fix missing virtual destructor warning in generate_dot_product.h and
add config file for generate library.
Simple pytests included to check python interface.

Signed-off-by: Jeremy Johnson <jeremy.johnson@arm.com>
Change-Id: I6cdad9b00660d6ddc8bd07fdea813937fb48626a
diff --git a/verif/conformance/model_files.py b/verif/conformance/model_files.py
index a9e1db3..5447f08 100644
--- a/verif/conformance/model_files.py
+++ b/verif/conformance/model_files.py
@@ -19,6 +19,7 @@
     SCHEMA = 1
     FLATC = 2
     VERIFY_LIBRARY = 3
+    GENERATE_LIBRARY = 4
 
 
 TOSA_FILE_TYPE_TO_DETAILS = {
@@ -42,6 +43,11 @@
         "location": DEFAULT_REF_MODEL_BUILD_EXE_PATH,
         "build": True,
     },
+    TosaFileType.GENERATE_LIBRARY: {
+        "name": "libtosa_reference_generate_lib.so",
+        "location": DEFAULT_REF_MODEL_BUILD_EXE_PATH,
+        "build": True,
+    },
 }
 
 
diff --git a/verif/conformance/tosa_verif_conformance_generator.py b/verif/conformance/tosa_verif_conformance_generator.py
index 4281fc2..692e79a 100644
--- a/verif/conformance/tosa_verif_conformance_generator.py
+++ b/verif/conformance/tosa_verif_conformance_generator.py
@@ -109,6 +109,8 @@
 
     build_cmd_base = [
         build_tests_cmd,
+        "--generate-lib-path",
+        str(args.generate_lib_path),
         "--filter",
         operator,
         "-o",
@@ -160,7 +162,7 @@
     error = False
     for i, cmd in enumerate(build_cmds_list):
         try:
-            _run_sh_command(args, args.ref_model_path.parent.absolute(), cmd)
+            _run_sh_command(args, args.ref_model_path.parent, cmd)
             logger.info(
                 f"{operator} test batch {(i+1)}/{len(build_cmds_list)} created successfully"
             )
@@ -225,9 +227,9 @@
     ref_cmd_base = [
         "tosa_verif_run_tests",
         "--ref-model-path",
-        str(args.ref_model_path.absolute()),
+        str(args.ref_model_path),
         "--schema-path",
-        str(args.schema_path.absolute()),
+        str(args.schema_path),
         "-j",
         str(num_cores),
         "-v",
@@ -258,7 +260,7 @@
     failed_counter = 0
 
     job_pool = mp.Pool(args.num_cores)
-    sh_partial = partial(_run_sh_command, args, args.ref_model_path.parent.absolute())
+    sh_partial = partial(_run_sh_command, args, args.ref_model_path.parent)
     pool_results = job_pool.map(sh_partial, ref_cmds)
     job_pool.close()
     job_pool.join()
@@ -525,6 +527,15 @@
         help="Path to TOSA reference model executable",
     )
     parser.add_argument(
+        "--generate-lib-path",
+        dest="generate_lib_path",
+        type=Path,
+        help=(
+            "Path to TOSA generate library. Defaults to "
+            "the library in the directory of `ref-model-path`"
+        ),
+    )
+    parser.add_argument(
         "--schema-path",
         "--operator-fbs",
         dest="schema_path",
@@ -646,6 +657,18 @@
             f"Missing reference model binary (--ref-model-path): {args.ref_model_path}"
         )
         return 2
+    args.ref_model_path = args.ref_model_path.absolute()
+
+    if args.generate_lib_path is None:
+        args.generate_lib_path = cmf.find_tosa_file(
+            cmf.TosaFileType.GENERATE_LIBRARY, args.ref_model_path
+        )
+    if not args.generate_lib_path.is_file():
+        logger.error(
+            f"Missing TOSA generate data library (--generate-lib-path): {args.generate_lib_path}"
+        )
+        return 2
+    args.generate_lib_path = args.generate_lib_path.absolute()
 
     if args.schema_path is None:
         args.schema_path = cmf.find_tosa_file(
@@ -656,6 +679,7 @@
             f"Missing reference model schema (--schema-path): {args.schema_path}"
         )
         return 2
+    args.schema_path = args.schema_path.absolute()
 
     if args.flatc_path is None:
         args.flatc_path = cmf.find_tosa_file(
@@ -664,6 +688,7 @@
     if not args.flatc_path.is_file():
         logger.error(f"Missing flatc binary (--flatc-path): {args.flatc_path}")
         return 2
+    args.flatc_path = args.flatc_path.absolute()
 
     if args.unit_tests in ["framework", "both"]:
         logger.warning(
diff --git a/verif/generator/datagenerator.py b/verif/generator/datagenerator.py
new file mode 100644
index 0000000..408c83e
--- /dev/null
+++ b/verif/generator/datagenerator.py
@@ -0,0 +1,196 @@
+# Copyright (c) 2023, ARM Limited.
+# SPDX-License-Identifier: Apache-2.0
+"""Calls the data generation library to create the test data."""
+import ctypes as ct
+import json
+from pathlib import Path
+
+import numpy as np
+from schemavalidation import schemavalidation
+
+
+class GenerateError(Exception):
+    """Exception raised for errors performing data generation."""
+
+
+class GenerateLibrary:
+    """Python interface to the C generate library."""
+
+    def __init__(self, generate_lib_path):
+        """Find the library and set up the interface."""
+        self.lib_path = generate_lib_path
+        if not self.lib_path.is_file():
+            raise GenerateError(f"Could not find generate library - {self.lib_path}")
+
+        self.test_desc = None
+        self.json_config = None
+        self.lib = ct.cdll.LoadLibrary(self.lib_path)
+
+        self.tgd_generate_data = self.lib.tgd_generate_data
+        self.tgd_generate_data.argtypes = [
+            ct.c_char_p,
+            ct.c_char_p,
+            ct.c_void_p,
+            ct.c_size_t,
+        ]
+        self.tgd_generate_data.restype = ct.c_bool
+
+    def check_config(self, test_desc: dict):
+        """Quick check that the config supports data generation."""
+        return ("meta" in test_desc) and ("data_gen" in test_desc["meta"])
+
+    def set_config(self, test_desc: dict):
+        """Set the test config in the library.
+
+        test_desc - the test desc.json file
+        """
+        self.test_desc = None
+        self.json_config = None
+
+        if not self.check_config(test_desc):
+            raise GenerateError("No meta/data_gen section found in desc.json")
+
+        # Validate the config versus the schema
+        tdsv = schemavalidation.TestDescSchemaValidator()
+        tdsv.validate_config(test_desc)
+
+        self.test_desc = test_desc
+        self.json_config = test_desc["meta"]["data_gen"]
+
+    def _create_buffer(self, dtype: str, shape: tuple):
+        """Helper to create a buffer of the required type."""
+        size = 1
+        for dim in shape:
+            size *= dim
+
+        if dtype == "FP32":
+            # Create buffer and initialize to zero
+            buffer = (ct.c_float * size)(0)
+            size_bytes = size * 4
+        else:
+            raise GenerateError(f"Unsupported data type {dtype}")
+
+        return buffer, size_bytes
+
+    def _data_gen_write(
+        self, test_path: Path, json_bytes: bytes, ifm_name: str, ifm_file: str
+    ):
+        """Generate the named tensor data and save it in numpy format."""
+        try:
+            tensor = self.json_config["tensors"][ifm_name]
+            dtype = tensor["data_type"]
+            shape = tuple(tensor["shape"])
+        except KeyError as e:
+            raise GenerateError(
+                f"Missing data in desc.json for input {ifm_name} - {repr(e)}"
+            )
+
+        buffer, size_bytes = self._create_buffer(dtype, shape)
+        buffer_ptr = ct.cast(buffer, ct.c_void_p)
+
+        result = self.tgd_generate_data(
+            ct.c_char_p(json_bytes),
+            ct.c_char_p(bytes(ifm_name, "utf8")),
+            buffer_ptr,
+            ct.c_size_t(size_bytes),
+        )
+        if not result:
+            raise GenerateError("Data generate failed")
+
+        arr = np.ctypeslib.as_array(buffer)
+        arr = np.reshape(arr, shape)
+
+        file_name = test_path / ifm_file
+        np.save(file_name, arr)
+
+    def write_numpy_files(self, test_path: Path):
+        """Write out all the specified tensors to numpy data files."""
+        if self.test_desc is None or self.json_config is None:
+            raise GenerateError("Cannot write numpy files as no config set up")
+
+        try:
+            ifm_names = self.test_desc["ifm_name"]
+            ifm_files = self.test_desc["ifm_file"]
+        except KeyError as e:
+            raise GenerateError(f"Missing data in desc.json - {repr(e)}")
+
+        json_bytes = bytes(json.dumps(self.json_config), "utf8")
+
+        failures = []
+        for iname, ifile in zip(ifm_names, ifm_files):
+            try:
+                self._data_gen_write(test_path, json_bytes, iname, ifile)
+            except GenerateError as e:
+                failures.append(
+                    f"ERROR: Failed to create data for tensor {iname} - {repr(e)}"
+                )
+
+        if len(failures) > 0:
+            raise GenerateError("\n".join(failures))
+
+
+def main(argv=None):
+    """Simple command line interface for the data generator."""
+    import argparse
+    import conformance.model_files as cmf
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--generate-lib-path",
+        type=Path,
+        help="Path to TOSA generate lib",
+    )
+    parser.add_argument(
+        "path", type=Path, help="the path to the test directory to generate data for"
+    )
+    args = parser.parse_args(argv)
+    test_path = args.path
+
+    if args.generate_lib_path is None:
+        # Try to work out ref model directory and find the verify library
+        # but this default only works for the python developer environment
+        # i.e. when using the scripts/py-dev-env.* scripts
+        # otherwise use the command line option --generate-lib-path to specify path
+        ref_model_dir = Path(__file__).absolute().parents[2]
+        args.generate_lib_path = cmf.find_tosa_file(
+            cmf.TosaFileType.GENERATE_LIBRARY, ref_model_dir, False
+        )
+
+    if not test_path.is_dir():
+        print(f"ERROR: Invalid directory - {test_path}")
+        return 2
+
+    test_desc_path = test_path / "desc.json"
+
+    if not test_desc_path.is_file():
+        print(f"ERROR: No test description found: {test_desc_path}")
+        return 2
+
+    # Load the JSON desc.json
+    try:
+        with test_desc_path.open("r") as fd:
+            test_desc = json.load(fd)
+    except Exception as e:
+        print(f"ERROR: Loading {test_desc_path} - {repr(e)}")
+        return 2
+
+    try:
+        dgl = GenerateLibrary(args.generate_lib_path)
+        if not dgl.check_config(test_desc):
+            print(f"WARNING: No data generation supported for {test_path}")
+            return 2
+
+        dgl.set_config(test_desc)
+    except GenerateError as e:
+        print(f"ERROR: Initializing generate library - {repr(e)}")
+        return 1
+
+    try:
+        dgl.write_numpy_files(test_path)
+    except GenerateError as e:
+        print(f"ERROR: Writing out data files to {test_path}\n{repr(e)}")
+        return 1
+
+
+if __name__ == "__main__":
+    exit(main())
diff --git a/verif/generator/tosa_arg_gen.py b/verif/generator/tosa_arg_gen.py
index de882ca..3b5d458 100644
--- a/verif/generator/tosa_arg_gen.py
+++ b/verif/generator/tosa_arg_gen.py
@@ -635,15 +635,13 @@
         # Variable inputs versus constants
         pCount, cCount = testGen.TOSA_OP_LIST[opName]["operands"]
 
-        overrideLazy = False
-        if not gtu.dtypeIsFloat(dtypeList[0]) and testGen.args.lazy_data_gen:
-            # TEMPORARY OVERRIDE for integer types
-            overrideLazy = True
+        if error_name is not None or not gtu.dtypeIsSupportedByCompliance(dtypeList[0]):
+            # Fall back to original path when dealing with unsupported types
+
+            # First turn off lazy data gen so we always produce data
+            lazy_data_gen = testGen.args.lazy_data_gen
             testGen.args.lazy_data_gen = False
 
-        # TODO - Change to generation of data using library!
-        # For now - we fall back to original path (or when dealing with non-floats)
-        if not testGen.args.lazy_data_gen:
             tens_ser_list = TosaTensorValuesGen.tvgDefault(
                 testGen,
                 testGen.TOSA_OP_LIST[opName],
@@ -652,9 +650,8 @@
                 [],
                 error_name,
             )
-            if overrideLazy:
-                # Return to lazy mode
-                testGen.args.lazy_data_gen = True
+            # Restore lazy data gen setting
+            testGen.args.lazy_data_gen = lazy_data_gen
             return TosaTensorValuesGen.TVGInfo(tens_ser_list, None)
 
         # Create data generator meta-data
@@ -1112,7 +1109,11 @@
     @staticmethod
     def _add_data_generators(testGen, opName, dtype, arg_list, error_name, **kwargs):
         """Add extra tests for each type of data generator for this op."""
-        if error_name is None and "data_gen" in testGen.TOSA_OP_LIST[opName]:
+        if (
+            error_name is None
+            and "data_gen" in testGen.TOSA_OP_LIST[opName]
+            and gtu.dtypeIsSupportedByCompliance(dtype)
+        ):
             if dtype in [DType.FP16, DType.FP32, DType.BF16]:
                 dataGenTypesList = testGen.TOSA_OP_LIST[opName]["data_gen"]["fp"]
             else:
diff --git a/verif/generator/tosa_test_gen.py b/verif/generator/tosa_test_gen.py
index 8beb2ae..8fcea29 100644
--- a/verif/generator/tosa_test_gen.py
+++ b/verif/generator/tosa_test_gen.py
@@ -9,6 +9,7 @@
 import generator.tosa_utils as gtu
 import numpy as np
 import serializer.tosa_serializer as ts
+from generator.datagenerator import GenerateLibrary
 from generator.tosa_arg_gen import TosaArgGen
 from generator.tosa_arg_gen import TosaQuantGen
 from generator.tosa_arg_gen import TosaTensorGen
@@ -55,6 +56,11 @@
         self.random_fp_high = max(args.tensor_fp_value_range)
         # JSON schema validation
         self.descSchemaValidator = TestDescSchemaValidator()
+        # Data generator library when not generating the data later
+        if not args.lazy_data_gen:
+            self.dgl = GenerateLibrary(args.generate_lib_path)
+        else:
+            self.dgl = None
 
     def createSerializer(self, opName, testPath):
         self.testPath = os.path.join(opName, testPath)
@@ -92,15 +98,21 @@
         self.descSchemaValidator.validate_config(desc)
 
         if metaData:
-            if self.args.lazy_data_gen and "data_gen" in metaData:
-                # Output datagen meta data as CPP data
-                path_md = path / f"{testName}_meta_data_gen.cpp"
-                with path_md.open("w") as fd:
-                    fd.write(TOSA_AUTOGENERATED_HEADER)
-                    fd.write("// Test meta data for data generation setup\n\n")
-                    fd.write(f'const char* json_tdg_config_{path.stem} = R"(')
-                    json.dump(metaData["data_gen"], fd)
-                    fd.write(')";\n\n')
+            if "data_gen" in metaData:
+                if self.args.lazy_data_gen:
+                    # Output datagen meta data as CPP data
+                    path_md = path / f"{testName}_meta_data_gen.cpp"
+                    with path_md.open("w") as fd:
+                        fd.write(TOSA_AUTOGENERATED_HEADER)
+                        fd.write("// Test meta data for data generation setup\n\n")
+                        fd.write(f'const char* json_tdg_config_{path.stem} = R"(')
+                        json.dump(metaData["data_gen"], fd)
+                        fd.write(')";\n\n')
+                else:
+                    # Generate the data
+                    self.dgl.set_config(desc)
+                    self.dgl.write_numpy_files(path)
+
             if "compliance" in metaData:
                 # Output datagen meta data as CPP data
                 path_md = path / f"{testName}_meta_compliance.cpp"
@@ -282,8 +294,8 @@
         )
 
     def tensorComplianceMetaData(self, op, argsDict, outputTensor, errorName):
-        if errorName or not gtu.dtypeIsFloat(outputTensor.dtype):
-            # No compliance for error tests or integer tests currently
+        if errorName or not gtu.dtypeIsSupportedByCompliance(outputTensor.dtype):
+            # No compliance for error tests or other data types currently
             return None
 
         # Create compliance meta data for expected output tensor
@@ -1099,9 +1111,12 @@
 
         self.ser.addOperator(op["op"], input_list, output_list, attr)
 
-        compliance = self.tensorComplianceMetaData(
-            op, args_dict, result_tensor, error_name
-        )
+        if gtu.dtypeIsSupportedByCompliance(a.dtype):
+            compliance = self.tensorComplianceMetaData(
+                op, args_dict, result_tensor, error_name
+            )
+        else:
+            compliance = None
 
         return TosaTestGen.BuildInfo(result_tensor, compliance)
 
diff --git a/verif/generator/tosa_utils.py b/verif/generator/tosa_utils.py
index dddc320..14afaa7 100644
--- a/verif/generator/tosa_utils.py
+++ b/verif/generator/tosa_utils.py
@@ -55,8 +55,9 @@
 DG_DOT_PRODUCT_OPTIONAL_INFO = ("acc_type", "kernel", "axis")
 
 
-def dtypeIsFloat(dtype):
-    return dtype in (DType.FP16, DType.BF16, DType.FP32)
+def dtypeIsSupportedByCompliance(dtype):
+    """Types supported by the new data generation and compliance flow."""
+    return dtype in (DType.FP32,)
 
 
 def valueToName(item, value):
diff --git a/verif/generator/tosa_verif_build_tests.py b/verif/generator/tosa_verif_build_tests.py
index 0d532c0..954c6e9 100644
--- a/verif/generator/tosa_verif_build_tests.py
+++ b/verif/generator/tosa_verif_build_tests.py
@@ -3,7 +3,9 @@
 import argparse
 import re
 import sys
+from pathlib import Path
 
+import conformance.model_files as cmf
 from generator.tosa_test_gen import TosaTestGen
 from serializer.tosa_serializer import dtype_str_to_val
 from serializer.tosa_serializer import DTypeNames
@@ -86,6 +88,13 @@
         help="Tensor data generation is delayed til test running",
     )
 
+    parser.add_argument(
+        "--generate-lib-path",
+        dest="generate_lib_path",
+        type=Path,
+        help="Path to TOSA generate library.",
+    )
+
     # Constraints on tests
     tens_group.add_argument(
         "--tensor-dim-range",
@@ -268,6 +277,17 @@
 
     args = parseArgs(argv)
 
+    if not args.lazy_data_gen:
+        if args.generate_lib_path is None:
+            args.generate_lib_path = cmf.find_tosa_file(
+                cmf.TosaFileType.GENERATE_LIBRARY, Path("reference_model"), False
+            )
+        if not args.generate_lib_path.is_file():
+            print(
+                f"Argument error: Generate library (--generate-lib-path) not found - {str(args.generate_lib_path)}"
+            )
+            exit(2)
+
     ttg = TosaTestGen(args)
 
     if args.test_type == "both":
diff --git a/verif/runner/tosa_test_runner.py b/verif/runner/tosa_test_runner.py
index b348f50..984b2d9 100644
--- a/verif/runner/tosa_test_runner.py
+++ b/verif/runner/tosa_test_runner.py
@@ -10,7 +10,9 @@
 from checker.color_print import set_print_in_color
 from checker.tosa_result_checker import set_print_result
 from checker.tosa_result_checker import test_check
+from generator.datagenerator import GenerateLibrary
 from json2fbbin import json2fbbin
+from json2numpy import json2numpy
 from runner.tosa_test_presets import TOSA_REFCOMPLIANCE_RUNNER
 
 
@@ -71,6 +73,7 @@
         self.testDirPath = testDirPath
         self.testName = self.testDirPath.name
         self.verify_lib_path = args.verify_lib_path
+        self.generate_lib_path = args.generate_lib_path
 
         set_print_in_color(not args.no_color)
         # Stop the result checker printing anything - we will do it
@@ -135,6 +138,33 @@
                 return True, "non-{} profile".format(self.args.profile)
         return False, ""
 
+    def _ready_file(self, dataFile, jsonOnly=False):
+        """Convert/create any data file that is missing."""
+        dataPath = self.testDirPath / dataFile
+        if not dataPath.is_file():
+            jsonPath = dataPath.with_suffix(".json")
+            if jsonPath.is_file():
+                # Data files stored as JSON
+                if self.args.verbose:
+                    print(f"Readying data file: {dataPath}")
+                json2numpy.json_to_npy(jsonPath)
+            elif not jsonOnly:
+                # Use data generator for all data files
+                if self.args.verbose:
+                    print("Readying all data input files")
+                dgl = GenerateLibrary(self.generate_lib_path)
+                dgl.set_config(self.testDesc)
+                dgl.write_numpy_files(self.testDirPath)
+
+    def readyDataFiles(self):
+        """Check that the data files have been created/converted."""
+        for dataFile in self.testDesc["ifm_file"]:
+            self._ready_file(dataFile)
+        # Convert expected result if any
+        if "expected_result_file" in self.testDesc:
+            for dataFile in self.testDesc["expected_result_file"]:
+                self._ready_file(dataFile, jsonOnly=True)
+
     def runTestGraph(self):
         """Override with function that calls system under test."""
         pass
diff --git a/verif/runner/tosa_verif_run_tests.py b/verif/runner/tosa_verif_run_tests.py
index 54cb7b2..d2aae22 100644
--- a/verif/runner/tosa_verif_run_tests.py
+++ b/verif/runner/tosa_verif_run_tests.py
@@ -13,7 +13,7 @@
 
 import conformance.model_files as cmf
 import runner.tosa_test_presets as ttp
-from json2numpy import json2numpy
+from generator.datagenerator import GenerateError
 from runner.tosa_test_runner import TosaTestInvalid
 from runner.tosa_test_runner import TosaTestRunner
 from xunit import xunit
@@ -52,6 +52,15 @@
         help="Path to TOSA reference model executable",
     )
     parser.add_argument(
+        "--generate-lib-path",
+        dest="generate_lib_path",
+        type=Path,
+        help=(
+            "Path to TOSA generate library. Defaults to "
+            "the library in the directory of `ref-model-path`"
+        ),
+    )
+    parser.add_argument(
         "--verify-lib-path",
         dest="verify_lib_path",
         type=Path,
@@ -177,22 +186,6 @@
     return args
 
 
-EXCLUSION_PREFIX = ["test", "model", "desc"]
-
-
-def convert2Numpy(test_path):
-    """Convert all the JSON numpy files back into binary numpy."""
-    jsons = test_path.glob("*.json")
-    for j in jsons:
-        for exclude in EXCLUSION_PREFIX:
-            if j.name.startswith(exclude):
-                j = None
-                break
-        if j:
-            # debug print(f"Converting {json}")
-            json2numpy.json_to_npy(j)
-
-
 def workerThread(task_queue, runnerList, complianceRunner, args, result_queue):
     """Worker thread that runs the next test from the queue."""
     complianceRunnerList = runnerList.copy()
@@ -222,7 +215,6 @@
             currentRunners = runnerList
 
         msg = ""
-        converted = False
         for runnerModule, runnerArgs in currentRunners:
             try:
                 start_time = datetime.now()
@@ -232,39 +224,30 @@
 
                 skip, reason = runner.skipTest()
                 if skip:
-                    msg = "Skipping {} test".format(reason)
-                    print("{} {}".format(msg, test_path))
+                    msg = f"Skipping {reason} test"
+                    print(f"{msg} {test_path}")
                     rc = TosaTestRunner.Result.SKIPPED
                 else:
-                    # Convert JSON data files into numpy format on first pass
-                    if not converted:
-                        convert2Numpy(test_path)
-                        converted = True
-
                     if args.verbose:
-                        print(
-                            "Running runner {} with test {}".format(
-                                runnerName, test_path
-                            )
-                        )
+                        print(f"Running runner {runnerName} with test {test_path}")
+                    try:
+                        # Convert or generate the required data files
+                        runner.readyDataFiles()
+                    except Exception as e:
+                        msg = f"Failed to ready test files error: {e}"
+                        raise e
+
                     try:
                         grc, gmsg = runner.runTestGraph()
                         rc, msg = runner.testResult(grc, gmsg)
                     except Exception as e:
-                        msg = "System Under Test error: {}".format(e)
-                        print(msg)
-                        print(
-                            "".join(
-                                traceback.format_exception(
-                                    etype=type(e), value=e, tb=e.__traceback__
-                                )
-                            )
-                        )
-                        rc = TosaTestRunner.Result.INTERNAL_ERROR
+                        msg = f"System Under Test error: {e}"
+                        raise e
             except Exception as e:
-                msg = "Internal error: {}".format(e)
+                if not msg:
+                    msg = f"Internal error: {e}"
                 print(msg)
-                if not isinstance(e, TosaTestInvalid):
+                if not isinstance(e, (TosaTestInvalid, GenerateError)):
                     # Show stack trace on unexpected exceptions
                     print(
                         "".join(
@@ -374,6 +357,10 @@
         args.ref_model_path = cmf.find_tosa_file(
             cmf.TosaFileType.REF_MODEL, Path("reference_model"), False
         )
+    if args.generate_lib_path is None:
+        args.generate_lib_path = cmf.find_tosa_file(
+            cmf.TosaFileType.GENERATE_LIBRARY, args.ref_model_path
+        )
     if args.verify_lib_path is None:
         args.verify_lib_path = cmf.find_tosa_file(
             cmf.TosaFileType.VERIFY_LIBRARY, args.ref_model_path
diff --git a/verif/tests/test_tosa_datagenerator.py b/verif/tests/test_tosa_datagenerator.py
new file mode 100644
index 0000000..ba0235c
--- /dev/null
+++ b/verif/tests/test_tosa_datagenerator.py
@@ -0,0 +1,116 @@
+"""Tests for the python interface to the data generator library."""
+# Copyright (c) 2023, ARM Limited.
+# SPDX-License-Identifier: Apache-2.0
+from pathlib import Path
+
+import numpy as np
+import pytest
+from generator.datagenerator import GenerateError
+from generator.datagenerator import GenerateLibrary
+
+# NOTE: These tests are marked as POST COMMIT
+# To run them, please build the reference_model in a local "build" directory
+# (as per the README) and run them using: pytest -m "postcommit"
+
+# Location of reference model binaries
+REF_MODEL_BUILD_PATH = Path(__file__).resolve().parents[2] / "build" / "reference_model"
+GENERATE_LIB = "libtosa_reference_generate_lib.so"
+GENERATE_LIB_PATH = REF_MODEL_BUILD_PATH / GENERATE_LIB
+
+TEST_DIR = Path(__file__).parent
+
+
+@pytest.mark.postcommit
+def test_generate_lib_built():
+    """First test to check the library has been built."""
+    assert GENERATE_LIB_PATH.is_file()
+
+
+@pytest.mark.postcommit
+def test_checker_generate_load_fail():
+    with pytest.raises(GenerateError) as excinfo:
+        GenerateLibrary(Path("/place-that-does-not-exist"))
+    assert str(excinfo.value).startswith("Could not find generate library")
+
+
+@pytest.mark.postcommit
+def test_checker_generate_load():
+    glib = GenerateLibrary(GENERATE_LIB_PATH)
+    assert glib
+
+
+JSON_DATAGEN_DOT_PRODUCT = {
+    "tosa_file": "test.json",
+    "ifm_name": ["input-0", "input-1"],
+    "ifm_file": ["input-0.npy", "input-1.npy"],
+    "ofm_name": ["result-0"],
+    "ofm_file": ["result-0.npy"],
+    "meta": {
+        "data_gen": {
+            "version": "0.1",
+            "tensors": {
+                "input-0": {
+                    "generator": "DOT_PRODUCT",
+                    "data_type": "FP32",
+                    "input_type": "VARIABLE",
+                    "shape": [3, 5, 4],
+                    "input_pos": 0,
+                    "op": "MATMUL",
+                    "dot_product_info": {"s": 0, "ks": 4, "acc_type": "FP32"},
+                },
+                "input-1": {
+                    "generator": "DOT_PRODUCT",
+                    "data_type": "FP32",
+                    "input_type": "VARIABLE",
+                    "shape": [3, 4, 6],
+                    "input_pos": 1,
+                    "op": "MATMUL",
+                    "dot_product_info": {"s": 0, "ks": 4, "acc_type": "FP32"},
+                },
+            },
+        }
+    },
+}
+
+
+@pytest.mark.postcommit
+def test_generate_dot_product_check():
+    glib = GenerateLibrary(GENERATE_LIB_PATH)
+    assert glib
+
+    json_config = JSON_DATAGEN_DOT_PRODUCT
+    glib.set_config(json_config)
+
+    glib.write_numpy_files(TEST_DIR)
+
+    # Test the files exist and are the expected numpy files
+    for f, n in zip(json_config["ifm_file"], json_config["ifm_name"]):
+        file = TEST_DIR / f
+        assert file.is_file()
+        arr = np.load(file)
+        assert arr.shape == tuple(
+            json_config["meta"]["data_gen"]["tensors"][n]["shape"]
+        )
+        assert arr.dtype == np.float32
+        file.unlink()
+
+
+@pytest.mark.postcommit
+def test_generate_dot_product_check_fail_names():
+    glib = GenerateLibrary(GENERATE_LIB_PATH)
+    assert glib
+
+    # Fix up the JSON to have the wrong names
+    json_config = JSON_DATAGEN_DOT_PRODUCT.copy()
+    json_config["ifm_name"] = ["not-input0", "not-input1"]
+    glib.set_config(json_config)
+
+    with pytest.raises(GenerateError) as excinfo:
+        glib.write_numpy_files(TEST_DIR)
+    info = str(excinfo.value).split("\n")
+    for i, n in enumerate(json_config["ifm_name"]):
+        assert info[i].startswith(f"ERROR: Failed to create data for tensor {n}")
+
+    for f in json_config["ifm_file"]:
+        file = TEST_DIR / f
+        assert not file.is_file()
diff --git a/verif/tests/test_tosa_refmodel.py b/verif/tests/test_tosa_refmodel.py
index 675a534..24ee9e2 100644
--- a/verif/tests/test_tosa_refmodel.py
+++ b/verif/tests/test_tosa_refmodel.py
@@ -6,6 +6,7 @@
 from pathlib import Path
 from shutil import rmtree
 
+import conformance.model_files as cmf
 import numpy as np
 import pytest
 from checker.tosa_result_checker import test_check as tosa_check
@@ -17,9 +18,13 @@
 # Note: Must rename imports (like test_check) so that pytest doesn't assume its a test function/class
 
 # Location of reference model binaries
-REF_MODEL_BUILD_PATH = Path(__file__).resolve().parents[2] / "build" / "reference_model"
-REF_MODEL_EXE = "tosa_reference_model"
-REF_MODEL_EXE_PATH = REF_MODEL_BUILD_PATH / REF_MODEL_EXE
+REF_MODEL_DIR = Path(__file__).resolve().parents[2]
+REF_MODEL_EXE_PATH = cmf.find_tosa_file(
+    cmf.TosaFileType.REF_MODEL, REF_MODEL_DIR, False
+)
+GENERATE_LIB_PATH = cmf.find_tosa_file(
+    cmf.TosaFileType.GENERATE_LIBRARY, REF_MODEL_EXE_PATH
+)
 
 # Set this to False if you want ot preserve the test directories after running
 CLEAN_UP_TESTS = True
@@ -51,7 +56,9 @@
     "bf16": "bf16",
 }
 
-# NOTE: These tests are set to POST COMMIT - so will only run on the CI
+# NOTE: These tests are marked as POST COMMIT
+# To run them, please build the reference_model in a local "build" directory
+# (as per the README) and run them using: pytest -m "postcommit"
 
 
 @pytest.mark.postcommit
@@ -83,6 +90,8 @@
 
         # Generate tests without any zero-point
         build_args = [
+            "--generate-lib-path",
+            str(GENERATE_LIB_PATH),
             "--filter",
             self.op_name,
             "--target-shape",
diff --git a/verif/tests/test_tosa_run_tests_mocksut.py b/verif/tests/test_tosa_run_tests_mocksut.py
index fb4a811..f4437b2 100644
--- a/verif/tests/test_tosa_run_tests_mocksut.py
+++ b/verif/tests/test_tosa_run_tests_mocksut.py
@@ -1,4 +1,4 @@
-"""Tests for tosa_verif_run_tests.py."""
+"""Mock SUT tests for tosa_verif_run_tests.py."""
 # Copyright (c) 2021-2023, ARM Limited.
 # SPDX-License-Identifier: Apache-2.0
 import json
@@ -55,16 +55,33 @@
     file.unlink()
 
 
+def _create_ifm_files(files):
+    """Create empty input files."""
+    for name in files:
+        file = Path(__file__).parent / name
+        with open(file, "w") as fd:
+            fd.write("empty")
+
+
+def _delete_ifm_files(files):
+    """Delete empty input files."""
+    for name in files:
+        file = Path(__file__).parent / name
+        file.unlink()
+
+
 @pytest.fixture
 def testDir() -> str:
     """Set up a mock expected pass test."""
     print("SET UP - testDir")
     _create_fake_ref_model()
+    _create_ifm_files(TEST_DESC["ifm_file"])
     file = _create_desc_json(TEST_DESC)
     yield file.parent
     print("TEAR DOWN - testDir")
     _delete_desc_json(file)
     _delete_fake_ref_model()
+    _delete_ifm_files(TEST_DESC["ifm_file"])
 
 
 @pytest.fixture
@@ -74,11 +91,13 @@
     _create_fake_ref_model()
     fail = deepcopy(TEST_DESC)
     fail["expected_failure"] = True
+    _create_ifm_files(TEST_DESC["ifm_file"])
     file = _create_desc_json(fail)
     yield file.parent
     print("TEAR DOWN - testDirExpectedFail")
     _delete_desc_json(file)
     _delete_fake_ref_model()
+    _delete_ifm_files(TEST_DESC["ifm_file"])
 
 
 @pytest.fixture
@@ -89,11 +108,13 @@
     out = deepcopy(TEST_DESC)
     out["ofm_name"].append("tr1")
     out["ofm_file"].append("test-result-1.npy")
+    _create_ifm_files(TEST_DESC["ifm_file"])
     file = _create_desc_json(out)
     yield file.parent
     print("TEAR DOWN - testDirMultiOutputs")
     _delete_desc_json(file)
     _delete_fake_ref_model()
+    _delete_ifm_files(TEST_DESC["ifm_file"])
 
 
 def _get_default_argv(testDir: Path, graphResult: str) -> list:
diff --git a/verif/tests/test_tosa_verifier.py b/verif/tests/test_tosa_verifier.py
index 864fa9c..a29f983 100644
--- a/verif/tests/test_tosa_verifier.py
+++ b/verif/tests/test_tosa_verifier.py
@@ -8,7 +8,9 @@
 from checker.verifier import VerifierError
 from checker.verifier import VerifierLibrary
 
-# NOTE: These tests are set to POST COMMIT - so will only run on the CI
+# NOTE: These tests are marked as POST COMMIT
+# To run them, please build the reference_model in a local "build" directory
+# (as per the README) and run them using: pytest -m "postcommit"
 
 # Location of reference model binaries
 REF_MODEL_BUILD_PATH = Path(__file__).resolve().parents[2] / "build" / "reference_model"