Initial lazy data-gen and compliance test build support

Add initial support for compliance and lazy data-gen meta data
added to desc.json for MATMUL.

Signed-off-by: Jeremy Johnson <jeremy.johnson@arm.com>
Change-Id: I00c047814134a96d7c98d890e93b5884e25b8e64
diff --git a/scripts/convert2conformance/convert2conformance.py b/scripts/convert2conformance/convert2conformance.py
index b1f8d0e..f915070 100755
--- a/scripts/convert2conformance/convert2conformance.py
+++ b/scripts/convert2conformance/convert2conformance.py
@@ -11,11 +11,13 @@
 import json
 import logging
 import os
+import shutil
 from pathlib import Path
 from typing import Optional
 
 from json2fbbin.json2fbbin import fbbin_to_json
 from json2numpy.json2numpy import npy_to_json
+from schemavalidation.schemavalidation import TestDescSchemaValidator
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("convert2conformance")
@@ -35,7 +37,8 @@
 
 def parse_args(argv):
     """Parse the arguments."""
-    parser = argparse.ArgumentParser()
+    # Set prog for when we are called via tosa_verif_conformance_generator
+    parser = argparse.ArgumentParser(prog="convert2conformance")
     parser.add_argument(
         "test_dir",
         default=Path.cwd(),
@@ -92,6 +95,11 @@
         help="Output directory must not contain the same test directory",
     )
     parser.add_argument(
+        "--lazy-data-generation",
+        action="store_true",
+        help="Enable lazy data generation (only for tosa-mi)",
+    )
+    parser.add_argument(
         "-v", "--verbose", dest="verbose", action="store_true", help="Verbose operation"
     )
     args = parser.parse_args(argv)
@@ -191,14 +199,15 @@
                 else:
                     logger.error(f"Missing result file {ofm_path}")
                     return None
-            cfm_files.append(cfm + ".npy")
+                cfm_files.append(cfm + ".npy")
         # Remove path and "ref-"/"ref_model_" from output filenames
         ofm_files.append(strip_ref_output_name(ofm_path.name))
 
     # Rewrite output file names as they can be relative, but keep them npys
     test_desc["ofm_file"] = ofm_files
-    if not test_desc["expected_failure"]:
-        # Output expected result file for conformance if expected pass
+    if not test_desc["expected_failure"] and cfm_files:
+        # Output expected result file for conformance if expected pass and we
+        # have some files!
         test_desc["expected_result_file"] = cfm_files
 
     # Add supported profiles
@@ -319,23 +328,32 @@
     # Convert input files to JSON
     ifm_files = []
     for file in test_desc["ifm_file"]:
-        if file is None:
-            ifm_files.append(None)
-        else:
+        if file:
             path = desc_filename.parent / file
-            convert_numpy_file(path, args.output_dir)
             ifm_files.append(path.name)
+            if path.is_file():
+                convert_numpy_file(path, args.output_dir)
+            else:
+                if not args.lazy_data_generation:
+                    logger.error(f"Missing input file {path.name}")
+                    return 1
+
     # Rewrite input file names to make sure the paths are correct,
     # but keep them numpys as the test runner will convert them back
     # before giving them to the SUT
     test_desc["ifm_file"] = ifm_files
 
+    # Check for cpp files for data-generator/verifier
+    cpp_files = args.test_dir.glob("*.cpp")
+    for cpp in cpp_files:
+        shutil.copy(str(cpp), str(args.output_dir))
+
     # Update desc.json and convert result files to JSON
     test_desc = update_desc_json(
         desc_filename.parent,
         test_desc,
         output_dir=args.output_dir,
-        create_result=True,
+        create_result=(not args.lazy_data_generation),
         profiles=args.profile,
         tags=args.tag,
     )
@@ -343,6 +361,9 @@
         # Error from conversion/update
         return 1
 
+    # Validate the desc.json schema
+    TestDescSchemaValidator().validate_config(test_desc)
+
     # Output new desc.json
     new_desc_filename = args.output_dir / NAME_DESC_FILENAME
     with open(new_desc_filename, "w") as fd:
diff --git a/verif/conformance/README.md b/verif/conformance/README.md
index a7ad789..8fbf4fd 100644
--- a/verif/conformance/README.md
+++ b/verif/conformance/README.md
@@ -17,6 +17,7 @@
 
 * "group" - name of the group this operator is in, in the spec
 * "profile" - list of profiles that this operator covers
+* "support_for" - optional list of supported creation modes out of: lazy_data_gen (data generation just before test run)
 * "generation" - dictionary of test generation details - see below
 * "selection" - dictionary of test selection details - see below
 
@@ -49,6 +50,8 @@
 
 ### Framework ops
 
+DEPRECATED - not supported for conformance testing.
+
 NOTE: Currently assumed all framework ops will be TFLite.
 
 Naming: `tosa_PPP_profile_framework_ops_info.json`
diff --git a/verif/conformance/tosa_main_profile_ops_info.json b/verif/conformance/tosa_main_profile_ops_info.json
index 7388835..0d3df72 100644
--- a/verif/conformance/tosa_main_profile_ops_info.json
+++ b/verif/conformance/tosa_main_profile_ops_info.json
@@ -1417,6 +1417,7 @@
         "profile": [
             "tosa-mi"
         ],
+        "support_for": [ "lazy_data_gen" ],
         "generation": {
             "standard": {
                 "negative_dim_range": "1,10",
@@ -1460,11 +1461,7 @@
         },
         "selection": {
             "default": {
-                "params": {},
-                "permutes": [
-                    "shape",
-                    "type"
-                ]
+                "all": "true"
             }
         }
     },
diff --git a/verif/conformance/tosa_verif_conformance_generator.py b/verif/conformance/tosa_verif_conformance_generator.py
index 0fb5500..c2ea4ec 100644
--- a/verif/conformance/tosa_verif_conformance_generator.py
+++ b/verif/conformance/tosa_verif_conformance_generator.py
@@ -84,7 +84,14 @@
 
 
 def build_op_tests(
-    args, test_type, profile, operator, group, gen_args_list, gen_neg_dim_range
+    args,
+    test_type,
+    profile,
+    operator,
+    group,
+    gen_args_list,
+    gen_neg_dim_range,
+    supports=[],
 ):
     """Build tests for a given operator.
 
@@ -105,6 +112,9 @@
         str(args.random_seed),
     ]
 
+    if "lazy_data_gen" in supports and args.lazy_data_generation:
+        build_cmd_base.append("--lazy-data-generation")
+
     build_cmds_list = []
 
     if test_type in ["positive", "both"]:
@@ -198,8 +208,12 @@
     return tests
 
 
-def generate_results(args, profile, operator, op_build_dir, tests=None):
+def generate_results(args, profile, operator, op_build_dir, supports=[], tests=None):
     """Run tests on reference model and save result to the test directory."""
+    if "lazy_data_gen" in supports and args.lazy_data_generation:
+        logger.info("Skipping running tests due to lazy data gen")
+        return
+
     num_cores = args.num_cores
     run_tests_cmd = "tosa_verif_run_tests"
 
@@ -254,6 +268,7 @@
     op_build_dir,
     output_dir,
     op_profiles_list,
+    supports=[],
     tests=None,
     group=None,
     trim_op_subdir=False,
@@ -275,6 +290,8 @@
             c2c_args_base.extend(["--tag", tag])
     if args.framework_schema:
         c2c_args_base.extend(["--framework-schema", str(args.framework_schema)])
+    if "lazy_data_gen" in supports and args.lazy_data_generation:
+        c2c_args_base.append("--lazy-data-generation")
     c2c_args_base.append("--output-directory")
 
     c2c_args_list = []
@@ -474,6 +491,11 @@
         help="Type of tests produced (default is both)",
     )
     parser.add_argument(
+        "--lazy-data-generation",
+        action="store_true",
+        help="Enable lazy data generation (only for tosa-mi)",
+    )
+    parser.add_argument(
         "--ref-model-directory",
         dest="ref_model_dir",
         type=Path,
@@ -718,6 +740,11 @@
 
                     operator_group = test_params[op]["group"]
                     root_output_dir = args.output_dir / "operators"
+                    supports = (
+                        test_params[op]["support_for"]
+                        if "support_for" in test_params[op]
+                        else []
+                    )
 
                     # Iterate through the generation groups selecting tests from each
                     for gen_name, gen_dict in test_params[op]["generation"].items():
@@ -756,6 +783,7 @@
                             gen_name,
                             gen_dict["generator_args"],
                             gen_neg_dim_range,
+                            supports=supports,
                         )
 
                         # Work out which selection criteria we are using
@@ -782,7 +810,9 @@
                             and selection_config["all"] == "true"
                         ):
                             logger.debug(f"Running and converting all {op} tests")
-                            generate_results(args, profile, op, op_build_dir)
+                            generate_results(
+                                args, profile, op, op_build_dir, supports=supports
+                            )
                             operator_test_list = None
                         else:
                             logger.debug(
@@ -800,7 +830,12 @@
                                     )
                                 )
                                 generate_results(
-                                    args, profile, op, op_build_dir, tests_gen
+                                    args,
+                                    profile,
+                                    op,
+                                    op_build_dir,
+                                    supports=supports,
+                                    tests=tests_gen,
                                 )
                                 operator_test_list = list(tests_gen2)
                             else:
@@ -823,6 +858,7 @@
                             op_build_dir,
                             root_output_dir,
                             op_profiles_list,
+                            supports=supports,
                             tests=operator_test_list,
                             group=operator_group,
                             tags=tags,
diff --git a/verif/generator/tosa_arg_gen.py b/verif/generator/tosa_arg_gen.py
index 97ff237..8d96090 100644
--- a/verif/generator/tosa_arg_gen.py
+++ b/verif/generator/tosa_arg_gen.py
@@ -4,12 +4,10 @@
 import math
 import warnings
 
+import generator.tosa_utils as gtu
 import numpy as np
 from generator.tosa_error_if import ErrorIf
 from generator.tosa_error_if import TosaErrorIfArgGen
-from generator.tosa_utils import get_accum_dtype_from_tgTypes
-from generator.tosa_utils import get_wrong_output_type
-from generator.tosa_utils import MAX_RESIZE_DIMENSION
 from serializer.tosa_serializer import DTypeNames
 from tosa.DType import DType
 from tosa.Op import Op
@@ -606,11 +604,18 @@
 
 
 class TosaTensorValuesGen:
-    """Tensor Value generators create the random data for each test."""
+    """Tensor Value generators create the random data for each tensor in each test."""
 
     def __init__(self):
         pass
 
+    class TVGInfo:
+        """Enhanced tensor values information including data gen dict."""
+
+        def __init__(self, tensorList, dataGenDict):
+            self.tensorList = tensorList
+            self.dataGenDict = dataGenDict
+
     @staticmethod
     def tvgDefault(testGen, op, dtypeList, shapeList, testArgs, error_name=None):
         pCount, cCount = op["operands"]
@@ -624,6 +629,87 @@
         return tens
 
     @staticmethod
+    def tvgLazyGenDefault(
+        testGen, opName, dtypeList, shapeList, argsDict, error_name=None
+    ):
+        # Variable inputs versus constants
+        pCount, cCount = testGen.TOSA_OP_LIST[opName]["operands"]
+
+        overrideLazy = False
+        if not gtu.dtypeIsFloat(dtypeList[0]) and testGen.args.lazy_data_gen:
+            # TEMPORARY OVERRIDE for integer types
+            overrideLazy = True
+            testGen.args.lazy_data_gen = False
+
+        # TODO - Change to generation of data using library!
+        # For now - we fall back to original path (or when dealing with non-floats)
+        if not testGen.args.lazy_data_gen:
+            tens_ser_list = TosaTensorValuesGen.tvgDefault(
+                testGen,
+                testGen.TOSA_OP_LIST[opName],
+                dtypeList,
+                shapeList,
+                [],
+                error_name,
+            )
+            if overrideLazy:
+                # Return to lazy mode
+                testGen.args.lazy_data_gen = True
+            return TosaTensorValuesGen.TVGInfo(tens_ser_list, None)
+
+        # Create data generator meta-data
+        dg_type = argsDict["dg_type"]
+        dg_tens_meta = {}
+        tens_ser_list = []
+        for idx, shape in enumerate(shapeList):
+
+            tens_meta = {}
+            tens_meta["generator"] = gtu.DataGenType(dg_type).name
+            tens_meta["data_type"] = gtu.DTYPE_ATTRIBUTES[dtypeList[idx]]["json"]
+            tens_meta["shape"] = [int(i) for i in shape]
+            tens_meta["input_pos"] = idx
+            tens_meta["op"] = opName
+
+            if idx < pCount:
+                tens_meta["input_type"] = "variable"
+                tens = testGen.ser.addPlaceholder(shape, dtypeList[idx], None)
+            else:
+                tens_meta["input_type"] = "constant"
+                tens = testGen.ser.addConst(shape, dtypeList[idx], None)
+            tens_ser_list.append(tens)
+
+            if dg_type == gtu.DataGenType.PSEUDO_RANDOM:
+                info = {}
+                # TODO - generate seed for this generator based on test
+                info["rng_seed"] = -1
+                info["range"] = [
+                    str(v)
+                    for v in testGen.getDTypeRange(dtypeList[idx], high_inclusive=True)
+                ]
+                tens_meta["pseudo_random_info"] = info
+            elif dg_type == gtu.DataGenType.DOT_PRODUCT:
+                info = {}
+                info["s"] = argsDict["s"]
+                info["ks"] = argsDict["ks"]
+                for key in gtu.DG_DOT_PRODUCT_OPTIONAL_INFO:
+                    if key in argsDict:
+                        if key.endswith("_type"):
+                            info[key] = gtu.DTYPE_ATTRIBUTES[argsDict[key]]["json"]
+                        else:
+                            info[key] = argsDict[key]
+                tens_meta["dot_product_info"] = info
+            else:
+                # TODO - other data gen type
+                assert False, "TODO: support other data gen types"
+            dg_tens_meta[tens.name] = tens_meta
+
+        tens_data = {
+            "version": "0.1",
+            "tensors": dg_tens_meta,
+        }
+        return TosaTensorValuesGen.TVGInfo(tens_ser_list, tens_data)
+
+    @staticmethod
     def tvgNegate(testGen, op, dtypeList, shapeList, testArgs, error_name=None):
         if dtypeList[0] == DType.INT32 and error_name is None:
             pCount, cCount = op["operands"]
@@ -1024,6 +1110,50 @@
         pass
 
     @staticmethod
+    def _add_data_generators(testGen, opName, dtype, arg_list, error_name, **kwargs):
+        """Add extra tests for each type of data generator for this op."""
+        if error_name is None and "data_gen" in testGen.TOSA_OP_LIST[opName]:
+            if dtype in [DType.FP16, DType.FP32, DType.BF16]:
+                dataGenTypesList = testGen.TOSA_OP_LIST[opName]["data_gen"]["fp"]
+            else:
+                dataGenTypesList = testGen.TOSA_OP_LIST[opName]["data_gen"]["int"]
+        else:
+            # Error test or No data generator types listed - assume random
+            dataGenTypesList = (gtu.DataGenType.PSEUDO_RANDOM,)
+
+        # Expand arg list with other data generator types
+        new_arg_list = []
+        for dg_type in dataGenTypesList:
+            for arg_str, arg_attrs in arg_list:
+                arg_dict = arg_attrs[0]
+                arg_dict["dg_type"] = dg_type
+
+                if dg_type == gtu.DataGenType.PSEUDO_RANDOM:
+                    # Default test
+                    new_arg_list.append((arg_str, [arg_dict]))
+
+                elif dg_type == gtu.DataGenType.DOT_PRODUCT:
+                    # Extra tests for each dot product test set
+                    dot_products = kwargs["dot_products"]
+                    if dot_products < testGen.TOSA_MI_DOT_PRODUCT_MIN:
+                        print(
+                            f"Skipping dot product test as too few calculations {dot_products} < {testGen.TOSA_MI_DOT_PRODUCT_MIN}"
+                        )
+                        continue
+                    arg_dict["ks"] = kwargs["ks"]
+                    for key in gtu.DG_DOT_PRODUCT_OPTIONAL_INFO:
+                        if key in kwargs:
+                            arg_dict[key] = kwargs[key]
+
+                    for s in testGen.TOSA_MI_DOT_PRODUCT_TEST_SETS:
+                        new_arg_str = f"{arg_str}_s{s}"
+                        new_arg_dict = arg_dict.copy()
+                        new_arg_dict["s"] = s
+                        new_arg_list.append((new_arg_str, [new_arg_dict]))
+
+        return new_arg_list
+
+    @staticmethod
     def agNone(testGen, opName, shapeList, dtype, error_name=None):
         """A trivial argument generator for operators that don't take any
         non-tensor arguments"""
@@ -1073,7 +1203,7 @@
         # Shape: (OFM channels), (KD), KH, KW, IFM channels
         filter_shape = shapeList[1]
 
-        accum_dtype = get_accum_dtype_from_tgTypes(dtypes)
+        accum_dtype = gtu.get_accum_dtype_from_tgTypes(dtypes)
 
         # Check the rank
         conv3d = opName.startswith("conv3d")
@@ -1258,12 +1388,12 @@
         input_dtype = dtypes[0]
 
         if error_name == ErrorIf.WrongOutputType:
-            accum_dtype = get_wrong_output_type(opName, testGen.rng, input_dtype)
+            accum_dtype = gtu.get_wrong_output_type(opName, testGen.rng, input_dtype)
         elif error_name == ErrorIf.WrongInputType:
             # Pick some potentially correct output dtype if input type is incorrect
             accum_dtype = DType.INT32
         else:
-            accum_dtype = get_accum_dtype_from_tgTypes(dtypes)
+            accum_dtype = gtu.get_accum_dtype_from_tgTypes(dtypes)
 
         return [(f"acc{testGen.typeStr(accum_dtype)}", [accum_dtype])]
 
@@ -1285,12 +1415,28 @@
 
         if error_name == ErrorIf.WrongOutputType:
             # Get incorrect output dtype for ErrorIf case
-            accum_dtypes = [get_wrong_output_type(opName, testGen.rng, dtype)]
+            accum_dtypes = [gtu.get_wrong_output_type(opName, testGen.rng, dtype)]
         elif error_name == ErrorIf.WrongInputType:
             # Pick some potentially correct output dtype if input type is incorrect
             accum_dtypes = [DType.INT32]
 
-        return [(f"acc{testGen.typeStr(a)}", [a]) for a in accum_dtypes]
+        arg_list = [
+            (f"acc{testGen.typeStr(a)}", [{"acc_type": a}]) for a in accum_dtypes
+        ]
+
+        arg_list = TosaArgGen._add_data_generators(
+            testGen,
+            opName,
+            dtype,
+            arg_list,
+            error_name,
+            ks=int(shapeList[0][2]),  # Set KS = C, from input A (N,H,C)
+            # Set dot_products = N*H*W
+            dot_products=gtu.product(
+                (shapeList[0][0], shapeList[0][1], shapeList[1][2])
+            ),
+        )
+        return arg_list
 
     @staticmethod
     def agTransposeConv2D(testGen, opName, shapeList, dtypes, error_name=None):
@@ -1303,7 +1449,7 @@
         ifm_shape = shapeList[0]
         filter_shape = shapeList[1]
 
-        accum_dtype = get_accum_dtype_from_tgTypes(dtypes)
+        accum_dtype = gtu.get_accum_dtype_from_tgTypes(dtypes)
 
         # Must be rank 4
         if error_name != ErrorIf.WrongRank:
@@ -2288,9 +2434,9 @@
 
                     if (
                         output_y <= 0
-                        or output_y >= MAX_RESIZE_DIMENSION
+                        or output_y >= gtu.MAX_RESIZE_DIMENSION
                         or output_x <= 0
-                        or output_x >= MAX_RESIZE_DIMENSION
+                        or output_x >= gtu.MAX_RESIZE_DIMENSION
                     ):
                         # Output dimensions out of scope
                         if error_name is not None and perm > 0:
@@ -2301,11 +2447,11 @@
 
                     if error_name == ErrorIf.ResizeOutputShapeMismatch and (
                         (
-                            output_y + scale_y_d >= MAX_RESIZE_DIMENSION
+                            output_y + scale_y_d >= gtu.MAX_RESIZE_DIMENSION
                             and output_y - scale_y_d < 1
                         )
                         or (
-                            output_x + scale_x_d >= MAX_RESIZE_DIMENSION
+                            output_x + scale_x_d >= gtu.MAX_RESIZE_DIMENSION
                             and output_x - scale_x_d < 1
                         )
                     ):
diff --git a/verif/generator/tosa_test_gen.py b/verif/generator/tosa_test_gen.py
index 3014c81..d15f785 100644
--- a/verif/generator/tosa_test_gen.py
+++ b/verif/generator/tosa_test_gen.py
@@ -1,8 +1,12 @@
 # Copyright (c) 2020-2023, ARM Limited.
 # SPDX-License-Identifier: Apache-2.0
+import json
 import os
 from copy import deepcopy
+from datetime import datetime
+from pathlib import Path
 
+import generator.tosa_utils as gtu
 import numpy as np
 import serializer.tosa_serializer as ts
 from generator.tosa_arg_gen import TosaArgGen
@@ -13,15 +17,15 @@
 from generator.tosa_error_if import TosaErrorIfArgGen
 from generator.tosa_error_if import TosaErrorValidator
 from generator.tosa_error_if import TosaInvalidValidator
-from generator.tosa_utils import DTYPE_ATTRIBUTES
-from generator.tosa_utils import get_rank_mismatch_shape
-from generator.tosa_utils import get_wrong_output_type
-from generator.tosa_utils import MAX_RESIZE_DIMENSION
-from generator.tosa_utils import usableDTypes
-from generator.tosa_utils import vect_f32_to_bf16
+from schemavalidation.schemavalidation import TestDescSchemaValidator
 from tosa.DType import DType
 from tosa.Op import Op
 
+TOSA_AUTOGENERATED_HEADER = f"""// Copyright (c) {datetime.today().year}, ARM Limited
+// SPDX-License-Identifier: Apache-2.0
+// AUTO-GENERATED FILE CREATED BY tosa_verif_build_tests
+"""
+
 
 class TosaTestGen:
     # Maximum rank of tensor supported by test generator.
@@ -31,6 +35,10 @@
     TOSA_8K_LEVEL_MAX_KERNEL = 8192
     TOSA_8K_LEVEL_MAX_STRIDE = 8192
 
+    # Main compliance dot product statistical test range
+    TOSA_MI_DOT_PRODUCT_TEST_SETS = range(0, 6)
+    TOSA_MI_DOT_PRODUCT_MIN = 1000
+
     def __init__(self, args):
         self.args = args
         self.basePath = args.output_dir
@@ -45,6 +53,8 @@
         # Work out floating point range
         self.random_fp_low = min(args.tensor_fp_value_range)
         self.random_fp_high = max(args.tensor_fp_value_range)
+        # JSON schema validation
+        self.descSchemaValidator = TestDescSchemaValidator()
 
     def createSerializer(self, opName, testPath):
         self.testPath = os.path.join(opName, testPath)
@@ -53,81 +63,131 @@
         os.makedirs(fullPath, exist_ok=True)
         # Embed const data in the flatbuffer
         constMode = ts.ConstMode.EMBED
-        if self.args.dump_consts:
+        if self.args.lazy_data_gen:
+            # Lazy data generation - so make constants files
+            constMode = ts.ConstMode.INPUTS
+        elif self.args.dump_consts:
             constMode = ts.ConstMode.EMBED_DUMP
         self.ser = ts.TosaSerializer(fullPath, constMode)
 
     def getSerializer(self):
         return self.ser
 
-    def serialize(self, testName):
-        with open(
-            os.path.join(self.basePath, self.testPath, "{}.tosa".format(testName)), "wb"
-        ) as fd:
+    def serialize(self, testName, metaData=None):
+        path = Path(self.basePath) / self.testPath
+
+        # Write out TOSA flatbuffer binary
+        path_fb = path / f"{testName}.tosa"
+        with path_fb.open("wb") as fd:
             fd.write(self.ser.serialize())
 
-        with open(os.path.join(self.basePath, self.testPath, "desc.json"), "w") as fd:
-            fd.write(self.ser.writeJson("{}.tosa".format(testName)))
+        # Get JSON descriptor from serializer
+        desc = json.loads(self.ser.writeJson(f"{testName}.tosa"))
+
+        if metaData:
+            # Add extra meta data to desc.json
+            desc["meta"] = metaData
+
+        # Validate desc.json before we output it
+        self.descSchemaValidator.validate_config(desc)
+
+        if metaData:
+            if self.args.lazy_data_gen and "data_gen" in metaData:
+                # Output datagen meta data as CPP data
+                path_md = path / f"{testName}_meta_data_gen.cpp"
+                with path_md.open("w") as fd:
+                    fd.write(TOSA_AUTOGENERATED_HEADER)
+                    fd.write("// Test meta data for data generation setup\n\n")
+                    fd.write(f'const char* json_tdg_config_{path.stem} = R"(')
+                    json.dump(metaData["data_gen"], fd)
+                    fd.write(')";\n\n')
+            if "compliance" in metaData:
+                # Output datagen meta data as CPP data
+                path_md = path / f"{testName}_meta_compliance.cpp"
+                with path_md.open("w") as fd:
+                    fd.write(TOSA_AUTOGENERATED_HEADER)
+                    fd.write("// Test meta data for compliance validation\n\n")
+                    fd.write(f'const char* json_tvf_config_{path.stem} = R"(')
+                    json.dump(metaData["compliance"], fd)
+                    fd.write(')";\n\n')
+
+        # Write desc.json
+        path_desc = path / "desc.json"
+        with path_desc.open("w") as fd:
+            json.dump(desc, fd, indent=1)
 
     def resetRNG(self, seed=None):
         if seed is None:
             seed = self.random_seed + 1
         self.rng = np.random.default_rng(seed)
 
+    def getDTypeRange(self, dtype, high_inclusive=False):
+        # Returns dtype value range boundaries (low, high)
+        # The high boundary is excluded in the range
+        # unless high_inclusive is True
+
+        if dtype in (DType.FP32, DType.FP16, DType.BF16):
+            return (self.random_fp_low, self.random_fp_high)
+        elif dtype == DType.BOOL:
+            rng = (0, 2)
+        elif dtype == DType.UINT8:
+            rng = (0, 256)
+        elif dtype == DType.UINT16:
+            rng = (0, 65536)
+        elif dtype == DType.INT4:
+            # TOSA specific INT4 weight range from -7 to 7
+            rng = (-7, 8)
+        elif dtype == DType.INT8:
+            rng = (-128, 128)
+        elif dtype == DType.INT16:
+            rng = (-32768, 32768)
+        elif dtype in (DType.INT32, DType.SHAPE):
+            # restricting too large value for SHAPE
+            rng = (-(1 << 31), (1 << 31))
+        elif dtype == DType.INT48:
+            rng = (-(1 << 47), (1 << 47))
+        else:
+            raise Exception("Unknown dtype: {}".format(dtype))
+
+        if not high_inclusive:
+            # Exclusive high: low <= range < high
+            return rng
+        else:
+            # Inclusive range: low <= range <= high
+            return (rng[0], rng[1] - 1)
+
     def getRandTensor(self, shape, dtype):
+        low, high = self.getDTypeRange(dtype)
+
         if dtype == DType.BOOL:
             return np.bool_(self.rng.choice(a=[False, True], size=shape))
-        # TOSA specific INT4 weight range from -7 to 7
-        elif dtype == DType.INT4:
-            return np.int32(self.rng.integers(low=-7, high=8, size=shape))
-        elif dtype == DType.INT8:
-            return np.int32(self.rng.integers(low=-128, high=128, size=shape))
-        elif dtype == DType.UINT8:
-            return np.int32(self.rng.integers(low=0, high=256, size=shape))
-        elif dtype == DType.INT16:
-            return np.int32(self.rng.integers(low=-32768, high=32768, size=shape))
-        elif dtype == DType.UINT16:
-            return np.int32(self.rng.integers(low=0, high=65536, size=shape))
-        elif (
-            dtype == DType.INT32 or dtype == DType.SHAPE
-        ):  # restricting too large value for SHAPE
-            return np.int32(
-                self.rng.integers(low=-(1 << 31), high=(1 << 31), size=shape)
-            )
         elif dtype == DType.INT48:
-            return np.int64(
-                self.rng.integers(low=-(1 << 47), high=(1 << 47), size=shape)
-            )
-        elif dtype == DType.FP16:
-            return np.float16(
-                self.rng.uniform(
-                    low=self.random_fp_low, high=self.random_fp_high, size=shape
-                )
-            )
-        elif dtype == DType.BF16:
-            f32_tensor = np.float32(
-                self.rng.uniform(
-                    low=self.random_fp_low, high=self.random_fp_high, size=shape
-                )
-            )
-            # Floor the last 16 bits of each f32 value
-            return np.float32(vect_f32_to_bf16(f32_tensor))
-        elif dtype == DType.FP32:
-            return np.float32(
-                self.rng.uniform(
-                    low=self.random_fp_low, high=self.random_fp_high, size=shape
-                )
-            )
+            return np.int64(self.rng.integers(low=low, high=high, size=shape))
+        elif dtype in (DType.FP16, DType.BF16, DType.FP32):
+            f_tensor = self.rng.uniform(low=low, high=high, size=shape)
+
+            if dtype == DType.FP16:
+                return np.float16(f_tensor)
+            else:
+                f32_tensor = np.float32(f_tensor)
+                if dtype == DType.BF16:
+                    # Floor the last 16 bits of each f32 value
+                    return np.float32(gtu.vect_f32_to_bf16(f32_tensor))
+                else:
+                    return f32_tensor
         else:
-            raise Exception("Unrecognized Dtype: {}".format(dtype))
+            # All other integer types
+            return np.int32(self.rng.integers(low=low, high=high, size=shape))
 
     def buildPlaceholderTensors(self, shape_list, dtype_list):
         placeholders = []
 
         assert len(shape_list) == len(dtype_list)
 
+        arr = None
         for idx, shape in enumerate(shape_list):
-            arr = self.getRandTensor(shape, dtype_list[idx])
+            if not self.args.lazy_data_gen:
+                arr = self.getRandTensor(shape, dtype_list[idx])
             placeholders.append(self.ser.addPlaceholder(shape, dtype_list[idx], arr))
 
         return placeholders
@@ -137,8 +197,10 @@
 
         assert len(shape_list) == len(dtype_list)
 
+        arr = None
         for idx, shape in enumerate(shape_list):
-            arr = self.getRandTensor(shape, dtype_list[idx])
+            if not self.args.lazy_data_gen:
+                arr = self.getRandTensor(shape, dtype_list[idx])
             consts.append(self.ser.addConst(shape, dtype_list[idx], arr))
 
         return consts
@@ -161,38 +223,20 @@
         return np.int32(self.rng.integers(low=low, high=high, size=1))[0]
 
     def getRandNumberDType(self, dtype):
+        low, high = self.getDTypeRange(dtype)
+
         if dtype == DType.FP32:
-            return np.float32(
-                self.rng.uniform(low=self.random_fp_low, high=self.random_fp_high)
-            )
+            return np.float32(self.rng.uniform(low=low, high=high))
         elif dtype == DType.FP16:
-            return np.float16(
-                self.rng.uniform(low=self.random_fp_low, high=self.random_fp_high)
-            )
+            return np.float16(self.rng.uniform(low=low, high=high))
         elif dtype == DType.BF16:
-            rand_f32 = np.float32(
-                self.rng.uniform(low=self.random_fp_low, high=self.random_fp_high)
-            )
-            return vect_f32_to_bf16(rand_f32)
+            rand_f32 = np.float32(self.rng.uniform(low=low, high=high))
+            return gtu.vect_f32_to_bf16(rand_f32)
         elif dtype == DType.BOOL:
             return self.rng.choice([False, True])
-        # TOSA specific INT4 weight range from -7 to 7
-        elif dtype == DType.INT4:
-            low, high = (-7, 8)
-        elif dtype == DType.INT8:
-            low, high = (-128, 128)
-        elif dtype == DType.INT16:
-            low, high = (-32768, 32768)
-        elif (
-            dtype == DType.INT32 or dtype == DType.SHAPE
-        ):  # restricting too large value for SHAPE
-            low, high = (-(1 << 31), (1 << 31))
         elif dtype == DType.INT48:
-            low, high = (-(1 << 47), (1 << 47))
             # Special size
             return np.int64(self.rng.integers(low, high, size=1))[0]
-        else:
-            raise Exception("Unknown dtype: {}".format(dtype))
 
         return np.int32(self.rng.integers(low, high, size=1))[0]
 
@@ -212,8 +256,8 @@
             # Limit types to the first 2 as the 3rd is the accumulator
             return "x".join(strs[:2])
         else:
-            if dtype in DTYPE_ATTRIBUTES:
-                return DTYPE_ATTRIBUTES[dtype]["str"]
+            if dtype in gtu.DTYPE_ATTRIBUTES:
+                return gtu.DTYPE_ATTRIBUTES[dtype]["str"]
             else:
                 raise Exception(
                     "Unknown dtype, cannot convert to string: {}".format(dtype)
@@ -221,8 +265,8 @@
 
     def typeWidth(self, dtype):
         """Get the datatype width for data types"""
-        if dtype in DTYPE_ATTRIBUTES:
-            return DTYPE_ATTRIBUTES[dtype]["width"]
+        if dtype in gtu.DTYPE_ATTRIBUTES:
+            return gtu.DTYPE_ATTRIBUTES[dtype]["width"]
         else:
             raise Exception(f"Unknown dtype, cannot determine width: {dtype}")
 
@@ -237,11 +281,44 @@
             low=self.args.tensor_shape_range[0], high=self.args.tensor_shape_range[1]
         )
 
-    # Argument generators
-    # Returns a list of tuples (stringDescriptor, [build_fcn_arg_list])
-    # Where the string descriptor is used to generate the test name and
-    # The build_fcn_arg_list is expanded and passed to the operator test
-    # build function
+    def tensorComplianceMetaData(self, op, argsDict, outputTensor, errorName):
+        if errorName:
+            # No compliance for error tests
+            return None
+        # Create compliance meta data for expected output tensor
+        compliance_tens = {"mode": None}
+        if argsDict["dg_type"] == gtu.DataGenType.DOT_PRODUCT:
+            mode = gtu.ComplianceMode.DOT_PRODUCT
+            compliance_tens["dot_product_info"] = {
+                "s": argsDict["s"],
+                "ks": argsDict["ks"],
+                "data_type": gtu.DTYPE_ATTRIBUTES[outputTensor.dtype]["json"],
+            }
+        elif argsDict["dg_type"] == gtu.DataGenType.OP_SPECIAL:
+            mode = gtu.ComplianceMode.FP_SPECIAL
+        elif "compliance" in op and "ulp" in op["compliance"]:
+            mode = gtu.ComplianceMode.ULP
+            compliance_tens["ulp_info"] = {"ulp": op["compliance"]["ulp"]}
+        elif op["op"] == Op.REDUCE_PRODUCT:
+            mode = gtu.ComplianceMode.REDUCE_PRODUCT
+        else:
+            mode = gtu.ComplianceMode.EXACT
+        compliance_tens["mode"] = gtu.ComplianceMode(mode).name
+
+        return compliance_tens
+
+    # Build Op functions
+    # Create the output tensor (calling OutputShaper as needed)
+    # Do final tweaks to attributes (if necessary for errorIf)
+    # Add Op into graph
+    # Return resulting tensor information or BuildInfo
+
+    class BuildInfo:
+        """Enhanced build information containing result tensor and associated compliance dict."""
+
+        def __init__(self, resultTensor, complianceDict):
+            self.resultTensor = resultTensor
+            self.complianceDict = complianceDict
 
     def build_unary(self, op, a, validator_fcns=None, error_name=None, qinfo=None):
         result_tens = OutputShaper.unaryOp(self.ser, self.rng, a, error_name)
@@ -975,15 +1052,16 @@
         return result_tens
 
     def build_matmul(
-        self, op, a, b, accum_dtype, validator_fcns=None, error_name=None, qinfo=None
+        self, op, a, b, args_dict, validator_fcns=None, error_name=None, qinfo=None
     ):
-        result_tens = OutputShaper.matmulOp(
+        accum_dtype = args_dict["acc_type"]
+        result_tensor = OutputShaper.matmulOp(
             self.ser, self.rng, a, b, accum_dtype, error_name
         )
 
         # Invalidate Input/Output list for error if checks.
         input_list = [a.name, b.name]
-        output_list = [result_tens.name]
+        output_list = [result_tensor.name]
         pCount, cCount = op["operands"]
         num_operands = pCount + cCount
         input_list, output_list = TosaErrorIfArgGen.eiInvalidateInputOutputList(
@@ -999,10 +1077,10 @@
             input_dtype=a.dtype,
             input2_shape=b.shape,
             input2_dtype=b.dtype,
-            output_shape=result_tens.shape,
-            output_dtype=result_tens.dtype,
+            output_shape=result_tensor.shape,
+            output_dtype=result_tensor.dtype,
             qinfo=qinfo,
-            result_tensors=[result_tens],
+            result_tensors=[result_tensor],
             input_list=input_list,
             output_list=output_list,
             num_operands=num_operands,
@@ -1014,7 +1092,12 @@
         attr.MatMulAttribute(qinfo[0], qinfo[1])
 
         self.ser.addOperator(op["op"], input_list, output_list, attr)
-        return result_tens
+
+        compliance = self.tensorComplianceMetaData(
+            op, args_dict, result_tensor, error_name
+        )
+
+        return TosaTestGen.BuildInfo(result_tensor, compliance)
 
     def build_reduce(self, op, a, axis, validator_fcns, error_name=None):
         result_tens = OutputShaper.reduceOp(self.ser, self.rng, a, axis, error_name)
@@ -1895,7 +1978,7 @@
 
     def _get_condition_tensor(self, op, cond, error_name):
         if error_name == ErrorIf.CondIfCondNotMatchingBool:
-            cond_type = get_wrong_output_type(op, self.rng, DType.BOOL)
+            cond_type = gtu.get_wrong_output_type(op, self.rng, DType.BOOL)
         else:
             cond_type = DType.BOOL
         if error_name == ErrorIf.CondIfCondShapeNotSizeOne:
@@ -2357,7 +2440,7 @@
         # Initialize a new random number generator
         self.rng = np.random.default_rng(self.random_seed)
 
-        build_fcn, tgen_fcn, tvgen_fcn, agen_fcn = op["build_fcn"]
+        _, tgen_fcn, _, agen_fcn = op["build_fcn"]
 
         # Test list consists of a tuple of:
         # (opName, testNameStr, dtype, shapeList, argumentsList)
@@ -2461,7 +2544,7 @@
         # Create a serializer
         self.createSerializer(opName, testStr)
 
-        build_fcn, tgen_fcn, tvgen_fcn, agen_fcn = op["build_fcn"]
+        build_fcn, _, tvgen_fcn, _ = op["build_fcn"]
         if "error_if_validators" in op:
             error_if_validators = op["error_if_validators"]
         else:
@@ -2495,24 +2578,37 @@
             qgen = None
 
         # Build the random tensor operands and the test
-        tens = []
 
         if qgen is not None:
             qinfo = qgen(self, op, dtype_or_dtypeList, error_name)
         else:
             qinfo = None
 
-        tens = tvgen_fcn(self, op, dtypeList, shapeList, testArgs, error_name)
+        # Extra meta data for the desc.json
+        tensMeta = {}
+
+        # Check we are using the new testArgs interface with an argsDict dictionary
+        if len(testArgs) == 1 and isinstance(testArgs[0], dict):
+            argsDict = testArgs[0]
+            assert "dg_type" in argsDict
+            tvgInfo = tvgen_fcn(
+                self, opName, dtypeList, shapeList, argsDict, error_name
+            )
+            if tvgInfo.dataGenDict:
+                tensMeta["data_gen"] = tvgInfo.dataGenDict
+            tens = tvgInfo.tensorList
+        else:
+            tens = tvgen_fcn(self, op, dtypeList, shapeList, testArgs, error_name)
 
         try:
             if error_if_validators is None:
                 if qinfo is not None:
-                    resultName = build_fcn(self, op, *tens, *testArgs, qinfo)
+                    result = build_fcn(self, op, *tens, *testArgs, qinfo)
                 else:
-                    resultName = build_fcn(self, op, *tens, *testArgs)
+                    result = build_fcn(self, op, *tens, *testArgs)
             else:
                 if qinfo is not None:
-                    resultName = build_fcn(
+                    result = build_fcn(
                         self,
                         op,
                         *tens,
@@ -2522,7 +2618,7 @@
                         qinfo=qinfo,
                     )
                 else:
-                    resultName = build_fcn(
+                    result = build_fcn(
                         self,
                         op,
                         *tens,
@@ -2534,9 +2630,16 @@
             print(f"build_fcn: {build_fcn}\nTensors: {tens}\nArgs: {testArgs}\n")
             raise e
 
-        if resultName:
+        if result:
             # The test is valid, serialize it
-            self.serialize("test")
+            if isinstance(result, TosaTestGen.BuildInfo) and result.complianceDict:
+                # Add the compliance meta data
+                # NOTE: This currently expects only one result output
+                tensMeta["compliance"] = {
+                    "version": "0.1",
+                    "tensors": {result.resultTensor.name: result.complianceDict},
+                }
+            self.serialize("test", tensMeta)
         else:
             # The test is not valid
             print(f"Invalid ERROR_IF test created: {opName} {testStr}")
@@ -2865,7 +2968,7 @@
             "build_fcn": (
                 build_matmul,
                 TosaTensorGen.tgMatmul,
-                TosaTensorValuesGen.tvgDefault,
+                TosaTensorValuesGen.tvgLazyGenDefault,
                 TosaArgGen.agMatMul,
             ),
             "qgen": TosaQuantGen.qgMatmul,
@@ -2878,6 +2981,10 @@
                 TosaErrorValidator.evWrongInputList,
                 TosaErrorValidator.evWrongOutputList,
             ),
+            "data_gen": {
+                "fp": (gtu.DataGenType.DOT_PRODUCT,),
+                "int": (gtu.DataGenType.PSEUDO_RANDOM,),
+            },
         },
         "max_pool2d": {
             "op": Op.MAX_POOL2D,
@@ -4446,7 +4553,7 @@
                 excludes = [DType.FP16, DType.FP32]
             else:
                 excludes = [out_dtype]
-            wrong_dtypes = list(usableDTypes(excludes=excludes))
+            wrong_dtypes = list(gtu.usableDTypes(excludes=excludes))
             out_dtype = rng.choice(wrong_dtypes)
 
         return ser.addOutput(ofm_shape, out_dtype)
@@ -4508,7 +4615,7 @@
                 excludes = [DType.FP16, DType.FP32]
             else:
                 excludes = [out_dtype]
-            wrong_dtypes = list(usableDTypes(excludes=excludes))
+            wrong_dtypes = list(gtu.usableDTypes(excludes=excludes))
             out_dtype = rng.choice(wrong_dtypes)
 
         return ser.addOutput(ofm_shape, out_dtype)
@@ -4559,7 +4666,7 @@
                 excludes = [DType.FP16, DType.FP32]
             else:
                 excludes = [out_dtype]
-            wrong_dtypes = list(usableDTypes(excludes=excludes))
+            wrong_dtypes = list(gtu.usableDTypes(excludes=excludes))
             out_dtype = rng.choice(wrong_dtypes)
 
         return ser.addOutput(ofm_shape, out_dtype)
@@ -4711,7 +4818,7 @@
             bad_dim = rng.choice(range(len(output_shape)))
             output_shape[bad_dim] -= rng.choice([1, 2])
         elif error_name == ErrorIf.RankMismatch:
-            output_shape = get_rank_mismatch_shape(rng, output_shape)
+            output_shape = gtu.get_rank_mismatch_shape(rng, output_shape)
 
         if error_name == ErrorIf.WrongOutputType:
             all_dtypes = [
@@ -4806,7 +4913,7 @@
         elif error_name == ErrorIf.InputSizeStartLengthMismatch:
             output_shape = input.shape.copy()
         elif error_name == ErrorIf.RankMismatch:
-            output_shape = get_rank_mismatch_shape(rng, output_shape)
+            output_shape = gtu.get_rank_mismatch_shape(rng, output_shape)
 
         return ser.addOutput(output_shape, outputDType)
 
@@ -4820,7 +4927,7 @@
             output_shape[i] = a.shape[i] * multiples[i]
 
         if error_name == ErrorIf.RankMismatch:
-            output_shape = get_rank_mismatch_shape(rng, output_shape)
+            output_shape = gtu.get_rank_mismatch_shape(rng, output_shape)
 
         if error_name == ErrorIf.WrongOutputType:
             all_dtypes = [
@@ -4853,7 +4960,7 @@
             for i in range(len(output_shape)):
                 output_shape[i] += rng.integers(1, 10)
         elif error_name == ErrorIf.RankMismatch:
-            output_shape = get_rank_mismatch_shape(rng, output_shape)
+            output_shape = gtu.get_rank_mismatch_shape(rng, output_shape)
 
         if error_name == ErrorIf.WrongOutputType:
             all_dtypes = [
@@ -4980,21 +5087,21 @@
             oh = max(oh, 1)
             ow = max(ow, 1)
             if error_name != ErrorIf.MaxDimExceeded:
-                oh = min(oh, MAX_RESIZE_DIMENSION - 1)
-                ow = min(ow, MAX_RESIZE_DIMENSION - 1)
+                oh = min(oh, gtu.MAX_RESIZE_DIMENSION - 1)
+                ow = min(ow, gtu.MAX_RESIZE_DIMENSION - 1)
 
         if error_name == ErrorIf.ResizeOutputShapeMismatch:
             choices = [1, 2, 3]
             change = rng.choice(choices)
             # increment in multiples of scale_y/x_d so we don't hit non-integer error case
             if change in [1, 3]:
-                if oh + scale_y_d >= MAX_RESIZE_DIMENSION:
+                if oh + scale_y_d >= gtu.MAX_RESIZE_DIMENSION:
                     oh -= scale_y_d
                     assert oh > 0  # Should have been caught in agResize
                 else:
                     oh += scale_y_d
             if change in [2, 3]:
-                if ow + scale_x_d >= MAX_RESIZE_DIMENSION:
+                if ow + scale_x_d >= gtu.MAX_RESIZE_DIMENSION:
                     ow -= scale_x_d
                     assert ow > 0  # Should have been caught in agResize
                 else:
@@ -5051,7 +5158,7 @@
                 excludes = [DType.FP16, DType.FP32]
             else:
                 excludes = [out_dtype]
-            wrong_dtypes = list(usableDTypes(excludes=excludes))
+            wrong_dtypes = list(gtu.usableDTypes(excludes=excludes))
             out_dtype = rng.choice(wrong_dtypes)
 
         return ser.addOutput(output_shape, out_dtype)
@@ -5075,7 +5182,7 @@
 
         if error_name == ErrorIf.WrongOutputType:
             excludes = [DType.FP32]
-            wrong_dtypes = list(usableDTypes(excludes=excludes))
+            wrong_dtypes = list(gtu.usableDTypes(excludes=excludes))
             output_dtype = rng.choice(wrong_dtypes)
         elif error_name == ErrorIf.BatchMismatch:
             output_shape[0] += rng.integers(1, 10)
@@ -5100,7 +5207,7 @@
         output_dtype = value.dtype
         if error_name == ErrorIf.WrongOutputType:
             excludes = [DType.FP32]
-            wrong_dtypes = list(usableDTypes(excludes=excludes))
+            wrong_dtypes = list(gtu.usableDTypes(excludes=excludes))
             output_dtype = rng.choice(wrong_dtypes)
         elif error_name == ErrorIf.BatchMismatch:
             output_shape[0] += rng.integers(1, 10)
diff --git a/verif/generator/tosa_utils.py b/verif/generator/tosa_utils.py
index 3cd0370..75a0df5 100644
--- a/verif/generator/tosa_utils.py
+++ b/verif/generator/tosa_utils.py
@@ -1,7 +1,8 @@
-# Copyright (c) 2021-2022, ARM Limited.
+# Copyright (c) 2021-2023, ARM Limited.
 # SPDX-License-Identifier: Apache-2.0
 import struct
 import sys
+from enum import IntEnum
 
 import numpy as np
 from tosa.DType import DType
@@ -9,22 +10,54 @@
 # Maximum dimension size for output and inputs for RESIZE
 MAX_RESIZE_DIMENSION = 16384
 
+# Data type information dictionary
+# - str: filename abbreviation
+# - width: number of bytes needed for type
+# - json: JSON type string
 DTYPE_ATTRIBUTES = {
-    DType.BOOL: {"str": "b", "width": 1},
-    DType.INT4: {"str": "i4", "width": 4},
-    DType.INT8: {"str": "i8", "width": 8},
-    DType.UINT8: {"str": "u8", "width": 8},
-    DType.INT16: {"str": "i16", "width": 16},
-    DType.UINT16: {"str": "u16", "width": 16},
-    DType.INT32: {"str": "i32", "width": 32},
-    DType.INT48: {"str": "i48", "width": 48},
-    DType.SHAPE: {"str": "i64", "width": 64},
-    DType.FP16: {"str": "f16", "width": 16},
-    DType.BF16: {"str": "bf16", "width": 16},
-    DType.FP32: {"str": "f32", "width": 32},
+    DType.BOOL: {"str": "b", "width": 1, "json": "BOOL"},
+    DType.INT4: {"str": "i4", "width": 4, "json": "INT4"},
+    DType.INT8: {"str": "i8", "width": 8, "json": "INT8"},
+    DType.UINT8: {"str": "u8", "width": 8, "json": "UINT8"},
+    DType.INT16: {"str": "i16", "width": 16, "json": "INT16"},
+    DType.UINT16: {"str": "u16", "width": 16, "json": "UINT16"},
+    DType.INT32: {"str": "i32", "width": 32, "json": "INT32"},
+    DType.INT48: {"str": "i48", "width": 48, "json": "INT48"},
+    DType.SHAPE: {"str": "s", "width": 64, "json": "SHAPE"},
+    DType.FP16: {"str": "f16", "width": 16, "json": "FP16"},
+    DType.BF16: {"str": "bf16", "width": 16, "json": "BF16"},
+    DType.FP32: {"str": "f32", "width": 32, "json": "FP32"},
 }
 
 
+class ComplianceMode(IntEnum):
+    """Compliance mode types."""
+
+    EXACT = 0
+    DOT_PRODUCT = 1
+    ULP = 2
+    FP_SPECIAL = 3
+    REDUCE_PRODUCT = 4
+
+
+class DataGenType(IntEnum):
+    """Data generator types."""
+
+    PSEUDO_RANDOM = 0
+    DOT_PRODUCT = 1
+    OP_BOUNDARY = 2
+    OP_FULLSET = 3
+    OP_SPECIAL = 4
+
+
+# Additional (optional) data for dot product data generator
+DG_DOT_PRODUCT_OPTIONAL_INFO = ("acc_type", "kernel", "axis")
+
+
+def dtypeIsFloat(dtype):
+    return dtype in (DType.FP16, DType.BF16, DType.FP32)
+
+
 def valueToName(item, value):
     """Get the name of an attribute with the given value.
 
diff --git a/verif/generator/tosa_verif_build_tests.py b/verif/generator/tosa_verif_build_tests.py
index 4d12bb0..0d532c0 100644
--- a/verif/generator/tosa_verif_build_tests.py
+++ b/verif/generator/tosa_verif_build_tests.py
@@ -51,6 +51,10 @@
         argv = new_argv
 
     parser = argparse.ArgumentParser()
+
+    ops_group = parser.add_argument_group("operator options")
+    tens_group = parser.add_argument_group("tensor options")
+
     parser.add_argument(
         "-o", dest="output_dir", type=str, default="vtest", help="Test output directory"
     )
@@ -75,8 +79,15 @@
         "-v", "--verbose", dest="verbose", action="count", help="Verbose operation"
     )
 
-    # Constraints on tests
     parser.add_argument(
+        "--lazy-data-generation",
+        dest="lazy_data_gen",
+        action="store_true",
+        help="Tensor data generation is delayed til test running",
+    )
+
+    # Constraints on tests
+    tens_group.add_argument(
         "--tensor-dim-range",
         dest="tensor_shape_range",
         default="1,64",
@@ -84,7 +95,7 @@
         help="Min,Max range of tensor shapes",
     )
 
-    parser.add_argument(
+    tens_group.add_argument(
         OPTION_FP_VALUES_RANGE,
         dest="tensor_fp_value_range",
         default="0.0,1.0",
@@ -92,7 +103,7 @@
         help="Min,Max range of floating point tensor values",
     )
 
-    parser.add_argument(
+    ops_group.add_argument(
         "--max-batch-size",
         dest="max_batch_size",
         default=1,
@@ -100,7 +111,7 @@
         help="Maximum batch size for NHWC tests",
     )
 
-    parser.add_argument(
+    ops_group.add_argument(
         "--max-conv-padding",
         dest="max_conv_padding",
         default=1,
@@ -108,7 +119,7 @@
         help="Maximum padding for Conv tests",
     )
 
-    parser.add_argument(
+    ops_group.add_argument(
         "--max-conv-dilation",
         dest="max_conv_dilation",
         default=2,
@@ -116,7 +127,7 @@
         help="Maximum dilation for Conv tests",
     )
 
-    parser.add_argument(
+    ops_group.add_argument(
         "--max-conv-stride",
         dest="max_conv_stride",
         default=2,
@@ -124,7 +135,7 @@
         help="Maximum stride for Conv tests",
     )
 
-    parser.add_argument(
+    ops_group.add_argument(
         "--max-pooling-padding",
         dest="max_pooling_padding",
         default=1,
@@ -132,7 +143,7 @@
         help="Maximum padding for pooling tests",
     )
 
-    parser.add_argument(
+    ops_group.add_argument(
         "--max-pooling-stride",
         dest="max_pooling_stride",
         default=2,
@@ -140,7 +151,7 @@
         help="Maximum stride for pooling tests",
     )
 
-    parser.add_argument(
+    ops_group.add_argument(
         "--max-pooling-kernel",
         dest="max_pooling_kernel",
         default=3,
@@ -148,7 +159,7 @@
         help="Maximum kernel for pooling tests",
     )
 
-    parser.add_argument(
+    ops_group.add_argument(
         "--num-rand-permutations",
         dest="num_rand_permutations",
         default=6,
@@ -156,7 +167,7 @@
         help="Number of random permutations for a given shape/rank for randomly-sampled parameter spaces",
     )
 
-    parser.add_argument(
+    ops_group.add_argument(
         "--max-resize-output-dim",
         dest="max_resize_output_dim",
         default=1000,
@@ -165,7 +176,7 @@
     )
 
     # Targeting a specific shape/rank/dtype
-    parser.add_argument(
+    tens_group.add_argument(
         "--target-shape",
         dest="target_shapes",
         action="append",
@@ -174,7 +185,7 @@
         help="Create tests with a particular input tensor shape, e.g., 1,4,4,8 (may be repeated for tests that require multiple input shapes)",
     )
 
-    parser.add_argument(
+    tens_group.add_argument(
         "--target-rank",
         dest="target_ranks",
         action="append",
@@ -184,7 +195,7 @@
     )
 
     # Used for parsing a comma-separated list of integers in a string
-    parser.add_argument(
+    tens_group.add_argument(
         "--target-dtype",
         dest="target_dtypes",
         action="append",
@@ -193,7 +204,7 @@
         help=f"Create test with a particular DType: [{', '.join([d.lower() for d in DTypeNames[1:]])}] (may be repeated)",
     )
 
-    parser.add_argument(
+    ops_group.add_argument(
         "--num-const-inputs-concat",
         dest="num_const_inputs_concat",
         default=0,
@@ -211,14 +222,14 @@
         help="type of tests produced, positive, negative, or both",
     )
 
-    parser.add_argument(
+    ops_group.add_argument(
         "--allow-pooling-and-conv-oversizes",
         dest="oversize",
         action="store_true",
         help="allow oversize padding, stride and kernel tests",
     )
 
-    parser.add_argument(
+    ops_group.add_argument(
         "--zero-point",
         dest="zeropoint",
         default=None,
@@ -233,11 +244,11 @@
         help="output const tensors as numpy files for inspection",
     )
 
-    parser.add_argument(
+    ops_group.add_argument(
         "--level-8k-sizes",
         dest="level8k",
         action="store_true",
-        help="create level 8k size tests (RESIZE)",
+        help="create level 8k size tests",
     )
 
     args = parser.parse_args(argv)