Add desc.json schema validation with compliance/data_gen extensions

Rename scripts utlities to tosa_* such as tosa_json2numpy

Signed-off-by: Jeremy Johnson <jeremy.johnson@arm.com>
Change-Id: Ie8d584e2afb189fb74cf96b39590c7c27444ba14
diff --git a/scripts/schemavalidation/__init__.py b/scripts/schemavalidation/__init__.py
new file mode 100644
index 0000000..5f57249
--- /dev/null
+++ b/scripts/schemavalidation/__init__.py
@@ -0,0 +1,3 @@
+"""Namespace."""
+# Copyright (c) 2023 Arm Limited.
+# SPDX-License-Identifier: Apache-2.0
diff --git a/scripts/schemavalidation/compliance-config.schema.json b/scripts/schemavalidation/compliance-config.schema.json
new file mode 100644
index 0000000..677a2a8
--- /dev/null
+++ b/scripts/schemavalidation/compliance-config.schema.json
@@ -0,0 +1,79 @@
+ {
+    "$comment": "Copyright (c) 2023, ARM Limited.",
+    "$comment": "SPDX-License-Identifier: Apache-2.0",
+    "$id": "compliance-config.schema.json",
+    "$schema": "https://json-schema.org/draft/2020-12/schema",
+    "title": "compliance-config",
+    "description": "Compliance configuration for verifying TOSA tensor data outputs from a test",
+    "type": "object",
+    "properties": {
+        "version": {
+            "description": "version string X.Y of this config data",
+            "type": "string"
+        },
+        "tensors": {
+            "type": "object",
+            "description": "dictionary of output tensors - keys are the tosa network names",
+            "patternProperties":
+            {
+                "^.*$":
+                {
+                    "description": "per named tensor config for compliance checking",
+                    "type": "object",
+                    "properties": {
+                        "mode": {
+                            "description": "verifier mode EXACT, DOT_PRODUCT, ULP, REDUCE_PRODUCT, or FP_SPECIAL",
+                            "type": "string"
+                        },
+                        "ulp_info": {
+                            "description": "info required for the ULP mode",
+                            "type": "object",
+                            "properties":
+                            {
+                                "ulp": {
+                                    "description": "ulp range limit - such as 1 or 0.5",
+                                    "type": "string"
+                                }
+                            },
+                            "required": [ "ulp" ]
+                        },
+                        "dot_product_info": {
+                            "description": "info required for the DOT_PRODUCT mode",
+                            "type": "object",
+                            "properties":
+                            {
+                                "s": {
+                                    "description": "test set 0 to 5 or -1 for other data",
+                                    "type": "integer"
+                                },
+                                "ks": {
+                                    "description": "kernel size for this dot product operation",
+                                    "type": "integer"
+                                },
+                                "data_type": {
+                                    "description": "tensor data type, such as: BOOL, INT16, FP32",
+                                    "type": "string"
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "s",
+                                "ks",
+                                "data_type"
+                            ]
+                        }
+                    },
+                    "additionalProperties": false,
+                    "required": [
+                        "mode"
+                    ]
+                }
+            }
+        }
+    },
+    "additionalProperties": false,
+    "required": [
+        "version",
+        "tensors"
+    ]
+}
diff --git a/scripts/schemavalidation/datagen-config.schema.json b/scripts/schemavalidation/datagen-config.schema.json
new file mode 100644
index 0000000..01f9fad
--- /dev/null
+++ b/scripts/schemavalidation/datagen-config.schema.json
@@ -0,0 +1,165 @@
+ {
+    "$comment": "Copyright (c) 2023, ARM Limited.",
+    "$comment": "SPDX-License-Identifier: Apache-2.0",
+    "$id": "datagen-config.schema.json",
+    "$schema": "https://json-schema.org/draft/2020-12/schema",
+    "title": "datagen-config",
+    "description": "Data generator configuration for generating TOSA tensor data",
+    "type": "object",
+    "properties": {
+        "version": {
+            "description": "version string X.Y of this config data",
+            "type": "string"
+        },
+        "tensors": {
+            "type": "object",
+            "description": "dictionary of input tensors - keys are the tosa network names",
+            "patternProperties":
+            {
+                "^.*$":
+                {
+                    "description": "per named tensor config for data generation",
+                    "type": "object",
+                    "properties": {
+                        "generator": {
+                            "description": "data generator name - PSEUDO_RANDOM, DOT_PRODUCT or OP_SPECIFIC",
+                            "type": "string"
+                        },
+                        "data_type": {
+                            "description": "tensor data type, such as: BOOL, INT16, FP32",
+                            "type": "string"
+                        },
+                        "input_type": {
+                            "description": "input is variable or constant",
+                            "type": "string"
+                        },
+                        "shape": {
+                            "description": "tensor shape array (rank 0 should be [])",
+                            "type": "array",
+                            "items": {
+                                "description": "shape dimension",
+                                "type": "integer",
+                                "minimum": 1
+                            }
+                        },
+                        "op": {
+                            "description": "operator name the tensor is initially used with",
+                            "type": "string"
+                        },
+                        "input_pos": {
+                            "description": "tensor input position for the operator (starts at 0)",
+                            "type": "integer",
+                            "minimum": 0
+                        },
+                        "pseudo_random_info": {
+                            "description": "info required for the PSEUDO_RANDOM generator",
+                            "type": "object",
+                            "properties":
+                            {
+                                "rng_seed": {
+                                    "description": "random number generator seed",
+                                    "type": "integer"
+                                },
+                                "range": {
+                                    "type": "array",
+                                    "description": "range of random numbers from low to high inclusively",
+                                    "minItems": 2,
+                                    "maxItems": 2,
+                                    "items": {
+                                        "description": "[low value, high value] as strings to allow ints and floats",
+                                        "type": "string"
+                                    }
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [ "rng_seed" ]
+                        },
+                        "dot_product_info": {
+                            "description": "info required for the DOT_PRODUCT generator",
+                            "type": "object",
+                            "properties":
+                            {
+                                "s": {
+                                    "description": "test set 0 to 5 or -1 for other data",
+                                    "type": "integer"
+                                },
+                                "ks": {
+                                    "description": "kernel size for this dot product operation",
+                                    "type": "integer"
+                                },
+                                "acc_type": {
+                                    "description": "operator accumulator type (like tensor data_type)",
+                                    "type": "string"
+                                },
+                                "kernel": {
+                                    "type": "array",
+                                    "description": "kernel x, y sizes (for avg_pool2d)",
+                                    "minItems": 2,
+                                    "maxItems": 2,
+                                    "items": {
+                                        "description": "kernel dimension",
+                                        "type": "integer",
+                                        "minimum": 1
+                                    }
+                                },
+                                "axis": {
+                                    "description": "axis dimension (for reduce_sum)",
+                                    "type": "integer",
+                                    "minimum": 0
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "s",
+                                "ks",
+                                "acc_type"
+                            ]
+                        },
+                        "op_specific_info": {
+                            "description": "info required for the OP_SPECIFIC generator",
+                            "type": "object",
+                            "properties":
+                            {
+                                "sub_generator": {
+                                    "description": "sub generator type for this op - FULL, SPECIAL or BOUNDARY",
+                                    "type": "string"
+                                },
+                                "offset": {
+                                    "description": "starting offset within the test data",
+                                    "type": "integer",
+                                    "minimum": 0
+                                },
+                                "attributes": {
+                                    "description": "attribute data from the op needed to compute the data",
+                                    "type": "object",
+                                    "properties": {
+                                        "TBD": {
+                                            "description": "Probably needed for RESCALE and MUL",
+                                            "type": "string"
+                                        }
+                                    }
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [ "sub_generator" ]
+                        }
+                    },
+                    "additionalProperties": false,
+                    "required": [
+                        "generator",
+                        "data_type",
+                        "input_type",
+                        "shape",
+                        "op",
+                        "input_pos"
+                    ]
+                }
+            }
+        }
+    },
+    "additionalProperties": false,
+    "required": [
+        "version",
+        "tensors"
+    ]
+}
diff --git a/scripts/schemavalidation/desc.schema.json b/scripts/schemavalidation/desc.schema.json
new file mode 100644
index 0000000..226fc68
--- /dev/null
+++ b/scripts/schemavalidation/desc.schema.json
@@ -0,0 +1,111 @@
+ {
+    "$comment": "Copyright (c) 2023, ARM Limited.",
+    "$comment": "SPDX-License-Identifier: Apache-2.0",
+    "$id": "desc.schema.json",
+    "$schema": "https://json-schema.org/draft/2020-12/schema",
+    "title": "desc",
+    "description": "Test description",
+    "type": "object",
+    "properties": {
+        "tosa_file": {
+            "description": "tosa flatbuffer file",
+            "type": "string"
+        },
+        "ifm_name": {
+            "description": "list of input tensors",
+            "type": "array",
+            "items": {
+                "description": "tensor name",
+                "type": "string"
+            }
+        },
+        "ifm_file": {
+            "description": "list of input files, one for each tensor in ifm_name",
+            "type": "array",
+            "items": {
+                "description": "file name",
+                "type": "string"
+            }
+        },
+        "ofm_name": {
+            "description": "list of output tensors",
+            "type": "array",
+            "minItems": 1,
+            "items": {
+                "description": "tensor name",
+                "type": "string"
+            }
+        },
+        "ofm_file": {
+            "description": "list of output files, one for each tensor in ofm_name",
+            "type": "array",
+            "minItems": 1,
+            "items": {
+                "description": "file name",
+                "type": "string"
+            }
+        },
+        "expected_failure": {
+            "description": "if the test is expected to fail - used for test filtering",
+            "type": "boolean"
+        },
+        "expected_failure_desc": {
+            "description": "summary of expected failure test",
+            "type": "string"
+        },
+        "expected_return_code": {
+            "description": "DEPRECATED (unused): expected return code from ref-model",
+            "type": "integer"
+        },
+        "expected_result_file": {
+            "description": "DEPRECATED: list of expected result files, one for each tensor in ofm_name",
+            "type": "array",
+            "minItems": 1,
+            "items": {
+                "description": "file name",
+                "type": "string"
+            }
+        },
+        "profile": {
+            "description": "list of profiles this test is valid for",
+            "type": "array",
+            "minItems": 1,
+            "items": {
+                "description": "profile name - tosa_bi or tosa_mi",
+                "type": "string"
+            }
+        },
+        "tags": {
+            "description": "list of string tags that can be used for test filtering",
+            "type": "array",
+            "items": {
+                "description": "test tag for filtering",
+                "type": "string"
+            }
+        },
+        "meta": {
+            "description": "meta-data for test libraries",
+            "type": "object",
+            "properties":
+            {
+                "data_gen": {
+                    "description": "data generation meta-data",
+                    "$ref": "datagen-config.schema.json"
+                },
+                "compliance": {
+                    "description": "compliance meta-data",
+                    "$ref": "compliance-config.schema.json"
+                }
+            },
+            "additionalProperties": false
+        }
+    },
+    "additionalProperties": false,
+    "required": [
+        "tosa_file",
+        "ifm_name",
+        "ifm_file",
+        "ofm_name",
+        "ofm_file"
+    ]
+}
diff --git a/scripts/schemavalidation/schemavalidation.py b/scripts/schemavalidation/schemavalidation.py
new file mode 100644
index 0000000..2848edf
--- /dev/null
+++ b/scripts/schemavalidation/schemavalidation.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2023, ARM Limited.
+# SPDX-License-Identifier: Apache-2.0
+"""Validates desc.json against its JSON schema."""
+import json
+from pathlib import Path
+
+from jsonschema import validate
+from referencing import Registry
+from referencing import Resource
+
+TD_SCHEMA_FULL = "full"
+TD_SCHEMA_DATA_GEN = "data_gen"
+TD_SCHEMA_COMPLIANCE = "compliance"
+
+# Default file info
+SCRIPT = Path(__file__).absolute()
+TD_SCHEMAS = {
+    TD_SCHEMA_DATA_GEN: "datagen-config.schema.json",
+    TD_SCHEMA_COMPLIANCE: "compliance-config.schema.json",
+    TD_SCHEMA_FULL: "desc.schema.json",
+}
+
+
+class TestDescSchemaValidator:
+    def __init__(self):
+        """Initialize by loading all the schemas and setting up a registry."""
+        self.registry = Registry()
+        self.schemas = {}
+        for key, name in TD_SCHEMAS.items():
+            schema_path = SCRIPT.parent / name
+            with schema_path.open("r") as fd:
+                schema = json.load(fd)
+            self.schemas[key] = schema
+            if key != TD_SCHEMA_FULL:
+                resource = Resource.from_contents(schema)
+                self.registry = self.registry.with_resource(uri=name, resource=resource)
+
+    def validate_config(self, config, schema_type=TD_SCHEMA_FULL):
+        """Validate the whole (or partial) config versus the relevant schema."""
+        validate(config, self.schemas[schema_type], registry=self.registry)
+
+
+def main(argv=None):
+    """Command line interface for the schema validation."""
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "path", type=Path, help="the path to the test directory to validate"
+    )
+    args = parser.parse_args(argv)
+    test_path = args.path
+
+    if not test_path.is_dir():
+        print(f"ERROR: Invalid directory - {test_path}")
+        return 2
+
+    test_desc_path = test_path / "desc.json"
+
+    if not test_desc_path.is_file():
+        print(f"ERROR: No test description found: {test_desc_path}")
+        return 2
+
+    # Load the JSON desc.json
+    try:
+        with test_desc_path.open("r") as fd:
+            test_desc = json.load(fd)
+    except Exception as e:
+        print(f"ERROR: Loading {test_desc_path} - {repr(e)}")
+        return 2
+
+    sv = TestDescSchemaValidator()
+    sv.validate_config(test_desc, TD_SCHEMA_FULL)
+
+    return 0
+
+
+if __name__ == "__main__":
+    exit(main())