| # Copyright (c) 2023, ARM Limited. |
| # SPDX-License-Identifier: Apache-2.0 |
| """Calls the data generation library to create the test data.""" |
| import ctypes as ct |
| import json |
| from pathlib import Path |
| |
| import numpy as np |
| import schemavalidation.schemavalidation as sch |
| |
| |
| class GenerateError(Exception): |
| """Exception raised for errors performing data generation.""" |
| |
| |
| class GenerateLibrary: |
| """Python interface to the C generate library. |
| |
| Simple usage to write out all input files: |
| set_config(test_desc) |
| write_numpy_files(test_path) |
| |
| To get data buffers (for const data): |
| get_tensor_data(tensor_name) |
| """ |
| |
| def __init__(self, generate_lib_path): |
| """Find the library and set up the interface.""" |
| self.lib_path = generate_lib_path |
| if self.lib_path is None or not self.lib_path.is_file(): |
| raise GenerateError(f"Could not find generate library - {self.lib_path}") |
| |
| self.schema_validator = sch.TestDescSchemaValidator() |
| |
| self.test_desc = None |
| self.json_config = None |
| self.lib = ct.cdll.LoadLibrary(self.lib_path) |
| |
| self.tgd_generate_data = self.lib.tgd_generate_data |
| self.tgd_generate_data.argtypes = [ |
| ct.c_char_p, |
| ct.c_char_p, |
| ct.c_void_p, |
| ct.c_size_t, |
| ] |
| self.tgd_generate_data.restype = ct.c_bool |
| |
| def check_config(self, test_desc: dict): |
| """Quick check that the config supports data generation.""" |
| return ("meta" in test_desc) and ("data_gen" in test_desc["meta"]) |
| |
| def set_config(self, test_desc: dict): |
| """Set the test config in the library. |
| |
| test_desc - the test desc.json file |
| """ |
| self.test_desc = None |
| self.json_config = None |
| |
| if not self.check_config(test_desc): |
| raise GenerateError("No meta/data_gen section found in desc.json") |
| |
| # Validate the config versus the schema |
| self.schema_validator.validate_config(test_desc) |
| |
| self.test_desc = test_desc |
| self.json_config = test_desc["meta"]["data_gen"] |
| |
| def _create_buffer(self, dtype: str, shape: tuple): |
| """Helper to create a buffer of the required type.""" |
| size = np.prod(shape) |
| |
| if dtype == "FP32": |
| # Create buffer and initialize to zero |
| buffer = (ct.c_float * size)(0) |
| size_bytes = size * 4 |
| elif dtype == "FP16": |
| size_bytes = size * 2 |
| # Create buffer of bytes and initialize to zero |
| buffer = (ct.c_ubyte * size_bytes)(0) |
| elif dtype == "INT32" or dtype == "SHAPE": |
| # Create buffer and initialize to zero |
| buffer = (ct.c_int32 * size)(0) |
| size_bytes = size * 4 |
| else: |
| raise GenerateError(f"Unsupported data type {dtype}") |
| |
| return buffer, size_bytes |
| |
| def _convert_buffer(self, buffer, dtype: str, shape: tuple): |
| """Helper to convert a buffer to a numpy array.""" |
| arr = np.ctypeslib.as_array(buffer) |
| |
| if dtype == "FP16": |
| # Convert from bytes back to FP16 |
| arr = np.frombuffer(arr, np.float16) |
| |
| arr = np.reshape(arr, shape) |
| |
| return arr |
| |
| def _data_gen_array(self, json_config: str, tensor_name: str): |
| """Generate the named tensor data and return a numpy array.""" |
| try: |
| tensor = json_config["tensors"][tensor_name] |
| dtype = tensor["data_type"] |
| shape = tuple(tensor["shape"]) |
| except KeyError as e: |
| raise GenerateError( |
| f"Missing data in json config for input {tensor_name} - {repr(e)}" |
| ) |
| |
| buffer, size_bytes = self._create_buffer(dtype, shape) |
| buffer_ptr = ct.cast(buffer, ct.c_void_p) |
| |
| json_bytes = bytes(json.dumps(json_config), "utf8") |
| |
| result = self.tgd_generate_data( |
| ct.c_char_p(json_bytes), |
| ct.c_char_p(bytes(tensor_name, "utf8")), |
| buffer_ptr, |
| ct.c_size_t(size_bytes), |
| ) |
| if not result: |
| raise GenerateError("Data generate failed") |
| |
| arr = self._convert_buffer(buffer, dtype, shape) |
| return arr |
| |
| def _data_gen_write( |
| self, test_path: Path, json_config: str, ifm_name: str, ifm_file: str |
| ): |
| """Generate the named tensor data and save it in numpy format.""" |
| arr = self._data_gen_array(json_config, ifm_name) |
| |
| file_name = test_path / ifm_file |
| np.save(file_name, arr) |
| |
| def write_numpy_files(self, test_path: Path): |
| """Write out all the desc.json input tensors to numpy data files.""" |
| if self.test_desc is None or self.json_config is None: |
| raise GenerateError("Cannot write numpy files as no config set up") |
| |
| try: |
| ifm_names = self.test_desc["ifm_name"] |
| ifm_files = self.test_desc["ifm_file"] |
| except KeyError as e: |
| raise GenerateError(f"Missing data in desc.json - {repr(e)}") |
| |
| failures = [] |
| for iname, ifile in zip(ifm_names, ifm_files): |
| try: |
| self._data_gen_write(test_path, self.json_config, iname, ifile) |
| except GenerateError as e: |
| failures.append( |
| f"ERROR: Failed to create data for tensor {iname} - {repr(e)}" |
| ) |
| |
| if len(failures) > 0: |
| raise GenerateError("\n".join(failures)) |
| |
| def get_tensor_data(self, tensor_name: str, json_config=None): |
| """Get a numpy array for a named tensor in the data_gen meta data.""" |
| if json_config is None: |
| if self.json_config is None: |
| raise GenerateError("Cannot get tensor data as no config set up") |
| json_config = self.json_config |
| else: |
| # Validate the given config |
| self.schema_validator.validate_config( |
| json_config, schema_type=sch.TD_SCHEMA_DATA_GEN |
| ) |
| |
| return self._data_gen_array(json_config, tensor_name) |
| |
| |
| def main(argv=None): |
| """Simple command line interface for the data generator.""" |
| import argparse |
| import conformance.model_files as cmf |
| |
| parser = argparse.ArgumentParser() |
| parser.add_argument( |
| "--generate-lib-path", |
| type=Path, |
| help="Path to TOSA generate lib", |
| ) |
| parser.add_argument( |
| "path", type=Path, help="the path to the test directory to generate data for" |
| ) |
| args = parser.parse_args(argv) |
| test_path = args.path |
| |
| if args.generate_lib_path is None: |
| # Try to work out ref model directory and find the verify library |
| # but this default only works for the python developer environment |
| # i.e. when using the scripts/py-dev-env.* scripts |
| # otherwise use the command line option --generate-lib-path to specify path |
| ref_model_dir = Path(__file__).absolute().parents[2] |
| args.generate_lib_path = cmf.find_tosa_file( |
| cmf.TosaFileType.GENERATE_LIBRARY, ref_model_dir, False |
| ) |
| |
| if not test_path.is_dir(): |
| print(f"ERROR: Invalid directory - {test_path}") |
| return 2 |
| |
| test_desc_path = test_path / "desc.json" |
| |
| if not test_desc_path.is_file(): |
| print(f"ERROR: No test description found: {test_desc_path}") |
| return 2 |
| |
| # Load the JSON desc.json |
| try: |
| with test_desc_path.open("r") as fd: |
| test_desc = json.load(fd) |
| except Exception as e: |
| print(f"ERROR: Loading {test_desc_path} - {repr(e)}") |
| return 2 |
| |
| try: |
| dgl = GenerateLibrary(args.generate_lib_path) |
| if not dgl.check_config(test_desc): |
| print(f"WARNING: No data generation supported for {test_path}") |
| return 2 |
| |
| dgl.set_config(test_desc) |
| except GenerateError as e: |
| print(f"ERROR: Initializing generate library - {repr(e)}") |
| return 1 |
| |
| try: |
| dgl.write_numpy_files(test_path) |
| except GenerateError as e: |
| print(f"ERROR: Writing out data files to {test_path}\n{repr(e)}") |
| return 1 |
| |
| |
| if __name__ == "__main__": |
| exit(main()) |