Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 1 | # Copyright (c) 2023, ARM Limited. |
| 2 | # SPDX-License-Identifier: Apache-2.0 |
| 3 | """Calls the data generation library to create the test data.""" |
| 4 | import ctypes as ct |
| 5 | import json |
| 6 | from pathlib import Path |
| 7 | |
| 8 | import numpy as np |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 9 | import schemavalidation.schemavalidation as sch |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 10 | |
| 11 | |
| 12 | class GenerateError(Exception): |
| 13 | """Exception raised for errors performing data generation.""" |
| 14 | |
| 15 | |
| 16 | class GenerateLibrary: |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 17 | """Python interface to the C generate library. |
| 18 | |
| 19 | Simple usage to write out all input files: |
| 20 | set_config(test_desc) |
| 21 | write_numpy_files(test_path) |
| 22 | |
| 23 | To get data buffers (for const data): |
| 24 | get_tensor_data(tensor_name) |
| 25 | """ |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 26 | |
| 27 | def __init__(self, generate_lib_path): |
| 28 | """Find the library and set up the interface.""" |
| 29 | self.lib_path = generate_lib_path |
Jeremy Johnson | 39f3434 | 2023-11-27 15:02:04 +0000 | [diff] [blame] | 30 | if self.lib_path is None or not self.lib_path.is_file(): |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 31 | raise GenerateError(f"Could not find generate library - {self.lib_path}") |
| 32 | |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 33 | self.schema_validator = sch.TestDescSchemaValidator() |
| 34 | |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 35 | self.test_desc = None |
| 36 | self.json_config = None |
| 37 | self.lib = ct.cdll.LoadLibrary(self.lib_path) |
| 38 | |
| 39 | self.tgd_generate_data = self.lib.tgd_generate_data |
| 40 | self.tgd_generate_data.argtypes = [ |
| 41 | ct.c_char_p, |
| 42 | ct.c_char_p, |
| 43 | ct.c_void_p, |
| 44 | ct.c_size_t, |
| 45 | ] |
| 46 | self.tgd_generate_data.restype = ct.c_bool |
| 47 | |
| 48 | def check_config(self, test_desc: dict): |
| 49 | """Quick check that the config supports data generation.""" |
| 50 | return ("meta" in test_desc) and ("data_gen" in test_desc["meta"]) |
| 51 | |
| 52 | def set_config(self, test_desc: dict): |
| 53 | """Set the test config in the library. |
| 54 | |
| 55 | test_desc - the test desc.json file |
| 56 | """ |
| 57 | self.test_desc = None |
| 58 | self.json_config = None |
| 59 | |
| 60 | if not self.check_config(test_desc): |
| 61 | raise GenerateError("No meta/data_gen section found in desc.json") |
| 62 | |
| 63 | # Validate the config versus the schema |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 64 | self.schema_validator.validate_config(test_desc) |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 65 | |
| 66 | self.test_desc = test_desc |
| 67 | self.json_config = test_desc["meta"]["data_gen"] |
| 68 | |
| 69 | def _create_buffer(self, dtype: str, shape: tuple): |
| 70 | """Helper to create a buffer of the required type.""" |
Jeremy Johnson | 718f347 | 2023-11-30 14:18:19 +0000 | [diff] [blame] | 71 | size = np.prod(shape) |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 72 | |
| 73 | if dtype == "FP32": |
| 74 | # Create buffer and initialize to zero |
| 75 | buffer = (ct.c_float * size)(0) |
| 76 | size_bytes = size * 4 |
Jeremy Johnson | 718f347 | 2023-11-30 14:18:19 +0000 | [diff] [blame] | 77 | elif dtype == "FP16": |
| 78 | size_bytes = size * 2 |
| 79 | # Create buffer of bytes and initialize to zero |
| 80 | buffer = (ct.c_ubyte * size_bytes)(0) |
Won Jeon | 64e4bfe | 2024-01-18 06:31:55 +0000 | [diff] [blame] | 81 | elif dtype == "INT32" or dtype == "SHAPE": |
Jeremy Johnson | a8420ad | 2023-12-07 16:35:28 +0000 | [diff] [blame] | 82 | # Create buffer and initialize to zero |
| 83 | buffer = (ct.c_int32 * size)(0) |
| 84 | size_bytes = size * 4 |
Jeremy Johnson | 0a04299 | 2024-02-28 13:20:05 +0000 | [diff] [blame] | 85 | elif dtype == "INT8": |
| 86 | size_bytes = size |
| 87 | # Create buffer of bytes and initialize to zero |
| 88 | buffer = (ct.c_ubyte * size_bytes)(0) |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 89 | else: |
| 90 | raise GenerateError(f"Unsupported data type {dtype}") |
| 91 | |
| 92 | return buffer, size_bytes |
| 93 | |
Jeremy Johnson | 718f347 | 2023-11-30 14:18:19 +0000 | [diff] [blame] | 94 | def _convert_buffer(self, buffer, dtype: str, shape: tuple): |
| 95 | """Helper to convert a buffer to a numpy array.""" |
| 96 | arr = np.ctypeslib.as_array(buffer) |
| 97 | |
| 98 | if dtype == "FP16": |
| 99 | # Convert from bytes back to FP16 |
| 100 | arr = np.frombuffer(arr, np.float16) |
| 101 | |
| 102 | arr = np.reshape(arr, shape) |
| 103 | |
| 104 | return arr |
| 105 | |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 106 | def _data_gen_array(self, json_config: str, tensor_name: str): |
| 107 | """Generate the named tensor data and return a numpy array.""" |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 108 | try: |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 109 | tensor = json_config["tensors"][tensor_name] |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 110 | dtype = tensor["data_type"] |
| 111 | shape = tuple(tensor["shape"]) |
| 112 | except KeyError as e: |
| 113 | raise GenerateError( |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 114 | f"Missing data in json config for input {tensor_name} - {repr(e)}" |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 115 | ) |
| 116 | |
| 117 | buffer, size_bytes = self._create_buffer(dtype, shape) |
| 118 | buffer_ptr = ct.cast(buffer, ct.c_void_p) |
| 119 | |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 120 | json_bytes = bytes(json.dumps(json_config), "utf8") |
| 121 | |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 122 | result = self.tgd_generate_data( |
| 123 | ct.c_char_p(json_bytes), |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 124 | ct.c_char_p(bytes(tensor_name, "utf8")), |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 125 | buffer_ptr, |
| 126 | ct.c_size_t(size_bytes), |
| 127 | ) |
| 128 | if not result: |
| 129 | raise GenerateError("Data generate failed") |
| 130 | |
Jeremy Johnson | 718f347 | 2023-11-30 14:18:19 +0000 | [diff] [blame] | 131 | arr = self._convert_buffer(buffer, dtype, shape) |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 132 | return arr |
| 133 | |
| 134 | def _data_gen_write( |
| 135 | self, test_path: Path, json_config: str, ifm_name: str, ifm_file: str |
| 136 | ): |
| 137 | """Generate the named tensor data and save it in numpy format.""" |
| 138 | arr = self._data_gen_array(json_config, ifm_name) |
| 139 | |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 140 | file_name = test_path / ifm_file |
| 141 | np.save(file_name, arr) |
| 142 | |
| 143 | def write_numpy_files(self, test_path: Path): |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 144 | """Write out all the desc.json input tensors to numpy data files.""" |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 145 | if self.test_desc is None or self.json_config is None: |
| 146 | raise GenerateError("Cannot write numpy files as no config set up") |
| 147 | |
| 148 | try: |
| 149 | ifm_names = self.test_desc["ifm_name"] |
| 150 | ifm_files = self.test_desc["ifm_file"] |
| 151 | except KeyError as e: |
| 152 | raise GenerateError(f"Missing data in desc.json - {repr(e)}") |
| 153 | |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 154 | failures = [] |
| 155 | for iname, ifile in zip(ifm_names, ifm_files): |
| 156 | try: |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 157 | self._data_gen_write(test_path, self.json_config, iname, ifile) |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 158 | except GenerateError as e: |
| 159 | failures.append( |
| 160 | f"ERROR: Failed to create data for tensor {iname} - {repr(e)}" |
| 161 | ) |
| 162 | |
| 163 | if len(failures) > 0: |
| 164 | raise GenerateError("\n".join(failures)) |
| 165 | |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 166 | def get_tensor_data(self, tensor_name: str, json_config=None): |
| 167 | """Get a numpy array for a named tensor in the data_gen meta data.""" |
| 168 | if json_config is None: |
| 169 | if self.json_config is None: |
| 170 | raise GenerateError("Cannot get tensor data as no config set up") |
| 171 | json_config = self.json_config |
| 172 | else: |
| 173 | # Validate the given config |
| 174 | self.schema_validator.validate_config( |
| 175 | json_config, schema_type=sch.TD_SCHEMA_DATA_GEN |
| 176 | ) |
| 177 | |
| 178 | return self._data_gen_array(json_config, tensor_name) |
| 179 | |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 180 | |
| 181 | def main(argv=None): |
| 182 | """Simple command line interface for the data generator.""" |
| 183 | import argparse |
| 184 | import conformance.model_files as cmf |
| 185 | |
| 186 | parser = argparse.ArgumentParser() |
| 187 | parser.add_argument( |
| 188 | "--generate-lib-path", |
| 189 | type=Path, |
| 190 | help="Path to TOSA generate lib", |
| 191 | ) |
| 192 | parser.add_argument( |
| 193 | "path", type=Path, help="the path to the test directory to generate data for" |
| 194 | ) |
| 195 | args = parser.parse_args(argv) |
| 196 | test_path = args.path |
| 197 | |
| 198 | if args.generate_lib_path is None: |
| 199 | # Try to work out ref model directory and find the verify library |
| 200 | # but this default only works for the python developer environment |
| 201 | # i.e. when using the scripts/py-dev-env.* scripts |
| 202 | # otherwise use the command line option --generate-lib-path to specify path |
| 203 | ref_model_dir = Path(__file__).absolute().parents[2] |
| 204 | args.generate_lib_path = cmf.find_tosa_file( |
| 205 | cmf.TosaFileType.GENERATE_LIBRARY, ref_model_dir, False |
| 206 | ) |
| 207 | |
| 208 | if not test_path.is_dir(): |
| 209 | print(f"ERROR: Invalid directory - {test_path}") |
| 210 | return 2 |
| 211 | |
| 212 | test_desc_path = test_path / "desc.json" |
| 213 | |
| 214 | if not test_desc_path.is_file(): |
| 215 | print(f"ERROR: No test description found: {test_desc_path}") |
| 216 | return 2 |
| 217 | |
| 218 | # Load the JSON desc.json |
| 219 | try: |
| 220 | with test_desc_path.open("r") as fd: |
| 221 | test_desc = json.load(fd) |
| 222 | except Exception as e: |
| 223 | print(f"ERROR: Loading {test_desc_path} - {repr(e)}") |
| 224 | return 2 |
| 225 | |
| 226 | try: |
| 227 | dgl = GenerateLibrary(args.generate_lib_path) |
| 228 | if not dgl.check_config(test_desc): |
| 229 | print(f"WARNING: No data generation supported for {test_path}") |
| 230 | return 2 |
| 231 | |
| 232 | dgl.set_config(test_desc) |
| 233 | except GenerateError as e: |
| 234 | print(f"ERROR: Initializing generate library - {repr(e)}") |
| 235 | return 1 |
| 236 | |
| 237 | try: |
| 238 | dgl.write_numpy_files(test_path) |
| 239 | except GenerateError as e: |
| 240 | print(f"ERROR: Writing out data files to {test_path}\n{repr(e)}") |
| 241 | return 1 |
| 242 | |
| 243 | |
| 244 | if __name__ == "__main__": |
| 245 | exit(main()) |