Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 1 | # Copyright (c) 2023, ARM Limited. |
| 2 | # SPDX-License-Identifier: Apache-2.0 |
| 3 | """Calls the data generation library to create the test data.""" |
| 4 | import ctypes as ct |
| 5 | import json |
| 6 | from pathlib import Path |
| 7 | |
| 8 | import numpy as np |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 9 | import schemavalidation.schemavalidation as sch |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 10 | |
| 11 | |
| 12 | class GenerateError(Exception): |
| 13 | """Exception raised for errors performing data generation.""" |
| 14 | |
| 15 | |
| 16 | class GenerateLibrary: |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 17 | """Python interface to the C generate library. |
| 18 | |
| 19 | Simple usage to write out all input files: |
| 20 | set_config(test_desc) |
| 21 | write_numpy_files(test_path) |
| 22 | |
| 23 | To get data buffers (for const data): |
| 24 | get_tensor_data(tensor_name) |
| 25 | """ |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 26 | |
| 27 | def __init__(self, generate_lib_path): |
| 28 | """Find the library and set up the interface.""" |
| 29 | self.lib_path = generate_lib_path |
Jeremy Johnson | 39f3434 | 2023-11-27 15:02:04 +0000 | [diff] [blame] | 30 | if self.lib_path is None or not self.lib_path.is_file(): |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 31 | raise GenerateError(f"Could not find generate library - {self.lib_path}") |
| 32 | |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 33 | self.schema_validator = sch.TestDescSchemaValidator() |
| 34 | |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 35 | self.test_desc = None |
| 36 | self.json_config = None |
| 37 | self.lib = ct.cdll.LoadLibrary(self.lib_path) |
| 38 | |
| 39 | self.tgd_generate_data = self.lib.tgd_generate_data |
| 40 | self.tgd_generate_data.argtypes = [ |
| 41 | ct.c_char_p, |
| 42 | ct.c_char_p, |
| 43 | ct.c_void_p, |
| 44 | ct.c_size_t, |
| 45 | ] |
| 46 | self.tgd_generate_data.restype = ct.c_bool |
| 47 | |
| 48 | def check_config(self, test_desc: dict): |
| 49 | """Quick check that the config supports data generation.""" |
| 50 | return ("meta" in test_desc) and ("data_gen" in test_desc["meta"]) |
| 51 | |
| 52 | def set_config(self, test_desc: dict): |
| 53 | """Set the test config in the library. |
| 54 | |
| 55 | test_desc - the test desc.json file |
| 56 | """ |
| 57 | self.test_desc = None |
| 58 | self.json_config = None |
| 59 | |
| 60 | if not self.check_config(test_desc): |
| 61 | raise GenerateError("No meta/data_gen section found in desc.json") |
| 62 | |
| 63 | # Validate the config versus the schema |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 64 | self.schema_validator.validate_config(test_desc) |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 65 | |
| 66 | self.test_desc = test_desc |
| 67 | self.json_config = test_desc["meta"]["data_gen"] |
| 68 | |
| 69 | def _create_buffer(self, dtype: str, shape: tuple): |
| 70 | """Helper to create a buffer of the required type.""" |
Jeremy Johnson | 18a379d | 2024-03-28 15:53:21 +0000 | [diff] [blame] | 71 | if shape: |
| 72 | size = np.prod(shape) |
| 73 | else: |
| 74 | # Rank 0 |
| 75 | size = 1 |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 76 | |
| 77 | if dtype == "FP32": |
| 78 | # Create buffer and initialize to zero |
| 79 | buffer = (ct.c_float * size)(0) |
| 80 | size_bytes = size * 4 |
Jeremy Johnson | 718f347 | 2023-11-30 14:18:19 +0000 | [diff] [blame] | 81 | elif dtype == "FP16": |
| 82 | size_bytes = size * 2 |
| 83 | # Create buffer of bytes and initialize to zero |
| 84 | buffer = (ct.c_ubyte * size_bytes)(0) |
Won Jeon | 64e4bfe | 2024-01-18 06:31:55 +0000 | [diff] [blame] | 85 | elif dtype == "INT32" or dtype == "SHAPE": |
Jeremy Johnson | a8420ad | 2023-12-07 16:35:28 +0000 | [diff] [blame] | 86 | # Create buffer and initialize to zero |
| 87 | buffer = (ct.c_int32 * size)(0) |
| 88 | size_bytes = size * 4 |
Jeremy Johnson | 0a04299 | 2024-02-28 13:20:05 +0000 | [diff] [blame] | 89 | elif dtype == "INT8": |
| 90 | size_bytes = size |
| 91 | # Create buffer of bytes and initialize to zero |
| 92 | buffer = (ct.c_ubyte * size_bytes)(0) |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 93 | else: |
| 94 | raise GenerateError(f"Unsupported data type {dtype}") |
| 95 | |
| 96 | return buffer, size_bytes |
| 97 | |
Jeremy Johnson | 718f347 | 2023-11-30 14:18:19 +0000 | [diff] [blame] | 98 | def _convert_buffer(self, buffer, dtype: str, shape: tuple): |
| 99 | """Helper to convert a buffer to a numpy array.""" |
| 100 | arr = np.ctypeslib.as_array(buffer) |
| 101 | |
| 102 | if dtype == "FP16": |
| 103 | # Convert from bytes back to FP16 |
| 104 | arr = np.frombuffer(arr, np.float16) |
| 105 | |
| 106 | arr = np.reshape(arr, shape) |
| 107 | |
| 108 | return arr |
| 109 | |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 110 | def _data_gen_array(self, json_config: str, tensor_name: str): |
| 111 | """Generate the named tensor data and return a numpy array.""" |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 112 | try: |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 113 | tensor = json_config["tensors"][tensor_name] |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 114 | dtype = tensor["data_type"] |
| 115 | shape = tuple(tensor["shape"]) |
| 116 | except KeyError as e: |
| 117 | raise GenerateError( |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 118 | f"Missing data in json config for input {tensor_name} - {repr(e)}" |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 119 | ) |
| 120 | |
| 121 | buffer, size_bytes = self._create_buffer(dtype, shape) |
| 122 | buffer_ptr = ct.cast(buffer, ct.c_void_p) |
| 123 | |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 124 | json_bytes = bytes(json.dumps(json_config), "utf8") |
| 125 | |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 126 | result = self.tgd_generate_data( |
| 127 | ct.c_char_p(json_bytes), |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 128 | ct.c_char_p(bytes(tensor_name, "utf8")), |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 129 | buffer_ptr, |
| 130 | ct.c_size_t(size_bytes), |
| 131 | ) |
| 132 | if not result: |
| 133 | raise GenerateError("Data generate failed") |
| 134 | |
Jeremy Johnson | 718f347 | 2023-11-30 14:18:19 +0000 | [diff] [blame] | 135 | arr = self._convert_buffer(buffer, dtype, shape) |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 136 | return arr |
| 137 | |
| 138 | def _data_gen_write( |
| 139 | self, test_path: Path, json_config: str, ifm_name: str, ifm_file: str |
| 140 | ): |
| 141 | """Generate the named tensor data and save it in numpy format.""" |
| 142 | arr = self._data_gen_array(json_config, ifm_name) |
| 143 | |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 144 | file_name = test_path / ifm_file |
| 145 | np.save(file_name, arr) |
| 146 | |
| 147 | def write_numpy_files(self, test_path: Path): |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 148 | """Write out all the desc.json input tensors to numpy data files.""" |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 149 | if self.test_desc is None or self.json_config is None: |
| 150 | raise GenerateError("Cannot write numpy files as no config set up") |
| 151 | |
| 152 | try: |
| 153 | ifm_names = self.test_desc["ifm_name"] |
| 154 | ifm_files = self.test_desc["ifm_file"] |
| 155 | except KeyError as e: |
| 156 | raise GenerateError(f"Missing data in desc.json - {repr(e)}") |
| 157 | |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 158 | failures = [] |
| 159 | for iname, ifile in zip(ifm_names, ifm_files): |
| 160 | try: |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 161 | self._data_gen_write(test_path, self.json_config, iname, ifile) |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 162 | except GenerateError as e: |
| 163 | failures.append( |
| 164 | f"ERROR: Failed to create data for tensor {iname} - {repr(e)}" |
| 165 | ) |
| 166 | |
| 167 | if len(failures) > 0: |
| 168 | raise GenerateError("\n".join(failures)) |
| 169 | |
Jeremy Johnson | d1a08ce | 2023-10-18 17:22:21 +0100 | [diff] [blame] | 170 | def get_tensor_data(self, tensor_name: str, json_config=None): |
| 171 | """Get a numpy array for a named tensor in the data_gen meta data.""" |
| 172 | if json_config is None: |
| 173 | if self.json_config is None: |
| 174 | raise GenerateError("Cannot get tensor data as no config set up") |
| 175 | json_config = self.json_config |
| 176 | else: |
| 177 | # Validate the given config |
| 178 | self.schema_validator.validate_config( |
| 179 | json_config, schema_type=sch.TD_SCHEMA_DATA_GEN |
| 180 | ) |
| 181 | |
| 182 | return self._data_gen_array(json_config, tensor_name) |
| 183 | |
Jeremy Johnson | 65ba809 | 2023-10-09 16:31:13 +0100 | [diff] [blame] | 184 | |
| 185 | def main(argv=None): |
| 186 | """Simple command line interface for the data generator.""" |
| 187 | import argparse |
| 188 | import conformance.model_files as cmf |
| 189 | |
| 190 | parser = argparse.ArgumentParser() |
| 191 | parser.add_argument( |
| 192 | "--generate-lib-path", |
| 193 | type=Path, |
| 194 | help="Path to TOSA generate lib", |
| 195 | ) |
| 196 | parser.add_argument( |
| 197 | "path", type=Path, help="the path to the test directory to generate data for" |
| 198 | ) |
| 199 | args = parser.parse_args(argv) |
| 200 | test_path = args.path |
| 201 | |
| 202 | if args.generate_lib_path is None: |
| 203 | # Try to work out ref model directory and find the verify library |
| 204 | # but this default only works for the python developer environment |
| 205 | # i.e. when using the scripts/py-dev-env.* scripts |
| 206 | # otherwise use the command line option --generate-lib-path to specify path |
| 207 | ref_model_dir = Path(__file__).absolute().parents[2] |
| 208 | args.generate_lib_path = cmf.find_tosa_file( |
| 209 | cmf.TosaFileType.GENERATE_LIBRARY, ref_model_dir, False |
| 210 | ) |
| 211 | |
| 212 | if not test_path.is_dir(): |
| 213 | print(f"ERROR: Invalid directory - {test_path}") |
| 214 | return 2 |
| 215 | |
| 216 | test_desc_path = test_path / "desc.json" |
| 217 | |
| 218 | if not test_desc_path.is_file(): |
| 219 | print(f"ERROR: No test description found: {test_desc_path}") |
| 220 | return 2 |
| 221 | |
| 222 | # Load the JSON desc.json |
| 223 | try: |
| 224 | with test_desc_path.open("r") as fd: |
| 225 | test_desc = json.load(fd) |
| 226 | except Exception as e: |
| 227 | print(f"ERROR: Loading {test_desc_path} - {repr(e)}") |
| 228 | return 2 |
| 229 | |
| 230 | try: |
| 231 | dgl = GenerateLibrary(args.generate_lib_path) |
| 232 | if not dgl.check_config(test_desc): |
| 233 | print(f"WARNING: No data generation supported for {test_path}") |
| 234 | return 2 |
| 235 | |
| 236 | dgl.set_config(test_desc) |
| 237 | except GenerateError as e: |
| 238 | print(f"ERROR: Initializing generate library - {repr(e)}") |
| 239 | return 1 |
| 240 | |
| 241 | try: |
| 242 | dgl.write_numpy_files(test_path) |
| 243 | except GenerateError as e: |
| 244 | print(f"ERROR: Writing out data files to {test_path}\n{repr(e)}") |
| 245 | return 1 |
| 246 | |
| 247 | |
| 248 | if __name__ == "__main__": |
| 249 | exit(main()) |