blob: 9f3d29dd55d2053610e36fb87660815034e75fbf [file] [log] [blame]
Jeremy Johnson65ba8092023-10-09 16:31:13 +01001# Copyright (c) 2023, ARM Limited.
2# SPDX-License-Identifier: Apache-2.0
3"""Calls the data generation library to create the test data."""
4import ctypes as ct
5import json
6from pathlib import Path
7
8import numpy as np
Jeremy Johnsond1a08ce2023-10-18 17:22:21 +01009import schemavalidation.schemavalidation as sch
Jeremy Johnson65ba8092023-10-09 16:31:13 +010010
11
12class GenerateError(Exception):
13 """Exception raised for errors performing data generation."""
14
15
16class GenerateLibrary:
Jeremy Johnsond1a08ce2023-10-18 17:22:21 +010017 """Python interface to the C generate library.
18
19 Simple usage to write out all input files:
20 set_config(test_desc)
21 write_numpy_files(test_path)
22
23 To get data buffers (for const data):
24 get_tensor_data(tensor_name)
25 """
Jeremy Johnson65ba8092023-10-09 16:31:13 +010026
27 def __init__(self, generate_lib_path):
28 """Find the library and set up the interface."""
29 self.lib_path = generate_lib_path
Jeremy Johnson39f34342023-11-27 15:02:04 +000030 if self.lib_path is None or not self.lib_path.is_file():
Jeremy Johnson65ba8092023-10-09 16:31:13 +010031 raise GenerateError(f"Could not find generate library - {self.lib_path}")
32
Jeremy Johnsond1a08ce2023-10-18 17:22:21 +010033 self.schema_validator = sch.TestDescSchemaValidator()
34
Jeremy Johnson65ba8092023-10-09 16:31:13 +010035 self.test_desc = None
36 self.json_config = None
37 self.lib = ct.cdll.LoadLibrary(self.lib_path)
38
39 self.tgd_generate_data = self.lib.tgd_generate_data
40 self.tgd_generate_data.argtypes = [
41 ct.c_char_p,
42 ct.c_char_p,
43 ct.c_void_p,
44 ct.c_size_t,
45 ]
46 self.tgd_generate_data.restype = ct.c_bool
47
48 def check_config(self, test_desc: dict):
49 """Quick check that the config supports data generation."""
50 return ("meta" in test_desc) and ("data_gen" in test_desc["meta"])
51
52 def set_config(self, test_desc: dict):
53 """Set the test config in the library.
54
55 test_desc - the test desc.json file
56 """
57 self.test_desc = None
58 self.json_config = None
59
60 if not self.check_config(test_desc):
61 raise GenerateError("No meta/data_gen section found in desc.json")
62
63 # Validate the config versus the schema
Jeremy Johnsond1a08ce2023-10-18 17:22:21 +010064 self.schema_validator.validate_config(test_desc)
Jeremy Johnson65ba8092023-10-09 16:31:13 +010065
66 self.test_desc = test_desc
67 self.json_config = test_desc["meta"]["data_gen"]
68
69 def _create_buffer(self, dtype: str, shape: tuple):
70 """Helper to create a buffer of the required type."""
Jeremy Johnson18a379d2024-03-28 15:53:21 +000071 if shape:
72 size = np.prod(shape)
73 else:
74 # Rank 0
75 size = 1
Jeremy Johnson65ba8092023-10-09 16:31:13 +010076
77 if dtype == "FP32":
78 # Create buffer and initialize to zero
79 buffer = (ct.c_float * size)(0)
80 size_bytes = size * 4
Jeremy Johnson718f3472023-11-30 14:18:19 +000081 elif dtype == "FP16":
82 size_bytes = size * 2
83 # Create buffer of bytes and initialize to zero
84 buffer = (ct.c_ubyte * size_bytes)(0)
Won Jeon64e4bfe2024-01-18 06:31:55 +000085 elif dtype == "INT32" or dtype == "SHAPE":
Jeremy Johnsona8420ad2023-12-07 16:35:28 +000086 # Create buffer and initialize to zero
87 buffer = (ct.c_int32 * size)(0)
88 size_bytes = size * 4
Jeremy Johnson0a042992024-02-28 13:20:05 +000089 elif dtype == "INT8":
90 size_bytes = size
91 # Create buffer of bytes and initialize to zero
92 buffer = (ct.c_ubyte * size_bytes)(0)
Jeremy Johnson65ba8092023-10-09 16:31:13 +010093 else:
94 raise GenerateError(f"Unsupported data type {dtype}")
95
96 return buffer, size_bytes
97
Jeremy Johnson718f3472023-11-30 14:18:19 +000098 def _convert_buffer(self, buffer, dtype: str, shape: tuple):
99 """Helper to convert a buffer to a numpy array."""
100 arr = np.ctypeslib.as_array(buffer)
101
102 if dtype == "FP16":
103 # Convert from bytes back to FP16
104 arr = np.frombuffer(arr, np.float16)
105
106 arr = np.reshape(arr, shape)
107
108 return arr
109
Jeremy Johnsond1a08ce2023-10-18 17:22:21 +0100110 def _data_gen_array(self, json_config: str, tensor_name: str):
111 """Generate the named tensor data and return a numpy array."""
Jeremy Johnson65ba8092023-10-09 16:31:13 +0100112 try:
Jeremy Johnsond1a08ce2023-10-18 17:22:21 +0100113 tensor = json_config["tensors"][tensor_name]
Jeremy Johnson65ba8092023-10-09 16:31:13 +0100114 dtype = tensor["data_type"]
115 shape = tuple(tensor["shape"])
116 except KeyError as e:
117 raise GenerateError(
Jeremy Johnsond1a08ce2023-10-18 17:22:21 +0100118 f"Missing data in json config for input {tensor_name} - {repr(e)}"
Jeremy Johnson65ba8092023-10-09 16:31:13 +0100119 )
120
121 buffer, size_bytes = self._create_buffer(dtype, shape)
122 buffer_ptr = ct.cast(buffer, ct.c_void_p)
123
Jeremy Johnsond1a08ce2023-10-18 17:22:21 +0100124 json_bytes = bytes(json.dumps(json_config), "utf8")
125
Jeremy Johnson65ba8092023-10-09 16:31:13 +0100126 result = self.tgd_generate_data(
127 ct.c_char_p(json_bytes),
Jeremy Johnsond1a08ce2023-10-18 17:22:21 +0100128 ct.c_char_p(bytes(tensor_name, "utf8")),
Jeremy Johnson65ba8092023-10-09 16:31:13 +0100129 buffer_ptr,
130 ct.c_size_t(size_bytes),
131 )
132 if not result:
133 raise GenerateError("Data generate failed")
134
Jeremy Johnson718f3472023-11-30 14:18:19 +0000135 arr = self._convert_buffer(buffer, dtype, shape)
Jeremy Johnsond1a08ce2023-10-18 17:22:21 +0100136 return arr
137
138 def _data_gen_write(
139 self, test_path: Path, json_config: str, ifm_name: str, ifm_file: str
140 ):
141 """Generate the named tensor data and save it in numpy format."""
142 arr = self._data_gen_array(json_config, ifm_name)
143
Jeremy Johnson65ba8092023-10-09 16:31:13 +0100144 file_name = test_path / ifm_file
145 np.save(file_name, arr)
146
147 def write_numpy_files(self, test_path: Path):
Jeremy Johnsond1a08ce2023-10-18 17:22:21 +0100148 """Write out all the desc.json input tensors to numpy data files."""
Jeremy Johnson65ba8092023-10-09 16:31:13 +0100149 if self.test_desc is None or self.json_config is None:
150 raise GenerateError("Cannot write numpy files as no config set up")
151
152 try:
153 ifm_names = self.test_desc["ifm_name"]
154 ifm_files = self.test_desc["ifm_file"]
155 except KeyError as e:
156 raise GenerateError(f"Missing data in desc.json - {repr(e)}")
157
Jeremy Johnson65ba8092023-10-09 16:31:13 +0100158 failures = []
159 for iname, ifile in zip(ifm_names, ifm_files):
160 try:
Jeremy Johnsond1a08ce2023-10-18 17:22:21 +0100161 self._data_gen_write(test_path, self.json_config, iname, ifile)
Jeremy Johnson65ba8092023-10-09 16:31:13 +0100162 except GenerateError as e:
163 failures.append(
164 f"ERROR: Failed to create data for tensor {iname} - {repr(e)}"
165 )
166
167 if len(failures) > 0:
168 raise GenerateError("\n".join(failures))
169
Jeremy Johnsond1a08ce2023-10-18 17:22:21 +0100170 def get_tensor_data(self, tensor_name: str, json_config=None):
171 """Get a numpy array for a named tensor in the data_gen meta data."""
172 if json_config is None:
173 if self.json_config is None:
174 raise GenerateError("Cannot get tensor data as no config set up")
175 json_config = self.json_config
176 else:
177 # Validate the given config
178 self.schema_validator.validate_config(
179 json_config, schema_type=sch.TD_SCHEMA_DATA_GEN
180 )
181
182 return self._data_gen_array(json_config, tensor_name)
183
Jeremy Johnson65ba8092023-10-09 16:31:13 +0100184
185def main(argv=None):
186 """Simple command line interface for the data generator."""
187 import argparse
188 import conformance.model_files as cmf
189
190 parser = argparse.ArgumentParser()
191 parser.add_argument(
192 "--generate-lib-path",
193 type=Path,
194 help="Path to TOSA generate lib",
195 )
196 parser.add_argument(
197 "path", type=Path, help="the path to the test directory to generate data for"
198 )
199 args = parser.parse_args(argv)
200 test_path = args.path
201
202 if args.generate_lib_path is None:
203 # Try to work out ref model directory and find the verify library
204 # but this default only works for the python developer environment
205 # i.e. when using the scripts/py-dev-env.* scripts
206 # otherwise use the command line option --generate-lib-path to specify path
207 ref_model_dir = Path(__file__).absolute().parents[2]
208 args.generate_lib_path = cmf.find_tosa_file(
209 cmf.TosaFileType.GENERATE_LIBRARY, ref_model_dir, False
210 )
211
212 if not test_path.is_dir():
213 print(f"ERROR: Invalid directory - {test_path}")
214 return 2
215
216 test_desc_path = test_path / "desc.json"
217
218 if not test_desc_path.is_file():
219 print(f"ERROR: No test description found: {test_desc_path}")
220 return 2
221
222 # Load the JSON desc.json
223 try:
224 with test_desc_path.open("r") as fd:
225 test_desc = json.load(fd)
226 except Exception as e:
227 print(f"ERROR: Loading {test_desc_path} - {repr(e)}")
228 return 2
229
230 try:
231 dgl = GenerateLibrary(args.generate_lib_path)
232 if not dgl.check_config(test_desc):
233 print(f"WARNING: No data generation supported for {test_path}")
234 return 2
235
236 dgl.set_config(test_desc)
237 except GenerateError as e:
238 print(f"ERROR: Initializing generate library - {repr(e)}")
239 return 1
240
241 try:
242 dgl.write_numpy_files(test_path)
243 except GenerateError as e:
244 print(f"ERROR: Writing out data files to {test_path}\n{repr(e)}")
245 return 1
246
247
248if __name__ == "__main__":
249 exit(main())