Blame - verif/checker/tosa_result_checker.py - tosa/reference_model

blob: 38ed510d1d1c3c39c1b18c3bbc72d34530fcb9ed [file] [log] [blame]

Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	1	"""TOSA result checker script."""
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	2	# Copyright (c) 2020-2023, ARM Limited.
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	3	# SPDX-License-Identifier: Apache-2.0
				4	import argparse
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	5	import json
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	6	from enum import IntEnum
				7	from enum import unique
				8	from pathlib import Path
				9
				10	import numpy as np
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	11	from checker.color_print import LogColors
				12	from checker.color_print import print_color
				13	from checker.verifier import VerifierError
				14	from checker.verifier import VerifierLibrary
James Ward	24dbc42	2022-10-19 12:20:31 +0100	[diff] [blame]	15	from generator.tosa_utils import float32_is_valid_bfloat16
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	16	from schemavalidation.schemavalidation import TestDescSchemaValidator
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	17
				18
				19	@unique
				20	class TestResult(IntEnum):
				21	"""Test result values."""
				22
				23	# Note: PASS must be 0 for command line return success
				24	PASS = 0
				25	MISSING_FILE = 1
				26	INCORRECT_FORMAT = 2
				27	MISMATCH = 3
				28	INTERNAL_ERROR = 4
				29
				30
				31	TestResultErrorStr = [
				32	"",
				33	"Missing file",
				34	"Incorrect format",
				35	"Mismatch",
				36	"Internal error",
				37	]
				38	##################################
				39
Jeremy Johnson	e4b08ff	2022-09-15 10:38:17 +0100	[diff] [blame]	40	DEFAULT_FP_TOLERANCE = 1e-3
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	41	result_printing = True
				42
				43
				44	def set_print_result(enabled):
				45	"""Set whether to print out or not."""
				46	global result_printing
				47	result_printing = enabled
				48
				49
				50	def _print_result(color, msg):
				51	"""Print out result."""
				52	global result_printing
				53	if result_printing:
				54	print_color(color, msg)
				55
				56
				57	def compliance_check(
				58	imp_result_path,
				59	ref_result_path,
				60	bnd_result_path,
				61	test_name,
				62	compliance_config,
				63	ofm_name,
				64	verify_lib_path,
				65	):
				66	try:
				67	vlib = VerifierLibrary(verify_lib_path)
				68	except VerifierError as e:
				69	_print_result(LogColors.RED, f"INTERNAL ERROR {test_name}")
				70	msg = f"Could not load verfier library: {str(e)}"
				71	return (TestResult.INTERNAL_ERROR, 0.0, msg)
				72
				73	success = vlib.verify_data(
				74	ofm_name, compliance_config, imp_result_path, ref_result_path, bnd_result_path
				75	)
				76	if success:
				77	_print_result(LogColors.GREEN, f"Results PASS {test_name}")
				78	return (TestResult.PASS, 0.0, "")
				79	else:
				80	_print_result(LogColors.RED, f"Results NON-COMPLIANT {test_name}")
				81	return (TestResult.MISMATCH, 0.0, "Non-compliance implementation results found")
Jeremy Johnson	e4b08ff	2022-09-15 10:38:17 +0100	[diff] [blame]	82
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	83
				84	def test_check(
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	85	ref_result_path,
				86	imp_result_path,
				87	test_name=None,
James Ward	24dbc42	2022-10-19 12:20:31 +0100	[diff] [blame]	88	quantize_tolerance=0,
Jeremy Johnson	e4b08ff	2022-09-15 10:38:17 +0100	[diff] [blame]	89	float_tolerance=DEFAULT_FP_TOLERANCE,
James Ward	24dbc42	2022-10-19 12:20:31 +0100	[diff] [blame]	90	misc_checks=[],
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	91	test_desc=None,
				92	bnd_result_path=None,
				93	ofm_name=None,
				94	verify_lib_path=None,
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	95	):
				96	"""Check if the result is the same as the expected reference."""
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	97	if test_desc:
				98	# New compliance method - first get test details
				99	try:
				100	TestDescSchemaValidator().validate_config(test_desc)
				101	except Exception as e:
				102	_print_result(LogColors.RED, f"Test INCORRECT FORMAT {test_name}")
				103	msg = f"Incorrect test format: {e}"
				104	return (TestResult.INCORRECT_FORMAT, 0.0, msg)
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	105
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	106	if test_name is None:
				107	test_name = "test"
				108
				109	paths = [imp_result_path, ref_result_path, bnd_result_path]
				110	names = ["Implementation", "Reference", "Bounds"]
				111	arrays = [None, None, None]
				112
				113	# Check the files exist and are in the right format
				114	for idx, path in enumerate(paths):
				115	name = names[idx]
				116	if path is None and name == "Bounds":
				117	# Bounds can be None - skip it
				118	continue
				119	if not path.is_file():
				120	_print_result(LogColors.RED, f"{name} MISSING FILE {test_name}")
				121	msg = f"Missing {name} file: {str(path)}"
				122	return (TestResult.MISSING_FILE, 0.0, msg)
				123	try:
				124	arrays[idx] = np.load(path)
				125	except Exception as e:
				126	_print_result(LogColors.RED, f"{name} INCORRECT FORMAT {test_name}")
				127	msg = f"Incorrect numpy format of {str(path)}\nnumpy.load exception: {e}"
				128	return (TestResult.INCORRECT_FORMAT, 0.0, msg)
				129
				130	if test_desc and "meta" in test_desc and "compliance" in test_desc["meta"]:
				131	# Switch to using the verifier library for full compliance
				132	if ofm_name is None:
				133	ofm_name = test_desc["ofm_name"][0]
				134	if len(test_desc["ofm_name"]) > 1:
				135	_print_result(LogColors.RED, f"Output Name MISSING FILE {test_name}")
				136	msg = "Must specify output name (ofm_name) to check as multiple found in desc.json"
				137	return (TestResult.MISSING_FILE, 0.0, msg)
				138
				139	compliance_json = test_desc["meta"]["compliance"]
				140
				141	return compliance_check(
				142	*arrays,
				143	test_name,
				144	compliance_json,
				145	ofm_name,
				146	verify_lib_path,
Jeremy Johnson	e4b08ff	2022-09-15 10:38:17 +0100	[diff] [blame]	147	)
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	148
				149	# Else continue with original checking method
				150	test_result, reference_result, _ = arrays
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	151
				152	# Type comparison
				153	if test_result.dtype != reference_result.dtype:
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	154	_print_result(LogColors.RED, "Results TYPE MISMATCH {}".format(test_name))
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	155	msg = "Mismatch results type: Expected {}, got {}".format(
				156	reference_result.dtype, test_result.dtype
				157	)
				158	return (TestResult.MISMATCH, 0.0, msg)
				159
				160	# Size comparison
				161	# Size = 1 tensors can be equivalently represented as having rank 0 or rank
				162	# >= 0, allow that special case
				163	test_result = np.squeeze(test_result)
				164	reference_result = np.squeeze(reference_result)
Jeremy Johnson	e4b08ff	2022-09-15 10:38:17 +0100	[diff] [blame]	165	difference = None
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	166
				167	if np.shape(test_result) != np.shape(reference_result):
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	168	_print_result(LogColors.RED, "Results MISCOMPARE {}".format(test_name))
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	169	msg = "Shapes mismatch: Reference {} vs {}".format(
				170	np.shape(test_result), np.shape(reference_result)
				171	)
				172	return (TestResult.MISMATCH, 0.0, msg)
				173
James Ward	24dbc42	2022-10-19 12:20:31 +0100	[diff] [blame]	174	# Perform miscellaneous checks
				175	if "bf16" in misc_checks:
				176	# Ensure floats are valid bfloat16 values
				177	test_res_is_bf16 = all([float32_is_valid_bfloat16(f) for f in test_result.flat])
				178	ref_res_is_bf16 = all(
				179	[float32_is_valid_bfloat16(f) for f in reference_result.flat]
				180	)
				181	if not (test_res_is_bf16 and ref_res_is_bf16):
				182	msg = (
				183	"All output values must be valid bfloat16. "
				184	"reference_result: {ref_res_is_bf16}; test_result: {test_res_is_bf16}"
				185	)
				186	return (TestResult.INCORRECT_FORMAT, 0.0, msg)
				187
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	188	# for quantized test, allow +-(quantize_tolerance) error
				189	if reference_result.dtype == np.int32 or reference_result.dtype == np.int64:
				190
				191	if np.all(np.absolute(reference_result - test_result) <= quantize_tolerance):
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	192	_print_result(LogColors.GREEN, "Results PASS {}".format(test_name))
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	193	return (TestResult.PASS, 0.0, "")
				194	else:
				195	tolerance = quantize_tolerance + 1
				196	while not np.all(
				197	np.absolute(reference_result - test_result) <= quantize_tolerance
				198	):
				199	tolerance = tolerance + 1
				200	if tolerance > 10:
				201	break
				202
				203	if tolerance > 10:
				204	msg = "Integer result does not match and is greater than 10 difference"
				205	else:
				206	msg = (
				207	"Integer result does not match but is within {} difference".format(
				208	tolerance
				209	)
				210	)
				211	# Fall-through to below to add failure values
Jeremy Johnson	e4b08ff	2022-09-15 10:38:17 +0100	[diff] [blame]	212	difference = reference_result - test_result
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	213
				214	elif reference_result.dtype == bool:
				215	assert test_result.dtype == bool
				216	# All boolean values must match, xor will show up differences
				217	test = np.array_equal(reference_result, test_result)
				218	if np.all(test):
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	219	_print_result(LogColors.GREEN, "Results PASS {}".format(test_name))
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	220	return (TestResult.PASS, 0.0, "")
				221	msg = "Boolean result does not match"
				222	tolerance = 0.0
Jeremy Johnson	e4b08ff	2022-09-15 10:38:17 +0100	[diff] [blame]	223	difference = None
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	224	# Fall-through to below to add failure values
				225
James Ward	8b39043	2022-08-12 20:48:56 +0100	[diff] [blame]	226	# TODO: update for fp16 tolerance
				227	elif reference_result.dtype == np.float32 or reference_result.dtype == np.float16:
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	228	tolerance = float_tolerance
				229	if np.allclose(reference_result, test_result, atol=tolerance, equal_nan=True):
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	230	_print_result(LogColors.GREEN, "Results PASS {}".format(test_name))
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	231	return (TestResult.PASS, tolerance, "")
				232	msg = "Float result does not match within tolerance of {}".format(tolerance)
Jeremy Johnson	e4b08ff	2022-09-15 10:38:17 +0100	[diff] [blame]	233	difference = reference_result - test_result
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	234	# Fall-through to below to add failure values
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	235	else:
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	236	_print_result(LogColors.RED, "Results UNSUPPORTED TYPE {}".format(test_name))
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	237	msg = "Unsupported results type: {}".format(reference_result.dtype)
				238	return (TestResult.MISMATCH, 0.0, msg)
				239
				240	# Fall-through for mismatch failure to add values to msg
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	241	_print_result(LogColors.RED, "Results MISCOMPARE {}".format(test_name))
Jeremy Johnson	e4b08ff	2022-09-15 10:38:17 +0100	[diff] [blame]	242	np.set_printoptions(threshold=128, edgeitems=2)
				243
				244	if difference is not None:
				245	tolerance_needed = np.amax(np.absolute(difference))
				246	msg = "{}\n-- tolerance_needed: {}".format(msg, tolerance_needed)
				247
				248	msg = "{}\n>> reference_result: {}\n{}".format(
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	249	msg, reference_result.shape, reference_result
				250	)
Jeremy Johnson	e4b08ff	2022-09-15 10:38:17 +0100	[diff] [blame]	251	msg = "{}\n<< test_result: {}\n{}".format(msg, test_result.shape, test_result)
				252
				253	if difference is not None:
				254	msg = "{}\n!! difference_result: \n{}".format(msg, difference)
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	255	return (TestResult.MISMATCH, tolerance, msg)
				256
				257
				258	def main(argv=None):
Jeremy Johnson	e4b08ff	2022-09-15 10:38:17 +0100	[diff] [blame]	259	"""Check that the supplied reference and result files have the same contents."""
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	260	parser = argparse.ArgumentParser()
				261	parser.add_argument(
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	262	"ref_result_path",
				263	type=Path,
				264	help="path to the reference model result file to check",
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	265	)
				266	parser.add_argument(
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	267	"imp_result_path",
				268	type=Path,
				269	help="path to the implementation result file to check",
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	270	)
Jeremy Johnson	e4b08ff	2022-09-15 10:38:17 +0100	[diff] [blame]	271	parser.add_argument(
				272	"--fp-tolerance", type=float, default=DEFAULT_FP_TOLERANCE, help="FP tolerance"
				273	)
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	274	parser.add_argument(
				275	"--test_path", type=Path, help="path to the test that produced the results"
				276	)
				277	parser.add_argument(
				278	"--bnd-result-path",
				279	type=Path,
				280	help="path to the reference model bounds result file for the dot product compliance check",
				281	)
				282	parser.add_argument(
				283	"--ofm-name",
				284	type=str,
				285	help="name of the output tensor to check, defaults to the first ofm_name listed in the test",
				286	)
				287	parser.add_argument(
				288	"--verify-lib-path",
				289	type=Path,
				290	help="path to TOSA verify library",
				291	)
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	292	args = parser.parse_args(argv)
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	293
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	294	if args.test_path:
				295	# Get details from the test path
				296	test_desc_path = args.test_path / "desc.json"
				297	if not args.test_path.is_dir() or not test_desc_path.is_file():
				298	print(f"Invalid test directory {str(args.test_path)}")
				299	return TestResult.MISSING_FILE
				300
				301	try:
				302	with test_desc_path.open("r") as fd:
				303	test_desc = json.load(fd)
				304	except Exception as e:
				305	print(f"Invalid test description file {str(test_desc_path)}: {e}")
				306	return TestResult.INCORRECT_FORMAT
				307	test_name = args.test_path.name
				308	else:
				309	test_desc = None
				310	test_name = None
				311
Jeremy Johnson	e4b08ff	2022-09-15 10:38:17 +0100	[diff] [blame]	312	result, tolerance, msg = test_check(
Jeremy Johnson	e2b5e87	2023-09-14 17:02:09 +0100	[diff] [blame]	313	args.ref_result_path,
				314	args.imp_result_path,
				315	float_tolerance=args.fp_tolerance,
				316	test_name=test_name,
				317	test_desc=test_desc,
				318	bnd_result_path=args.bnd_result_path,
				319	ofm_name=args.ofm_name,
				320	verify_lib_path=args.verify_lib_path,
Jeremy Johnson	e4b08ff	2022-09-15 10:38:17 +0100	[diff] [blame]	321	)
Jeremy Johnson	be1a940	2021-12-15 17:14:56 +0000	[diff] [blame]	322	if result != TestResult.PASS:
				323	print(msg)
				324
				325	return result
				326
				327
				328	if __name__ == "__main__":
				329	exit(main())