Blame - verif/checker/tosa_result_checker.py - tosa/reference_model

2021-12-15 17:14:56 +0000

[diff] [blame]

1

"""TOSA result checker script."""

Jeremy Johnson

2024-01-18 16:57:28 +0000

[diff] [blame]

2

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

3

# SPDX-License-Identifier: Apache-2.0

4

import argparse

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

5

import json

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

6

from enum import IntEnum

7

from enum import unique

8

from pathlib import Path

9

10

import numpy as np

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

11

from checker.color_print import LogColors

12

from checker.color_print import print_color

13

from checker.verifier import VerifierError

14

from checker.verifier import VerifierLibrary

James Ward

2022-10-19 12:20:31 +0100

[diff] [blame]

15

from generator.tosa_utils import float32_is_valid_bfloat16

Won Jeon

2c34b46

2024-02-06 18:37:00 +0000

[diff] [blame]

16

from generator.tosa_utils import float32_is_valid_float8

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

17

from schemavalidation.schemavalidation import TestDescSchemaValidator

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

@unique

class TestResult(IntEnum):

22

"""Test result values."""

23

24

# Note: PASS must be 0 for command line return success

PASS = 0

MISSING_FILE = 1

INCORRECT_FORMAT = 2

MISMATCH = 3

INTERNAL_ERROR = 4

TestResultErrorStr = [

"",

"Missing file",

"Incorrect format",

"Mismatch",

"Internal error",

]

##################################

40

Jeremy Johnson

2022-09-15 10:38:17 +0100

[diff] [blame]

41

DEFAULT_FP_TOLERANCE = 1e-3

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

42

result_printing = True

43

44

45

def set_print_result(enabled):

46

"""Set whether to print out or not."""

47

global result_printing

48

result_printing = enabled

49

50

51

def _print_result(color, msg):

52

"""Print out result."""

53

global result_printing

54

if result_printing:

55

print_color(color, msg)

56

57

58

def compliance_check(

Jeremy Johnson

2024-01-18 16:57:28 +0000

[diff] [blame]

59

imp_result_data,

60

ref_result_data,

61

bnd_result_data,

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

test_name,

compliance_config,

ofm_name,

verify_lib_path,

):

Jeremy Johnson

2023-11-27 15:02:04 +0000

[diff] [blame]

67

if verify_lib_path is None:

68

error = "Please supply --verify-lib-path"

else:

error = None

try:

vlib = VerifierLibrary(verify_lib_path)

73

except VerifierError as e:

74

error = str(e)

75

76

if error is not None:

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

77

_print_result(LogColors.RED, f"INTERNAL ERROR {test_name}")

Jeremy Johnson

2023-11-27 15:02:04 +0000

[diff] [blame]

78

msg = f"Could not load verfier library: {error}"

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

79

return (TestResult.INTERNAL_ERROR, 0.0, msg)

80

81

success = vlib.verify_data(

Jeremy Johnson

2024-01-18 16:57:28 +0000

[diff] [blame]

82

ofm_name, compliance_config, imp_result_data, ref_result_data, bnd_result_data

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

83

)

84

if success:

Jeremy Johnson

2023-11-27 15:02:04 +0000

[diff] [blame]

85

_print_result(LogColors.GREEN, f"Compliance Results PASS {test_name}")

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

86

return (TestResult.PASS, 0.0, "")

87

else:

88

_print_result(LogColors.RED, f"Results NON-COMPLIANT {test_name}")

Jeremy Johnson

2024-01-18 16:57:28 +0000

[diff] [blame]

return (

TestResult.MISMATCH,

0.0,

f"Non-compliance results found for {ofm_name}",

93

)

Jeremy Johnson

2022-09-15 10:38:17 +0100

[diff] [blame]

94

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

95

96

def test_check(

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

97

ref_result_path,

98

imp_result_path,

99

test_name=None,

James Ward

2022-10-19 12:20:31 +0100

[diff] [blame]

100

quantize_tolerance=0,

Jeremy Johnson

2022-09-15 10:38:17 +0100

[diff] [blame]

101

float_tolerance=DEFAULT_FP_TOLERANCE,

James Ward

2022-10-19 12:20:31 +0100

[diff] [blame]

102

misc_checks=[],

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

103

test_desc=None,

104

bnd_result_path=None,

105

ofm_name=None,

106

verify_lib_path=None,

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

107

):

108

"""Check if the result is the same as the expected reference."""

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

109

if test_desc:

110

# New compliance method - first get test details

111

try:

112

TestDescSchemaValidator().validate_config(test_desc)

113

except Exception as e:

114

_print_result(LogColors.RED, f"Test INCORRECT FORMAT {test_name}")

115

msg = f"Incorrect test format: {e}"

116

return (TestResult.INCORRECT_FORMAT, 0.0, msg)

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

117

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

118

if test_name is None:

119

test_name = "test"

120

121

paths = [imp_result_path, ref_result_path, bnd_result_path]

122

names = ["Implementation", "Reference", "Bounds"]

123

arrays = [None, None, None]

124

125

# Check the files exist and are in the right format

126

for idx, path in enumerate(paths):

127

name = names[idx]

128

if path is None and name == "Bounds":

129

# Bounds can be None - skip it

130

continue

131

if not path.is_file():

132

_print_result(LogColors.RED, f"{name} MISSING FILE {test_name}")

133

msg = f"Missing {name} file: {str(path)}"

134

return (TestResult.MISSING_FILE, 0.0, msg)

135

try:

136

arrays[idx] = np.load(path)

137

except Exception as e:

138

_print_result(LogColors.RED, f"{name} INCORRECT FORMAT {test_name}")

139

msg = f"Incorrect numpy format of {str(path)}\nnumpy.load exception: {e}"

140

return (TestResult.INCORRECT_FORMAT, 0.0, msg)

141

142

if test_desc and "meta" in test_desc and "compliance" in test_desc["meta"]:

143

# Switch to using the verifier library for full compliance

144

if ofm_name is None:

145

ofm_name = test_desc["ofm_name"][0]

146

if len(test_desc["ofm_name"]) > 1:

147

_print_result(LogColors.RED, f"Output Name MISSING FILE {test_name}")

148

msg = "Must specify output name (ofm_name) to check as multiple found in desc.json"

149

return (TestResult.MISSING_FILE, 0.0, msg)

150

151

compliance_json = test_desc["meta"]["compliance"]

152

153

return compliance_check(

*arrays,

test_name,

compliance_json,

ofm_name,

verify_lib_path,

Jeremy Johnson

2022-09-15 10:38:17 +0100

[diff] [blame]

159

)

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

160

161

# Else continue with original checking method

162

test_result, reference_result, _ = arrays

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

163

164

# Type comparison

165

if test_result.dtype != reference_result.dtype:

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

166

_print_result(LogColors.RED, "Results TYPE MISMATCH {}".format(test_name))

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

167

msg = "Mismatch results type: Expected {}, got {}".format(

168

reference_result.dtype, test_result.dtype

169

)

170

return (TestResult.MISMATCH, 0.0, msg)

171

172

# Size comparison

173

# Size = 1 tensors can be equivalently represented as having rank 0 or rank

174

# >= 0, allow that special case

175

test_result = np.squeeze(test_result)

176

reference_result = np.squeeze(reference_result)

Jeremy Johnson

2022-09-15 10:38:17 +0100

[diff] [blame]

177

difference = None

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

178

179

if np.shape(test_result) != np.shape(reference_result):

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

180

_print_result(LogColors.RED, "Results MISCOMPARE {}".format(test_name))

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

181

msg = "Shapes mismatch: Reference {} vs {}".format(

182

np.shape(test_result), np.shape(reference_result)

183

)

184

return (TestResult.MISMATCH, 0.0, msg)

185

James Ward

2022-10-19 12:20:31 +0100

[diff] [blame]

186

# Perform miscellaneous checks

187

if "bf16" in misc_checks:

188

# Ensure floats are valid bfloat16 values

189

test_res_is_bf16 = all([float32_is_valid_bfloat16(f) for f in test_result.flat])

190

ref_res_is_bf16 = all(

191

[float32_is_valid_bfloat16(f) for f in reference_result.flat]

192

)

193

if not (test_res_is_bf16 and ref_res_is_bf16):

194

msg = (

195

"All output values must be valid bfloat16. "

196

"reference_result: {ref_res_is_bf16}; test_result: {test_res_is_bf16}"

197

)

198

return (TestResult.INCORRECT_FORMAT, 0.0, msg)

Won Jeon

2c34b46

2024-02-06 18:37:00 +0000

[diff] [blame]

199

if "fp8e4m3" in misc_checks or "fp8e5m2" in misc_checks:

200

# Ensure floats are valid float8 values

201

test_res_is_fp8 = all([float32_is_valid_float8(f) for f in test_result.flat])

202

ref_res_is_fp8 = all(

203

[float32_is_valid_float8(f) for f in reference_result.flat]

204

)

205

if not (test_res_is_fp8 and ref_res_is_fp8):

206

msg = (

207

"All output values must be valid float8. "

208

"reference_result: {ref_res_is_float8}; test_result: {test_res_is_float8}"

209

)

210

return (TestResult.INCORRECT_FLOAT, 0.0, msg)

James Ward

2022-10-19 12:20:31 +0100

[diff] [blame]

211

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

212

# for quantized test, allow +-(quantize_tolerance) error

Jeremy Johnson

72dcab7

2023-10-30 10:28:21 +0000

[diff] [blame]

213

if reference_result.dtype in (

np.int8,

np.int16,

np.int32,

np.int64,

np.uint8,

np.uint16,

):

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

221

222

if np.all(np.absolute(reference_result - test_result) <= quantize_tolerance):

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

223

_print_result(LogColors.GREEN, "Results PASS {}".format(test_name))

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

224

return (TestResult.PASS, 0.0, "")

225

else:

226

tolerance = quantize_tolerance + 1

227

while not np.all(

228

np.absolute(reference_result - test_result) <= quantize_tolerance

229

):

230

tolerance = tolerance + 1

if tolerance > 10:

break

if tolerance > 10:

msg = "Integer result does not match and is greater than 10 difference"

236

else:

237

msg = (

238

"Integer result does not match but is within {} difference".format(

tolerance

)

)

# Fall-through to below to add failure values

Jeremy Johnson

2022-09-15 10:38:17 +0100

[diff] [blame]

243

difference = reference_result - test_result

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

244

245

elif reference_result.dtype == bool:

246

assert test_result.dtype == bool

247

# All boolean values must match, xor will show up differences

248

test = np.array_equal(reference_result, test_result)

249

if np.all(test):

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

250

_print_result(LogColors.GREEN, "Results PASS {}".format(test_name))

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

251

return (TestResult.PASS, 0.0, "")

252

msg = "Boolean result does not match"

253

tolerance = 0.0

Jeremy Johnson

2022-09-15 10:38:17 +0100

[diff] [blame]

254

difference = None

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

255

# Fall-through to below to add failure values

256

James Ward

8b39043

2022-08-12 20:48:56 +0100

[diff] [blame]

257

# TODO: update for fp16 tolerance

258

elif reference_result.dtype == np.float32 or reference_result.dtype == np.float16:

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

259

tolerance = float_tolerance

260

if np.allclose(reference_result, test_result, atol=tolerance, equal_nan=True):

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

261

_print_result(LogColors.GREEN, "Results PASS {}".format(test_name))

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

262

return (TestResult.PASS, tolerance, "")

263

msg = "Float result does not match within tolerance of {}".format(tolerance)

Jeremy Johnson

2022-09-15 10:38:17 +0100

[diff] [blame]

264

difference = reference_result - test_result

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

265

# Fall-through to below to add failure values

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

266

else:

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

267

_print_result(LogColors.RED, "Results UNSUPPORTED TYPE {}".format(test_name))

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

268

msg = "Unsupported results type: {}".format(reference_result.dtype)

269

return (TestResult.MISMATCH, 0.0, msg)

270

271

# Fall-through for mismatch failure to add values to msg

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

272

_print_result(LogColors.RED, "Results MISCOMPARE {}".format(test_name))

Jeremy Johnson

2022-09-15 10:38:17 +0100

[diff] [blame]

273

np.set_printoptions(threshold=128, edgeitems=2)

274

275

if difference is not None:

276

tolerance_needed = np.amax(np.absolute(difference))

277

msg = "{}\n-- tolerance_needed: {}".format(msg, tolerance_needed)

278

279

msg = "{}\n>> reference_result: {}\n{}".format(

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

280

msg, reference_result.shape, reference_result

281

)

Jeremy Johnson

2022-09-15 10:38:17 +0100

[diff] [blame]

282

msg = "{}\n<< test_result: {}\n{}".format(msg, test_result.shape, test_result)

283

284

if difference is not None:

285

msg = "{}\n!! difference_result: \n{}".format(msg, difference)

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

286

return (TestResult.MISMATCH, tolerance, msg)

287

288

289

def main(argv=None):

Jeremy Johnson

2022-09-15 10:38:17 +0100

[diff] [blame]

290

"""Check that the supplied reference and result files have the same contents."""

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

291

parser = argparse.ArgumentParser()

292

parser.add_argument(

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

293

"ref_result_path",

294

type=Path,

295

help="path to the reference model result file to check",

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

296

)

297

parser.add_argument(

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

298

"imp_result_path",

299

type=Path,

300

help="path to the implementation result file to check",

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

301

)

Jeremy Johnson

2022-09-15 10:38:17 +0100

[diff] [blame]

302

parser.add_argument(

303

"--fp-tolerance", type=float, default=DEFAULT_FP_TOLERANCE, help="FP tolerance"

304

)

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

305

parser.add_argument(

Jeremy Johnson

2023-11-27 15:02:04 +0000

[diff] [blame]

306

"--test-path", type=Path, help="path to the test that produced the results"

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

307

)

Jeremy Johnson

2023-11-27 15:02:04 +0000

[diff] [blame]

308

# Deprecate the incorrectly formatted option by hiding it

309

parser.add_argument("--test_path", type=Path, help=argparse.SUPPRESS)

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

parser.add_argument(

"--bnd-result-path",

type=Path,

help="path to the reference model bounds result file for the dot product compliance check",

)

parser.add_argument(

"--ofm-name",

type=str,

help="name of the output tensor to check, defaults to the first ofm_name listed in the test",

)

parser.add_argument(

"--verify-lib-path",

type=Path,

help="path to TOSA verify library",

324

)

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

325

args = parser.parse_args(argv)

Jeremy Johnson

2021-12-15 17:14:56 +0000

[diff] [blame]

326

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

327

if args.test_path:

328

# Get details from the test path

329

test_desc_path = args.test_path / "desc.json"

330

if not args.test_path.is_dir() or not test_desc_path.is_file():

331

print(f"Invalid test directory {str(args.test_path)}")

332

return TestResult.MISSING_FILE

333

334

try:

335

with test_desc_path.open("r") as fd:

336

test_desc = json.load(fd)

337

except Exception as e:

338

print(f"Invalid test description file {str(test_desc_path)}: {e}")

339

return TestResult.INCORRECT_FORMAT

340

test_name = args.test_path.name

else:

test_desc = None

test_name = None

Jeremy Johnson

2022-09-15 10:38:17 +0100

[diff] [blame]

345

result, tolerance, msg = test_check(

Jeremy Johnson

2023-09-14 17:02:09 +0100

[diff] [blame]

346

args.ref_result_path,

347

args.imp_result_path,

348

float_tolerance=args.fp_tolerance,

349

test_name=test_name,

350

test_desc=test_desc,

351

bnd_result_path=args.bnd_result_path,

352

ofm_name=args.ofm_name,

353

verify_lib_path=args.verify_lib_path,

Jeremy Johnson

2022-09-15 10:38:17 +0100

[diff] [blame]

354

)

Jeremy Johnson