Blame - ethosu/vela/vela.py - ml/ethos-u/ethos-u-vela

2021-02-03 10:20:16 +0100

[diff] [blame]

1

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

16

# Description:

17

# Main entry point for the Vela compiler.

18

#

19

# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

20

import argparse

21

import ast

Tim Hall

1bd531d

2020-11-01 20:59:36 +0000

[diff] [blame]

22

import os

Diego Russo

ea6111a

2020-04-14 18:41:58 +0100

[diff] [blame]

23

import sys

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

24

import time

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

25

erik.andersson@arm.com

2021-02-03 10:20:16 +0100

[diff] [blame]

26

import flatbuffers

27

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

28

from . import architecture_features

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

29

from . import compiler_driver

30

from . import model_reader

31

from . import scheduler

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

32

from . import stats_writer

33

from . import tflite_writer

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

34

from ._version import __version__

Louis Verhaard

11831ce

2020-11-18 18:53:24 +0100

[diff] [blame]

35

from .api import API_VERSION

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

36

from .debug_database import DebugDatabase

Louis Verhaard

7db7896

2020-05-25 15:05:26 +0200

[diff] [blame]

37

from .errors import InputFileError

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

38

from .errors import VelaError

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

39

from .nn_graph import PassPlacement

40

from .nn_graph import TensorAllocator

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

41

from .scheduler import ParetoMetric

Michael McGeagh

2020-11-10 12:38:25 +0000

[diff] [blame]

42

from .supported_operators import SupportedOperators

Diego Russo

ea6111a

2020-04-14 18:41:58 +0100

[diff] [blame]

43

from .tensor import MemArea

Jacob Bohlin

0628a8c

2020-08-28 13:25:14 +0200

[diff] [blame]

44

from .tensor import Tensor

erik.andersson@arm.com

2021-02-03 10:20:16 +0100

[diff] [blame]

45

from .tflite.Model import Model

Michael McGeagh

2020-11-10 12:38:25 +0000

[diff] [blame]

46

from .tflite_mapping import builtin_operator_map

47

from .tflite_mapping import builtin_type_name

Louis Verhaard

5207830

2020-11-18 13:35:06 +0100

[diff] [blame]

48

from ethosu.vela.architecture_features import ArchitectureFeatures

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

49

50

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

51

def process(input_name, enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

52

if compiler_options.timing:

53

start = time.time()

54

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

55

os.makedirs(compiler_options.output_dir, exist_ok=True)

56

output_basename = os.path.join(compiler_options.output_dir, os.path.splitext(os.path.basename(input_name))[0])

57

DebugDatabase.show_warnings = enable_debug_db

58

59

nng = model_reader.read_model(input_name, model_reader_options)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

60

61

if not nng:

Michael McGeagh

7a6f843

2020-12-02 15:29:22 +0000

[diff] [blame]

62

raise InputFileError(input_name, "Input file could not be read")

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

63

64

if compiler_options.verbose_operators:

65

nng.print_operators()

66

67

if compiler_options.timing:

68

stop = time.time()

69

print("Model reading took %f s" % (stop - start))

70

start = time.time()

71

72

compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)

73

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

74

passes_csv_file = "{0}_pass-breakdown_{1}.csv".format(output_basename, arch.system_config)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

75

stats_writer.write_pass_metrics_csv(nng, passes_csv_file)

76

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

77

summary_csv_file = "{0}_summary_{1}.csv".format(output_basename, arch.system_config)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

78

stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)

79

Fredrik Svedberg

f5c07c4

2021-04-23 14:36:42 +0200

[diff] [blame^]

80

stats_writer.print_performance_metrics(

81

nng,

82

show_cpu_operations=compiler_options.show_cpu_operations,

83

verbose_weights=compiler_options.verbose_weights,

84

arch=arch,

85

)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

86

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

87

output_filename = output_basename + "_vela.tflite"

88

if input_name.endswith(".tflite"):

89

tflite_writer.write_tflite(nng, output_filename)

90

91

if enable_debug_db:

erik.andersson@arm.com

2021-02-03 10:20:16 +0100

[diff] [blame]

92

file_offsets = calculate_operator_file_offsets(output_filename)

93

for idx, offset in enumerate(sorted(file_offsets)):

94

sg = find_subgraph_with_command_stream_order(nng, idx)

95

if sg is not None:

96

DebugDatabase.set_stream_offset(sg, offset)

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

97

debug_filename = output_basename + "_debug.xml"

98

DebugDatabase.write(debug_filename, input_name, output_filename)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

99

100

if compiler_options.timing:

101

stop = time.time()

102

print("Compiler driver took %f s" % (stop - start))

return nng

erik.andersson@arm.com

2021-02-03 10:20:16 +0100

[diff] [blame]

107

def find_subgraph_with_command_stream_order(nng, idx):

108

for sg in nng.subgraphs:

109

if sg.generated_stream_id == idx:

return sg

return None

def calculate_operator_file_offsets(name: str):

115

# Read the vela optimized tflite file

116

with open(name, "rb") as f:

117

buf = bytearray(f.read())

118

# Calculate the file offsets for each custom operator

119

file_offsets = []

120

model = Model.GetRootAsModel(buf, 0)

121

for idx in range(model.SubgraphsLength()): # However only one subgraph is supported as of now

122

sg = model.Subgraphs(idx)

123

for idx in range(sg.OperatorsLength()):

124

operator = sg.Operators(idx)

125

if model.OperatorCodes(operator.OpcodeIndex()).CustomCode() is not None:

126

tensor_idx = operator.Inputs(0)

127

tensor = sg.Tensors(tensor_idx)

128

buffer = model.Buffers(tensor.Buffer())

129

offset = flatbuffers.number_types.UOffsetTFlags.py_type(buffer._tab.Offset(4))

130

file_offsets.append(buffer._tab.Vector(offset))

return file_offsets

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

134

def print_subgraph_io_summary(nng):

135

"""Print a summary of all the input and output tensor sizes for all subgraphs.

136

Also displays the total tensor size and the memory used area for sram.

137

"""

138

139

print("Subgraph IO Summary")

140

print("-------------------")

141

print("NNG: {0}".format(nng.name))

142

max_sg_size = 0

143

for sg in reversed(nng.subgraphs):

144

print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))

145

sg_size = 0

146

147

if sg.placement == PassPlacement.Npu:

148

for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:

149

if tens in sg.input_tensors:

150

tens_dir = "In"

151

elif tens in sg.output_tensors:

tens_dir = "Out"

else:

tens_dir = "In/Out"

size = tens.elements() * tens.element_size() / 1024.0

157

sg_size = sg_size + size

158

print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))

159

160

print(" Total Size = {0} KiB".format(sg_size))

161

print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))

162

max_sg_size = max(sg_size, max_sg_size)

163

164

print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))

165

166

Michael McGeagh

2020-11-10 12:38:25 +0000

[diff] [blame]

167

def generate_supported_ops():

lines = [

"# Supported Ops",

"",

"This file was automatically generated by Vela using the `--supported-ops-report` parameter. ",

172

f"Vela version: `{__version__}`",

173

"",

Michael McGeagh

54a6111

2020-11-24 14:58:51 +0000

[diff] [blame]

174

"This file complies with",

175

"[**Gitiles Markdown syntax**](https://github.com/google/gitiles/blob/master/Documentation/markdown.md)",

Michael McGeagh

2020-11-10 12:38:25 +0000

[diff] [blame]

"",

"## Summary Table",

"",

"The table below contains TFLite operators that can be placed on the Ethos-U NPU. ",

180

"If the constraints are not met, then that operator will be scheduled on the CPU instead. ",

181

"For any other TFLite operator not listed, will be left untouched and scheduled on the CPU. ",

182

"Please check the supported operator list for your chosen runtime for further information.",

183

"",

184

"| Operator | Constraints |",

Michael McGeagh

54a6111

2020-11-24 14:58:51 +0000

[diff] [blame]

185

"| --- | --- |",

Michael McGeagh

2020-11-10 12:38:25 +0000

[diff] [blame]

186

]

187

supported = SupportedOperators()

188

op_constraint_links = []

189

op_list = sorted(((op, builtin_type_name(op)) for op in builtin_operator_map), key=lambda x: x[1])

190

for op, name in op_list:

191

internal_op = builtin_operator_map[op][0]

192

if internal_op in SupportedOperators.supported_operators:

193

links = "[Generic](#generic-constraints)"

194

if internal_op in supported.specific_constraints:

195

links += f", [Specific](#{name.lower()}-constraints)"

196

op_constraint_links.append((internal_op, name))

197

lines.append(f"| {name} | {links} |")

198

lines += [

199

"",

200

"## Generic Constraints",

201

"",

202

"This is a list of constraints that all NPU operators must satisfy in order to be scheduled on the NPU.",

203

"",

204

]

205

for constraint in supported.generic_constraints:

206

# Markdown needs two spaces at the end of a line to render it as a separate line

207

reason = constraint.__doc__.replace("\n", " \n")

208

lines.append(f"- {reason}")

209

for op, name in op_constraint_links:

210

lines += [

211

"",

212

f"## {name} Constraints",

213

"",

214

f"This is a list of constraints that the {name} operator must satisfy in order to be scheduled on the NPU.",

215

"",

216

]

217

for constraint in supported.specific_constraints[op]:

218

# Markdown needs two spaces at the end of a line to render it as a separate line

219

reason = constraint.__doc__.replace("\n", " \n")

220

lines.append(f"- {reason}")

221

222

# Note. this will generate the file in the CWD

223

filepath = os.path.join(os.getcwd(), "SUPPORTED_OPS.md")

224

with open(filepath, "wt") as md:

225

md.writelines(line + "\n" for line in lines)

226

print(f"Report file: {filepath}")

227

228

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

229

def main(args=None):

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

230

try:

231

if args is None:

232

args = sys.argv[1:]

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

233

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

234

parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Arm Ethos-U NPUs")

235

parser.add_argument("--version", action="version", version=__version__)

236

parser.add_argument(

237

"--api-version", action="version", version=API_VERSION, help="Displays the version of the external API."

238

)

239

parser.add_argument(

240

"--supported-ops-report",

241

action="store_true",

242

help="Generate the SUPPORTED_OPS.md file in the current working directory and exit",

Tim Hall

b9b515c

2020-11-01 21:27:19 +0000

[diff] [blame]

243

)

Jacob Bohlin

0628a8c

2020-08-28 13:25:14 +0200

[diff] [blame]

244

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

245

# set network nargs to be optional to allow the support-ops-report CLI option to be used standalone

parser.add_argument(

"network",

metavar="NETWORK",

type=str,

default=None,

nargs="?",

help="Filename of the input TensorFlow Lite for Microcontrollers network",

253

)

254

parser.add_argument(

255

"--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"

)

parser.add_argument(

"--enable-debug-db",

action="store_true",

default=None,

help="Enables the calculation and writing of a network debug database to output directory",

)

parser.add_argument(

"--config",

type=str,

action="append",

help="Vela configuration file(s) in Python ConfigParser .ini file format",

268

)

269

parser.add_argument("--verbose-all", action="store_true", help="Enable all verbose options")

270

parser.add_argument(

271

"--verbose-config", action="store_true", help="Verbose system configuration and memory mode"

272

)

273

parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")

274

parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")

275

parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")

276

parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")

277

parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")

278

parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")

279

parser.add_argument(

280

"--verbose-pareto-frontier-schedules",

281

action="store_true",

282

help="Show all schedules along the pareto frontier of optimisation criteria",

283

)

284

parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")

285

parser.add_argument(

286

"--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"

287

)

288

parser.add_argument(

289

"--verbose-register-command-stream", action="store_true", help="Verbose register command stream"

290

)

291

parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")

Fredrik Svedberg

f5c07c4

2021-04-23 14:36:42 +0200

[diff] [blame^]

292

parser.add_argument("--verbose-weights", action="store_true", help="Verbose weights information")

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

293

parser.add_argument(

294

"--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"

295

)

296

parser.add_argument(

297

"--cache-bias-scale-tensor",

298

type=ast.literal_eval,

299

default=True,

300

choices=[True, False],

301

help="Controls the caching of the bias & scale tensors in SRAM (default: %(default)s)",

)

parser.add_argument(

"--cascading",

type=ast.literal_eval,

306

default=True,

307

choices=[True, False],

308

help="Controls the packing of multiple passes into a cascade (default: %(default)s)",

309

)

310

parser.add_argument(

311

"--force-block-config", type=str, default="", help="Force a specific block configuration WxHxC"

312

)

313

parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")

314

parser.add_argument(

315

"--accelerator-config",

316

type=str,

317

default="ethos-u55-256",

318

choices=list(architecture_features.Accelerator.member_list()),

319

help="Accelerator configuration to use (default: %(default)s)",

)

parser.add_argument(

"--system-config",

type=str,

default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,

325

help="System configuration to select from the Vela configuration file (default: %(default)s)",

)

parser.add_argument(

"--memory-mode",

type=str,

default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,

331

help="Memory mode to select from the Vela configuration file (default: %(default)s)",

332

)

333

parser.add_argument(

334

"--tensor-allocator",

335

default=TensorAllocator.HillClimb,

336

type=lambda s: TensorAllocator[s],

337

choices=list(TensorAllocator),

338

help="Tensor Allocator algorithm (default: %(default)s)",

339

)

340

parser.add_argument(

341

"--show-subgraph-io-summary",

342

action="store_true",

343

help="Shows a summary of all the subgraphs and their inputs and outputs",

)

parser.add_argument(

"--ifm-streaming",

type=ast.literal_eval,

348

default=True,

349

choices=[True, False],

350

help="Controls scheduler IFM streaming search (default: %(default)s)",

351

)

352

parser.add_argument(

353

"--block-config-limit",

354

type=int,

355

default=16,

356

help="Limit block config search space, use zero for unlimited (default: %(default)s)",

)

parser.add_argument(

"--pareto-metric",

default=ParetoMetric.BwCycMem,

361

type=lambda s: ParetoMetric[s],

362

choices=list(ParetoMetric),

363

help="Controls the calculation of the pareto metric (default: %(default)s)",

)

parser.add_argument(

"--recursion-limit",

type=int,

default=10000,

help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",

370

)

371

parser.add_argument(

372

"--max-block-dependency",

373

type=int,

374

default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,

375

choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),

376

help=(

377

"Set the maximum value that can be used for the block dependency between npu kernel operations"

378

" (default: %(default)s)"

),

)

parser.add_argument(

"--nhcwb16-between-cascaded-passes",

383

type=ast.literal_eval,

384

default=True,

385

choices=[True, False],

386

help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",

387

)

388

parser.add_argument(

389

"--weight-estimation-scaling",

390

type=float,

391

default=1.0,

392

help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),

393

)

394

parser.add_argument(

395

"--cpu-tensor-alignment",

396

type=int,

397

default=Tensor.AllocationQuantum,

398

help=(

399

"Controls the allocation byte alignment of cpu tensors including Ethos-U Custom"

400

" operator inputs and outputs (default: %(default)s)"

401

),

402

)

403

args = parser.parse_args(args=args)

Louis Verhaard

5207830

2020-11-18 13:35:06 +0100

[diff] [blame]

404

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

405

# Generate the supported ops report and exit

406

if args.supported_ops_report:

407

generate_supported_ops()

408

return 0

Louis Verhaard

5207830

2020-11-18 13:35:06 +0100

[diff] [blame]

409

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

410

if args.network is None:

411

parser.error("the following argument is required: NETWORK")

Michael McGeagh

2fa40ae

2020-12-02 10:55:04 +0000

[diff] [blame]

412

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

413

# check all config files exist because they will be read as a group

414

if args.config is not None:

415

for filename in args.config:

416

if not os.access(filename, os.R_OK):

417

raise InputFileError(filename, "File not found or is not readable")

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

418

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

419

sys.setrecursionlimit(args.recursion_limit)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

420

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

421

if args.force_block_config:

422

force_block_config = architecture_features.Block.from_string(args.force_block_config)

423

else:

424

force_block_config = None

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

425

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

426

if args.cpu_tensor_alignment < 16 or args.cpu_tensor_alignment & (args.cpu_tensor_alignment - 1) != 0:

427

parser.error(

428

"Invalid argument to --cpu-tensor-alignment = {} (must be greater than or equal to 16 and a power of 2)"

429

"".format(args.cpu_tensor_alignment)

430

)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

431

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

432

if args.system_config == ArchitectureFeatures.DEFAULT_CONFIG:

433

print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for system configuration")

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

434

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

435

if args.memory_mode == ArchitectureFeatures.DEFAULT_CONFIG:

436

print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for memory mode")

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

437

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

438

if args.verbose_all:

439

for v in vars(args):

440

if v.startswith("verbose") and v != "verbose_all":

441

setattr(args, v, True)

442

443

arch = architecture_features.ArchitectureFeatures(

444

vela_config_files=args.config,

445

system_config=args.system_config,

446

memory_mode=args.memory_mode,

447

accelerator_config=args.accelerator_config,

448

override_block_config=force_block_config,

449

block_config_limit=args.block_config_limit,

450

max_blockdep=args.max_block_dependency,

451

weight_estimation_scaling=args.weight_estimation_scaling,

452

verbose_config=args.verbose_config,

453

)

454

455

compiler_options = compiler_driver.CompilerOptions(

456

verbose_graph=args.verbose_graph,

457

verbose_quantization=args.verbose_quantization,

458

verbose_packing=args.verbose_packing,

459

verbose_tensor_purpose=args.verbose_tensor_purpose,

460

verbose_tensor_format=args.verbose_tensor_format,

461

verbose_allocation=args.verbose_allocation,

462

verbose_high_level_command_stream=args.verbose_high_level_command_stream,

463

verbose_register_command_stream=args.verbose_register_command_stream,

464

verbose_operators=args.verbose_operators,

Fredrik Svedberg

f5c07c4

2021-04-23 14:36:42 +0200

[diff] [blame^]

465

verbose_weights=args.verbose_weights,

Henrik G Olsson