Blame - ethosu/vela/vela.py - ml/ethos-u/ethos-u-vela

2021-02-03 10:20:16 +0100

[diff] [blame]

1

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

16

# Description:

17

# Main entry point for the Vela compiler.

18

#

19

# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

20

import argparse

Tim Hall

1bd531d

2020-11-01 20:59:36 +0000

[diff] [blame]

21

import os

Diego Russo

ea6111a

2020-04-14 18:41:58 +0100

[diff] [blame]

22

import sys

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

23

import time

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

24

erik.andersson@arm.com

2021-02-03 10:20:16 +0100

[diff] [blame]

25

import flatbuffers

26

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

27

from . import architecture_features

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

28

from . import compiler_driver

29

from . import model_reader

30

from . import scheduler

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

31

from . import stats_writer

32

from . import tflite_writer

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

33

from ._version import __version__

Louis Verhaard

11831ce

2020-11-18 18:53:24 +0100

[diff] [blame]

34

from .api import API_VERSION

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

35

from .debug_database import DebugDatabase

Louis Verhaard

7db7896

2020-05-25 15:05:26 +0200

[diff] [blame]

36

from .errors import InputFileError

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

37

from .errors import VelaError

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

38

from .nn_graph import PassPlacement

39

from .nn_graph import TensorAllocator

Michael McGeagh

2020-11-10 12:38:25 +0000

[diff] [blame]

40

from .supported_operators import SupportedOperators

Diego Russo

ea6111a

2020-04-14 18:41:58 +0100

[diff] [blame]

41

from .tensor import MemArea

Jacob Bohlin

0628a8c

2020-08-28 13:25:14 +0200

[diff] [blame]

42

from .tensor import Tensor

erik.andersson@arm.com

2021-02-03 10:20:16 +0100

[diff] [blame]

43

from .tflite.Model import Model

Michael McGeagh

2020-11-10 12:38:25 +0000

[diff] [blame]

44

from .tflite_mapping import builtin_operator_map

45

from .tflite_mapping import builtin_type_name

Louis Verhaard

5207830

2020-11-18 13:35:06 +0100

[diff] [blame]

46

from ethosu.vela.architecture_features import ArchitectureFeatures

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

47

48

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

49

def process(input_name, enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

50

if compiler_options.timing:

51

start = time.time()

52

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

53

os.makedirs(compiler_options.output_dir, exist_ok=True)

54

output_basename = os.path.join(compiler_options.output_dir, os.path.splitext(os.path.basename(input_name))[0])

55

DebugDatabase.show_warnings = enable_debug_db

56

57

nng = model_reader.read_model(input_name, model_reader_options)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

58

59

if not nng:

Michael McGeagh

7a6f843

2020-12-02 15:29:22 +0000

[diff] [blame]

60

raise InputFileError(input_name, "Input file could not be read")

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

61

62

if compiler_options.verbose_operators:

63

nng.print_operators()

64

65

if compiler_options.timing:

66

stop = time.time()

67

print("Model reading took %f s" % (stop - start))

68

start = time.time()

69

70

compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)

71

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

72

summary_csv_file = "{0}_summary_{1}.csv".format(output_basename, arch.system_config)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

73

stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)

74

Fredrik Svedberg

f5c07c4

2021-04-23 14:36:42 +0200

[diff] [blame]

75

stats_writer.print_performance_metrics(

76

nng,

77

show_cpu_operations=compiler_options.show_cpu_operations,

78

verbose_weights=compiler_options.verbose_weights,

79

arch=arch,

80

)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

81

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

82

output_filename = output_basename + "_vela.tflite"

83

if input_name.endswith(".tflite"):

84

tflite_writer.write_tflite(nng, output_filename)

85

86

if enable_debug_db:

erik.andersson@arm.com

2021-02-03 10:20:16 +0100

[diff] [blame]

87

file_offsets = calculate_operator_file_offsets(output_filename)

88

for idx, offset in enumerate(sorted(file_offsets)):

89

sg = find_subgraph_with_command_stream_order(nng, idx)

90

if sg is not None:

91

DebugDatabase.set_stream_offset(sg, offset)

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

92

debug_filename = output_basename + "_debug.xml"

93

DebugDatabase.write(debug_filename, input_name, output_filename)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

94

95

if compiler_options.timing:

96

stop = time.time()

97

print("Compiler driver took %f s" % (stop - start))

return nng

erik.andersson@arm.com

2021-02-03 10:20:16 +0100

[diff] [blame]

102

def find_subgraph_with_command_stream_order(nng, idx):

103

for sg in nng.subgraphs:

104

if sg.generated_stream_id == idx:

return sg

return None

def calculate_operator_file_offsets(name: str):

110

# Read the vela optimized tflite file

111

with open(name, "rb") as f:

112

buf = bytearray(f.read())

113

# Calculate the file offsets for each custom operator

114

file_offsets = []

115

model = Model.GetRootAsModel(buf, 0)

116

for idx in range(model.SubgraphsLength()): # However only one subgraph is supported as of now

117

sg = model.Subgraphs(idx)

118

for idx in range(sg.OperatorsLength()):

119

operator = sg.Operators(idx)

120

if model.OperatorCodes(operator.OpcodeIndex()).CustomCode() is not None:

121

tensor_idx = operator.Inputs(0)

122

tensor = sg.Tensors(tensor_idx)

123

buffer = model.Buffers(tensor.Buffer())

124

offset = flatbuffers.number_types.UOffsetTFlags.py_type(buffer._tab.Offset(4))

125

file_offsets.append(buffer._tab.Vector(offset))

return file_offsets

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

129

def print_subgraph_io_summary(nng):

130

"""Print a summary of all the input and output tensor sizes for all subgraphs.

131

Also displays the total tensor size and the memory used area for sram.

132

"""

133

134

print("Subgraph IO Summary")

135

print("-------------------")

136

print("NNG: {0}".format(nng.name))

137

max_sg_size = 0

138

for sg in reversed(nng.subgraphs):

139

print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))

140

sg_size = 0

141

142

if sg.placement == PassPlacement.Npu:

143

for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:

144

if tens in sg.input_tensors:

145

tens_dir = "In"

146

elif tens in sg.output_tensors:

tens_dir = "Out"

else:

tens_dir = "In/Out"

size = tens.elements() * tens.element_size() / 1024.0

152

sg_size = sg_size + size

153

print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))

154

155

print(" Total Size = {0} KiB".format(sg_size))

156

print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))

157

max_sg_size = max(sg_size, max_sg_size)

158

159

print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))

160

161

Michael McGeagh

2020-11-10 12:38:25 +0000

[diff] [blame]

162

def generate_supported_ops():

lines = [

"# Supported Ops",

"",

"This file was automatically generated by Vela using the `--supported-ops-report` parameter. ",

167

f"Vela version: `{__version__}`",

168

"",

Michael McGeagh

54a6111

2020-11-24 14:58:51 +0000

[diff] [blame]

169

"This file complies with",

170

"[**Gitiles Markdown syntax**](https://github.com/google/gitiles/blob/master/Documentation/markdown.md)",

Michael McGeagh

2020-11-10 12:38:25 +0000

[diff] [blame]

"",

"## Summary Table",

"",

"The table below contains TFLite operators that can be placed on the Ethos-U NPU. ",

175

"If the constraints are not met, then that operator will be scheduled on the CPU instead. ",

176

"For any other TFLite operator not listed, will be left untouched and scheduled on the CPU. ",

177

"Please check the supported operator list for your chosen runtime for further information.",

178

"",

179

"| Operator | Constraints |",

Michael McGeagh

54a6111

2020-11-24 14:58:51 +0000

[diff] [blame]

180

"| --- | --- |",

Michael McGeagh

2020-11-10 12:38:25 +0000

[diff] [blame]

181

]

182

supported = SupportedOperators()

183

op_constraint_links = []

184

op_list = sorted(((op, builtin_type_name(op)) for op in builtin_operator_map), key=lambda x: x[1])

185

for op, name in op_list:

186

internal_op = builtin_operator_map[op][0]

187

if internal_op in SupportedOperators.supported_operators:

188

links = "[Generic](#generic-constraints)"

189

if internal_op in supported.specific_constraints:

190

links += f", [Specific](#{name.lower()}-constraints)"

191

op_constraint_links.append((internal_op, name))

192

lines.append(f"| {name} | {links} |")

193

lines += [

194

"",

195

"## Generic Constraints",

196

"",

197

"This is a list of constraints that all NPU operators must satisfy in order to be scheduled on the NPU.",

198

"",

199

]

200

for constraint in supported.generic_constraints:

201

# Markdown needs two spaces at the end of a line to render it as a separate line

202

reason = constraint.__doc__.replace("\n", " \n")

203

lines.append(f"- {reason}")

204

for op, name in op_constraint_links:

205

lines += [

206

"",

207

f"## {name} Constraints",

208

"",

209

f"This is a list of constraints that the {name} operator must satisfy in order to be scheduled on the NPU.",

210

"",

211

]

212

for constraint in supported.specific_constraints[op]:

213

# Markdown needs two spaces at the end of a line to render it as a separate line

214

reason = constraint.__doc__.replace("\n", " \n")

215

lines.append(f"- {reason}")

216

217

# Note. this will generate the file in the CWD

218

filepath = os.path.join(os.getcwd(), "SUPPORTED_OPS.md")

219

with open(filepath, "wt") as md:

220

md.writelines(line + "\n" for line in lines)

221

print(f"Report file: {filepath}")

222

223

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

224

def main(args=None):

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

225

try:

226

if args is None:

227

args = sys.argv[1:]

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

228

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

229

parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Arm Ethos-U NPUs")

230

parser.add_argument("--version", action="version", version=__version__)

231

parser.add_argument(

232

"--api-version", action="version", version=API_VERSION, help="Displays the version of the external API."

233

)

234

parser.add_argument(

235

"--supported-ops-report",

236

action="store_true",

237

help="Generate the SUPPORTED_OPS.md file in the current working directory and exit",

Tim Hall

b9b515c

2020-11-01 21:27:19 +0000

[diff] [blame]

238

)

Jacob Bohlin

0628a8c

2020-08-28 13:25:14 +0200

[diff] [blame]

239

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

240

# set network nargs to be optional to allow the support-ops-report CLI option to be used standalone

parser.add_argument(

"network",

metavar="NETWORK",

type=str,

default=None,

nargs="?",

help="Filename of the input TensorFlow Lite for Microcontrollers network",

248

)

249

parser.add_argument(

250

"--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"

)

parser.add_argument(

"--enable-debug-db",

action="store_true",

default=None,

help="Enables the calculation and writing of a network debug database to output directory",

)

parser.add_argument(

"--config",

type=str,

action="append",

help="Vela configuration file(s) in Python ConfigParser .ini file format",

263

)

264

parser.add_argument("--verbose-all", action="store_true", help="Enable all verbose options")

265

parser.add_argument(

266

"--verbose-config", action="store_true", help="Verbose system configuration and memory mode"

267

)

268

parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")

269

parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")

270

parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")

271

parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")

272

parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")

273

parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

274

parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")

275

parser.add_argument(

276

"--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"

277

)

278

parser.add_argument(

279

"--verbose-register-command-stream", action="store_true", help="Verbose register command stream"

280

)

281

parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")

Fredrik Svedberg

f5c07c4

2021-04-23 14:36:42 +0200

[diff] [blame]

282

parser.add_argument("--verbose-weights", action="store_true", help="Verbose weights information")

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

283

parser.add_argument(

284

"--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"

285

)

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

286

parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")

287

parser.add_argument(

288

"--accelerator-config",

289

type=str,

290

default="ethos-u55-256",

291

choices=list(architecture_features.Accelerator.member_list()),

292

help="Accelerator configuration to use (default: %(default)s)",

)

parser.add_argument(

"--system-config",

type=str,

default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,

298

help="System configuration to select from the Vela configuration file (default: %(default)s)",

)

parser.add_argument(

"--memory-mode",

type=str,

default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,

304

help="Memory mode to select from the Vela configuration file (default: %(default)s)",

305

)

306

parser.add_argument(

307

"--tensor-allocator",

308

default=TensorAllocator.HillClimb,

309

type=lambda s: TensorAllocator[s],

310

choices=list(TensorAllocator),

311

help="Tensor Allocator algorithm (default: %(default)s)",

312

)

313

parser.add_argument(

314

"--show-subgraph-io-summary",

315

action="store_true",

316

help="Shows a summary of all the subgraphs and their inputs and outputs",

317

)

318

parser.add_argument(

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

319

"--max-block-dependency",

320

type=int,

321

default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,

322

choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),

323

help=(

324

"Set the maximum value that can be used for the block dependency between npu kernel operations"

325

" (default: %(default)s)"

326

),

327

)

328

parser.add_argument(

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame^]

329

"--optimise",

330

type=lambda s: scheduler.OptimizationStrategy[s],

331

default=scheduler.OptimizationStrategy.Performance,

332

choices=list(scheduler.OptimizationStrategy),

333

help=(

334

"Set the optimisation strategy. The Size strategy results in minimal SRAM usage (does not use"

335

" arena-cache-size). The Performance strategy results in maximal performance (uses the arena-cache-size"

336

" if specified) (default: %(default)s)"

337

),

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

338

)

339

parser.add_argument(

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame^]

340

"--arena-cache-size",

341

type=int,

342

help=(

343

"Set the size of the arena cache memory area, in bytes. If specified, this option overrides the memory"

344

" mode attribute with the same name in a Vela configuration file"

345

),

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

346

)

347

parser.add_argument(

348

"--cpu-tensor-alignment",

349

type=int,

350

default=Tensor.AllocationQuantum,

351

help=(

352

"Controls the allocation byte alignment of cpu tensors including Ethos-U Custom"

353

" operator inputs and outputs (default: %(default)s)"

354

),

355

)

356

args = parser.parse_args(args=args)

Louis Verhaard

5207830

2020-11-18 13:35:06 +0100

[diff] [blame]

357

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

358

# Generate the supported ops report and exit

359

if args.supported_ops_report:

360

generate_supported_ops()

361

return 0

Louis Verhaard

5207830

2020-11-18 13:35:06 +0100

[diff] [blame]

362

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

363

if args.network is None:

364

parser.error("the following argument is required: NETWORK")

Michael McGeagh

2fa40ae

2020-12-02 10:55:04 +0000

[diff] [blame]

365

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

366

# check all config files exist because they will be read as a group

367

if args.config is not None:

368

for filename in args.config:

369

if not os.access(filename, os.R_OK):

370

raise InputFileError(filename, "File not found or is not readable")

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

371

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

372

if args.cpu_tensor_alignment < 16 or args.cpu_tensor_alignment & (args.cpu_tensor_alignment - 1) != 0:

373

parser.error(

374

"Invalid argument to --cpu-tensor-alignment = {} (must be greater than or equal to 16 and a power of 2)"

375

"".format(args.cpu_tensor_alignment)

376

)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

377

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

378

if args.system_config == ArchitectureFeatures.DEFAULT_CONFIG:

379

print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for system configuration")

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

380

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

381

if args.memory_mode == ArchitectureFeatures.DEFAULT_CONFIG:

382

print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for memory mode")

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

383

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

384

if args.verbose_all:

385

for v in vars(args):

386

if v.startswith("verbose") and v != "verbose_all":

387

setattr(args, v, True)

388

389

arch = architecture_features.ArchitectureFeatures(

390

vela_config_files=args.config,

391

system_config=args.system_config,

392

memory_mode=args.memory_mode,

393

accelerator_config=args.accelerator_config,

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

394

max_blockdep=args.max_block_dependency,

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

395

verbose_config=args.verbose_config,

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame^]

396

arena_cache_size=args.arena_cache_size,

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

397

)

398

399

compiler_options = compiler_driver.CompilerOptions(

400

verbose_graph=args.verbose_graph,

401

verbose_quantization=args.verbose_quantization,

402

verbose_packing=args.verbose_packing,

403

verbose_tensor_purpose=args.verbose_tensor_purpose,

404

verbose_tensor_format=args.verbose_tensor_format,

405

verbose_allocation=args.verbose_allocation,

406

verbose_high_level_command_stream=args.verbose_high_level_command_stream,

407

verbose_register_command_stream=args.verbose_register_command_stream,

408

verbose_operators=args.verbose_operators,

Fredrik Svedberg

f5c07c4

2021-04-23 14:36:42 +0200

[diff] [blame]

409

verbose_weights=args.verbose_weights,

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

410

show_cpu_operations=args.show_cpu_operations,

411

tensor_allocator=args.tensor_allocator,

412

timing=args.timing,

413

output_dir=args.output_dir,

414

cpu_tensor_alignment=args.cpu_tensor_alignment,

415

)

416

417

scheduler_options = scheduler.SchedulerOptions(

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame^]

418

optimization_strategy=args.optimise,

419

sram_target=arch.arena_cache_size,

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame]

420

verbose_schedule=args.verbose_schedule,

Henrik G Olsson