Blame - ethosu/vela/vela.py - ml/ethos-u/ethos-u-vela

2021-02-03 10:20:16 +0100

[diff] [blame]

1

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

16

# Description:

17

# Main entry point for the Vela compiler.

18

#

19

# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

20

import argparse

21

import ast

Tim Hall

1bd531d

2020-11-01 20:59:36 +0000

[diff] [blame]

22

import os

Diego Russo

ea6111a

2020-04-14 18:41:58 +0100

[diff] [blame]

23

import sys

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

24

import time

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

25

erik.andersson@arm.com

2021-02-03 10:20:16 +0100

[diff] [blame]

26

import flatbuffers

27

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

28

from . import architecture_features

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

29

from . import compiler_driver

30

from . import model_reader

31

from . import scheduler

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

32

from . import stats_writer

33

from . import tflite_writer

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

34

from ._version import __version__

Louis Verhaard

11831ce

2020-11-18 18:53:24 +0100

[diff] [blame]

35

from .api import API_VERSION

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

36

from .debug_database import DebugDatabase

Louis Verhaard

7db7896

2020-05-25 15:05:26 +0200

[diff] [blame]

37

from .errors import InputFileError

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame^]

38

from .errors import VelaError

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

39

from .nn_graph import PassPlacement

40

from .nn_graph import TensorAllocator

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

41

from .scheduler import ParetoMetric

Michael McGeagh

2020-11-10 12:38:25 +0000

[diff] [blame]

42

from .supported_operators import SupportedOperators

Diego Russo

ea6111a

2020-04-14 18:41:58 +0100

[diff] [blame]

43

from .tensor import MemArea

Jacob Bohlin

0628a8c

2020-08-28 13:25:14 +0200

[diff] [blame]

44

from .tensor import Tensor

erik.andersson@arm.com

2021-02-03 10:20:16 +0100

[diff] [blame]

45

from .tflite.Model import Model

Michael McGeagh

2020-11-10 12:38:25 +0000

[diff] [blame]

46

from .tflite_mapping import builtin_operator_map

47

from .tflite_mapping import builtin_type_name

Louis Verhaard

5207830

2020-11-18 13:35:06 +0100

[diff] [blame]

48

from ethosu.vela.architecture_features import ArchitectureFeatures

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

49

50

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

51

def process(input_name, enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

52

if compiler_options.timing:

53

start = time.time()

54

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

55

os.makedirs(compiler_options.output_dir, exist_ok=True)

56

output_basename = os.path.join(compiler_options.output_dir, os.path.splitext(os.path.basename(input_name))[0])

57

DebugDatabase.show_warnings = enable_debug_db

58

59

nng = model_reader.read_model(input_name, model_reader_options)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

60

61

if not nng:

Michael McGeagh

7a6f843

2020-12-02 15:29:22 +0000

[diff] [blame]

62

raise InputFileError(input_name, "Input file could not be read")

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

63

64

if compiler_options.verbose_operators:

65

nng.print_operators()

66

67

if compiler_options.timing:

68

stop = time.time()

69

print("Model reading took %f s" % (stop - start))

70

start = time.time()

71

72

compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)

73

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

74

passes_csv_file = "{0}_pass-breakdown_{1}.csv".format(output_basename, arch.system_config)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

75

stats_writer.write_pass_metrics_csv(nng, passes_csv_file)

76

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

77

summary_csv_file = "{0}_summary_{1}.csv".format(output_basename, arch.system_config)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

78

stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)

79

80

stats_writer.print_performance_metrics(nng, show_cpu_operations=compiler_options.show_cpu_operations, arch=arch)

81

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

82

output_filename = output_basename + "_vela.tflite"

83

if input_name.endswith(".tflite"):

84

tflite_writer.write_tflite(nng, output_filename)

85

86

if enable_debug_db:

erik.andersson@arm.com

2021-02-03 10:20:16 +0100

[diff] [blame]

87

file_offsets = calculate_operator_file_offsets(output_filename)

88

for idx, offset in enumerate(sorted(file_offsets)):

89

sg = find_subgraph_with_command_stream_order(nng, idx)

90

if sg is not None:

91

DebugDatabase.set_stream_offset(sg, offset)

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

92

debug_filename = output_basename + "_debug.xml"

93

DebugDatabase.write(debug_filename, input_name, output_filename)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

94

95

if compiler_options.timing:

96

stop = time.time()

97

print("Compiler driver took %f s" % (stop - start))

return nng

erik.andersson@arm.com

2021-02-03 10:20:16 +0100

[diff] [blame]

102

def find_subgraph_with_command_stream_order(nng, idx):

103

for sg in nng.subgraphs:

104

if sg.generated_stream_id == idx:

return sg

return None

def calculate_operator_file_offsets(name: str):

110

# Read the vela optimized tflite file

111

with open(name, "rb") as f:

112

buf = bytearray(f.read())

113

# Calculate the file offsets for each custom operator

114

file_offsets = []

115

model = Model.GetRootAsModel(buf, 0)

116

for idx in range(model.SubgraphsLength()): # However only one subgraph is supported as of now

117

sg = model.Subgraphs(idx)

118

for idx in range(sg.OperatorsLength()):

119

operator = sg.Operators(idx)

120

if model.OperatorCodes(operator.OpcodeIndex()).CustomCode() is not None:

121

tensor_idx = operator.Inputs(0)

122

tensor = sg.Tensors(tensor_idx)

123

buffer = model.Buffers(tensor.Buffer())

124

offset = flatbuffers.number_types.UOffsetTFlags.py_type(buffer._tab.Offset(4))

125

file_offsets.append(buffer._tab.Vector(offset))

return file_offsets

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

129

def print_subgraph_io_summary(nng):

130

"""Print a summary of all the input and output tensor sizes for all subgraphs.

131

Also displays the total tensor size and the memory used area for sram.

132

"""

133

134

print("Subgraph IO Summary")

135

print("-------------------")

136

print("NNG: {0}".format(nng.name))

137

max_sg_size = 0

138

for sg in reversed(nng.subgraphs):

139

print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))

140

sg_size = 0

141

142

if sg.placement == PassPlacement.Npu:

143

for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:

144

if tens in sg.input_tensors:

145

tens_dir = "In"

146

elif tens in sg.output_tensors:

tens_dir = "Out"

else:

tens_dir = "In/Out"

size = tens.elements() * tens.element_size() / 1024.0

152

sg_size = sg_size + size

153

print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))

154

155

print(" Total Size = {0} KiB".format(sg_size))

156

print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))

157

max_sg_size = max(sg_size, max_sg_size)

158

159

print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))

160

161

Michael McGeagh

2020-11-10 12:38:25 +0000

[diff] [blame]

162

def generate_supported_ops():

lines = [

"# Supported Ops",

"",

"This file was automatically generated by Vela using the `--supported-ops-report` parameter. ",

167

f"Vela version: `{__version__}`",

168

"",

Michael McGeagh

54a6111

2020-11-24 14:58:51 +0000

[diff] [blame]

169

"This file complies with",

170

"[**Gitiles Markdown syntax**](https://github.com/google/gitiles/blob/master/Documentation/markdown.md)",

Michael McGeagh

2020-11-10 12:38:25 +0000

[diff] [blame]

"",

"## Summary Table",

"",

"The table below contains TFLite operators that can be placed on the Ethos-U NPU. ",

175

"If the constraints are not met, then that operator will be scheduled on the CPU instead. ",

176

"For any other TFLite operator not listed, will be left untouched and scheduled on the CPU. ",

177

"Please check the supported operator list for your chosen runtime for further information.",

178

"",

179

"| Operator | Constraints |",

Michael McGeagh

54a6111

2020-11-24 14:58:51 +0000

[diff] [blame]

180

"| --- | --- |",

Michael McGeagh

2020-11-10 12:38:25 +0000

[diff] [blame]

181

]

182

supported = SupportedOperators()

183

op_constraint_links = []

184

op_list = sorted(((op, builtin_type_name(op)) for op in builtin_operator_map), key=lambda x: x[1])

185

for op, name in op_list:

186

internal_op = builtin_operator_map[op][0]

187

if internal_op in SupportedOperators.supported_operators:

188

links = "[Generic](#generic-constraints)"

189

if internal_op in supported.specific_constraints:

190

links += f", [Specific](#{name.lower()}-constraints)"

191

op_constraint_links.append((internal_op, name))

192

lines.append(f"| {name} | {links} |")

193

lines += [

194

"",

195

"## Generic Constraints",

196

"",

197

"This is a list of constraints that all NPU operators must satisfy in order to be scheduled on the NPU.",

198

"",

199

]

200

for constraint in supported.generic_constraints:

201

# Markdown needs two spaces at the end of a line to render it as a separate line

202

reason = constraint.__doc__.replace("\n", " \n")

203

lines.append(f"- {reason}")

204

for op, name in op_constraint_links:

205

lines += [

206

"",

207

f"## {name} Constraints",

208

"",

209

f"This is a list of constraints that the {name} operator must satisfy in order to be scheduled on the NPU.",

210

"",

211

]

212

for constraint in supported.specific_constraints[op]:

213

# Markdown needs two spaces at the end of a line to render it as a separate line

214

reason = constraint.__doc__.replace("\n", " \n")

215

lines.append(f"- {reason}")

216

217

# Note. this will generate the file in the CWD

218

filepath = os.path.join(os.getcwd(), "SUPPORTED_OPS.md")

219

with open(filepath, "wt") as md:

220

md.writelines(line + "\n" for line in lines)

221

print(f"Report file: {filepath}")

222

223

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

224

def main(args=None):

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame^]

225

try:

226

if args is None:

227

args = sys.argv[1:]

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

228

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame^]

229

parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Arm Ethos-U NPUs")

230

parser.add_argument("--version", action="version", version=__version__)

231

parser.add_argument(

232

"--api-version", action="version", version=API_VERSION, help="Displays the version of the external API."

233

)

234

parser.add_argument(

235

"--supported-ops-report",

236

action="store_true",

237

help="Generate the SUPPORTED_OPS.md file in the current working directory and exit",

Tim Hall

b9b515c

2020-11-01 21:27:19 +0000

[diff] [blame]

238

)

Jacob Bohlin

0628a8c

2020-08-28 13:25:14 +0200

[diff] [blame]

239

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame^]

240

# set network nargs to be optional to allow the support-ops-report CLI option to be used standalone

parser.add_argument(

"network",

metavar="NETWORK",

type=str,

default=None,

nargs="?",

help="Filename of the input TensorFlow Lite for Microcontrollers network",

248

)

249

parser.add_argument(

250

"--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"

)

parser.add_argument(

"--enable-debug-db",

action="store_true",

default=None,

help="Enables the calculation and writing of a network debug database to output directory",

)

parser.add_argument(

"--config",

type=str,

action="append",

help="Vela configuration file(s) in Python ConfigParser .ini file format",

263

)

264

parser.add_argument("--verbose-all", action="store_true", help="Enable all verbose options")

265

parser.add_argument(

266

"--verbose-config", action="store_true", help="Verbose system configuration and memory mode"

267

)

268

parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")

269

parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")

270

parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")

271

parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")

272

parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")

273

parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")

274

parser.add_argument(

275

"--verbose-pareto-frontier-schedules",

276

action="store_true",

277

help="Show all schedules along the pareto frontier of optimisation criteria",

278

)

279

parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")

280

parser.add_argument(

281

"--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"

282

)

283

parser.add_argument(

284

"--verbose-register-command-stream", action="store_true", help="Verbose register command stream"

285

)

286

parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")

287

parser.add_argument(

288

"--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"

289

)

290

parser.add_argument(

291

"--cache-bias-scale-tensor",

292

type=ast.literal_eval,

293

default=True,

294

choices=[True, False],

295

help="Controls the caching of the bias & scale tensors in SRAM (default: %(default)s)",

)

parser.add_argument(

"--cascading",

type=ast.literal_eval,

300

default=True,

301

choices=[True, False],

302

help="Controls the packing of multiple passes into a cascade (default: %(default)s)",

303

)

304

parser.add_argument(

305

"--force-block-config", type=str, default="", help="Force a specific block configuration WxHxC"

306

)

307

parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")

308

parser.add_argument(

309

"--accelerator-config",

310

type=str,

311

default="ethos-u55-256",

312

choices=list(architecture_features.Accelerator.member_list()),

313

help="Accelerator configuration to use (default: %(default)s)",

)

parser.add_argument(

"--system-config",

type=str,

default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,

319

help="System configuration to select from the Vela configuration file (default: %(default)s)",

)

parser.add_argument(

"--memory-mode",

type=str,

default=architecture_features.ArchitectureFeatures.DEFAULT_CONFIG,

325

help="Memory mode to select from the Vela configuration file (default: %(default)s)",

326

)

327

parser.add_argument(

328

"--tensor-allocator",

329

default=TensorAllocator.HillClimb,

330

type=lambda s: TensorAllocator[s],

331

choices=list(TensorAllocator),

332

help="Tensor Allocator algorithm (default: %(default)s)",

333

)

334

parser.add_argument(

335

"--show-subgraph-io-summary",

336

action="store_true",

337

help="Shows a summary of all the subgraphs and their inputs and outputs",

)

parser.add_argument(

"--ifm-streaming",

type=ast.literal_eval,

342

default=True,

343

choices=[True, False],

344

help="Controls scheduler IFM streaming search (default: %(default)s)",

345

)

346

parser.add_argument(

347

"--block-config-limit",

348

type=int,

349

default=16,

350

help="Limit block config search space, use zero for unlimited (default: %(default)s)",

)

parser.add_argument(

"--pareto-metric",

default=ParetoMetric.BwCycMem,

355

type=lambda s: ParetoMetric[s],

356

choices=list(ParetoMetric),

357

help="Controls the calculation of the pareto metric (default: %(default)s)",

)

parser.add_argument(

"--recursion-limit",

type=int,

default=10000,

help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",

364

)

365

parser.add_argument(

366

"--max-block-dependency",

367

type=int,

368

default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,

369

choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),

370

help=(

371

"Set the maximum value that can be used for the block dependency between npu kernel operations"

372

" (default: %(default)s)"

),

)

parser.add_argument(

"--nhcwb16-between-cascaded-passes",

377

type=ast.literal_eval,

378

default=True,

379

choices=[True, False],

380

help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",

381

)

382

parser.add_argument(

383

"--weight-estimation-scaling",

384

type=float,

385

default=1.0,

386

help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),

387

)

388

parser.add_argument(

389

"--cpu-tensor-alignment",

390

type=int,

391

default=Tensor.AllocationQuantum,

392

help=(

393

"Controls the allocation byte alignment of cpu tensors including Ethos-U Custom"

394

" operator inputs and outputs (default: %(default)s)"

395

),

396

)

397

args = parser.parse_args(args=args)

Louis Verhaard

5207830

2020-11-18 13:35:06 +0100

[diff] [blame]

398

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame^]

399

# Generate the supported ops report and exit

400

if args.supported_ops_report:

401

generate_supported_ops()

402

return 0

Louis Verhaard

5207830

2020-11-18 13:35:06 +0100

[diff] [blame]

403

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame^]

404

if args.network is None:

405

parser.error("the following argument is required: NETWORK")

Michael McGeagh

2fa40ae

2020-12-02 10:55:04 +0000

[diff] [blame]

406

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame^]

407

# check all config files exist because they will be read as a group

408

if args.config is not None:

409

for filename in args.config:

410

if not os.access(filename, os.R_OK):

411

raise InputFileError(filename, "File not found or is not readable")

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

412

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame^]

413

sys.setrecursionlimit(args.recursion_limit)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

414

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame^]

415

if args.force_block_config:

416

force_block_config = architecture_features.Block.from_string(args.force_block_config)

417

else:

418

force_block_config = None

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

419

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame^]

420

if args.cpu_tensor_alignment < 16 or args.cpu_tensor_alignment & (args.cpu_tensor_alignment - 1) != 0:

421

parser.error(

422

"Invalid argument to --cpu-tensor-alignment = {} (must be greater than or equal to 16 and a power of 2)"

423

"".format(args.cpu_tensor_alignment)

424

)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

425

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame^]

426

if args.system_config == ArchitectureFeatures.DEFAULT_CONFIG:

427

print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for system configuration")

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

428

Henrik G Olsson

2021-03-23 17:34:49 +0100

[diff] [blame^]

429

if args.memory_mode == ArchitectureFeatures.DEFAULT_CONFIG:

430

print(f"Warning: Using {ArchitectureFeatures.DEFAULT_CONFIG} values for memory mode")

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

431

Henrik G Olsson