Blame - ethosu/vela/vela.py - ml/ethos-u/ethos-u-vela

2020-04-27 18:20:16 +0100

[diff] [blame]

1

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

16

# Description:

17

# Main entry point for the Vela compiler.

18

#

19

# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

20

import argparse

21

import ast

22

import configparser

Diego Russo

ea6111a

2020-04-14 18:41:58 +0100

[diff] [blame]

23

import os.path

24

import sys

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

25

import time

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

26

27

from . import architecture_features

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

28

from . import compiler_driver

29

from . import model_reader

30

from . import scheduler

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

31

from . import stats_writer

32

from . import tflite_writer

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

33

from ._version import __version__

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

34

from .debug_database import DebugDatabase

Louis Verhaard

7db7896

2020-05-25 15:05:26 +0200

[diff] [blame]

35

from .errors import InputFileError

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

36

from .nn_graph import PassPlacement

37

from .nn_graph import TensorAllocator

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

38

from .scheduler import ParetoMetric

Diego Russo

ea6111a

2020-04-14 18:41:58 +0100

[diff] [blame]

39

from .tensor import MemArea

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

40

from .tensor import Tensor

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

41

42

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

43

def process(input_name, enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

44

if compiler_options.timing:

45

start = time.time()

46

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

47

os.makedirs(compiler_options.output_dir, exist_ok=True)

48

output_basename = os.path.join(compiler_options.output_dir, os.path.splitext(os.path.basename(input_name))[0])

49

DebugDatabase.show_warnings = enable_debug_db

50

51

nng = model_reader.read_model(input_name, model_reader_options)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

52

53

if not nng:

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

54

raise InputFileError(input_name, "input file could not be read")

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

55

56

if compiler_options.verbose_operators:

57

nng.print_operators()

58

59

if compiler_options.timing:

60

stop = time.time()

61

print("Model reading took %f s" % (stop - start))

62

start = time.time()

63

64

compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)

65

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

66

passes_csv_file = "{0}_pass-breakdown_{1}.csv".format(output_basename, arch.system_config)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

67

stats_writer.write_pass_metrics_csv(nng, passes_csv_file)

68

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

69

summary_csv_file = "{0}_summary_{1}.csv".format(output_basename, arch.system_config)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

70

stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)

71

72

stats_writer.print_performance_metrics(nng, show_cpu_operations=compiler_options.show_cpu_operations, arch=arch)

73

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

74

output_filename = output_basename + "_vela.tflite"

75

if input_name.endswith(".tflite"):

76

tflite_writer.write_tflite(nng, output_filename)

77

78

if enable_debug_db:

79

debug_filename = output_basename + "_debug.xml"

80

DebugDatabase.write(debug_filename, input_name, output_filename)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

81

82

if compiler_options.timing:

83

stop = time.time()

84

print("Compiler driver took %f s" % (stop - start))

return nng

def print_subgraph_io_summary(nng):

90

"""Print a summary of all the input and output tensor sizes for all subgraphs.

91

Also displays the total tensor size and the memory used area for sram.

92

"""

93

94

print("Subgraph IO Summary")

95

print("-------------------")

96

print("NNG: {0}".format(nng.name))

97

max_sg_size = 0

98

for sg in reversed(nng.subgraphs):

99

print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))

100

sg_size = 0

101

102

if sg.placement == PassPlacement.Npu:

103

for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:

104

if tens in sg.input_tensors:

105

tens_dir = "In"

106

elif tens in sg.output_tensors:

tens_dir = "Out"

else:

tens_dir = "In/Out"

size = tens.elements() * tens.element_size() / 1024.0

112

sg_size = sg_size + size

113

print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))

114

115

print(" Total Size = {0} KiB".format(sg_size))

116

print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))

117

max_sg_size = max(sg_size, max_sg_size)

118

119

print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))

def main(args=None):

if args is None:

args = sys.argv[1:]

parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Ethos-U55")

127

128

parser.add_argument(

129

"network", metavar="NETWORK", type=str, default=None, nargs=None, help="Filename of network to process"

130

)

131

132

parser.add_argument("--version", action="version", version=__version__)

133

parser.add_argument(

134

"--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"

135

)

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

parser.add_argument(

"--enable-debug-db",

action="store_true",

default=None,

help="Enables the calculation and writing of a network debug database to output directory",

141

)

142

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

143

parser.add_argument("--config", type=str, help="Location of vela configuration file")

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

144

145

parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")

146

parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")

147

parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")

148

parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")

149

parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")

150

parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")

151

parser.add_argument(

152

"--verbose-pareto-frontier-schedules",

153

action="store_true",

154

help="Show all schedules along the pareto frontier of optimisation criteria",

155

)

156

parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")

157

parser.add_argument(

158

"--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"

159

)

160

parser.add_argument(

161

"--verbose-register-command-stream", action="store_true", help="Verbose register command stream"

162

)

163

parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")

164

165

parser.add_argument(

166

"--show-minimum-possible-allocation", action="store_true", help="Show the minimum possible allocation"

167

)

168

parser.add_argument(

169

"--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"

170

)

171

parser.add_argument(

Andreas Nevalainen

897cc14

2020-10-28 15:42:08 +0100

[diff] [blame]

172

"--keep-scale-placement", action="store_true", help="Keep scale tensors memory placement during scheduling"

173

)

174

parser.add_argument(

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

175

"--cascading",

176

type=ast.literal_eval,

177

default=True,

178

choices=[True, False],

179

help="Controls the packing of multiple passes into a cascade (default: %(default)s)",

180

)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

181

parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration HxWxC")

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

182

parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")

183

parser.add_argument(

184

"--accelerator-config",

185

type=str,

186

default="ethos-u55-256",

Manupa Karunaratne

d83d2e1

2020-07-20 12:05:32 +0100

[diff] [blame]

187

choices=list(architecture_features.Accelerator.member_list()),

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

188

help="Accelerator configuration to use (default: %(default)s)",

)

parser.add_argument(

"--system-config",

type=str,

default="internal-default",

194

help="System configuration to use (default: %(default)s)",

195

)

196

parser.add_argument(

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

197

"--tensor-allocator",

198

default=TensorAllocator.Greedy,

199

type=lambda s: TensorAllocator[s],

200

choices=list(TensorAllocator),

201

help="Tensor Allocator algorithm (default: %(default)s)",

202

)

203

parser.add_argument(

204

"--show-subgraph-io-summary",

205

action="store_true",

206

help="Shows a summary of all the subgraphs and their inputs and outputs",

)

parser.add_argument(

"--ifm-streaming",

type=ast.literal_eval,

211

default=True,

212

choices=[True, False],

213

help="Controls scheduler IFM streaming search (default: %(default)s)",

214

)

215

parser.add_argument(

216

"--block-config-limit",

217

type=int,

218

default=16,

219

help="Limit block config search space, use zero for unlimited (default: %(default)s)",

220

)

221

parser.add_argument(

222

"--global-memory-clock-scale",

type=float,

default=1.0,

help=(

"Performs an additional scaling of the individual memory clock scales specified by the system config "

227

"(default: %(default)s)"

),

)

parser.add_argument(

"--pareto-metric",

default=ParetoMetric.BwCycMem,

233

type=lambda s: ParetoMetric[s],

234

choices=list(ParetoMetric),

235

help="Controls the calculation of the pareto metric (default: %(default)s)",

)

parser.add_argument(

"--recursion-limit",

type=int,

default=10000,

help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",

242

)

243

parser.add_argument(

244

"--max-block-dependency",

245

type=int,

246

default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,

247

choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),

248

help=(

249

"Set the maximum value that can be used for the block dependency between npu kernel operations "

250

"(default: %(default)s)"

251

),

252

)

Charles Xu

7b8823f

2020-05-29 13:53:10 +0200

[diff] [blame]

253

parser.add_argument(

254

"--nhcwb16-between-cascaded-passes",

255

type=ast.literal_eval,

256

default=True,

257

choices=[True, False],

258

help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",

259

)

Fredrik Svedberg

a0c3624

2020-06-03 15:43:31 +0200

[diff] [blame]

260

parser.add_argument(

Patrik Gustavsson

90831bc

2020-08-24 16:26:11 +0200

[diff] [blame]

261

"--weight-estimation-scaling",

262

type=float,

263

default=1.0,

264

help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),

265

)

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

266

parser.add_argument(

267

"--allocation-alignment",

268

type=int,

269

default=Tensor.AllocationQuantum,

270

help=("Controls the allocation byte alignment of cpu tensors (default: %(default)s)"),

271

)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

272

args = parser.parse_args(args=args)

273

274

# Read configuration file

275

config_file = args.config

276

config = None

277

if config_file is not None:

278

with open(config_file) as f:

279

config = configparser.ConfigParser()

280

config.read_file(f)

281

282

if args.network is None:

283

parser.error("the following argument is required: NETWORK")

284

285

sys.setrecursionlimit(args.recursion_limit)

286

287

if args.force_block_config:

288

force_block_config = architecture_features.Block.from_string(args.force_block_config)

289

else:

290

force_block_config = None

291

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

292

alignment = args.allocation_alignment

293

if alignment < 16:

294

parser.error("the following argument needs to be greater or equal to 16: ALLOCATION_ALIGNMENT")

295

if alignment & (alignment - 1) != 0:

296

parser.error("the following argument needs to be a power of 2: ALLOCATION_ALIGNMENT")

297

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

298

arch = architecture_features.ArchitectureFeatures(

299

vela_config=config,

300

system_config=args.system_config,

301

accelerator_config=args.accelerator_config,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

302

override_block_config=force_block_config,

303

block_config_limit=args.block_config_limit,

304

global_memory_clock_scale=args.global_memory_clock_scale,

305

max_blockdep=args.max_block_dependency,

Patrik Gustavsson

90831bc

2020-08-24 16:26:11 +0200

[diff] [blame]

306

weight_estimation_scaling=args.weight_estimation_scaling,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

307

)

308

309

compiler_options = compiler_driver.CompilerOptions(

310

verbose_graph=args.verbose_graph,

311

verbose_quantization=args.verbose_quantization,

312

verbose_packing=args.verbose_packing,

313

verbose_tensor_purpose=args.verbose_tensor_purpose,

314

verbose_tensor_format=args.verbose_tensor_format,

315

verbose_allocation=args.verbose_allocation,

316

verbose_high_level_command_stream=args.verbose_high_level_command_stream,

317

verbose_register_command_stream=args.verbose_register_command_stream,

318

verbose_operators=args.verbose_operators,

319

show_minimum_possible_allocation=args.show_minimum_possible_allocation,

320

show_cpu_operations=args.show_cpu_operations,

321

tensor_allocator=args.tensor_allocator,

322

timing=args.timing,

323

output_dir=args.output_dir,

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

324

allocation_alignment=alignment,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

325

)

326

327

scheduler_options = scheduler.SchedulerOptions(

328

use_cascading=args.cascading,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

329

verbose_schedule=args.verbose_schedule,

330

verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules,

331

use_ifm_streaming=args.ifm_streaming,

332

pareto_metric=args.pareto_metric,

Charles Xu

7b8823f

2020-05-29 13:53:10 +0200

[diff] [blame]

333

use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes,

Andreas Nevalainen

897cc14

2020-10-28 15:42:08 +0100

[diff] [blame]

334

keep_scale_placement=args.keep_scale_placement,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

335

)

336

Tim Hall

284223e

2020-06-09 13:17:21 +0100

[diff] [blame]

337

model_reader_options = model_reader.ModelReaderOptions()

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

338

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

339

nng = process(args.network, args.enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options)

Tim Hall