blob: 91899c28ba417f2c2cb46ea37cb163aae628ad73 [file] [log] [blame]
Tim Hall79d07d22020-04-27 18:20:16 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
17# Main entry point for the Vela compiler.
18#
19# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.
Diego Russoe8a10452020-04-21 17:39:10 +010020import argparse
21import ast
22import configparser
Diego Russoea6111a2020-04-14 18:41:58 +010023import os.path
24import sys
Tim Hall79d07d22020-04-27 18:20:16 +010025import time
Tim Hall79d07d22020-04-27 18:20:16 +010026
27from . import architecture_features
Diego Russoe8a10452020-04-21 17:39:10 +010028from . import compiler_driver
29from . import model_reader
30from . import scheduler
Tim Hall79d07d22020-04-27 18:20:16 +010031from . import stats_writer
32from . import tflite_writer
Tim Hall79d07d22020-04-27 18:20:16 +010033from ._version import __version__
Louis Verhaard7db78962020-05-25 15:05:26 +020034from .errors import InputFileError
Diego Russoe8a10452020-04-21 17:39:10 +010035from .nn_graph import PassPlacement
36from .nn_graph import TensorAllocator
Tim Hall79d07d22020-04-27 18:20:16 +010037from .scheduler import ParetoMetric
Diego Russoea6111a2020-04-14 18:41:58 +010038from .tensor import MemArea
Tim Hall79d07d22020-04-27 18:20:16 +010039
40
41def process(fname, arch, model_reader_options, compiler_options, scheduler_options):
42 if compiler_options.timing:
43 start = time.time()
44
45 nng = model_reader.read_model(fname, model_reader_options)
46
47 if not nng:
Louis Verhaard7db78962020-05-25 15:05:26 +020048 raise InputFileError(fname, "input file could not be read")
Tim Hall79d07d22020-04-27 18:20:16 +010049
50 if compiler_options.verbose_operators:
51 nng.print_operators()
52
53 if compiler_options.timing:
54 stop = time.time()
55 print("Model reading took %f s" % (stop - start))
56 start = time.time()
57
58 compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)
59
60 passes_csv_file = "%s/%s_pass-breakdown_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config)
61 stats_writer.write_pass_metrics_csv(nng, passes_csv_file)
62
63 summary_csv_file = "%s/%s_summary_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config)
64 stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)
65
66 stats_writer.print_performance_metrics(nng, show_cpu_operations=compiler_options.show_cpu_operations, arch=arch)
67
68 if fname.endswith(".tflite"):
69 tflite_writer.write_tflite(nng, "%s/%s_vela.tflite" % (compiler_options.output_dir, nng.name))
70
71 if compiler_options.timing:
72 stop = time.time()
73 print("Compiler driver took %f s" % (stop - start))
74
75 return nng
76
77
78def print_subgraph_io_summary(nng):
79 """Print a summary of all the input and output tensor sizes for all subgraphs.
80 Also displays the total tensor size and the memory used area for sram.
81 """
82
83 print("Subgraph IO Summary")
84 print("-------------------")
85 print("NNG: {0}".format(nng.name))
86 max_sg_size = 0
87 for sg in reversed(nng.subgraphs):
88 print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))
89 sg_size = 0
90
91 if sg.placement == PassPlacement.Npu:
92 for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:
93 if tens in sg.input_tensors:
94 tens_dir = "In"
95 elif tens in sg.output_tensors:
96 tens_dir = "Out"
97 else:
98 tens_dir = "In/Out"
99
100 size = tens.elements() * tens.element_size() / 1024.0
101 sg_size = sg_size + size
102 print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))
103
104 print(" Total Size = {0} KiB".format(sg_size))
105 print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))
106 max_sg_size = max(sg_size, max_sg_size)
107
108 print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))
109
110
111def main(args=None):
112 if args is None:
113 args = sys.argv[1:]
114
115 parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Ethos-U55")
116
117 parser.add_argument(
118 "network", metavar="NETWORK", type=str, default=None, nargs=None, help="Filename of network to process"
119 )
120
121 parser.add_argument("--version", action="version", version=__version__)
122 parser.add_argument(
123 "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"
124 )
125 parser.add_argument("--config", type=str, help="Location of vela configuration file")
Tim Hall79d07d22020-04-27 18:20:16 +0100126
127 parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")
128 parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")
129 parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")
130 parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")
131 parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")
132 parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")
133 parser.add_argument(
134 "--verbose-pareto-frontier-schedules",
135 action="store_true",
136 help="Show all schedules along the pareto frontier of optimisation criteria",
137 )
138 parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")
139 parser.add_argument(
140 "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"
141 )
142 parser.add_argument(
143 "--verbose-register-command-stream", action="store_true", help="Verbose register command stream"
144 )
145 parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")
146
147 parser.add_argument(
148 "--show-minimum-possible-allocation", action="store_true", help="Show the minimum possible allocation"
149 )
150 parser.add_argument(
151 "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
152 )
153 parser.add_argument(
154 "--cascading",
155 type=ast.literal_eval,
156 default=True,
157 choices=[True, False],
158 help="Controls the packing of multiple passes into a cascade (default: %(default)s)",
159 )
160 parser.add_argument(
161 "--ifm-ofm-overlap",
162 type=ast.literal_eval,
163 default=True,
164 choices=[True, False],
165 help="Controls the overlapping of IFM and OFM buffers (default: %(default)s)",
166 )
167 parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration HxWxC")
Tim Hall79d07d22020-04-27 18:20:16 +0100168 parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
169 parser.add_argument(
170 "--accelerator-config",
171 type=str,
172 default="ethos-u55-256",
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100173 choices=list(architecture_features.Accelerator.member_list()),
Tim Hall79d07d22020-04-27 18:20:16 +0100174 help="Accelerator configuration to use (default: %(default)s)",
175 )
176 parser.add_argument(
177 "--system-config",
178 type=str,
179 default="internal-default",
180 help="System configuration to use (default: %(default)s)",
181 )
182 parser.add_argument(
Tim Hall79d07d22020-04-27 18:20:16 +0100183 "--permanent-storage",
184 default=MemArea.OffChipFlash,
185 type=lambda s: MemArea[s],
Patrik Gustavsson6c878072020-08-19 12:13:30 +0200186 choices=list(MemArea)[3:5],
Tim Hall79d07d22020-04-27 18:20:16 +0100187 help=(
Patrik Gustavsson6c878072020-08-19 12:13:30 +0200188 "Memory area for permanent storage, only valid for Ethos-U55. "
189 "To store the weights and other constant data in SRAM, select 'OnChipFlash'. (default: %(default)s)"
Tim Hall79d07d22020-04-27 18:20:16 +0100190 ),
191 )
192 parser.add_argument(
193 "--tensor-allocator",
194 default=TensorAllocator.Greedy,
195 type=lambda s: TensorAllocator[s],
196 choices=list(TensorAllocator),
197 help="Tensor Allocator algorithm (default: %(default)s)",
198 )
199 parser.add_argument(
200 "--show-subgraph-io-summary",
201 action="store_true",
202 help="Shows a summary of all the subgraphs and their inputs and outputs",
203 )
204 parser.add_argument(
205 "--ifm-streaming",
206 type=ast.literal_eval,
207 default=True,
208 choices=[True, False],
209 help="Controls scheduler IFM streaming search (default: %(default)s)",
210 )
211 parser.add_argument(
212 "--block-config-limit",
213 type=int,
214 default=16,
215 help="Limit block config search space, use zero for unlimited (default: %(default)s)",
216 )
217 parser.add_argument(
218 "--global-memory-clock-scale",
219 type=float,
220 default=1.0,
221 help=(
222 "Performs an additional scaling of the individual memory clock scales specified by the system config "
223 "(default: %(default)s)"
224 ),
225 )
226 parser.add_argument(
227 "--pareto-metric",
228 default=ParetoMetric.BwCycMem,
229 type=lambda s: ParetoMetric[s],
230 choices=list(ParetoMetric),
231 help="Controls the calculation of the pareto metric (default: %(default)s)",
232 )
233 parser.add_argument(
234 "--recursion-limit",
235 type=int,
236 default=10000,
237 help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",
238 )
239 parser.add_argument(
240 "--max-block-dependency",
241 type=int,
242 default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,
243 choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),
244 help=(
245 "Set the maximum value that can be used for the block dependency between npu kernel operations "
246 "(default: %(default)s)"
247 ),
248 )
Charles Xu7b8823f2020-05-29 13:53:10 +0200249 parser.add_argument(
250 "--nhcwb16-between-cascaded-passes",
251 type=ast.literal_eval,
252 default=True,
253 choices=[True, False],
254 help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",
255 )
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200256 parser.add_argument(
Patrik Gustavsson90831bc2020-08-24 16:26:11 +0200257 "--weight-estimation-scaling",
258 type=float,
259 default=1.0,
260 help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),
261 )
Fredrik Svedberg880e7352020-08-25 11:31:47 +0200262
Tim Hall79d07d22020-04-27 18:20:16 +0100263 args = parser.parse_args(args=args)
264
265 # Read configuration file
266 config_file = args.config
267 config = None
268 if config_file is not None:
269 with open(config_file) as f:
270 config = configparser.ConfigParser()
271 config.read_file(f)
272
273 if args.network is None:
274 parser.error("the following argument is required: NETWORK")
275
276 sys.setrecursionlimit(args.recursion_limit)
277
278 if args.force_block_config:
279 force_block_config = architecture_features.Block.from_string(args.force_block_config)
280 else:
281 force_block_config = None
282
283 arch = architecture_features.ArchitectureFeatures(
284 vela_config=config,
285 system_config=args.system_config,
286 accelerator_config=args.accelerator_config,
287 permanent_storage=args.permanent_storage,
Tim Hall79d07d22020-04-27 18:20:16 +0100288 override_block_config=force_block_config,
289 block_config_limit=args.block_config_limit,
290 global_memory_clock_scale=args.global_memory_clock_scale,
291 max_blockdep=args.max_block_dependency,
Patrik Gustavsson90831bc2020-08-24 16:26:11 +0200292 weight_estimation_scaling=args.weight_estimation_scaling,
Tim Hall79d07d22020-04-27 18:20:16 +0100293 )
294
295 compiler_options = compiler_driver.CompilerOptions(
296 verbose_graph=args.verbose_graph,
297 verbose_quantization=args.verbose_quantization,
298 verbose_packing=args.verbose_packing,
299 verbose_tensor_purpose=args.verbose_tensor_purpose,
300 verbose_tensor_format=args.verbose_tensor_format,
301 verbose_allocation=args.verbose_allocation,
302 verbose_high_level_command_stream=args.verbose_high_level_command_stream,
303 verbose_register_command_stream=args.verbose_register_command_stream,
304 verbose_operators=args.verbose_operators,
305 show_minimum_possible_allocation=args.show_minimum_possible_allocation,
306 show_cpu_operations=args.show_cpu_operations,
307 tensor_allocator=args.tensor_allocator,
308 timing=args.timing,
309 output_dir=args.output_dir,
310 )
311
312 scheduler_options = scheduler.SchedulerOptions(
313 use_cascading=args.cascading,
314 use_ifm_ofm_overlap=args.ifm_ofm_overlap,
315 verbose_schedule=args.verbose_schedule,
316 verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules,
317 use_ifm_streaming=args.ifm_streaming,
318 pareto_metric=args.pareto_metric,
Charles Xu7b8823f2020-05-29 13:53:10 +0200319 use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes,
Tim Hall79d07d22020-04-27 18:20:16 +0100320 )
321
Tim Hall284223e2020-06-09 13:17:21 +0100322 model_reader_options = model_reader.ModelReaderOptions()
Tim Hall79d07d22020-04-27 18:20:16 +0100323
324 os.makedirs(args.output_dir, exist_ok=True)
325
326 nng = process(args.network, arch, model_reader_options, compiler_options, scheduler_options)
327
328 if args.show_subgraph_io_summary:
329 print_subgraph_io_summary(nng)
330
331 return 0