blob: 1908092636b2d69303753ba3a1427e67181b89fd [file] [log] [blame]
Tim Hall79d07d22020-04-27 18:20:16 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
17# Main entry point for the Vela compiler.
18#
19# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.
Diego Russoe8a10452020-04-21 17:39:10 +010020import argparse
21import ast
22import configparser
Diego Russoea6111a2020-04-14 18:41:58 +010023import os.path
24import sys
Tim Hall79d07d22020-04-27 18:20:16 +010025import time
Tim Hall79d07d22020-04-27 18:20:16 +010026
27from . import architecture_features
Diego Russoe8a10452020-04-21 17:39:10 +010028from . import compiler_driver
29from . import model_reader
30from . import scheduler
Tim Hall79d07d22020-04-27 18:20:16 +010031from . import stats_writer
32from . import tflite_writer
Tim Hall79d07d22020-04-27 18:20:16 +010033from ._version import __version__
Louis Verhaard7db78962020-05-25 15:05:26 +020034from .errors import InputFileError
Diego Russoe8a10452020-04-21 17:39:10 +010035from .nn_graph import PassPlacement
36from .nn_graph import TensorAllocator
Tim Hall79d07d22020-04-27 18:20:16 +010037from .scheduler import ParetoMetric
Diego Russoea6111a2020-04-14 18:41:58 +010038from .tensor import MemArea
Tim Hall79d07d22020-04-27 18:20:16 +010039
40
41def process(fname, arch, model_reader_options, compiler_options, scheduler_options):
42 if compiler_options.timing:
43 start = time.time()
44
45 nng = model_reader.read_model(fname, model_reader_options)
46
47 if not nng:
Louis Verhaard7db78962020-05-25 15:05:26 +020048 raise InputFileError(fname, "input file could not be read")
Tim Hall79d07d22020-04-27 18:20:16 +010049
50 if compiler_options.verbose_operators:
51 nng.print_operators()
52
53 if compiler_options.timing:
54 stop = time.time()
55 print("Model reading took %f s" % (stop - start))
56 start = time.time()
57
58 compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)
59
60 passes_csv_file = "%s/%s_pass-breakdown_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config)
61 stats_writer.write_pass_metrics_csv(nng, passes_csv_file)
62
63 summary_csv_file = "%s/%s_summary_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config)
64 stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)
65
66 stats_writer.print_performance_metrics(nng, show_cpu_operations=compiler_options.show_cpu_operations, arch=arch)
67
68 if fname.endswith(".tflite"):
69 tflite_writer.write_tflite(nng, "%s/%s_vela.tflite" % (compiler_options.output_dir, nng.name))
70
71 if compiler_options.timing:
72 stop = time.time()
73 print("Compiler driver took %f s" % (stop - start))
74
75 return nng
76
77
78def print_subgraph_io_summary(nng):
79 """Print a summary of all the input and output tensor sizes for all subgraphs.
80 Also displays the total tensor size and the memory used area for sram.
81 """
82
83 print("Subgraph IO Summary")
84 print("-------------------")
85 print("NNG: {0}".format(nng.name))
86 max_sg_size = 0
87 for sg in reversed(nng.subgraphs):
88 print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))
89 sg_size = 0
90
91 if sg.placement == PassPlacement.Npu:
92 for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:
93 if tens in sg.input_tensors:
94 tens_dir = "In"
95 elif tens in sg.output_tensors:
96 tens_dir = "Out"
97 else:
98 tens_dir = "In/Out"
99
100 size = tens.elements() * tens.element_size() / 1024.0
101 sg_size = sg_size + size
102 print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))
103
104 print(" Total Size = {0} KiB".format(sg_size))
105 print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))
106 max_sg_size = max(sg_size, max_sg_size)
107
108 print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))
109
110
111def main(args=None):
112 if args is None:
113 args = sys.argv[1:]
114
115 parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Ethos-U55")
116
117 parser.add_argument(
118 "network", metavar="NETWORK", type=str, default=None, nargs=None, help="Filename of network to process"
119 )
120
121 parser.add_argument("--version", action="version", version=__version__)
122 parser.add_argument(
123 "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"
124 )
125 parser.add_argument("--config", type=str, help="Location of vela configuration file")
Tim Hall79d07d22020-04-27 18:20:16 +0100126
127 parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")
128 parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")
129 parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")
130 parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")
131 parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")
132 parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")
133 parser.add_argument(
134 "--verbose-pareto-frontier-schedules",
135 action="store_true",
136 help="Show all schedules along the pareto frontier of optimisation criteria",
137 )
138 parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")
139 parser.add_argument(
140 "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"
141 )
142 parser.add_argument(
143 "--verbose-register-command-stream", action="store_true", help="Verbose register command stream"
144 )
145 parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")
146
147 parser.add_argument(
148 "--show-minimum-possible-allocation", action="store_true", help="Show the minimum possible allocation"
149 )
150 parser.add_argument(
151 "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
152 )
153 parser.add_argument(
154 "--cascading",
155 type=ast.literal_eval,
156 default=True,
157 choices=[True, False],
158 help="Controls the packing of multiple passes into a cascade (default: %(default)s)",
159 )
160 parser.add_argument(
161 "--ifm-ofm-overlap",
162 type=ast.literal_eval,
163 default=True,
164 choices=[True, False],
165 help="Controls the overlapping of IFM and OFM buffers (default: %(default)s)",
166 )
167 parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration HxWxC")
Tim Hall79d07d22020-04-27 18:20:16 +0100168 parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
169 parser.add_argument(
170 "--accelerator-config",
171 type=str,
172 default="ethos-u55-256",
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100173 choices=list(architecture_features.Accelerator.member_list()),
Tim Hall79d07d22020-04-27 18:20:16 +0100174 help="Accelerator configuration to use (default: %(default)s)",
175 )
176 parser.add_argument(
177 "--system-config",
178 type=str,
179 default="internal-default",
180 help="System configuration to use (default: %(default)s)",
181 )
182 parser.add_argument(
Tim Hall79d07d22020-04-27 18:20:16 +0100183 "--permanent-storage",
184 default=MemArea.OffChipFlash,
185 type=lambda s: MemArea[s],
Patrik Gustavsson6c878072020-08-19 12:13:30 +0200186 choices=list(MemArea)[3:5],
Tim Hall79d07d22020-04-27 18:20:16 +0100187 help=(
Patrik Gustavsson6c878072020-08-19 12:13:30 +0200188 "Memory area for permanent storage, only valid for Ethos-U55. "
189 "To store the weights and other constant data in SRAM, select 'OnChipFlash'. (default: %(default)s)"
Tim Hall79d07d22020-04-27 18:20:16 +0100190 ),
191 )
192 parser.add_argument(
193 "--tensor-allocator",
194 default=TensorAllocator.Greedy,
195 type=lambda s: TensorAllocator[s],
196 choices=list(TensorAllocator),
197 help="Tensor Allocator algorithm (default: %(default)s)",
198 )
199 parser.add_argument(
200 "--show-subgraph-io-summary",
201 action="store_true",
202 help="Shows a summary of all the subgraphs and their inputs and outputs",
203 )
204 parser.add_argument(
205 "--ifm-streaming",
206 type=ast.literal_eval,
207 default=True,
208 choices=[True, False],
209 help="Controls scheduler IFM streaming search (default: %(default)s)",
210 )
211 parser.add_argument(
212 "--block-config-limit",
213 type=int,
214 default=16,
215 help="Limit block config search space, use zero for unlimited (default: %(default)s)",
216 )
217 parser.add_argument(
218 "--global-memory-clock-scale",
219 type=float,
220 default=1.0,
221 help=(
222 "Performs an additional scaling of the individual memory clock scales specified by the system config "
223 "(default: %(default)s)"
224 ),
225 )
226 parser.add_argument(
227 "--pareto-metric",
228 default=ParetoMetric.BwCycMem,
229 type=lambda s: ParetoMetric[s],
230 choices=list(ParetoMetric),
231 help="Controls the calculation of the pareto metric (default: %(default)s)",
232 )
233 parser.add_argument(
234 "--recursion-limit",
235 type=int,
236 default=10000,
237 help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",
238 )
239 parser.add_argument(
240 "--max-block-dependency",
241 type=int,
242 default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,
243 choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),
244 help=(
245 "Set the maximum value that can be used for the block dependency between npu kernel operations "
246 "(default: %(default)s)"
247 ),
248 )
Charles Xu7b8823f2020-05-29 13:53:10 +0200249 parser.add_argument(
250 "--nhcwb16-between-cascaded-passes",
251 type=ast.literal_eval,
252 default=True,
253 choices=[True, False],
254 help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",
255 )
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200256 parser.add_argument(
257 "--softmax-support",
258 type=ast.literal_eval,
259 default=False,
260 choices=[True, False],
261 help="Control if Softmax should be transformed into a set of npu operations (default: %(default)s)",
262 )
Patrik Gustavsson90831bc2020-08-24 16:26:11 +0200263 parser.add_argument(
264 "--weight-estimation-scaling",
265 type=float,
266 default=1.0,
267 help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),
268 )
Tim Hall79d07d22020-04-27 18:20:16 +0100269 args = parser.parse_args(args=args)
270
271 # Read configuration file
272 config_file = args.config
273 config = None
274 if config_file is not None:
275 with open(config_file) as f:
276 config = configparser.ConfigParser()
277 config.read_file(f)
278
279 if args.network is None:
280 parser.error("the following argument is required: NETWORK")
281
282 sys.setrecursionlimit(args.recursion_limit)
283
284 if args.force_block_config:
285 force_block_config = architecture_features.Block.from_string(args.force_block_config)
286 else:
287 force_block_config = None
288
289 arch = architecture_features.ArchitectureFeatures(
290 vela_config=config,
291 system_config=args.system_config,
292 accelerator_config=args.accelerator_config,
293 permanent_storage=args.permanent_storage,
Tim Hall79d07d22020-04-27 18:20:16 +0100294 override_block_config=force_block_config,
295 block_config_limit=args.block_config_limit,
296 global_memory_clock_scale=args.global_memory_clock_scale,
297 max_blockdep=args.max_block_dependency,
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200298 softmax_support=args.softmax_support,
Patrik Gustavsson90831bc2020-08-24 16:26:11 +0200299 weight_estimation_scaling=args.weight_estimation_scaling,
Tim Hall79d07d22020-04-27 18:20:16 +0100300 )
301
302 compiler_options = compiler_driver.CompilerOptions(
303 verbose_graph=args.verbose_graph,
304 verbose_quantization=args.verbose_quantization,
305 verbose_packing=args.verbose_packing,
306 verbose_tensor_purpose=args.verbose_tensor_purpose,
307 verbose_tensor_format=args.verbose_tensor_format,
308 verbose_allocation=args.verbose_allocation,
309 verbose_high_level_command_stream=args.verbose_high_level_command_stream,
310 verbose_register_command_stream=args.verbose_register_command_stream,
311 verbose_operators=args.verbose_operators,
312 show_minimum_possible_allocation=args.show_minimum_possible_allocation,
313 show_cpu_operations=args.show_cpu_operations,
314 tensor_allocator=args.tensor_allocator,
315 timing=args.timing,
316 output_dir=args.output_dir,
317 )
318
319 scheduler_options = scheduler.SchedulerOptions(
320 use_cascading=args.cascading,
321 use_ifm_ofm_overlap=args.ifm_ofm_overlap,
322 verbose_schedule=args.verbose_schedule,
323 verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules,
324 use_ifm_streaming=args.ifm_streaming,
325 pareto_metric=args.pareto_metric,
Charles Xu7b8823f2020-05-29 13:53:10 +0200326 use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes,
Tim Hall79d07d22020-04-27 18:20:16 +0100327 )
328
Tim Hall284223e2020-06-09 13:17:21 +0100329 model_reader_options = model_reader.ModelReaderOptions()
Tim Hall79d07d22020-04-27 18:20:16 +0100330
331 os.makedirs(args.output_dir, exist_ok=True)
332
333 nng = process(args.network, arch, model_reader_options, compiler_options, scheduler_options)
334
335 if args.show_subgraph_io_summary:
336 print_subgraph_io_summary(nng)
337
338 return 0