blob: 1766750ead4356ac93f3f581c45581d1c3cd2264 [file] [log] [blame]
Tim Hall79d07d22020-04-27 18:20:16 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
17# Main entry point for the Vela compiler.
18#
19# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.
Diego Russoe8a10452020-04-21 17:39:10 +010020import argparse
21import ast
22import configparser
Diego Russoea6111a2020-04-14 18:41:58 +010023import os.path
24import sys
Tim Hall79d07d22020-04-27 18:20:16 +010025import time
Tim Hall79d07d22020-04-27 18:20:16 +010026
27from . import architecture_features
Diego Russoe8a10452020-04-21 17:39:10 +010028from . import compiler_driver
29from . import model_reader
30from . import scheduler
Tim Hall79d07d22020-04-27 18:20:16 +010031from . import stats_writer
32from . import tflite_writer
Tim Hall79d07d22020-04-27 18:20:16 +010033from ._version import __version__
Louis Verhaard7db78962020-05-25 15:05:26 +020034from .errors import InputFileError
Diego Russoe8a10452020-04-21 17:39:10 +010035from .nn_graph import PassPlacement
36from .nn_graph import TensorAllocator
Tim Hall79d07d22020-04-27 18:20:16 +010037from .scheduler import ParetoMetric
Diego Russoea6111a2020-04-14 18:41:58 +010038from .tensor import MemArea
Tim Hall79d07d22020-04-27 18:20:16 +010039
40
41def process(fname, arch, model_reader_options, compiler_options, scheduler_options):
42 if compiler_options.timing:
43 start = time.time()
44
45 nng = model_reader.read_model(fname, model_reader_options)
46
47 if not nng:
Louis Verhaard7db78962020-05-25 15:05:26 +020048 raise InputFileError(fname, "input file could not be read")
Tim Hall79d07d22020-04-27 18:20:16 +010049
50 if compiler_options.verbose_operators:
51 nng.print_operators()
52
53 if compiler_options.timing:
54 stop = time.time()
55 print("Model reading took %f s" % (stop - start))
56 start = time.time()
57
58 compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)
59
60 passes_csv_file = "%s/%s_pass-breakdown_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config)
61 stats_writer.write_pass_metrics_csv(nng, passes_csv_file)
62
63 summary_csv_file = "%s/%s_summary_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config)
64 stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)
65
66 stats_writer.print_performance_metrics(nng, show_cpu_operations=compiler_options.show_cpu_operations, arch=arch)
67
68 if fname.endswith(".tflite"):
69 tflite_writer.write_tflite(nng, "%s/%s_vela.tflite" % (compiler_options.output_dir, nng.name))
70
71 if compiler_options.timing:
72 stop = time.time()
73 print("Compiler driver took %f s" % (stop - start))
74
75 return nng
76
77
78def print_subgraph_io_summary(nng):
79 """Print a summary of all the input and output tensor sizes for all subgraphs.
80 Also displays the total tensor size and the memory used area for sram.
81 """
82
83 print("Subgraph IO Summary")
84 print("-------------------")
85 print("NNG: {0}".format(nng.name))
86 max_sg_size = 0
87 for sg in reversed(nng.subgraphs):
88 print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))
89 sg_size = 0
90
91 if sg.placement == PassPlacement.Npu:
92 for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:
93 if tens in sg.input_tensors:
94 tens_dir = "In"
95 elif tens in sg.output_tensors:
96 tens_dir = "Out"
97 else:
98 tens_dir = "In/Out"
99
100 size = tens.elements() * tens.element_size() / 1024.0
101 sg_size = sg_size + size
102 print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))
103
104 print(" Total Size = {0} KiB".format(sg_size))
105 print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))
106 max_sg_size = max(sg_size, max_sg_size)
107
108 print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))
109
110
111def main(args=None):
112 if args is None:
113 args = sys.argv[1:]
114
115 parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Ethos-U55")
116
117 parser.add_argument(
118 "network", metavar="NETWORK", type=str, default=None, nargs=None, help="Filename of network to process"
119 )
120
121 parser.add_argument("--version", action="version", version=__version__)
122 parser.add_argument(
123 "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"
124 )
125 parser.add_argument("--config", type=str, help="Location of vela configuration file")
Tim Hall79d07d22020-04-27 18:20:16 +0100126
127 parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")
128 parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")
129 parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")
130 parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")
131 parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")
132 parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")
133 parser.add_argument(
134 "--verbose-pareto-frontier-schedules",
135 action="store_true",
136 help="Show all schedules along the pareto frontier of optimisation criteria",
137 )
138 parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")
139 parser.add_argument(
140 "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"
141 )
142 parser.add_argument(
143 "--verbose-register-command-stream", action="store_true", help="Verbose register command stream"
144 )
145 parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")
146
147 parser.add_argument(
148 "--show-minimum-possible-allocation", action="store_true", help="Show the minimum possible allocation"
149 )
150 parser.add_argument(
151 "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
152 )
153 parser.add_argument(
154 "--cascading",
155 type=ast.literal_eval,
156 default=True,
157 choices=[True, False],
158 help="Controls the packing of multiple passes into a cascade (default: %(default)s)",
159 )
160 parser.add_argument(
161 "--ifm-ofm-overlap",
162 type=ast.literal_eval,
163 default=True,
164 choices=[True, False],
165 help="Controls the overlapping of IFM and OFM buffers (default: %(default)s)",
166 )
167 parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration HxWxC")
Tim Hall79d07d22020-04-27 18:20:16 +0100168 parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
169 parser.add_argument(
170 "--accelerator-config",
171 type=str,
172 default="ethos-u55-256",
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100173 choices=list(architecture_features.Accelerator.member_list()),
Tim Hall79d07d22020-04-27 18:20:16 +0100174 help="Accelerator configuration to use (default: %(default)s)",
175 )
176 parser.add_argument(
177 "--system-config",
178 type=str,
179 default="internal-default",
180 help="System configuration to use (default: %(default)s)",
181 )
182 parser.add_argument(
Tim Hall79d07d22020-04-27 18:20:16 +0100183 "--permanent-storage",
184 default=MemArea.OffChipFlash,
185 type=lambda s: MemArea[s],
186 choices=list(MemArea)[3:-1],
187 help=(
188 "Memory area for permanent storage. To store the weights and other constant data in SRAM select "
189 "'OnChipFlash' (default: %(default)s)"
190 ),
191 )
192 parser.add_argument(
193 "--tensor-allocator",
194 default=TensorAllocator.Greedy,
195 type=lambda s: TensorAllocator[s],
196 choices=list(TensorAllocator),
197 help="Tensor Allocator algorithm (default: %(default)s)",
198 )
199 parser.add_argument(
200 "--show-subgraph-io-summary",
201 action="store_true",
202 help="Shows a summary of all the subgraphs and their inputs and outputs",
203 )
204 parser.add_argument(
205 "--ifm-streaming",
206 type=ast.literal_eval,
207 default=True,
208 choices=[True, False],
209 help="Controls scheduler IFM streaming search (default: %(default)s)",
210 )
211 parser.add_argument(
212 "--block-config-limit",
213 type=int,
214 default=16,
215 help="Limit block config search space, use zero for unlimited (default: %(default)s)",
216 )
217 parser.add_argument(
218 "--global-memory-clock-scale",
219 type=float,
220 default=1.0,
221 help=(
222 "Performs an additional scaling of the individual memory clock scales specified by the system config "
223 "(default: %(default)s)"
224 ),
225 )
226 parser.add_argument(
227 "--pareto-metric",
228 default=ParetoMetric.BwCycMem,
229 type=lambda s: ParetoMetric[s],
230 choices=list(ParetoMetric),
231 help="Controls the calculation of the pareto metric (default: %(default)s)",
232 )
233 parser.add_argument(
234 "--recursion-limit",
235 type=int,
236 default=10000,
237 help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",
238 )
239 parser.add_argument(
240 "--max-block-dependency",
241 type=int,
242 default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,
243 choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),
244 help=(
245 "Set the maximum value that can be used for the block dependency between npu kernel operations "
246 "(default: %(default)s)"
247 ),
248 )
Charles Xu7b8823f2020-05-29 13:53:10 +0200249 parser.add_argument(
250 "--nhcwb16-between-cascaded-passes",
251 type=ast.literal_eval,
252 default=True,
253 choices=[True, False],
254 help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",
255 )
Tim Hall79d07d22020-04-27 18:20:16 +0100256
257 args = parser.parse_args(args=args)
258
259 # Read configuration file
260 config_file = args.config
261 config = None
262 if config_file is not None:
263 with open(config_file) as f:
264 config = configparser.ConfigParser()
265 config.read_file(f)
266
267 if args.network is None:
268 parser.error("the following argument is required: NETWORK")
269
270 sys.setrecursionlimit(args.recursion_limit)
271
272 if args.force_block_config:
273 force_block_config = architecture_features.Block.from_string(args.force_block_config)
274 else:
275 force_block_config = None
276
277 arch = architecture_features.ArchitectureFeatures(
278 vela_config=config,
279 system_config=args.system_config,
280 accelerator_config=args.accelerator_config,
281 permanent_storage=args.permanent_storage,
Tim Hall79d07d22020-04-27 18:20:16 +0100282 override_block_config=force_block_config,
283 block_config_limit=args.block_config_limit,
284 global_memory_clock_scale=args.global_memory_clock_scale,
285 max_blockdep=args.max_block_dependency,
286 )
287
288 compiler_options = compiler_driver.CompilerOptions(
289 verbose_graph=args.verbose_graph,
290 verbose_quantization=args.verbose_quantization,
291 verbose_packing=args.verbose_packing,
292 verbose_tensor_purpose=args.verbose_tensor_purpose,
293 verbose_tensor_format=args.verbose_tensor_format,
294 verbose_allocation=args.verbose_allocation,
295 verbose_high_level_command_stream=args.verbose_high_level_command_stream,
296 verbose_register_command_stream=args.verbose_register_command_stream,
297 verbose_operators=args.verbose_operators,
298 show_minimum_possible_allocation=args.show_minimum_possible_allocation,
299 show_cpu_operations=args.show_cpu_operations,
300 tensor_allocator=args.tensor_allocator,
301 timing=args.timing,
302 output_dir=args.output_dir,
303 )
304
305 scheduler_options = scheduler.SchedulerOptions(
306 use_cascading=args.cascading,
307 use_ifm_ofm_overlap=args.ifm_ofm_overlap,
308 verbose_schedule=args.verbose_schedule,
309 verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules,
310 use_ifm_streaming=args.ifm_streaming,
311 pareto_metric=args.pareto_metric,
Charles Xu7b8823f2020-05-29 13:53:10 +0200312 use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes,
Tim Hall79d07d22020-04-27 18:20:16 +0100313 )
314
Tim Hall284223e2020-06-09 13:17:21 +0100315 model_reader_options = model_reader.ModelReaderOptions()
Tim Hall79d07d22020-04-27 18:20:16 +0100316
317 os.makedirs(args.output_dir, exist_ok=True)
318
319 nng = process(args.network, arch, model_reader_options, compiler_options, scheduler_options)
320
321 if args.show_subgraph_io_summary:
322 print_subgraph_io_summary(nng)
323
324 return 0