blob: b1edf34e7521e7a5e9b63e4461339a4f106e0070 [file] [log] [blame]
Tim Hall79d07d22020-04-27 18:20:16 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
17# Main entry point for the Vela compiler.
18#
19# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.
Diego Russoe8a10452020-04-21 17:39:10 +010020import argparse
21import ast
22import configparser
Diego Russoea6111a2020-04-14 18:41:58 +010023import os.path
24import sys
Tim Hall79d07d22020-04-27 18:20:16 +010025import time
Tim Hall79d07d22020-04-27 18:20:16 +010026
27from . import architecture_features
Diego Russoe8a10452020-04-21 17:39:10 +010028from . import compiler_driver
29from . import model_reader
30from . import scheduler
Tim Hall79d07d22020-04-27 18:20:16 +010031from . import stats_writer
32from . import tflite_writer
Tim Hall79d07d22020-04-27 18:20:16 +010033from ._version import __version__
Louis Verhaard7db78962020-05-25 15:05:26 +020034from .errors import InputFileError
Diego Russoe8a10452020-04-21 17:39:10 +010035from .nn_graph import PassPlacement
36from .nn_graph import TensorAllocator
Tim Hall79d07d22020-04-27 18:20:16 +010037from .scheduler import ParetoMetric
Diego Russoea6111a2020-04-14 18:41:58 +010038from .tensor import MemArea
Tim Hall79d07d22020-04-27 18:20:16 +010039
40
41def process(fname, arch, model_reader_options, compiler_options, scheduler_options):
42 if compiler_options.timing:
43 start = time.time()
44
45 nng = model_reader.read_model(fname, model_reader_options)
46
47 if not nng:
Louis Verhaard7db78962020-05-25 15:05:26 +020048 raise InputFileError(fname, "input file could not be read")
Tim Hall79d07d22020-04-27 18:20:16 +010049
50 if compiler_options.verbose_operators:
51 nng.print_operators()
52
53 if compiler_options.timing:
54 stop = time.time()
55 print("Model reading took %f s" % (stop - start))
56 start = time.time()
57
58 compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)
59
60 passes_csv_file = "%s/%s_pass-breakdown_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config)
61 stats_writer.write_pass_metrics_csv(nng, passes_csv_file)
62
63 summary_csv_file = "%s/%s_summary_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config)
64 stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)
65
66 stats_writer.print_performance_metrics(nng, show_cpu_operations=compiler_options.show_cpu_operations, arch=arch)
67
68 if fname.endswith(".tflite"):
69 tflite_writer.write_tflite(nng, "%s/%s_vela.tflite" % (compiler_options.output_dir, nng.name))
70
71 if compiler_options.timing:
72 stop = time.time()
73 print("Compiler driver took %f s" % (stop - start))
74
75 return nng
76
77
78def print_subgraph_io_summary(nng):
79 """Print a summary of all the input and output tensor sizes for all subgraphs.
80 Also displays the total tensor size and the memory used area for sram.
81 """
82
83 print("Subgraph IO Summary")
84 print("-------------------")
85 print("NNG: {0}".format(nng.name))
86 max_sg_size = 0
87 for sg in reversed(nng.subgraphs):
88 print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))
89 sg_size = 0
90
91 if sg.placement == PassPlacement.Npu:
92 for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:
93 if tens in sg.input_tensors:
94 tens_dir = "In"
95 elif tens in sg.output_tensors:
96 tens_dir = "Out"
97 else:
98 tens_dir = "In/Out"
99
100 size = tens.elements() * tens.element_size() / 1024.0
101 sg_size = sg_size + size
102 print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))
103
104 print(" Total Size = {0} KiB".format(sg_size))
105 print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))
106 max_sg_size = max(sg_size, max_sg_size)
107
108 print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))
109
110
111def main(args=None):
112 if args is None:
113 args = sys.argv[1:]
114
115 parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Ethos-U55")
116
117 parser.add_argument(
118 "network", metavar="NETWORK", type=str, default=None, nargs=None, help="Filename of network to process"
119 )
120
121 parser.add_argument("--version", action="version", version=__version__)
122 parser.add_argument(
123 "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"
124 )
125 parser.add_argument("--config", type=str, help="Location of vela configuration file")
Tim Hall79d07d22020-04-27 18:20:16 +0100126
127 parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")
128 parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")
129 parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")
130 parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")
131 parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")
132 parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")
133 parser.add_argument(
134 "--verbose-pareto-frontier-schedules",
135 action="store_true",
136 help="Show all schedules along the pareto frontier of optimisation criteria",
137 )
138 parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")
139 parser.add_argument(
140 "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"
141 )
142 parser.add_argument(
143 "--verbose-register-command-stream", action="store_true", help="Verbose register command stream"
144 )
145 parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")
146
147 parser.add_argument(
148 "--show-minimum-possible-allocation", action="store_true", help="Show the minimum possible allocation"
149 )
150 parser.add_argument(
151 "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
152 )
153 parser.add_argument(
154 "--cascading",
155 type=ast.literal_eval,
156 default=True,
157 choices=[True, False],
158 help="Controls the packing of multiple passes into a cascade (default: %(default)s)",
159 )
160 parser.add_argument(
161 "--ifm-ofm-overlap",
162 type=ast.literal_eval,
163 default=True,
164 choices=[True, False],
165 help="Controls the overlapping of IFM and OFM buffers (default: %(default)s)",
166 )
167 parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration HxWxC")
Tim Hall79d07d22020-04-27 18:20:16 +0100168 parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
169 parser.add_argument(
170 "--accelerator-config",
171 type=str,
172 default="ethos-u55-256",
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100173 choices=list(architecture_features.Accelerator.member_list()),
Tim Hall79d07d22020-04-27 18:20:16 +0100174 help="Accelerator configuration to use (default: %(default)s)",
175 )
176 parser.add_argument(
177 "--system-config",
178 type=str,
179 default="internal-default",
180 help="System configuration to use (default: %(default)s)",
181 )
182 parser.add_argument(
Tim Hall79d07d22020-04-27 18:20:16 +0100183 "--permanent-storage",
184 default=MemArea.OffChipFlash,
185 type=lambda s: MemArea[s],
186 choices=list(MemArea)[3:-1],
187 help=(
188 "Memory area for permanent storage. To store the weights and other constant data in SRAM select "
189 "'OnChipFlash' (default: %(default)s)"
190 ),
191 )
192 parser.add_argument(
193 "--tensor-allocator",
194 default=TensorAllocator.Greedy,
195 type=lambda s: TensorAllocator[s],
196 choices=list(TensorAllocator),
197 help="Tensor Allocator algorithm (default: %(default)s)",
198 )
199 parser.add_argument(
200 "--show-subgraph-io-summary",
201 action="store_true",
202 help="Shows a summary of all the subgraphs and their inputs and outputs",
203 )
204 parser.add_argument(
205 "--ifm-streaming",
206 type=ast.literal_eval,
207 default=True,
208 choices=[True, False],
209 help="Controls scheduler IFM streaming search (default: %(default)s)",
210 )
211 parser.add_argument(
212 "--block-config-limit",
213 type=int,
214 default=16,
215 help="Limit block config search space, use zero for unlimited (default: %(default)s)",
216 )
217 parser.add_argument(
218 "--global-memory-clock-scale",
219 type=float,
220 default=1.0,
221 help=(
222 "Performs an additional scaling of the individual memory clock scales specified by the system config "
223 "(default: %(default)s)"
224 ),
225 )
226 parser.add_argument(
227 "--pareto-metric",
228 default=ParetoMetric.BwCycMem,
229 type=lambda s: ParetoMetric[s],
230 choices=list(ParetoMetric),
231 help="Controls the calculation of the pareto metric (default: %(default)s)",
232 )
233 parser.add_argument(
234 "--recursion-limit",
235 type=int,
236 default=10000,
237 help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",
238 )
239 parser.add_argument(
240 "--max-block-dependency",
241 type=int,
242 default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,
243 choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),
244 help=(
245 "Set the maximum value that can be used for the block dependency between npu kernel operations "
246 "(default: %(default)s)"
247 ),
248 )
Charles Xu7b8823f2020-05-29 13:53:10 +0200249 parser.add_argument(
250 "--nhcwb16-between-cascaded-passes",
251 type=ast.literal_eval,
252 default=True,
253 choices=[True, False],
254 help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",
255 )
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200256 parser.add_argument(
257 "--softmax-support",
258 type=ast.literal_eval,
259 default=False,
260 choices=[True, False],
261 help="Control if Softmax should be transformed into a set of npu operations (default: %(default)s)",
262 )
Tim Hall79d07d22020-04-27 18:20:16 +0100263
264 args = parser.parse_args(args=args)
265
266 # Read configuration file
267 config_file = args.config
268 config = None
269 if config_file is not None:
270 with open(config_file) as f:
271 config = configparser.ConfigParser()
272 config.read_file(f)
273
274 if args.network is None:
275 parser.error("the following argument is required: NETWORK")
276
277 sys.setrecursionlimit(args.recursion_limit)
278
279 if args.force_block_config:
280 force_block_config = architecture_features.Block.from_string(args.force_block_config)
281 else:
282 force_block_config = None
283
284 arch = architecture_features.ArchitectureFeatures(
285 vela_config=config,
286 system_config=args.system_config,
287 accelerator_config=args.accelerator_config,
288 permanent_storage=args.permanent_storage,
Tim Hall79d07d22020-04-27 18:20:16 +0100289 override_block_config=force_block_config,
290 block_config_limit=args.block_config_limit,
291 global_memory_clock_scale=args.global_memory_clock_scale,
292 max_blockdep=args.max_block_dependency,
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200293 softmax_support=args.softmax_support,
Tim Hall79d07d22020-04-27 18:20:16 +0100294 )
295
296 compiler_options = compiler_driver.CompilerOptions(
297 verbose_graph=args.verbose_graph,
298 verbose_quantization=args.verbose_quantization,
299 verbose_packing=args.verbose_packing,
300 verbose_tensor_purpose=args.verbose_tensor_purpose,
301 verbose_tensor_format=args.verbose_tensor_format,
302 verbose_allocation=args.verbose_allocation,
303 verbose_high_level_command_stream=args.verbose_high_level_command_stream,
304 verbose_register_command_stream=args.verbose_register_command_stream,
305 verbose_operators=args.verbose_operators,
306 show_minimum_possible_allocation=args.show_minimum_possible_allocation,
307 show_cpu_operations=args.show_cpu_operations,
308 tensor_allocator=args.tensor_allocator,
309 timing=args.timing,
310 output_dir=args.output_dir,
311 )
312
313 scheduler_options = scheduler.SchedulerOptions(
314 use_cascading=args.cascading,
315 use_ifm_ofm_overlap=args.ifm_ofm_overlap,
316 verbose_schedule=args.verbose_schedule,
317 verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules,
318 use_ifm_streaming=args.ifm_streaming,
319 pareto_metric=args.pareto_metric,
Charles Xu7b8823f2020-05-29 13:53:10 +0200320 use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes,
Tim Hall79d07d22020-04-27 18:20:16 +0100321 )
322
Tim Hall284223e2020-06-09 13:17:21 +0100323 model_reader_options = model_reader.ModelReaderOptions()
Tim Hall79d07d22020-04-27 18:20:16 +0100324
325 os.makedirs(args.output_dir, exist_ok=True)
326
327 nng = process(args.network, arch, model_reader_options, compiler_options, scheduler_options)
328
329 if args.show_subgraph_io_summary:
330 print_subgraph_io_summary(nng)
331
332 return 0