blob: 923d8ec8537d5e02ec9c19bf56d6e690c459a0dc [file] [log] [blame]
Tim Hall79d07d22020-04-27 18:20:16 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
17# Main entry point for the Vela compiler.
18#
19# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.
Diego Russoe8a10452020-04-21 17:39:10 +010020import argparse
21import ast
22import configparser
Diego Russoea6111a2020-04-14 18:41:58 +010023import os.path
24import sys
Tim Hall79d07d22020-04-27 18:20:16 +010025import time
Tim Hall79d07d22020-04-27 18:20:16 +010026
27from . import architecture_features
Diego Russoe8a10452020-04-21 17:39:10 +010028from . import compiler_driver
29from . import model_reader
30from . import scheduler
Tim Hall79d07d22020-04-27 18:20:16 +010031from . import stats_writer
32from . import tflite_writer
Tim Hall79d07d22020-04-27 18:20:16 +010033from ._version import __version__
Louis Verhaard7db78962020-05-25 15:05:26 +020034from .errors import InputFileError
Diego Russoe8a10452020-04-21 17:39:10 +010035from .nn_graph import PassPlacement
36from .nn_graph import TensorAllocator
Tim Hall79d07d22020-04-27 18:20:16 +010037from .scheduler import ParetoMetric
Diego Russoea6111a2020-04-14 18:41:58 +010038from .tensor import MemArea
Jacob Bohlin0628a8c2020-08-28 13:25:14 +020039from .tensor import Tensor
Tim Hall79d07d22020-04-27 18:20:16 +010040
41
42def process(fname, arch, model_reader_options, compiler_options, scheduler_options):
43 if compiler_options.timing:
44 start = time.time()
45
46 nng = model_reader.read_model(fname, model_reader_options)
47
48 if not nng:
Louis Verhaard7db78962020-05-25 15:05:26 +020049 raise InputFileError(fname, "input file could not be read")
Tim Hall79d07d22020-04-27 18:20:16 +010050
51 if compiler_options.verbose_operators:
52 nng.print_operators()
53
54 if compiler_options.timing:
55 stop = time.time()
56 print("Model reading took %f s" % (stop - start))
57 start = time.time()
58
59 compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)
60
61 passes_csv_file = "%s/%s_pass-breakdown_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config)
62 stats_writer.write_pass_metrics_csv(nng, passes_csv_file)
63
64 summary_csv_file = "%s/%s_summary_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config)
65 stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)
66
67 stats_writer.print_performance_metrics(nng, show_cpu_operations=compiler_options.show_cpu_operations, arch=arch)
68
69 if fname.endswith(".tflite"):
70 tflite_writer.write_tflite(nng, "%s/%s_vela.tflite" % (compiler_options.output_dir, nng.name))
71
72 if compiler_options.timing:
73 stop = time.time()
74 print("Compiler driver took %f s" % (stop - start))
75
76 return nng
77
78
79def print_subgraph_io_summary(nng):
80 """Print a summary of all the input and output tensor sizes for all subgraphs.
81 Also displays the total tensor size and the memory used area for sram.
82 """
83
84 print("Subgraph IO Summary")
85 print("-------------------")
86 print("NNG: {0}".format(nng.name))
87 max_sg_size = 0
88 for sg in reversed(nng.subgraphs):
89 print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))
90 sg_size = 0
91
92 if sg.placement == PassPlacement.Npu:
93 for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:
94 if tens in sg.input_tensors:
95 tens_dir = "In"
96 elif tens in sg.output_tensors:
97 tens_dir = "Out"
98 else:
99 tens_dir = "In/Out"
100
101 size = tens.elements() * tens.element_size() / 1024.0
102 sg_size = sg_size + size
103 print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))
104
105 print(" Total Size = {0} KiB".format(sg_size))
106 print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))
107 max_sg_size = max(sg_size, max_sg_size)
108
109 print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))
110
111
112def main(args=None):
113 if args is None:
114 args = sys.argv[1:]
115
116 parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Ethos-U55")
117
118 parser.add_argument(
119 "network", metavar="NETWORK", type=str, default=None, nargs=None, help="Filename of network to process"
120 )
121
122 parser.add_argument("--version", action="version", version=__version__)
123 parser.add_argument(
124 "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"
125 )
126 parser.add_argument("--config", type=str, help="Location of vela configuration file")
Tim Hall79d07d22020-04-27 18:20:16 +0100127
128 parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")
129 parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")
130 parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")
131 parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")
132 parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")
133 parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")
134 parser.add_argument(
135 "--verbose-pareto-frontier-schedules",
136 action="store_true",
137 help="Show all schedules along the pareto frontier of optimisation criteria",
138 )
139 parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")
140 parser.add_argument(
141 "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"
142 )
143 parser.add_argument(
144 "--verbose-register-command-stream", action="store_true", help="Verbose register command stream"
145 )
146 parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")
147
148 parser.add_argument(
149 "--show-minimum-possible-allocation", action="store_true", help="Show the minimum possible allocation"
150 )
151 parser.add_argument(
152 "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
153 )
154 parser.add_argument(
155 "--cascading",
156 type=ast.literal_eval,
157 default=True,
158 choices=[True, False],
159 help="Controls the packing of multiple passes into a cascade (default: %(default)s)",
160 )
161 parser.add_argument(
162 "--ifm-ofm-overlap",
163 type=ast.literal_eval,
164 default=True,
165 choices=[True, False],
166 help="Controls the overlapping of IFM and OFM buffers (default: %(default)s)",
167 )
168 parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration HxWxC")
Tim Hall79d07d22020-04-27 18:20:16 +0100169 parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
170 parser.add_argument(
171 "--accelerator-config",
172 type=str,
173 default="ethos-u55-256",
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100174 choices=list(architecture_features.Accelerator.member_list()),
Tim Hall79d07d22020-04-27 18:20:16 +0100175 help="Accelerator configuration to use (default: %(default)s)",
176 )
177 parser.add_argument(
178 "--system-config",
179 type=str,
180 default="internal-default",
181 help="System configuration to use (default: %(default)s)",
182 )
183 parser.add_argument(
Tim Hall79d07d22020-04-27 18:20:16 +0100184 "--permanent-storage",
185 default=MemArea.OffChipFlash,
186 type=lambda s: MemArea[s],
Patrik Gustavsson6c878072020-08-19 12:13:30 +0200187 choices=list(MemArea)[3:5],
Tim Hall79d07d22020-04-27 18:20:16 +0100188 help=(
Patrik Gustavsson6c878072020-08-19 12:13:30 +0200189 "Memory area for permanent storage, only valid for Ethos-U55. "
190 "To store the weights and other constant data in SRAM, select 'OnChipFlash'. (default: %(default)s)"
Tim Hall79d07d22020-04-27 18:20:16 +0100191 ),
192 )
193 parser.add_argument(
194 "--tensor-allocator",
195 default=TensorAllocator.Greedy,
196 type=lambda s: TensorAllocator[s],
197 choices=list(TensorAllocator),
198 help="Tensor Allocator algorithm (default: %(default)s)",
199 )
200 parser.add_argument(
201 "--show-subgraph-io-summary",
202 action="store_true",
203 help="Shows a summary of all the subgraphs and their inputs and outputs",
204 )
205 parser.add_argument(
206 "--ifm-streaming",
207 type=ast.literal_eval,
208 default=True,
209 choices=[True, False],
210 help="Controls scheduler IFM streaming search (default: %(default)s)",
211 )
212 parser.add_argument(
213 "--block-config-limit",
214 type=int,
215 default=16,
216 help="Limit block config search space, use zero for unlimited (default: %(default)s)",
217 )
218 parser.add_argument(
219 "--global-memory-clock-scale",
220 type=float,
221 default=1.0,
222 help=(
223 "Performs an additional scaling of the individual memory clock scales specified by the system config "
224 "(default: %(default)s)"
225 ),
226 )
227 parser.add_argument(
228 "--pareto-metric",
229 default=ParetoMetric.BwCycMem,
230 type=lambda s: ParetoMetric[s],
231 choices=list(ParetoMetric),
232 help="Controls the calculation of the pareto metric (default: %(default)s)",
233 )
234 parser.add_argument(
235 "--recursion-limit",
236 type=int,
237 default=10000,
238 help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",
239 )
240 parser.add_argument(
241 "--max-block-dependency",
242 type=int,
243 default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,
244 choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),
245 help=(
246 "Set the maximum value that can be used for the block dependency between npu kernel operations "
247 "(default: %(default)s)"
248 ),
249 )
Charles Xu7b8823f2020-05-29 13:53:10 +0200250 parser.add_argument(
251 "--nhcwb16-between-cascaded-passes",
252 type=ast.literal_eval,
253 default=True,
254 choices=[True, False],
255 help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",
256 )
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200257 parser.add_argument(
Patrik Gustavsson90831bc2020-08-24 16:26:11 +0200258 "--weight-estimation-scaling",
259 type=float,
260 default=1.0,
261 help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),
262 )
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200263 parser.add_argument(
264 "--allocation-alignment",
265 type=int,
266 default=Tensor.AllocationQuantum,
267 help=("Controls the allocation byte alignment of cpu tensors (default: %(default)s)"),
268 )
Tim Hall79d07d22020-04-27 18:20:16 +0100269 args = parser.parse_args(args=args)
270
271 # Read configuration file
272 config_file = args.config
273 config = None
274 if config_file is not None:
275 with open(config_file) as f:
276 config = configparser.ConfigParser()
277 config.read_file(f)
278
279 if args.network is None:
280 parser.error("the following argument is required: NETWORK")
281
282 sys.setrecursionlimit(args.recursion_limit)
283
284 if args.force_block_config:
285 force_block_config = architecture_features.Block.from_string(args.force_block_config)
286 else:
287 force_block_config = None
288
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200289 alignment = args.allocation_alignment
290 if alignment < 16:
291 parser.error("the following argument needs to be greater or equal to 16: ALLOCATION_ALIGNMENT")
292 if alignment & (alignment - 1) != 0:
293 parser.error("the following argument needs to be a power of 2: ALLOCATION_ALIGNMENT")
294
Tim Hall79d07d22020-04-27 18:20:16 +0100295 arch = architecture_features.ArchitectureFeatures(
296 vela_config=config,
297 system_config=args.system_config,
298 accelerator_config=args.accelerator_config,
299 permanent_storage=args.permanent_storage,
Tim Hall79d07d22020-04-27 18:20:16 +0100300 override_block_config=force_block_config,
301 block_config_limit=args.block_config_limit,
302 global_memory_clock_scale=args.global_memory_clock_scale,
303 max_blockdep=args.max_block_dependency,
Patrik Gustavsson90831bc2020-08-24 16:26:11 +0200304 weight_estimation_scaling=args.weight_estimation_scaling,
Tim Hall79d07d22020-04-27 18:20:16 +0100305 )
306
307 compiler_options = compiler_driver.CompilerOptions(
308 verbose_graph=args.verbose_graph,
309 verbose_quantization=args.verbose_quantization,
310 verbose_packing=args.verbose_packing,
311 verbose_tensor_purpose=args.verbose_tensor_purpose,
312 verbose_tensor_format=args.verbose_tensor_format,
313 verbose_allocation=args.verbose_allocation,
314 verbose_high_level_command_stream=args.verbose_high_level_command_stream,
315 verbose_register_command_stream=args.verbose_register_command_stream,
316 verbose_operators=args.verbose_operators,
317 show_minimum_possible_allocation=args.show_minimum_possible_allocation,
318 show_cpu_operations=args.show_cpu_operations,
319 tensor_allocator=args.tensor_allocator,
320 timing=args.timing,
321 output_dir=args.output_dir,
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200322 allocation_alignment=alignment,
Tim Hall79d07d22020-04-27 18:20:16 +0100323 )
324
325 scheduler_options = scheduler.SchedulerOptions(
326 use_cascading=args.cascading,
327 use_ifm_ofm_overlap=args.ifm_ofm_overlap,
328 verbose_schedule=args.verbose_schedule,
329 verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules,
330 use_ifm_streaming=args.ifm_streaming,
331 pareto_metric=args.pareto_metric,
Charles Xu7b8823f2020-05-29 13:53:10 +0200332 use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes,
Tim Hall79d07d22020-04-27 18:20:16 +0100333 )
334
Tim Hall284223e2020-06-09 13:17:21 +0100335 model_reader_options = model_reader.ModelReaderOptions()
Tim Hall79d07d22020-04-27 18:20:16 +0100336
337 os.makedirs(args.output_dir, exist_ok=True)
338
339 nng = process(args.network, arch, model_reader_options, compiler_options, scheduler_options)
340
341 if args.show_subgraph_io_summary:
342 print_subgraph_io_summary(nng)
343
344 return 0