blob: b9e224cbd59c0def5251076f0037b217a75229c8 [file] [log] [blame]
Tim Hall79d07d22020-04-27 18:20:16 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
17# Main entry point for the Vela compiler.
18#
19# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.
Diego Russoe8a10452020-04-21 17:39:10 +010020import argparse
21import ast
22import configparser
Diego Russoea6111a2020-04-14 18:41:58 +010023import os.path
24import sys
Tim Hall79d07d22020-04-27 18:20:16 +010025import time
Tim Hall79d07d22020-04-27 18:20:16 +010026
27from . import architecture_features
Diego Russoe8a10452020-04-21 17:39:10 +010028from . import compiler_driver
29from . import model_reader
30from . import scheduler
Tim Hall79d07d22020-04-27 18:20:16 +010031from . import stats_writer
32from . import tflite_writer
Tim Hall79d07d22020-04-27 18:20:16 +010033from ._version import __version__
Louis Verhaard7db78962020-05-25 15:05:26 +020034from .errors import InputFileError
Diego Russoe8a10452020-04-21 17:39:10 +010035from .nn_graph import PassPlacement
36from .nn_graph import TensorAllocator
Tim Hall79d07d22020-04-27 18:20:16 +010037from .scheduler import ParetoMetric
Diego Russoea6111a2020-04-14 18:41:58 +010038from .tensor import MemArea
Jacob Bohlin0628a8c2020-08-28 13:25:14 +020039from .tensor import Tensor
Tim Hall79d07d22020-04-27 18:20:16 +010040
41
42def process(fname, arch, model_reader_options, compiler_options, scheduler_options):
43 if compiler_options.timing:
44 start = time.time()
45
46 nng = model_reader.read_model(fname, model_reader_options)
47
48 if not nng:
Louis Verhaard7db78962020-05-25 15:05:26 +020049 raise InputFileError(fname, "input file could not be read")
Tim Hall79d07d22020-04-27 18:20:16 +010050
51 if compiler_options.verbose_operators:
52 nng.print_operators()
53
54 if compiler_options.timing:
55 stop = time.time()
56 print("Model reading took %f s" % (stop - start))
57 start = time.time()
58
59 compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)
60
61 passes_csv_file = "%s/%s_pass-breakdown_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config)
62 stats_writer.write_pass_metrics_csv(nng, passes_csv_file)
63
64 summary_csv_file = "%s/%s_summary_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config)
65 stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)
66
67 stats_writer.print_performance_metrics(nng, show_cpu_operations=compiler_options.show_cpu_operations, arch=arch)
68
69 if fname.endswith(".tflite"):
70 tflite_writer.write_tflite(nng, "%s/%s_vela.tflite" % (compiler_options.output_dir, nng.name))
71
72 if compiler_options.timing:
73 stop = time.time()
74 print("Compiler driver took %f s" % (stop - start))
75
76 return nng
77
78
79def print_subgraph_io_summary(nng):
80 """Print a summary of all the input and output tensor sizes for all subgraphs.
81 Also displays the total tensor size and the memory used area for sram.
82 """
83
84 print("Subgraph IO Summary")
85 print("-------------------")
86 print("NNG: {0}".format(nng.name))
87 max_sg_size = 0
88 for sg in reversed(nng.subgraphs):
89 print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))
90 sg_size = 0
91
92 if sg.placement == PassPlacement.Npu:
93 for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:
94 if tens in sg.input_tensors:
95 tens_dir = "In"
96 elif tens in sg.output_tensors:
97 tens_dir = "Out"
98 else:
99 tens_dir = "In/Out"
100
101 size = tens.elements() * tens.element_size() / 1024.0
102 sg_size = sg_size + size
103 print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))
104
105 print(" Total Size = {0} KiB".format(sg_size))
106 print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))
107 max_sg_size = max(sg_size, max_sg_size)
108
109 print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))
110
111
112def main(args=None):
113 if args is None:
114 args = sys.argv[1:]
115
116 parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Ethos-U55")
117
118 parser.add_argument(
119 "network", metavar="NETWORK", type=str, default=None, nargs=None, help="Filename of network to process"
120 )
121
122 parser.add_argument("--version", action="version", version=__version__)
123 parser.add_argument(
124 "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"
125 )
126 parser.add_argument("--config", type=str, help="Location of vela configuration file")
Tim Hall79d07d22020-04-27 18:20:16 +0100127
128 parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")
129 parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")
130 parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")
131 parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")
132 parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")
133 parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")
134 parser.add_argument(
135 "--verbose-pareto-frontier-schedules",
136 action="store_true",
137 help="Show all schedules along the pareto frontier of optimisation criteria",
138 )
139 parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")
140 parser.add_argument(
141 "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"
142 )
143 parser.add_argument(
144 "--verbose-register-command-stream", action="store_true", help="Verbose register command stream"
145 )
146 parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")
147
148 parser.add_argument(
149 "--show-minimum-possible-allocation", action="store_true", help="Show the minimum possible allocation"
150 )
151 parser.add_argument(
152 "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
153 )
154 parser.add_argument(
155 "--cascading",
156 type=ast.literal_eval,
157 default=True,
158 choices=[True, False],
159 help="Controls the packing of multiple passes into a cascade (default: %(default)s)",
160 )
161 parser.add_argument(
162 "--ifm-ofm-overlap",
163 type=ast.literal_eval,
164 default=True,
165 choices=[True, False],
166 help="Controls the overlapping of IFM and OFM buffers (default: %(default)s)",
167 )
168 parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration HxWxC")
Tim Hall79d07d22020-04-27 18:20:16 +0100169 parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
170 parser.add_argument(
171 "--accelerator-config",
172 type=str,
173 default="ethos-u55-256",
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100174 choices=list(architecture_features.Accelerator.member_list()),
Tim Hall79d07d22020-04-27 18:20:16 +0100175 help="Accelerator configuration to use (default: %(default)s)",
176 )
177 parser.add_argument(
178 "--system-config",
179 type=str,
180 default="internal-default",
181 help="System configuration to use (default: %(default)s)",
182 )
183 parser.add_argument(
Tim Hall79d07d22020-04-27 18:20:16 +0100184 "--tensor-allocator",
185 default=TensorAllocator.Greedy,
186 type=lambda s: TensorAllocator[s],
187 choices=list(TensorAllocator),
188 help="Tensor Allocator algorithm (default: %(default)s)",
189 )
190 parser.add_argument(
191 "--show-subgraph-io-summary",
192 action="store_true",
193 help="Shows a summary of all the subgraphs and their inputs and outputs",
194 )
195 parser.add_argument(
196 "--ifm-streaming",
197 type=ast.literal_eval,
198 default=True,
199 choices=[True, False],
200 help="Controls scheduler IFM streaming search (default: %(default)s)",
201 )
202 parser.add_argument(
203 "--block-config-limit",
204 type=int,
205 default=16,
206 help="Limit block config search space, use zero for unlimited (default: %(default)s)",
207 )
208 parser.add_argument(
209 "--global-memory-clock-scale",
210 type=float,
211 default=1.0,
212 help=(
213 "Performs an additional scaling of the individual memory clock scales specified by the system config "
214 "(default: %(default)s)"
215 ),
216 )
217 parser.add_argument(
218 "--pareto-metric",
219 default=ParetoMetric.BwCycMem,
220 type=lambda s: ParetoMetric[s],
221 choices=list(ParetoMetric),
222 help="Controls the calculation of the pareto metric (default: %(default)s)",
223 )
224 parser.add_argument(
225 "--recursion-limit",
226 type=int,
227 default=10000,
228 help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",
229 )
230 parser.add_argument(
231 "--max-block-dependency",
232 type=int,
233 default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,
234 choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),
235 help=(
236 "Set the maximum value that can be used for the block dependency between npu kernel operations "
237 "(default: %(default)s)"
238 ),
239 )
Charles Xu7b8823f2020-05-29 13:53:10 +0200240 parser.add_argument(
241 "--nhcwb16-between-cascaded-passes",
242 type=ast.literal_eval,
243 default=True,
244 choices=[True, False],
245 help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",
246 )
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200247 parser.add_argument(
Patrik Gustavsson90831bc2020-08-24 16:26:11 +0200248 "--weight-estimation-scaling",
249 type=float,
250 default=1.0,
251 help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),
252 )
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200253 parser.add_argument(
254 "--allocation-alignment",
255 type=int,
256 default=Tensor.AllocationQuantum,
257 help=("Controls the allocation byte alignment of cpu tensors (default: %(default)s)"),
258 )
Tim Hall79d07d22020-04-27 18:20:16 +0100259 args = parser.parse_args(args=args)
260
261 # Read configuration file
262 config_file = args.config
263 config = None
264 if config_file is not None:
265 with open(config_file) as f:
266 config = configparser.ConfigParser()
267 config.read_file(f)
268
269 if args.network is None:
270 parser.error("the following argument is required: NETWORK")
271
272 sys.setrecursionlimit(args.recursion_limit)
273
274 if args.force_block_config:
275 force_block_config = architecture_features.Block.from_string(args.force_block_config)
276 else:
277 force_block_config = None
278
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200279 alignment = args.allocation_alignment
280 if alignment < 16:
281 parser.error("the following argument needs to be greater or equal to 16: ALLOCATION_ALIGNMENT")
282 if alignment & (alignment - 1) != 0:
283 parser.error("the following argument needs to be a power of 2: ALLOCATION_ALIGNMENT")
284
Tim Hall79d07d22020-04-27 18:20:16 +0100285 arch = architecture_features.ArchitectureFeatures(
286 vela_config=config,
287 system_config=args.system_config,
288 accelerator_config=args.accelerator_config,
Tim Hall79d07d22020-04-27 18:20:16 +0100289 override_block_config=force_block_config,
290 block_config_limit=args.block_config_limit,
291 global_memory_clock_scale=args.global_memory_clock_scale,
292 max_blockdep=args.max_block_dependency,
Patrik Gustavsson90831bc2020-08-24 16:26:11 +0200293 weight_estimation_scaling=args.weight_estimation_scaling,
Tim Hall79d07d22020-04-27 18:20:16 +0100294 )
295
296 compiler_options = compiler_driver.CompilerOptions(
297 verbose_graph=args.verbose_graph,
298 verbose_quantization=args.verbose_quantization,
299 verbose_packing=args.verbose_packing,
300 verbose_tensor_purpose=args.verbose_tensor_purpose,
301 verbose_tensor_format=args.verbose_tensor_format,
302 verbose_allocation=args.verbose_allocation,
303 verbose_high_level_command_stream=args.verbose_high_level_command_stream,
304 verbose_register_command_stream=args.verbose_register_command_stream,
305 verbose_operators=args.verbose_operators,
306 show_minimum_possible_allocation=args.show_minimum_possible_allocation,
307 show_cpu_operations=args.show_cpu_operations,
308 tensor_allocator=args.tensor_allocator,
309 timing=args.timing,
310 output_dir=args.output_dir,
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200311 allocation_alignment=alignment,
Tim Hall79d07d22020-04-27 18:20:16 +0100312 )
313
314 scheduler_options = scheduler.SchedulerOptions(
315 use_cascading=args.cascading,
316 use_ifm_ofm_overlap=args.ifm_ofm_overlap,
317 verbose_schedule=args.verbose_schedule,
318 verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules,
319 use_ifm_streaming=args.ifm_streaming,
320 pareto_metric=args.pareto_metric,
Charles Xu7b8823f2020-05-29 13:53:10 +0200321 use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes,
Tim Hall79d07d22020-04-27 18:20:16 +0100322 )
323
Tim Hall284223e2020-06-09 13:17:21 +0100324 model_reader_options = model_reader.ModelReaderOptions()
Tim Hall79d07d22020-04-27 18:20:16 +0100325
326 os.makedirs(args.output_dir, exist_ok=True)
327
328 nng = process(args.network, arch, model_reader_options, compiler_options, scheduler_options)
329
330 if args.show_subgraph_io_summary:
331 print_subgraph_io_summary(nng)
332
333 return 0