blob: 4b43751a899230596810c05d3ab119dd0120133d [file] [log] [blame]
Tim Hall79d07d22020-04-27 18:20:16 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
17# Main entry point for the Vela compiler.
18#
19# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.
Diego Russoe8a10452020-04-21 17:39:10 +010020import argparse
21import ast
22import configparser
Diego Russoea6111a2020-04-14 18:41:58 +010023import os.path
24import sys
Tim Hall79d07d22020-04-27 18:20:16 +010025import time
Tim Hall79d07d22020-04-27 18:20:16 +010026
27from . import architecture_features
Diego Russoe8a10452020-04-21 17:39:10 +010028from . import compiler_driver
29from . import model_reader
30from . import scheduler
Tim Hall79d07d22020-04-27 18:20:16 +010031from . import stats_writer
32from . import tflite_writer
Tim Hall79d07d22020-04-27 18:20:16 +010033from ._version import __version__
Louis Verhaard7db78962020-05-25 15:05:26 +020034from .errors import InputFileError
Diego Russoe8a10452020-04-21 17:39:10 +010035from .nn_graph import PassPlacement
36from .nn_graph import TensorAllocator
Tim Hall79d07d22020-04-27 18:20:16 +010037from .scheduler import ParetoMetric
Diego Russoea6111a2020-04-14 18:41:58 +010038from .tensor import MemArea
Jacob Bohlin0628a8c2020-08-28 13:25:14 +020039from .tensor import Tensor
Tim Hall79d07d22020-04-27 18:20:16 +010040
41
42def process(fname, arch, model_reader_options, compiler_options, scheduler_options):
43 if compiler_options.timing:
44 start = time.time()
45
46 nng = model_reader.read_model(fname, model_reader_options)
47
48 if not nng:
Louis Verhaard7db78962020-05-25 15:05:26 +020049 raise InputFileError(fname, "input file could not be read")
Tim Hall79d07d22020-04-27 18:20:16 +010050
51 if compiler_options.verbose_operators:
52 nng.print_operators()
53
54 if compiler_options.timing:
55 stop = time.time()
56 print("Model reading took %f s" % (stop - start))
57 start = time.time()
58
59 compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)
60
61 passes_csv_file = "%s/%s_pass-breakdown_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config)
62 stats_writer.write_pass_metrics_csv(nng, passes_csv_file)
63
64 summary_csv_file = "%s/%s_summary_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config)
65 stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)
66
67 stats_writer.print_performance_metrics(nng, show_cpu_operations=compiler_options.show_cpu_operations, arch=arch)
68
69 if fname.endswith(".tflite"):
70 tflite_writer.write_tflite(nng, "%s/%s_vela.tflite" % (compiler_options.output_dir, nng.name))
71
72 if compiler_options.timing:
73 stop = time.time()
74 print("Compiler driver took %f s" % (stop - start))
75
76 return nng
77
78
79def print_subgraph_io_summary(nng):
80 """Print a summary of all the input and output tensor sizes for all subgraphs.
81 Also displays the total tensor size and the memory used area for sram.
82 """
83
84 print("Subgraph IO Summary")
85 print("-------------------")
86 print("NNG: {0}".format(nng.name))
87 max_sg_size = 0
88 for sg in reversed(nng.subgraphs):
89 print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))
90 sg_size = 0
91
92 if sg.placement == PassPlacement.Npu:
93 for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:
94 if tens in sg.input_tensors:
95 tens_dir = "In"
96 elif tens in sg.output_tensors:
97 tens_dir = "Out"
98 else:
99 tens_dir = "In/Out"
100
101 size = tens.elements() * tens.element_size() / 1024.0
102 sg_size = sg_size + size
103 print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))
104
105 print(" Total Size = {0} KiB".format(sg_size))
106 print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))
107 max_sg_size = max(sg_size, max_sg_size)
108
109 print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))
110
111
112def main(args=None):
113 if args is None:
114 args = sys.argv[1:]
115
116 parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Ethos-U55")
117
118 parser.add_argument(
119 "network", metavar="NETWORK", type=str, default=None, nargs=None, help="Filename of network to process"
120 )
121
122 parser.add_argument("--version", action="version", version=__version__)
123 parser.add_argument(
124 "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"
125 )
126 parser.add_argument("--config", type=str, help="Location of vela configuration file")
Tim Hall79d07d22020-04-27 18:20:16 +0100127
128 parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")
129 parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")
130 parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")
131 parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")
132 parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")
133 parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")
134 parser.add_argument(
135 "--verbose-pareto-frontier-schedules",
136 action="store_true",
137 help="Show all schedules along the pareto frontier of optimisation criteria",
138 )
139 parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")
140 parser.add_argument(
141 "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"
142 )
143 parser.add_argument(
144 "--verbose-register-command-stream", action="store_true", help="Verbose register command stream"
145 )
146 parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")
147
148 parser.add_argument(
149 "--show-minimum-possible-allocation", action="store_true", help="Show the minimum possible allocation"
150 )
151 parser.add_argument(
152 "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
153 )
154 parser.add_argument(
Andreas Nevalainen897cc142020-10-28 15:42:08 +0100155 "--keep-scale-placement", action="store_true", help="Keep scale tensors memory placement during scheduling"
156 )
157 parser.add_argument(
Tim Hall79d07d22020-04-27 18:20:16 +0100158 "--cascading",
159 type=ast.literal_eval,
160 default=True,
161 choices=[True, False],
162 help="Controls the packing of multiple passes into a cascade (default: %(default)s)",
163 )
Tim Hall79d07d22020-04-27 18:20:16 +0100164 parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration HxWxC")
Tim Hall79d07d22020-04-27 18:20:16 +0100165 parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
166 parser.add_argument(
167 "--accelerator-config",
168 type=str,
169 default="ethos-u55-256",
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100170 choices=list(architecture_features.Accelerator.member_list()),
Tim Hall79d07d22020-04-27 18:20:16 +0100171 help="Accelerator configuration to use (default: %(default)s)",
172 )
173 parser.add_argument(
174 "--system-config",
175 type=str,
176 default="internal-default",
177 help="System configuration to use (default: %(default)s)",
178 )
179 parser.add_argument(
Tim Hall79d07d22020-04-27 18:20:16 +0100180 "--tensor-allocator",
181 default=TensorAllocator.Greedy,
182 type=lambda s: TensorAllocator[s],
183 choices=list(TensorAllocator),
184 help="Tensor Allocator algorithm (default: %(default)s)",
185 )
186 parser.add_argument(
187 "--show-subgraph-io-summary",
188 action="store_true",
189 help="Shows a summary of all the subgraphs and their inputs and outputs",
190 )
191 parser.add_argument(
192 "--ifm-streaming",
193 type=ast.literal_eval,
194 default=True,
195 choices=[True, False],
196 help="Controls scheduler IFM streaming search (default: %(default)s)",
197 )
198 parser.add_argument(
199 "--block-config-limit",
200 type=int,
201 default=16,
202 help="Limit block config search space, use zero for unlimited (default: %(default)s)",
203 )
204 parser.add_argument(
205 "--global-memory-clock-scale",
206 type=float,
207 default=1.0,
208 help=(
209 "Performs an additional scaling of the individual memory clock scales specified by the system config "
210 "(default: %(default)s)"
211 ),
212 )
213 parser.add_argument(
214 "--pareto-metric",
215 default=ParetoMetric.BwCycMem,
216 type=lambda s: ParetoMetric[s],
217 choices=list(ParetoMetric),
218 help="Controls the calculation of the pareto metric (default: %(default)s)",
219 )
220 parser.add_argument(
221 "--recursion-limit",
222 type=int,
223 default=10000,
224 help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",
225 )
226 parser.add_argument(
227 "--max-block-dependency",
228 type=int,
229 default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,
230 choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),
231 help=(
232 "Set the maximum value that can be used for the block dependency between npu kernel operations "
233 "(default: %(default)s)"
234 ),
235 )
Charles Xu7b8823f2020-05-29 13:53:10 +0200236 parser.add_argument(
237 "--nhcwb16-between-cascaded-passes",
238 type=ast.literal_eval,
239 default=True,
240 choices=[True, False],
241 help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",
242 )
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200243 parser.add_argument(
Patrik Gustavsson90831bc2020-08-24 16:26:11 +0200244 "--weight-estimation-scaling",
245 type=float,
246 default=1.0,
247 help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),
248 )
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200249 parser.add_argument(
250 "--allocation-alignment",
251 type=int,
252 default=Tensor.AllocationQuantum,
253 help=("Controls the allocation byte alignment of cpu tensors (default: %(default)s)"),
254 )
Tim Hall79d07d22020-04-27 18:20:16 +0100255 args = parser.parse_args(args=args)
256
257 # Read configuration file
258 config_file = args.config
259 config = None
260 if config_file is not None:
261 with open(config_file) as f:
262 config = configparser.ConfigParser()
263 config.read_file(f)
264
265 if args.network is None:
266 parser.error("the following argument is required: NETWORK")
267
268 sys.setrecursionlimit(args.recursion_limit)
269
270 if args.force_block_config:
271 force_block_config = architecture_features.Block.from_string(args.force_block_config)
272 else:
273 force_block_config = None
274
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200275 alignment = args.allocation_alignment
276 if alignment < 16:
277 parser.error("the following argument needs to be greater or equal to 16: ALLOCATION_ALIGNMENT")
278 if alignment & (alignment - 1) != 0:
279 parser.error("the following argument needs to be a power of 2: ALLOCATION_ALIGNMENT")
280
Tim Hall79d07d22020-04-27 18:20:16 +0100281 arch = architecture_features.ArchitectureFeatures(
282 vela_config=config,
283 system_config=args.system_config,
284 accelerator_config=args.accelerator_config,
Tim Hall79d07d22020-04-27 18:20:16 +0100285 override_block_config=force_block_config,
286 block_config_limit=args.block_config_limit,
287 global_memory_clock_scale=args.global_memory_clock_scale,
288 max_blockdep=args.max_block_dependency,
Patrik Gustavsson90831bc2020-08-24 16:26:11 +0200289 weight_estimation_scaling=args.weight_estimation_scaling,
Tim Hall79d07d22020-04-27 18:20:16 +0100290 )
291
292 compiler_options = compiler_driver.CompilerOptions(
293 verbose_graph=args.verbose_graph,
294 verbose_quantization=args.verbose_quantization,
295 verbose_packing=args.verbose_packing,
296 verbose_tensor_purpose=args.verbose_tensor_purpose,
297 verbose_tensor_format=args.verbose_tensor_format,
298 verbose_allocation=args.verbose_allocation,
299 verbose_high_level_command_stream=args.verbose_high_level_command_stream,
300 verbose_register_command_stream=args.verbose_register_command_stream,
301 verbose_operators=args.verbose_operators,
302 show_minimum_possible_allocation=args.show_minimum_possible_allocation,
303 show_cpu_operations=args.show_cpu_operations,
304 tensor_allocator=args.tensor_allocator,
305 timing=args.timing,
306 output_dir=args.output_dir,
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200307 allocation_alignment=alignment,
Tim Hall79d07d22020-04-27 18:20:16 +0100308 )
309
310 scheduler_options = scheduler.SchedulerOptions(
311 use_cascading=args.cascading,
Tim Hall79d07d22020-04-27 18:20:16 +0100312 verbose_schedule=args.verbose_schedule,
313 verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules,
314 use_ifm_streaming=args.ifm_streaming,
315 pareto_metric=args.pareto_metric,
Charles Xu7b8823f2020-05-29 13:53:10 +0200316 use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes,
Andreas Nevalainen897cc142020-10-28 15:42:08 +0100317 keep_scale_placement=args.keep_scale_placement,
Tim Hall79d07d22020-04-27 18:20:16 +0100318 )
319
Tim Hall284223e2020-06-09 13:17:21 +0100320 model_reader_options = model_reader.ModelReaderOptions()
Tim Hall79d07d22020-04-27 18:20:16 +0100321
322 os.makedirs(args.output_dir, exist_ok=True)
323
324 nng = process(args.network, arch, model_reader_options, compiler_options, scheduler_options)
325
326 if args.show_subgraph_io_summary:
327 print_subgraph_io_summary(nng)
328
329 return 0