blob: 6d54187cb25f019dbdffd9bcf85e38f619c1698e [file] [log] [blame]
Tim Hall79d07d22020-04-27 18:20:16 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
17# Main entry point for the Vela compiler.
18#
19# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.
Diego Russoe8a10452020-04-21 17:39:10 +010020import argparse
21import ast
22import configparser
Diego Russoea6111a2020-04-14 18:41:58 +010023import os.path
24import sys
Tim Hall79d07d22020-04-27 18:20:16 +010025import time
Tim Hall79d07d22020-04-27 18:20:16 +010026
27from . import architecture_features
Diego Russoe8a10452020-04-21 17:39:10 +010028from . import compiler_driver
29from . import model_reader
30from . import scheduler
Tim Hall79d07d22020-04-27 18:20:16 +010031from . import stats_writer
32from . import tflite_writer
Tim Hall79d07d22020-04-27 18:20:16 +010033from ._version import __version__
Louis Verhaard7db78962020-05-25 15:05:26 +020034from .errors import InputFileError
Diego Russoe8a10452020-04-21 17:39:10 +010035from .nn_graph import PassPlacement
36from .nn_graph import TensorAllocator
Tim Hall79d07d22020-04-27 18:20:16 +010037from .scheduler import ParetoMetric
Diego Russoea6111a2020-04-14 18:41:58 +010038from .tensor import MemArea
Jacob Bohlin0628a8c2020-08-28 13:25:14 +020039from .tensor import Tensor
Tim Hall79d07d22020-04-27 18:20:16 +010040
41
42def process(fname, arch, model_reader_options, compiler_options, scheduler_options):
43 if compiler_options.timing:
44 start = time.time()
45
46 nng = model_reader.read_model(fname, model_reader_options)
47
48 if not nng:
Louis Verhaard7db78962020-05-25 15:05:26 +020049 raise InputFileError(fname, "input file could not be read")
Tim Hall79d07d22020-04-27 18:20:16 +010050
51 if compiler_options.verbose_operators:
52 nng.print_operators()
53
54 if compiler_options.timing:
55 stop = time.time()
56 print("Model reading took %f s" % (stop - start))
57 start = time.time()
58
59 compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)
60
61 passes_csv_file = "%s/%s_pass-breakdown_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config)
62 stats_writer.write_pass_metrics_csv(nng, passes_csv_file)
63
64 summary_csv_file = "%s/%s_summary_%s.csv" % (compiler_options.output_dir, nng.name, arch.system_config)
65 stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)
66
67 stats_writer.print_performance_metrics(nng, show_cpu_operations=compiler_options.show_cpu_operations, arch=arch)
68
69 if fname.endswith(".tflite"):
70 tflite_writer.write_tflite(nng, "%s/%s_vela.tflite" % (compiler_options.output_dir, nng.name))
71
72 if compiler_options.timing:
73 stop = time.time()
74 print("Compiler driver took %f s" % (stop - start))
75
76 return nng
77
78
79def print_subgraph_io_summary(nng):
80 """Print a summary of all the input and output tensor sizes for all subgraphs.
81 Also displays the total tensor size and the memory used area for sram.
82 """
83
84 print("Subgraph IO Summary")
85 print("-------------------")
86 print("NNG: {0}".format(nng.name))
87 max_sg_size = 0
88 for sg in reversed(nng.subgraphs):
89 print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))
90 sg_size = 0
91
92 if sg.placement == PassPlacement.Npu:
93 for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:
94 if tens in sg.input_tensors:
95 tens_dir = "In"
96 elif tens in sg.output_tensors:
97 tens_dir = "Out"
98 else:
99 tens_dir = "In/Out"
100
101 size = tens.elements() * tens.element_size() / 1024.0
102 sg_size = sg_size + size
103 print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))
104
105 print(" Total Size = {0} KiB".format(sg_size))
106 print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))
107 max_sg_size = max(sg_size, max_sg_size)
108
109 print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))
110
111
112def main(args=None):
113 if args is None:
114 args = sys.argv[1:]
115
116 parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Ethos-U55")
117
118 parser.add_argument(
119 "network", metavar="NETWORK", type=str, default=None, nargs=None, help="Filename of network to process"
120 )
121
122 parser.add_argument("--version", action="version", version=__version__)
123 parser.add_argument(
124 "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"
125 )
126 parser.add_argument("--config", type=str, help="Location of vela configuration file")
Tim Hall79d07d22020-04-27 18:20:16 +0100127
128 parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")
129 parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")
130 parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")
131 parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")
132 parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")
133 parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")
134 parser.add_argument(
135 "--verbose-pareto-frontier-schedules",
136 action="store_true",
137 help="Show all schedules along the pareto frontier of optimisation criteria",
138 )
139 parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")
140 parser.add_argument(
141 "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"
142 )
143 parser.add_argument(
144 "--verbose-register-command-stream", action="store_true", help="Verbose register command stream"
145 )
146 parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")
147
148 parser.add_argument(
149 "--show-minimum-possible-allocation", action="store_true", help="Show the minimum possible allocation"
150 )
151 parser.add_argument(
152 "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
153 )
154 parser.add_argument(
155 "--cascading",
156 type=ast.literal_eval,
157 default=True,
158 choices=[True, False],
159 help="Controls the packing of multiple passes into a cascade (default: %(default)s)",
160 )
Tim Hall79d07d22020-04-27 18:20:16 +0100161 parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration HxWxC")
Tim Hall79d07d22020-04-27 18:20:16 +0100162 parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
163 parser.add_argument(
164 "--accelerator-config",
165 type=str,
166 default="ethos-u55-256",
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100167 choices=list(architecture_features.Accelerator.member_list()),
Tim Hall79d07d22020-04-27 18:20:16 +0100168 help="Accelerator configuration to use (default: %(default)s)",
169 )
170 parser.add_argument(
171 "--system-config",
172 type=str,
173 default="internal-default",
174 help="System configuration to use (default: %(default)s)",
175 )
176 parser.add_argument(
Tim Hall79d07d22020-04-27 18:20:16 +0100177 "--tensor-allocator",
178 default=TensorAllocator.Greedy,
179 type=lambda s: TensorAllocator[s],
180 choices=list(TensorAllocator),
181 help="Tensor Allocator algorithm (default: %(default)s)",
182 )
183 parser.add_argument(
184 "--show-subgraph-io-summary",
185 action="store_true",
186 help="Shows a summary of all the subgraphs and their inputs and outputs",
187 )
188 parser.add_argument(
189 "--ifm-streaming",
190 type=ast.literal_eval,
191 default=True,
192 choices=[True, False],
193 help="Controls scheduler IFM streaming search (default: %(default)s)",
194 )
195 parser.add_argument(
196 "--block-config-limit",
197 type=int,
198 default=16,
199 help="Limit block config search space, use zero for unlimited (default: %(default)s)",
200 )
201 parser.add_argument(
202 "--global-memory-clock-scale",
203 type=float,
204 default=1.0,
205 help=(
206 "Performs an additional scaling of the individual memory clock scales specified by the system config "
207 "(default: %(default)s)"
208 ),
209 )
210 parser.add_argument(
211 "--pareto-metric",
212 default=ParetoMetric.BwCycMem,
213 type=lambda s: ParetoMetric[s],
214 choices=list(ParetoMetric),
215 help="Controls the calculation of the pareto metric (default: %(default)s)",
216 )
217 parser.add_argument(
218 "--recursion-limit",
219 type=int,
220 default=10000,
221 help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",
222 )
223 parser.add_argument(
224 "--max-block-dependency",
225 type=int,
226 default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,
227 choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),
228 help=(
229 "Set the maximum value that can be used for the block dependency between npu kernel operations "
230 "(default: %(default)s)"
231 ),
232 )
Charles Xu7b8823f2020-05-29 13:53:10 +0200233 parser.add_argument(
234 "--nhcwb16-between-cascaded-passes",
235 type=ast.literal_eval,
236 default=True,
237 choices=[True, False],
238 help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",
239 )
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200240 parser.add_argument(
Patrik Gustavsson90831bc2020-08-24 16:26:11 +0200241 "--weight-estimation-scaling",
242 type=float,
243 default=1.0,
244 help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),
245 )
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200246 parser.add_argument(
247 "--allocation-alignment",
248 type=int,
249 default=Tensor.AllocationQuantum,
250 help=("Controls the allocation byte alignment of cpu tensors (default: %(default)s)"),
251 )
Tim Hall79d07d22020-04-27 18:20:16 +0100252 args = parser.parse_args(args=args)
253
254 # Read configuration file
255 config_file = args.config
256 config = None
257 if config_file is not None:
258 with open(config_file) as f:
259 config = configparser.ConfigParser()
260 config.read_file(f)
261
262 if args.network is None:
263 parser.error("the following argument is required: NETWORK")
264
265 sys.setrecursionlimit(args.recursion_limit)
266
267 if args.force_block_config:
268 force_block_config = architecture_features.Block.from_string(args.force_block_config)
269 else:
270 force_block_config = None
271
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200272 alignment = args.allocation_alignment
273 if alignment < 16:
274 parser.error("the following argument needs to be greater or equal to 16: ALLOCATION_ALIGNMENT")
275 if alignment & (alignment - 1) != 0:
276 parser.error("the following argument needs to be a power of 2: ALLOCATION_ALIGNMENT")
277
Tim Hall79d07d22020-04-27 18:20:16 +0100278 arch = architecture_features.ArchitectureFeatures(
279 vela_config=config,
280 system_config=args.system_config,
281 accelerator_config=args.accelerator_config,
Tim Hall79d07d22020-04-27 18:20:16 +0100282 override_block_config=force_block_config,
283 block_config_limit=args.block_config_limit,
284 global_memory_clock_scale=args.global_memory_clock_scale,
285 max_blockdep=args.max_block_dependency,
Patrik Gustavsson90831bc2020-08-24 16:26:11 +0200286 weight_estimation_scaling=args.weight_estimation_scaling,
Tim Hall79d07d22020-04-27 18:20:16 +0100287 )
288
289 compiler_options = compiler_driver.CompilerOptions(
290 verbose_graph=args.verbose_graph,
291 verbose_quantization=args.verbose_quantization,
292 verbose_packing=args.verbose_packing,
293 verbose_tensor_purpose=args.verbose_tensor_purpose,
294 verbose_tensor_format=args.verbose_tensor_format,
295 verbose_allocation=args.verbose_allocation,
296 verbose_high_level_command_stream=args.verbose_high_level_command_stream,
297 verbose_register_command_stream=args.verbose_register_command_stream,
298 verbose_operators=args.verbose_operators,
299 show_minimum_possible_allocation=args.show_minimum_possible_allocation,
300 show_cpu_operations=args.show_cpu_operations,
301 tensor_allocator=args.tensor_allocator,
302 timing=args.timing,
303 output_dir=args.output_dir,
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200304 allocation_alignment=alignment,
Tim Hall79d07d22020-04-27 18:20:16 +0100305 )
306
307 scheduler_options = scheduler.SchedulerOptions(
308 use_cascading=args.cascading,
Tim Hall79d07d22020-04-27 18:20:16 +0100309 verbose_schedule=args.verbose_schedule,
310 verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules,
311 use_ifm_streaming=args.ifm_streaming,
312 pareto_metric=args.pareto_metric,
Charles Xu7b8823f2020-05-29 13:53:10 +0200313 use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes,
Tim Hall79d07d22020-04-27 18:20:16 +0100314 )
315
Tim Hall284223e2020-06-09 13:17:21 +0100316 model_reader_options = model_reader.ModelReaderOptions()
Tim Hall79d07d22020-04-27 18:20:16 +0100317
318 os.makedirs(args.output_dir, exist_ok=True)
319
320 nng = process(args.network, arch, model_reader_options, compiler_options, scheduler_options)
321
322 if args.show_subgraph_io_summary:
323 print_subgraph_io_summary(nng)
324
325 return 0