blob: 5df20d227020143385eed39ed5dbe3b58405d111 [file] [log] [blame]
Tim Hall79d07d22020-04-27 18:20:16 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
17# Main entry point for the Vela compiler.
18#
19# Provides command line interface, options parsing, and network loading. Before calling the compiler driver.
Diego Russoe8a10452020-04-21 17:39:10 +010020import argparse
21import ast
22import configparser
Diego Russoea6111a2020-04-14 18:41:58 +010023import os.path
24import sys
Tim Hall79d07d22020-04-27 18:20:16 +010025import time
Tim Hall79d07d22020-04-27 18:20:16 +010026
27from . import architecture_features
Diego Russoe8a10452020-04-21 17:39:10 +010028from . import compiler_driver
29from . import model_reader
30from . import scheduler
Tim Hall79d07d22020-04-27 18:20:16 +010031from . import stats_writer
32from . import tflite_writer
Tim Hall79d07d22020-04-27 18:20:16 +010033from ._version import __version__
Tim Halle6ccd872020-11-09 16:46:37 +000034from .debug_database import DebugDatabase
Louis Verhaard7db78962020-05-25 15:05:26 +020035from .errors import InputFileError
Diego Russoe8a10452020-04-21 17:39:10 +010036from .nn_graph import PassPlacement
37from .nn_graph import TensorAllocator
Tim Hall79d07d22020-04-27 18:20:16 +010038from .scheduler import ParetoMetric
Diego Russoea6111a2020-04-14 18:41:58 +010039from .tensor import MemArea
Jacob Bohlin0628a8c2020-08-28 13:25:14 +020040from .tensor import Tensor
Tim Hall79d07d22020-04-27 18:20:16 +010041
42
Tim Halle6ccd872020-11-09 16:46:37 +000043def process(input_name, enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options):
Tim Hall79d07d22020-04-27 18:20:16 +010044 if compiler_options.timing:
45 start = time.time()
46
Tim Halle6ccd872020-11-09 16:46:37 +000047 os.makedirs(compiler_options.output_dir, exist_ok=True)
48 output_basename = os.path.join(compiler_options.output_dir, os.path.splitext(os.path.basename(input_name))[0])
49 DebugDatabase.show_warnings = enable_debug_db
50
51 nng = model_reader.read_model(input_name, model_reader_options)
Tim Hall79d07d22020-04-27 18:20:16 +010052
53 if not nng:
Tim Halle6ccd872020-11-09 16:46:37 +000054 raise InputFileError(input_name, "input file could not be read")
Tim Hall79d07d22020-04-27 18:20:16 +010055
56 if compiler_options.verbose_operators:
57 nng.print_operators()
58
59 if compiler_options.timing:
60 stop = time.time()
61 print("Model reading took %f s" % (stop - start))
62 start = time.time()
63
64 compiler_driver.compiler_driver(nng, arch, compiler_options, scheduler_options)
65
Tim Halle6ccd872020-11-09 16:46:37 +000066 passes_csv_file = "{0}_pass-breakdown_{1}.csv".format(output_basename, arch.system_config)
Tim Hall79d07d22020-04-27 18:20:16 +010067 stats_writer.write_pass_metrics_csv(nng, passes_csv_file)
68
Tim Halle6ccd872020-11-09 16:46:37 +000069 summary_csv_file = "{0}_summary_{1}.csv".format(output_basename, arch.system_config)
Tim Hall79d07d22020-04-27 18:20:16 +010070 stats_writer.write_summary_metrics_csv(nng, summary_csv_file, arch)
71
72 stats_writer.print_performance_metrics(nng, show_cpu_operations=compiler_options.show_cpu_operations, arch=arch)
73
Tim Halle6ccd872020-11-09 16:46:37 +000074 output_filename = output_basename + "_vela.tflite"
75 if input_name.endswith(".tflite"):
76 tflite_writer.write_tflite(nng, output_filename)
77
78 if enable_debug_db:
79 debug_filename = output_basename + "_debug.xml"
80 DebugDatabase.write(debug_filename, input_name, output_filename)
Tim Hall79d07d22020-04-27 18:20:16 +010081
82 if compiler_options.timing:
83 stop = time.time()
84 print("Compiler driver took %f s" % (stop - start))
85
86 return nng
87
88
89def print_subgraph_io_summary(nng):
90 """Print a summary of all the input and output tensor sizes for all subgraphs.
91 Also displays the total tensor size and the memory used area for sram.
92 """
93
94 print("Subgraph IO Summary")
95 print("-------------------")
96 print("NNG: {0}".format(nng.name))
97 max_sg_size = 0
98 for sg in reversed(nng.subgraphs):
99 print(" Subgraph: {0} = {1}".format(sg.name, sg.placement))
100 sg_size = 0
101
102 if sg.placement == PassPlacement.Npu:
103 for tens in sg.input_tensors + [sg.scratch_tensor] + sg.output_tensors:
104 if tens in sg.input_tensors:
105 tens_dir = "In"
106 elif tens in sg.output_tensors:
107 tens_dir = "Out"
108 else:
109 tens_dir = "In/Out"
110
111 size = tens.elements() * tens.element_size() / 1024.0
112 sg_size = sg_size + size
113 print(" Tensor [{0}]: {1} = {2} KiB".format(tens_dir, tens.name, size))
114
115 print(" Total Size = {0} KiB".format(sg_size))
116 print(" SRAM Memory Used = {0} KiB".format(sg.memory_used.get(MemArea.Sram, 0) / 1024.0))
117 max_sg_size = max(sg_size, max_sg_size)
118
119 print(" Maximum Subgraph Size = {0} KiB".format(max_sg_size))
120
121
122def main(args=None):
123 if args is None:
124 args = sys.argv[1:]
125
126 parser = argparse.ArgumentParser(prog="vela", description="Neural network model compiler for Ethos-U55")
127
128 parser.add_argument(
129 "network", metavar="NETWORK", type=str, default=None, nargs=None, help="Filename of network to process"
130 )
131
132 parser.add_argument("--version", action="version", version=__version__)
133 parser.add_argument(
134 "--output-dir", type=str, default="output", help="Output directory to write files to (default: %(default)s)"
135 )
Tim Halle6ccd872020-11-09 16:46:37 +0000136 parser.add_argument(
137 "--enable-debug-db",
138 action="store_true",
139 default=None,
140 help="Enables the calculation and writing of a network debug database to output directory",
141 )
142
Tim Hall79d07d22020-04-27 18:20:16 +0100143 parser.add_argument("--config", type=str, help="Location of vela configuration file")
Tim Hall79d07d22020-04-27 18:20:16 +0100144
145 parser.add_argument("--verbose-graph", action="store_true", help="Verbose graph rewriter")
146 parser.add_argument("--verbose-quantization", action="store_true", help="Verbose quantization")
147 parser.add_argument("--verbose-packing", action="store_true", help="Verbose pass packing")
148 parser.add_argument("--verbose-tensor-purpose", action="store_true", help="Verbose tensor purpose")
149 parser.add_argument("--verbose-tensor-format", action="store_true", help="Verbose tensor format")
150 parser.add_argument("--verbose-schedule", action="store_true", help="Verbose schedule")
151 parser.add_argument(
152 "--verbose-pareto-frontier-schedules",
153 action="store_true",
154 help="Show all schedules along the pareto frontier of optimisation criteria",
155 )
156 parser.add_argument("--verbose-allocation", action="store_true", help="Verbose tensor allocation")
157 parser.add_argument(
158 "--verbose-high-level-command-stream", action="store_true", help="Verbose high level command stream"
159 )
160 parser.add_argument(
161 "--verbose-register-command-stream", action="store_true", help="Verbose register command stream"
162 )
163 parser.add_argument("--verbose-operators", action="store_true", help="Verbose operator list")
164
165 parser.add_argument(
166 "--show-minimum-possible-allocation", action="store_true", help="Show the minimum possible allocation"
167 )
168 parser.add_argument(
169 "--show-cpu-operations", action="store_true", help="Show the operations that fall back to the CPU"
170 )
171 parser.add_argument(
Andreas Nevalainen897cc142020-10-28 15:42:08 +0100172 "--keep-scale-placement", action="store_true", help="Keep scale tensors memory placement during scheduling"
173 )
174 parser.add_argument(
Tim Hall79d07d22020-04-27 18:20:16 +0100175 "--cascading",
176 type=ast.literal_eval,
177 default=True,
178 choices=[True, False],
179 help="Controls the packing of multiple passes into a cascade (default: %(default)s)",
180 )
Tim Hall79d07d22020-04-27 18:20:16 +0100181 parser.add_argument("--force-block-config", type=str, default="", help="Force a specific block configuration HxWxC")
Tim Hall79d07d22020-04-27 18:20:16 +0100182 parser.add_argument("--timing", action="store_true", help="Time the compiler doing operations")
183 parser.add_argument(
184 "--accelerator-config",
185 type=str,
186 default="ethos-u55-256",
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100187 choices=list(architecture_features.Accelerator.member_list()),
Tim Hall79d07d22020-04-27 18:20:16 +0100188 help="Accelerator configuration to use (default: %(default)s)",
189 )
190 parser.add_argument(
191 "--system-config",
192 type=str,
193 default="internal-default",
194 help="System configuration to use (default: %(default)s)",
195 )
196 parser.add_argument(
Tim Hall79d07d22020-04-27 18:20:16 +0100197 "--tensor-allocator",
198 default=TensorAllocator.Greedy,
199 type=lambda s: TensorAllocator[s],
200 choices=list(TensorAllocator),
201 help="Tensor Allocator algorithm (default: %(default)s)",
202 )
203 parser.add_argument(
204 "--show-subgraph-io-summary",
205 action="store_true",
206 help="Shows a summary of all the subgraphs and their inputs and outputs",
207 )
208 parser.add_argument(
209 "--ifm-streaming",
210 type=ast.literal_eval,
211 default=True,
212 choices=[True, False],
213 help="Controls scheduler IFM streaming search (default: %(default)s)",
214 )
215 parser.add_argument(
216 "--block-config-limit",
217 type=int,
218 default=16,
219 help="Limit block config search space, use zero for unlimited (default: %(default)s)",
220 )
221 parser.add_argument(
222 "--global-memory-clock-scale",
223 type=float,
224 default=1.0,
225 help=(
226 "Performs an additional scaling of the individual memory clock scales specified by the system config "
227 "(default: %(default)s)"
228 ),
229 )
230 parser.add_argument(
231 "--pareto-metric",
232 default=ParetoMetric.BwCycMem,
233 type=lambda s: ParetoMetric[s],
234 choices=list(ParetoMetric),
235 help="Controls the calculation of the pareto metric (default: %(default)s)",
236 )
237 parser.add_argument(
238 "--recursion-limit",
239 type=int,
240 default=10000,
241 help="Set the recursion depth limit, may result in RecursionError if too low (default: %(default)s)",
242 )
243 parser.add_argument(
244 "--max-block-dependency",
245 type=int,
246 default=architecture_features.ArchitectureFeatures.MAX_BLOCKDEP,
247 choices=range(0, architecture_features.ArchitectureFeatures.MAX_BLOCKDEP + 1),
248 help=(
249 "Set the maximum value that can be used for the block dependency between npu kernel operations "
250 "(default: %(default)s)"
251 ),
252 )
Charles Xu7b8823f2020-05-29 13:53:10 +0200253 parser.add_argument(
254 "--nhcwb16-between-cascaded-passes",
255 type=ast.literal_eval,
256 default=True,
257 choices=[True, False],
258 help="Control if NHCWB16 or NHWC should be used in between cascaded passes (default: %(default)s)",
259 )
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200260 parser.add_argument(
Patrik Gustavsson90831bc2020-08-24 16:26:11 +0200261 "--weight-estimation-scaling",
262 type=float,
263 default=1.0,
264 help=("Performs an additional scaling of weight compression scale estimate (default: %(default)s)"),
265 )
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200266 parser.add_argument(
267 "--allocation-alignment",
268 type=int,
269 default=Tensor.AllocationQuantum,
270 help=("Controls the allocation byte alignment of cpu tensors (default: %(default)s)"),
271 )
Tim Hall79d07d22020-04-27 18:20:16 +0100272 args = parser.parse_args(args=args)
273
274 # Read configuration file
275 config_file = args.config
276 config = None
277 if config_file is not None:
278 with open(config_file) as f:
279 config = configparser.ConfigParser()
280 config.read_file(f)
281
282 if args.network is None:
283 parser.error("the following argument is required: NETWORK")
284
285 sys.setrecursionlimit(args.recursion_limit)
286
287 if args.force_block_config:
288 force_block_config = architecture_features.Block.from_string(args.force_block_config)
289 else:
290 force_block_config = None
291
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200292 alignment = args.allocation_alignment
293 if alignment < 16:
294 parser.error("the following argument needs to be greater or equal to 16: ALLOCATION_ALIGNMENT")
295 if alignment & (alignment - 1) != 0:
296 parser.error("the following argument needs to be a power of 2: ALLOCATION_ALIGNMENT")
297
Tim Hall79d07d22020-04-27 18:20:16 +0100298 arch = architecture_features.ArchitectureFeatures(
299 vela_config=config,
300 system_config=args.system_config,
301 accelerator_config=args.accelerator_config,
Tim Hall79d07d22020-04-27 18:20:16 +0100302 override_block_config=force_block_config,
303 block_config_limit=args.block_config_limit,
304 global_memory_clock_scale=args.global_memory_clock_scale,
305 max_blockdep=args.max_block_dependency,
Patrik Gustavsson90831bc2020-08-24 16:26:11 +0200306 weight_estimation_scaling=args.weight_estimation_scaling,
Tim Hall79d07d22020-04-27 18:20:16 +0100307 )
308
309 compiler_options = compiler_driver.CompilerOptions(
310 verbose_graph=args.verbose_graph,
311 verbose_quantization=args.verbose_quantization,
312 verbose_packing=args.verbose_packing,
313 verbose_tensor_purpose=args.verbose_tensor_purpose,
314 verbose_tensor_format=args.verbose_tensor_format,
315 verbose_allocation=args.verbose_allocation,
316 verbose_high_level_command_stream=args.verbose_high_level_command_stream,
317 verbose_register_command_stream=args.verbose_register_command_stream,
318 verbose_operators=args.verbose_operators,
319 show_minimum_possible_allocation=args.show_minimum_possible_allocation,
320 show_cpu_operations=args.show_cpu_operations,
321 tensor_allocator=args.tensor_allocator,
322 timing=args.timing,
323 output_dir=args.output_dir,
Jacob Bohlin0628a8c2020-08-28 13:25:14 +0200324 allocation_alignment=alignment,
Tim Hall79d07d22020-04-27 18:20:16 +0100325 )
326
327 scheduler_options = scheduler.SchedulerOptions(
328 use_cascading=args.cascading,
Tim Hall79d07d22020-04-27 18:20:16 +0100329 verbose_schedule=args.verbose_schedule,
330 verbose_pareto_frontier_schedules=args.verbose_pareto_frontier_schedules,
331 use_ifm_streaming=args.ifm_streaming,
332 pareto_metric=args.pareto_metric,
Charles Xu7b8823f2020-05-29 13:53:10 +0200333 use_nhcwb16_between_cascaded_passes=args.nhcwb16_between_cascaded_passes,
Andreas Nevalainen897cc142020-10-28 15:42:08 +0100334 keep_scale_placement=args.keep_scale_placement,
Tim Hall79d07d22020-04-27 18:20:16 +0100335 )
336
Tim Hall284223e2020-06-09 13:17:21 +0100337 model_reader_options = model_reader.ModelReaderOptions()
Tim Hall79d07d22020-04-27 18:20:16 +0100338
Tim Halle6ccd872020-11-09 16:46:37 +0000339 nng = process(args.network, args.enable_debug_db, arch, model_reader_options, compiler_options, scheduler_options)
Tim Hall79d07d22020-04-27 18:20:16 +0100340
341 if args.show_subgraph_io_summary:
342 print_subgraph_io_summary(nng)
343
344 return 0