blob: e0e44b29090ecbf5354a974b54674715bc693fda [file] [log] [blame]
erik.andersson@arm.com460c6892021-02-24 14:38:09 +01001# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
Tim Hall79d07d22020-04-27 18:20:16 +01002#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
Tim Hallc8a73862020-10-27 12:43:14 +000017# Holds a container for Ethos-U and System architecture parameters.
Diego Russoea6111a2020-04-14 18:41:58 +010018import enum
Tim Hall79d07d22020-04-27 18:20:16 +010019from collections import namedtuple
20from configparser import ConfigParser
Diego Russoea6111a2020-04-14 18:41:58 +010021
Tim Hall79d07d22020-04-27 18:20:16 +010022import numpy as np
Diego Russoea6111a2020-04-14 18:41:58 +010023
Louis Verhaardaeae5672020-11-02 18:04:27 +010024from .api import NpuAccelerator
Tim Hall1bd531d2020-11-01 20:59:36 +000025from .errors import CliOptionError
26from .errors import ConfigOptionError
Dwight Lidmana9390f72020-05-13 12:00:08 +020027from .ethos_u55_regs.ethos_u55_regs import resampling_mode
Louis Verhaard69b31762020-11-17 09:45:20 +010028from .numeric_util import full_shape
Diego Russoe8a10452020-04-21 17:39:10 +010029from .numeric_util import round_up
30from .numeric_util import round_up_divide
erik.andersson@arm.com1d6d5c42021-04-14 13:31:05 +020031from .numeric_util import round_up_to_int
Tim Hall4ed38bc2020-10-20 18:54:20 +010032from .operation import Kernel
Diego Russoea6111a2020-04-14 18:41:58 +010033from .operation import NpuBlockType
Tim Hall4ed38bc2020-10-20 18:54:20 +010034from .operation import PointXYZ
Diqing Zhongf842b692020-12-11 13:07:37 +010035from .tensor import BandwidthDirection
Diego Russoe8a10452020-04-21 17:39:10 +010036from .tensor import MemArea
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020037from .tensor import MemType
Diego Russoe8a10452020-04-21 17:39:10 +010038from .tensor import TensorFormat
39from .tensor import TensorPurpose
Jonas Ohlsson45e653d2021-07-26 16:13:12 +020040from .tflite_supported_operators import TFLiteSupportedOperators
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020041from .tosa_supported_operators import TosaSupportedOperators
Tim Hall79d07d22020-04-27 18:20:16 +010042
Tim Hall79d07d22020-04-27 18:20:16 +010043
44class Block:
Tim Halld8339a72021-05-27 18:49:40 +010045 def __init__(self, w=0, h=0, d=0):
Tim Hall79d07d22020-04-27 18:20:16 +010046 self.width = w
47 self.height = h
48 self.depth = d
49
Tim Halld8339a72021-05-27 18:49:40 +010050 def elements(self):
51 return self.width * self.height * self.depth
52
53 def elements_wh(self):
54 return self.width * self.height
55
56 def clone(self):
57 return Block(self.width, self.height, self.depth)
58
59 def as_list(self):
60 return [self.height, self.width, self.depth]
61
Tim Hall79d07d22020-04-27 18:20:16 +010062 def __eq__(self, other):
63 if self.width == other.width and self.height == other.height and self.depth == other.depth:
64 return True
65 else:
66 return False
67
68 def __repr__(self):
69 return "<Block: {0},{1},{2}>".format(self.width, self.height, self.depth)
70
Tim Halld8339a72021-05-27 18:49:40 +010071 def to_hwc(self):
72 return [self.height, self.width, self.depth]
73
Tim Hall79d07d22020-04-27 18:20:16 +010074 @classmethod
75 def from_string(cls, s):
76 w, h, c = (int(v) for v in s.split("x"))
77 return cls(w, h, c)
78
Louis Verhaard69b31762020-11-17 09:45:20 +010079 @classmethod
80 def from_shape(cls, shape) -> "Block":
81 """Converts the shape to a Block"""
82 shp = full_shape(3, shape, 1)
83 # Note: index from end, as len(shp) may be > 3
84 return Block(shp[-2], shp[-3], shp[-1])
85
Tim Halld8339a72021-05-27 18:49:40 +010086 @classmethod
87 def min(cls, a, b):
88 return cls(min(a.width, b.width), min(a.height, b.height), min(a.depth, b.depth))
89
90 @classmethod
91 def max(cls, a, b):
92 return cls(max(a.width, b.width), max(a.height, b.height), max(a.depth, b.depth))
93
94 @classmethod
95 def round(cls, a, b):
96 return cls(round_up(a.width, b.width), round_up(a.height, b.height), round_up(a.depth, b.depth))
97
98 @classmethod
99 def div_round_up(cls, a, b):
100 return cls(
101 round_up_divide(a.width, b.width), round_up_divide(a.height, b.height), round_up_divide(a.depth, b.depth)
102 )
103
Tim Hall79d07d22020-04-27 18:20:16 +0100104
105class Rect:
106 def __init__(self, x, y, z, x2, y2, z2):
107 self.x = x
108 self.y = y
109 self.z = z
110 self.x2 = x2
111 self.y2 = y2
112 self.z2 = z2
113
114 def start(self):
115 return PointXYZ(self.x, self.y, self.z)
116
117 def end(self):
118 return PointXYZ(self.x2, self.y2, self.z2)
119
120 def size(self):
121 return Block(self.x2 - self.x + 1, self.y2 - self.y + 1, self.z2 - self.z + 1)
122
123 def __repr__(self):
124 return "<Rect: ({0},{1},{2}) ({3},{4},{5})>".format(self.x, self.y, self.z, self.x2, self.y2, self.z2)
125
126
Tim Hall79d07d22020-04-27 18:20:16 +0100127class SHRAMElements:
128 IFM8 = 0
129 IFM16 = 1
130 IFM8_Elementwise = 2
131 IFM16_Elementwise = 3
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200132 IFM32 = 4
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200133 Acc16 = 5
134 Acc32 = 6
135 Acc40 = 7
Tim Hall79d07d22020-04-27 18:20:16 +0100136 Last = Acc40
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200137 BitSizes = np.array([8, 16, 8, 16, 32, 16, 32, 40], np.int32)
Louis Verhaardf98c6742020-05-12 14:22:38 +0200138 ByteSizes = BitSizes // 8
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200139 PostAlign = np.array([8, 8, 8, 8, 8, 1, 1, 1], np.int32)
140 PreAlign = np.array([1, 1, 1, 1, 1, 8, 8, 8], np.int32)
Tim Hall79d07d22020-04-27 18:20:16 +0100141
142
143class SHRAMBlockConfig:
144 def __init__(self, sizes, banks):
145 assert len(banks) == SHRAMElements.Last + 1
146 self.sizes = sizes
147 self.banks = banks
148
149
Tim Hallc8a73862020-10-27 12:43:14 +0000150# Area indices must match Ethos-U SHRAM layout spec
Tim Hall79d07d22020-04-27 18:20:16 +0100151class SharedBufferArea(enum.IntEnum):
152 OFM = 0
153 Weights = 1
154 IFM = 2
155 Accumulators = 3
156 Size = Accumulators + 1
157
158
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100159class Accelerator(enum.Enum):
160 Ethos_U55_32 = "ethos-u55-32"
161 Ethos_U55_64 = "ethos-u55-64"
162 Ethos_U55_128 = "ethos-u55-128"
163 Ethos_U55_256 = "ethos-u55-256"
Tim Hallc8a73862020-10-27 12:43:14 +0000164 Ethos_U65_256 = "ethos-u65-256"
165 Ethos_U65_512 = "ethos-u65-512"
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100166
167 @classmethod
168 def member_list(cls):
169 return [e.value for e in cls]
170
Louis Verhaardaeae5672020-11-02 18:04:27 +0100171 @classmethod
172 def from_npu_accelerator(cls, npu_accelerator: NpuAccelerator) -> "Accelerator":
173 """Converts the given public API object to Accelerator (used internally)"""
174 accelerator_map = {
175 NpuAccelerator.Ethos_U55_32: cls.Ethos_U55_32,
176 NpuAccelerator.Ethos_U55_64: cls.Ethos_U55_64,
177 NpuAccelerator.Ethos_U55_128: cls.Ethos_U55_128,
178 NpuAccelerator.Ethos_U55_256: cls.Ethos_U55_256,
179 NpuAccelerator.Ethos_U65_256: cls.Ethos_U65_256,
180 NpuAccelerator.Ethos_U65_512: cls.Ethos_U65_512,
181 }
182 assert npu_accelerator in accelerator_map, f"Unsupported accelerator {npu_accelerator}"
183 return accelerator_map[npu_accelerator]
184
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100185
Tim Hall1bd531d2020-11-01 20:59:36 +0000186@enum.unique
187class MemPort(enum.Enum):
188 Axi0 = enum.auto()
189 Axi1 = enum.auto()
190
191
Tim Halld8339a72021-05-27 18:49:40 +0100192SHRAMConfig = namedtuple(
193 "SHRAMConfig", ["reserved_output_banks", "bank_size_bytes", "total_banks", "reserved_end_banks"]
194)
195
196
Tim Hall79d07d22020-04-27 18:20:16 +0100197class ArchitectureFeatures:
Tim Hallc8a73862020-10-27 12:43:14 +0000198 """This class is a container for various parameters of the Ethos-U core
Diqing Zhonge8887a32020-09-24 09:53:48 +0200199 and system configuration that can be tuned, either by command line
Tim Hallc8a73862020-10-27 12:43:14 +0000200 parameters or by the Ethos-U architects. The class is often passed
Diqing Zhonge8887a32020-09-24 09:53:48 +0200201 around to passes that need to do architecture-dependent actions.
Tim Hall79d07d22020-04-27 18:20:16 +0100202
Diqing Zhonge8887a32020-09-24 09:53:48 +0200203 Note the difference between ArchitectureFeatures and CompilerOptions
Tim Hallc8a73862020-10-27 12:43:14 +0000204 - ArchitectureFeatures is for changing the Ethos-U and system architecture
Diqing Zhonge8887a32020-09-24 09:53:48 +0200205 - CompilerOptions is for changing the behaviour of the compiler
206 """
Tim Hall79d07d22020-04-27 18:20:16 +0100207
208 ArchitectureConfig = namedtuple(
209 "ArchitectureConfig", "macs cores ofm_ublock ifm_ublock shram_banks shram_granules elem_units"
210 )
211 accelerator_configs = {
Tim Hallc8a73862020-10-27 12:43:14 +0000212 Accelerator.Ethos_U65_512: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200213 256, 2, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 16, 8, 16, 20], 8
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100214 ),
Tim Hallc8a73862020-10-27 12:43:14 +0000215 Accelerator.Ethos_U65_256: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200216 256, 1, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 16, 8, 16, 20], 8
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100217 ),
218 Accelerator.Ethos_U55_256: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200219 256, 1, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 16, 8, 16, 20], 8
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100220 ),
221 Accelerator.Ethos_U55_128: ArchitectureConfig(
Dwight Lidman2f754572021-04-21 12:00:37 +0200222 128, 1, Block(2, 1, 8), Block(2, 1, 8), 24, [4, 4, 4, 4, 8, 4, 8, 12], 4
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100223 ),
224 Accelerator.Ethos_U55_64: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200225 64, 1, Block(1, 1, 8), Block(1, 1, 8), 16, [2, 2, 2, 2, 4, 4, 4, 8], 2
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100226 ),
227 Accelerator.Ethos_U55_32: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200228 32, 1, Block(1, 1, 4), Block(1, 1, 8), 16, [2, 2, 2, 2, 4, 4, 4, 4], 1
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100229 ),
Tim Hall79d07d22020-04-27 18:20:16 +0100230 }
231
232 OFMSplitDepth = 16
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100233 SubKernelMax = Block(8, 8, 65536)
Tim Hall79d07d22020-04-27 18:20:16 +0100234
Tim Hall1bd531d2020-11-01 20:59:36 +0000235 DEFAULT_CONFIG = "internal-default"
Louis Verhaard1e170182020-11-26 11:42:04 +0100236 MAX_BLOCKDEP = 3
Tim Hall1bd531d2020-11-01 20:59:36 +0000237
Tim Hall79d07d22020-04-27 18:20:16 +0100238 def __init__(
239 self,
Tim Hall1bd531d2020-11-01 20:59:36 +0000240 vela_config_files,
Tim Hall79d07d22020-04-27 18:20:16 +0100241 accelerator_config,
242 system_config,
Tim Hall1bd531d2020-11-01 20:59:36 +0000243 memory_mode,
Tim Hall79d07d22020-04-27 18:20:16 +0100244 max_blockdep,
Tim Hall1bd531d2020-11-01 20:59:36 +0000245 verbose_config,
Tim Halld8339a72021-05-27 18:49:40 +0100246 arena_cache_size,
Tim Hall79d07d22020-04-27 18:20:16 +0100247 ):
248 accelerator_config = accelerator_config.lower()
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100249 if accelerator_config not in Accelerator.member_list():
Tim Hall1bd531d2020-11-01 20:59:36 +0000250 raise CliOptionError("--accelerator-config", self.accelerator_config, "Unknown accelerator configuration")
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100251 self.accelerator_config = Accelerator(accelerator_config)
Tim Hall79d07d22020-04-27 18:20:16 +0100252 accel_config = ArchitectureFeatures.accelerator_configs[self.accelerator_config]
253 self.config = accel_config
254
Tim Halld8339a72021-05-27 18:49:40 +0100255 self.accumulator_granules = {
256 SHRAMElements.Acc16: accel_config.shram_granules[SHRAMElements.Acc16],
257 SHRAMElements.Acc32: accel_config.shram_granules[SHRAMElements.Acc32],
258 SHRAMElements.Acc40: accel_config.shram_granules[SHRAMElements.Acc40],
259 }
260
261 self.ifm_bank_granules = {
262 8: accel_config.shram_granules[SHRAMElements.IFM8],
263 16: accel_config.shram_granules[SHRAMElements.IFM16],
264 32: accel_config.shram_granules[SHRAMElements.IFM32],
265 }
266
267 self.ifm_ew_bank_granules = {
268 8: accel_config.shram_granules[SHRAMElements.IFM8_Elementwise],
269 16: accel_config.shram_granules[SHRAMElements.IFM16_Elementwise],
270 32: accel_config.shram_granules[SHRAMElements.IFM32],
271 }
272
273 self.shram = SHRAMConfig(2, 1024, accel_config.shram_banks, 2 if accel_config.shram_banks > 16 else 0)
274
Tim Hall79d07d22020-04-27 18:20:16 +0100275 self.system_config = system_config
Tim Hall1bd531d2020-11-01 20:59:36 +0000276 self.memory_mode = memory_mode
Tim Hallc8a73862020-10-27 12:43:14 +0000277 self.is_ethos_u65_system = self.accelerator_config in (Accelerator.Ethos_U65_256, Accelerator.Ethos_U65_512)
Tim Hall79d07d22020-04-27 18:20:16 +0100278
Tim Hall53c62452021-08-06 13:51:34 +0100279 if self.is_ethos_u65_system:
280 self.max_outstanding_dma = 2
281 axi_port_address_width = 40
282 axi_port_data_width = 128
283 else:
284 self.max_outstanding_dma = 1
285 axi_port_address_width = 32
286 axi_port_data_width = 64
287
Tim Hall289a41d2020-08-04 21:40:14 +0100288 self.max_outstanding_kernels = 3
289
Tim Hall79d07d22020-04-27 18:20:16 +0100290 self.ncores = accel_config.cores
291 self.ofm_ublock = accel_config.ofm_ublock
292 self.ifm_ublock = accel_config.ifm_ublock
Tim Hall79d07d22020-04-27 18:20:16 +0100293 self.ofm_block_max = Block(64, 32, 128)
Tim Hall79d07d22020-04-27 18:20:16 +0100294
Tim Hall79d07d22020-04-27 18:20:16 +0100295 self.max_blockdep = max_blockdep
296
297 dpu_min_height = accel_config.ofm_ublock.height
298 dpu_min_width = accel_config.ofm_ublock.width
299 dpu_dot_product_width = 8
300 dpu_min_ofm_channels = accel_config.ofm_ublock.depth
301
302 self.num_elem_wise_units = accel_config.elem_units
303 self.num_macs_per_cycle = dpu_min_height * dpu_min_width * dpu_dot_product_width * dpu_min_ofm_channels
Tim Hallc1be0872022-03-03 17:50:52 +0000304 assert self.num_macs_per_cycle == accel_config.macs, f"{self.num_macs_per_cycle} != {accel_config.macs}"
Louis Verhaarda208cf82021-03-30 16:07:24 +0200305 # Max value in address offsets
Tim Hall53c62452021-08-06 13:51:34 +0100306 self.max_address_offset = 1 << axi_port_address_width
Tim Hall79d07d22020-04-27 18:20:16 +0100307
Tim Hall1bd531d2020-11-01 20:59:36 +0000308 # Get system configuration and memory mode
Tim Halld8339a72021-05-27 18:49:40 +0100309 self._get_vela_config(vela_config_files, verbose_config, arena_cache_size)
Tim Hall79d07d22020-04-27 18:20:16 +0100310
Tim Hall53c62452021-08-06 13:51:34 +0100311 self.memory_bandwidths_per_cycle = axi_port_data_width * self.memory_clock_scales / 8
Tim Hall79d07d22020-04-27 18:20:16 +0100312
Tim Hall1bd531d2020-11-01 20:59:36 +0000313 self.memory_bandwidths_per_second = self.memory_bandwidths_per_cycle * self.core_clock
Tim Hall79d07d22020-04-27 18:20:16 +0100314
Diqing Zhonge8887a32020-09-24 09:53:48 +0200315 # Get output/activation performance numbers
316 self._generate_output_perf_tables(self.accelerator_config)
317
Tim Hall79d07d22020-04-27 18:20:16 +0100318 # sizes as N x H x W x C. we need to round up to these when allocating storage
319 self.storage_rounding_quantums = {
320 TensorFormat.Unknown: (1, 1, 1, 1),
321 TensorFormat.WeightsCompressed: (1, 1, 1, 1),
322 TensorFormat.NHWC: (1, 1, 1, 1),
323 TensorFormat.NHCWB16: (1, 1, 1, 16),
324 }
325
326 # brick sizes as N x H x W x C. We have to fetch whole bricks at a time
327 self.brick_sizes = {
328 TensorFormat.Unknown: (1, 1, 1, 1),
329 TensorFormat.WeightsCompressed: (1, 1, 1, 1),
330 TensorFormat.NHWC: (1, 1, 1, 1),
331 TensorFormat.NHCWB16: (1, 1, 1, 16),
332 }
333
Tim Hall79d07d22020-04-27 18:20:16 +0100334 self.default_weight_format = TensorFormat.WeightsCompressed
335 self.default_feature_map_format = TensorFormat.NHWC
336
Tim Hall79d07d22020-04-27 18:20:16 +0100337 self.tensor_storage_mem_area = {
338 # permanent mem_area
Tim Hall465582c2020-05-26 09:33:14 +0100339 TensorPurpose.Unknown: MemArea.Unknown,
Tim Hall79d07d22020-04-27 18:20:16 +0100340 TensorPurpose.Weights: self.permanent_storage_mem_area,
341 TensorPurpose.FeatureMap: self.feature_map_storage_mem_area,
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200342 TensorPurpose.LUT: self.permanent_storage_mem_area,
Fredrik Svedberge22ba8c2021-01-27 16:53:41 +0100343 TensorPurpose.Scratch: self.feature_map_storage_mem_area,
344 TensorPurpose.ScratchFast: self.fast_storage_mem_area,
Tim Hall79d07d22020-04-27 18:20:16 +0100345 }
346
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200347 self.tensor_storage_mem_type = {
Dwight Lidman1a9d20e2020-08-11 12:10:36 +0200348 TensorPurpose.Unknown: MemType.Unknown,
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200349 TensorPurpose.Weights: MemType.Permanent_NPU,
350 TensorPurpose.FeatureMap: MemType.Scratch,
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200351 TensorPurpose.LUT: MemType.Scratch,
Fredrik Svedberge22ba8c2021-01-27 16:53:41 +0100352 TensorPurpose.Scratch: MemType.Scratch,
353 TensorPurpose.ScratchFast: MemType.Scratch_fast,
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200354 }
Tim Hall79d07d22020-04-27 18:20:16 +0100355
356 self.min_block_sizes = {
357 NpuBlockType.Default: (dpu_min_height, dpu_min_width),
358 NpuBlockType.VectorProduct: (1, 1),
359 NpuBlockType.ConvolutionMxN: (dpu_min_height, dpu_min_width),
360 NpuBlockType.Pooling: (dpu_min_height, dpu_min_width),
361 NpuBlockType.ConvolutionDepthWise: (dpu_min_height, dpu_min_width),
362 NpuBlockType.ElementWise: (1, 1),
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200363 NpuBlockType.ReduceSum: (dpu_min_height, dpu_min_width),
Tim Hall79d07d22020-04-27 18:20:16 +0100364 }
365
366 self.sub_kernel_limits = {
367 NpuBlockType.Default: (8, 8),
368 NpuBlockType.VectorProduct: (1, 1),
369 NpuBlockType.ConvolutionMxN: (8, 8),
370 NpuBlockType.Pooling: (8, 8),
371 NpuBlockType.ConvolutionDepthWise: (8, 8),
372 NpuBlockType.ElementWise: (1, 1),
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200373 NpuBlockType.ReduceSum: (8, 8),
Tim Hall79d07d22020-04-27 18:20:16 +0100374 }
375
376 # weights for scheduler search
377 from .npu_performance import make_bandwidth_array
378
379 self.bandwidth_weights = make_bandwidth_array()
380 self.bandwidth_weights[MemArea.Sram] = 1.0
381 self.bandwidth_weights[MemArea.Dram] = 10.0
382 self.bandwidth_weights[MemArea.OnChipFlash] = 2.0
383 self.bandwidth_weights[MemArea.OffChipFlash] = 20.0
384 self.cycles_weight = 40
385 self.max_sram_used_weight = 1000
386
Tim Hall1bd531d2020-11-01 20:59:36 +0000387 if self.is_spilling_enabled():
Patrik Gustavsson3ab94522020-06-29 17:36:55 +0200388 self.max_sram_used_weight = 0
Tim Hall79d07d22020-04-27 18:20:16 +0100389
390 # Shared Buffer Block allocations
391 self.shram_bank_size = 1024 # bytes
392 self.shram_size_bytes = accel_config.shram_banks * self.shram_bank_size
393 self.shram_reserved_output_banks = 2
394 self.shram_reserved_weight_banks = 0
395 self.shram_reserved_unused_banks = 2 if accel_config.shram_banks > 16 else 0
396 self.shram_total_banks = accel_config.shram_banks - self.shram_reserved_unused_banks
397 self.shram_bank_granules = np.array(accel_config.shram_granules, np.int32)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200398 self.shram_lut_size = 2048
399 # SHRAM base address of the activation lookup table
400 self.shram_lut_address = self.shram_bank_size * self.available_shram_banks(True)
Tim Hall79d07d22020-04-27 18:20:16 +0100401
402 # Build a map of acceptable IFM/OFM block configurations up to the maximum
403 # IFM/OFM block size.
404 ifm_block_max = self.get_ifm_block_size(32, self.ofm_block_max, Kernel(8, 8))
405 self.block_config_map = dict()
Tim Halld8339a72021-05-27 18:49:40 +0100406 self.generate_block_config_map(Block(ifm_block_max.width * 2, ifm_block_max.height, 128))
Tim Hall79d07d22020-04-27 18:20:16 +0100407
408 # Setup supported operators and restriction checkers class
Jonas Ohlsson45e653d2021-07-26 16:13:12 +0200409 self.tflite_supported_operators = TFLiteSupportedOperators()
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200410 self.tosa_supported_operators = TosaSupportedOperators()
Tim Hall79d07d22020-04-27 18:20:16 +0100411
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200412 # Returns available number of SHRAM banks depending on activation lookup table
413 # being used or not
414 def available_shram_banks(self, uses_activation_lut):
415 banks = self.shram_total_banks
416 if uses_activation_lut and self.shram_reserved_unused_banks == 0:
417 banks -= 2
418 return banks
419
Tim Hall79d07d22020-04-27 18:20:16 +0100420 # Calculate block configuration for ALL known IFM operations and
421 # accumulator sizes. Consumers will need to select their preferred
422 # operation and bit-width at read-time.
423 def generate_block_config(self, width, height, depth):
Louis Verhaardf98c6742020-05-12 14:22:38 +0200424 # Number of bytes required for any SHRAM element for a FM of given dimensions.
425 # For IFM: size = H*W*Align(D*BYTE_WIDTH, 8)
426 # For ACC: size = H*W*Align(D,8)*BYTE_WIDTH
427 d1 = round_up(depth, SHRAMElements.PreAlign)
428 d2 = round_up(d1 * SHRAMElements.ByteSizes, SHRAMElements.PostAlign)
429 size_bytes = (height * width) * d2
430
Tim Hall79d07d22020-04-27 18:20:16 +0100431 # Convert byte size (rounded) to size in banks
432 size_banks = round_up_divide(size_bytes, self.shram_bank_size)
433 size_banks *= 2 # Double buffer the IFM/Acc (need twice as many banks)
434 # Round bank requirement to bank granularity
435 required_banks = round_up(size_banks, self.shram_bank_granules)
436 return SHRAMBlockConfig(size_bytes, required_banks)
437
438 @staticmethod
439 def make_block_config_key(width, height, depth):
440 return (int(height), int(width), int(depth))
441
442 def get_block_config(self, width, height, depth):
443 assert depth <= self.ofm_block_max.depth
444 key = ArchitectureFeatures.make_block_config_key(width, height, depth)
445 config = self.block_config_map.get(key, None)
446 return config
447
448 # Generate a key:value map of possible block configurations, where the
449 # key is compounded from the block dimensions: 0x00HHWWCC
450 def generate_block_config_map(self, block: Block):
451 for h in range(1, block.height + 1):
452 for w in range(1, block.width + 1):
453 # All possible IFM/OFM depth values
454 for c in [4, 8, 12, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128]:
455 key = ArchitectureFeatures.make_block_config_key(w, h, c)
456 self.block_config_map[key] = self.generate_block_config(w, h, c)
457
Diqing Zhonge8887a32020-09-24 09:53:48 +0200458 def _generate_output_perf_tables(self, accel_config):
459 if accel_config == Accelerator.Ethos_U55_32:
460 self.output_cycles_per_elem = (2.0, 3.0, 3.0, 3.0, 4.0, 6.0, 1.0, 2.0)
461 self.activation_cycles_per_elem = (1.0, 1.0, 0.0)
462 elif accel_config == Accelerator.Ethos_U55_64:
463 self.output_cycles_per_elem = (1.0, 1.5, 1.5, 1.5, 2.0, 3.0, 0.5, 1.0)
464 self.activation_cycles_per_elem = (1.0, 1.0, 0.0)
465 elif accel_config == Accelerator.Ethos_U55_128:
466 self.output_cycles_per_elem = (0.75, 1.25, 0.75, 0.75, 1.0, 1.5, 0.25, 0.5)
467 self.activation_cycles_per_elem = (1.0, 0.5, 0.0)
Tim Hallc8a73862020-10-27 12:43:14 +0000468 elif accel_config in (Accelerator.Ethos_U55_256, Accelerator.Ethos_U65_256):
Diqing Zhonge8887a32020-09-24 09:53:48 +0200469 self.output_cycles_per_elem = (0.625, 1.125, 0.5, 0.375, 0.5, 0.75, 0.125, 0.25)
470 self.activation_cycles_per_elem = (1.0, 0.25, 0.0)
471 else:
Tim Hallc8a73862020-10-27 12:43:14 +0000472 assert accel_config == Accelerator.Ethos_U65_512
Diqing Zhonge8887a32020-09-24 09:53:48 +0200473 self.output_cycles_per_elem = (0.3125, 0.5625, 0.25, 0.1875, 0.25, 0.375, 0.0625, 0.125)
474 self.activation_cycles_per_elem = (0.5, 0.125, 0.0)
475
Tim Hall79d07d22020-04-27 18:20:16 +0100476 def calc_ifm_block_depth(self, ifm_depth, ifm_bits):
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200477 assert ifm_bits in (8, 16, 32)
Tim Hall79d07d22020-04-27 18:20:16 +0100478 assert ifm_depth > 0
479 ifm_depth = round_up(ifm_depth, self.ifm_ublock.depth)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200480 max_block_depth = 8 * 32 // ifm_bits
Tim Hall79d07d22020-04-27 18:20:16 +0100481 return min(max_block_depth, ifm_depth)
482
483 # Calculate the size of the IFM block given a depth, target OFM block and a kernel
Tim Hallc30f4952020-06-15 20:47:35 +0100484 def get_ifm_block_size(
485 self,
486 ifm_block_depth,
487 ofm_block: Block,
488 kernel: Kernel,
489 subkernel: Block = Block(8, 8, 65536),
490 ifm_resampling_mode=resampling_mode.NONE,
491 ):
Dwight Lidmana9390f72020-05-13 12:00:08 +0200492 upscaling = 1 if ifm_resampling_mode == resampling_mode.NONE else 2
Tim Hall79d07d22020-04-27 18:20:16 +0100493
erik.andersson@arm.com1d6d5c42021-04-14 13:31:05 +0200494 # Height
495 dilated_kernel_height = ((kernel.height - 1) * kernel.dilation.y) + 1
496 ifm_block_height = round_up_to_int(
497 ((ofm_block.height - 1) * kernel.stride.y + min(subkernel.height, dilated_kernel_height)) / upscaling
498 )
Tim Hall79d07d22020-04-27 18:20:16 +0100499
Dwight Lidman2f754572021-04-21 12:00:37 +0200500 ifm_block_height = round_up(ifm_block_height, self.ifm_ublock.height)
501
Tim Hall79d07d22020-04-27 18:20:16 +0100502 # Width
Tim Hall79d07d22020-04-27 18:20:16 +0100503 dilated_kernel_width = ((kernel.width - 1) * kernel.dilation.x) + 1
erik.andersson@arm.com1d6d5c42021-04-14 13:31:05 +0200504 ifm_block_width = round_up_to_int(
505 ((ofm_block.width - 1) * kernel.stride.x + min(subkernel.width, dilated_kernel_width)) / upscaling
506 )
Tim Hall79d07d22020-04-27 18:20:16 +0100507
Dwight Lidman2f754572021-04-21 12:00:37 +0200508 ifm_block_width = round_up(ifm_block_width, self.ifm_ublock.width)
509
Tim Hall79d07d22020-04-27 18:20:16 +0100510 return Block(ifm_block_width, ifm_block_height, ifm_block_depth)
511
Tim Hall1bd531d2020-11-01 20:59:36 +0000512 def is_spilling_enabled(self):
Tim Hall79d07d22020-04-27 18:20:16 +0100513 """
Tim Hall1bd531d2020-11-01 20:59:36 +0000514 Spilling is a feature that allows the Ethos-U to use a dedicated SRAM as a cache for various types of data
Tim Hall79d07d22020-04-27 18:20:16 +0100515 """
Tim Hall1bd531d2020-11-01 20:59:36 +0000516 return (
517 self._mem_port_mapping(self.cache_mem_area) == MemArea.Sram and self.cache_mem_area != self.arena_mem_area
518 )
Tim Hall79d07d22020-04-27 18:20:16 +0100519
Louis Verhaard024c3552021-03-17 14:26:34 +0100520 def mem_type_size(self, mem_type: MemType) -> int:
patrik.gustavsson6f23da62021-08-19 11:51:45 +0000521 """Returns size in bytes available for the given memory type. This is a hard limit."""
522 if mem_type == MemType.Scratch_fast and self.is_spilling_enabled():
523 # when accessing the scratch fast memory type with memory spilling enabled the arena_cache_size refers to
524 # the cache memory area which is a hard limit
Tim Halld8339a72021-05-27 18:49:40 +0100525 return self.arena_cache_size
Tim Hall53c62452021-08-06 13:51:34 +0100526 else:
patrik.gustavsson6f23da62021-08-19 11:51:45 +0000527 # for all other memory types and modes the hard limit is the maximum possible address offset
Tim Hall53c62452021-08-06 13:51:34 +0100528 return self.max_address_offset
Louis Verhaard024c3552021-03-17 14:26:34 +0100529
Tim Hall1bd531d2020-11-01 20:59:36 +0000530 def _mem_port_mapping(self, mem_port):
531 mem_port_mapping = {MemPort.Axi0: self.axi0_port, MemPort.Axi1: self.axi1_port}
532 return mem_port_mapping[mem_port]
Tim Hall79d07d22020-04-27 18:20:16 +0100533
Tim Hall1bd531d2020-11-01 20:59:36 +0000534 def _set_default_sys_config(self):
Tim Hall1bd531d2020-11-01 20:59:36 +0000535 # ArchitectureFeatures.DEFAULT_CONFIG values
536 if self.is_ethos_u65_system:
537 # Default Ethos-U65 system configuration
538 # Ethos-U65 Client-Server: SRAM (16 GB/s) and DRAM (12 GB/s)
539 self.core_clock = 1e9
540 self.axi0_port = MemArea.Sram
541 self.axi1_port = MemArea.Dram
542 self.memory_clock_scales[MemArea.Sram] = 1.0
543 self.memory_clock_scales[MemArea.Dram] = 0.75 # 3 / 4
Diqing Zhongf842b692020-12-11 13:07:37 +0100544 self.memory_burst_length[MemArea.Sram] = 32
545 self.memory_burst_length[MemArea.Dram] = 128
546 self.memory_latency[MemArea.Sram][BandwidthDirection.Read] = 32
547 self.memory_latency[MemArea.Sram][BandwidthDirection.Write] = 32
548 self.memory_latency[MemArea.Dram][BandwidthDirection.Read] = 500
549 self.memory_latency[MemArea.Dram][BandwidthDirection.Write] = 250
Tim Hall79d07d22020-04-27 18:20:16 +0100550 else:
Tim Hall1bd531d2020-11-01 20:59:36 +0000551 # Default Ethos-U55 system configuration
552 # Ethos-U55 High-End Embedded: SRAM (4 GB/s) and Flash (0.5 GB/s)
553 self.core_clock = 500e6
554 self.axi0_port = MemArea.Sram
555 self.axi1_port = MemArea.OffChipFlash
556 self.memory_clock_scales[MemArea.Sram] = 1.0
557 self.memory_clock_scales[MemArea.OffChipFlash] = 0.125 # 1 / 8
Diqing Zhongf842b692020-12-11 13:07:37 +0100558 self.memory_burst_length[MemArea.Sram] = 32
559 self.memory_burst_length[MemArea.OffChipFlash] = 128
560 self.memory_latency[MemArea.Sram][BandwidthDirection.Read] = 32
561 self.memory_latency[MemArea.Sram][BandwidthDirection.Write] = 32
562 self.memory_latency[MemArea.OffChipFlash][BandwidthDirection.Read] = 64
563 self.memory_latency[MemArea.OffChipFlash][BandwidthDirection.Write] = 64
Tim Hall79d07d22020-04-27 18:20:16 +0100564
Tim Hall1bd531d2020-11-01 20:59:36 +0000565 def _set_default_mem_mode(self):
Tim Hall1bd531d2020-11-01 20:59:36 +0000566 # ArchitectureFeatures.DEFAULT_CONFIG values
567 if self.is_ethos_u65_system:
568 # Default Ethos-U65 memory mode
Tim Hall70b71a52020-12-22 11:47:54 +0000569 # Dedicated SRAM: the SRAM is only for use by the Ethos-U
570 # The non-SRAM memory is assumed to be read-writeable
Tim Hall1bd531d2020-11-01 20:59:36 +0000571 self.const_mem_area = MemPort.Axi1
572 self.arena_mem_area = MemPort.Axi1
573 self.cache_mem_area = MemPort.Axi0
Tim Halld8339a72021-05-27 18:49:40 +0100574 self.arena_cache_size = 384 * 1024
Tim Hall1bd531d2020-11-01 20:59:36 +0000575 else:
Tim Hall70b71a52020-12-22 11:47:54 +0000576 # Default Ethos-U55 memory mode
577 # Shared SRAM: the SRAM is shared between the Ethos-U and the Cortex-M software
578 # The non-SRAM memory is assumed to be read-only
Tim Hall1bd531d2020-11-01 20:59:36 +0000579 self.const_mem_area = MemPort.Axi1
580 self.arena_mem_area = MemPort.Axi0
581 self.cache_mem_area = MemPort.Axi0
Tim Halld8339a72021-05-27 18:49:40 +0100582 self.arena_cache_size = self.max_address_offset
Tim Hall79d07d22020-04-27 18:20:16 +0100583
Tim Halld8339a72021-05-27 18:49:40 +0100584 def _get_vela_config(self, vela_config_files, verbose_config, arena_cache_size_from_cli):
Tim Hall1bd531d2020-11-01 20:59:36 +0000585 """
586 Gets the system configuration and memory modes from one or more Vela configuration file(s) or uses some
587 defaults.
588 """
Tim Hall79d07d22020-04-27 18:20:16 +0100589
Tim Hall1bd531d2020-11-01 20:59:36 +0000590 # all properties are optional and are initialised to a value of 1 (or the equivalent)
591 self.core_clock = 1
592 self.axi0_port = MemArea(1)
593 self.axi1_port = MemArea(1)
594 self.memory_clock_scales = np.ones(MemArea.Size)
Tim Hall70b71a52020-12-22 11:47:54 +0000595 self.memory_burst_length = np.ones(MemArea.Size, np.int)
596 self.memory_latency = np.zeros((MemArea.Size, BandwidthDirection.Size), np.int)
Tim Hall1bd531d2020-11-01 20:59:36 +0000597 self.const_mem_area = MemPort(1)
598 self.arena_mem_area = MemPort(1)
599 self.cache_mem_area = MemPort(1)
Tim Halld8339a72021-05-27 18:49:40 +0100600 self.arena_cache_size = self.max_address_offset
601 arena_cache_size_loc_text = "Default"
Tim Hall79d07d22020-04-27 18:20:16 +0100602
Tim Hall1bd531d2020-11-01 20:59:36 +0000603 # read configuration file(s)
604 self.vela_config = None
605
606 if vela_config_files is not None:
607 self.vela_config = ConfigParser()
608 self.vela_config.read(vela_config_files)
609
610 # read system configuration
611 sys_cfg_section = "System_Config." + self.system_config
612
613 if self.vela_config is not None and self.vela_config.has_section(sys_cfg_section):
614 self.core_clock = float(self._read_config(sys_cfg_section, "core_clock", self.core_clock))
615 self.axi0_port = MemArea[self._read_config(sys_cfg_section, "axi0_port", self.axi0_port)]
616 self.axi1_port = MemArea[self._read_config(sys_cfg_section, "axi1_port", self.axi1_port)]
617
618 for mem_area in (self.axi0_port, self.axi1_port):
619 self.memory_clock_scales[mem_area] = float(
620 self._read_config(
621 sys_cfg_section, mem_area.name + "_clock_scale", self.memory_clock_scales[mem_area]
622 )
623 )
Diqing Zhongf842b692020-12-11 13:07:37 +0100624 self.memory_burst_length[mem_area] = int(
625 self._read_config(
626 sys_cfg_section, mem_area.name + "_burst_length", self.memory_burst_length[mem_area]
627 )
628 )
629 self.memory_latency[mem_area][BandwidthDirection.Read] = int(
630 self._read_config(
631 sys_cfg_section,
632 mem_area.name + "_read_latency",
633 self.memory_latency[mem_area][BandwidthDirection.Read],
634 )
635 )
636 self.memory_latency[mem_area][BandwidthDirection.Write] = int(
637 self._read_config(
638 sys_cfg_section,
639 mem_area.name + "_write_latency",
640 self.memory_latency[mem_area][BandwidthDirection.Write],
641 )
642 )
Tim Hall1bd531d2020-11-01 20:59:36 +0000643 elif self.system_config == ArchitectureFeatures.DEFAULT_CONFIG:
644 self._set_default_sys_config()
645
646 elif vela_config_files is None:
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000647 raise CliOptionError("--config", vela_config_files, "Vela config file not specified")
Tim Hall1bd531d2020-11-01 20:59:36 +0000648
649 else:
650 raise CliOptionError(
Jonas Ohlssond8575072022-03-30 10:30:25 +0200651 "--system-config",
652 self.system_config,
653 f"Section {sys_cfg_section} not found in Vela config file",
Tim Hall79d07d22020-04-27 18:20:16 +0100654 )
Tim Hall79d07d22020-04-27 18:20:16 +0100655
Tim Hall1bd531d2020-11-01 20:59:36 +0000656 # read the memory mode
657 mem_mode_section = "Memory_Mode." + self.memory_mode
Tim Hall79d07d22020-04-27 18:20:16 +0100658
Tim Hall1bd531d2020-11-01 20:59:36 +0000659 if self.vela_config is not None and self.vela_config.has_section(mem_mode_section):
660 self.const_mem_area = MemPort[
661 self._read_config(mem_mode_section, "const_mem_area", self.const_mem_area.name)
662 ]
663 self.arena_mem_area = MemPort[
664 self._read_config(mem_mode_section, "arena_mem_area", self.arena_mem_area.name)
665 ]
666 self.cache_mem_area = MemPort[
667 self._read_config(mem_mode_section, "cache_mem_area", self.cache_mem_area.name)
668 ]
Tim Halld8339a72021-05-27 18:49:40 +0100669 found = []
670 self.arena_cache_size = int(
671 self._read_config(mem_mode_section, "arena_cache_size", self.arena_cache_size, found)
672 )
673 if found[-1]:
674 arena_cache_size_loc_text = "Configuration file"
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200675
Tim Hall1bd531d2020-11-01 20:59:36 +0000676 elif self.memory_mode == ArchitectureFeatures.DEFAULT_CONFIG:
677 self._set_default_mem_mode()
Patrik Gustavsson5f47c052020-06-25 12:56:04 +0200678
Tim Hall1bd531d2020-11-01 20:59:36 +0000679 elif vela_config_files is None:
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000680 raise CliOptionError("--config", vela_config_files, "Vela config file not specified")
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200681
Tim Hall1bd531d2020-11-01 20:59:36 +0000682 else:
683 raise CliOptionError(
Jonas Ohlssond8575072022-03-30 10:30:25 +0200684 "--memory-mode",
685 self.memory_mode,
686 f"Section {mem_mode_section} not found in Vela config file",
Tim Hall1bd531d2020-11-01 20:59:36 +0000687 )
Tim Hall79d07d22020-04-27 18:20:16 +0100688
Tim Hall1bd531d2020-11-01 20:59:36 +0000689 # override sram to onchipflash
690 if self._mem_port_mapping(self.const_mem_area) == MemArea.Sram:
691 if self.const_mem_area == self.arena_mem_area == self.cache_mem_area:
692 print(
693 "Info: Changing const_mem_area from Sram to OnChipFlash. This will use the same characteristics as"
694 " Sram."
695 )
696 if self.const_mem_area == MemPort.Axi0:
697 self.const_mem_area = MemPort.Axi1
698 self.axi1_port = MemArea.OnChipFlash
699 else:
700 self.const_mem_area = MemPort.Axi0
701 self.axi0_port = MemArea.OnChipFlash
702 self.memory_clock_scales[MemArea.OnChipFlash] = self.memory_clock_scales[MemArea.Sram]
Diqing Zhongf842b692020-12-11 13:07:37 +0100703 self.memory_burst_length[MemArea.OnChipFlash] = self.memory_burst_length[MemArea.Sram]
704 self.memory_latency[MemArea.OnChipFlash] = self.memory_latency[MemArea.Sram]
Tim Hall1bd531d2020-11-01 20:59:36 +0000705
Tim Halld8339a72021-05-27 18:49:40 +0100706 # override sram usage
707 if arena_cache_size_from_cli is not None:
708 self.arena_cache_size = arena_cache_size_from_cli
709 arena_cache_size_loc_text = "CLI option"
710
Tim Hall1bd531d2020-11-01 20:59:36 +0000711 # check configuration
Tim Hall70b71a52020-12-22 11:47:54 +0000712 if self._mem_port_mapping(self.const_mem_area) not in (
713 MemArea.Dram,
714 MemArea.OnChipFlash,
715 MemArea.OffChipFlash,
716 ):
717 raise ConfigOptionError(
718 "const_mem_area",
719 self._mem_port_mapping(self.const_mem_area).name,
720 "Dram or OnChipFlash or OffChipFlash",
721 )
722
723 if self._mem_port_mapping(self.arena_mem_area) not in (MemArea.Sram, MemArea.Dram):
724 raise ConfigOptionError("arena_mem_area", self._mem_port_mapping(self.arena_mem_area).name, "Sram or Dram")
725
Tim Hall1bd531d2020-11-01 20:59:36 +0000726 if self._mem_port_mapping(self.cache_mem_area) != MemArea.Sram:
727 raise ConfigOptionError("cache_mem_area", self._mem_port_mapping(self.cache_mem_area).name, "Sram")
728
Tim Halld8339a72021-05-27 18:49:40 +0100729 if self.arena_cache_size < 0:
730 raise ConfigOptionError("arena_cache_size", self.arena_cache_size, ">= 0")
731 if self.arena_cache_size > self.max_address_offset:
732 raise ConfigOptionError(
733 "arena_cache_size",
734 f"{self.arena_cache_size}. Size is out of bounds, maximum is: {self.max_address_offset}",
735 )
736
Tim Hall1bd531d2020-11-01 20:59:36 +0000737 # assign existing memory areas
738 self.permanent_storage_mem_area = self._mem_port_mapping(self.const_mem_area)
739 self.feature_map_storage_mem_area = self._mem_port_mapping(self.arena_mem_area)
740 self.fast_storage_mem_area = self._mem_port_mapping(self.cache_mem_area)
741
Tim Hall1bd531d2020-11-01 20:59:36 +0000742 # display the system configuration and memory mode
743 if verbose_config:
Rickard Bolin7ce6b322022-06-02 09:30:33 +0000744 print("Configuration files:")
745 print(f" {vela_config_files}")
Tim Hall1bd531d2020-11-01 20:59:36 +0000746 print(f"System Configuration ({self.system_config}):")
747 print(f" core_clock = {self.core_clock}")
748 print(f" axi0_port = {self.axi0_port.name}")
749 print(f" axi1_port = {self.axi1_port.name}")
750 for mem in (MemArea.Sram, MemArea.Dram, MemArea.OnChipFlash, MemArea.OffChipFlash):
751 print(f" {mem.name}_clock_scales = {self.memory_clock_scales[mem]}")
Diqing Zhongf842b692020-12-11 13:07:37 +0100752 print(f" {mem.name}_burst_length = {self.memory_burst_length[mem]}")
753 print(f" {mem.name}_read_latency = {self.memory_latency[mem][BandwidthDirection.Read]}")
754 print(f" {mem.name}_write_latency = {self.memory_latency[mem][BandwidthDirection.Write]}")
Tim Hall1bd531d2020-11-01 20:59:36 +0000755
756 print(f"Memory Mode ({self.memory_mode}):")
757 print(f" const_mem_area = {self.const_mem_area.name}")
758 print(f" arena_mem_area = {self.arena_mem_area.name}")
759 print(f" cache_mem_area = {self.cache_mem_area.name}")
Tim Halld8339a72021-05-27 18:49:40 +0100760 print(f" arena_cache_size = {self.arena_cache_size} from {arena_cache_size_loc_text}")
Tim Hall1bd531d2020-11-01 20:59:36 +0000761
762 print("Architecture Settings:")
763 print(f" permanent_storage_mem_area = {self.permanent_storage_mem_area.name}")
764 print(f" feature_map_storage_mem_area = {self.feature_map_storage_mem_area.name}")
765 print(f" fast_storage_mem_area = {self.fast_storage_mem_area.name}")
Tim Hall1bd531d2020-11-01 20:59:36 +0000766
Tim Halld8339a72021-05-27 18:49:40 +0100767 def _read_config(self, section, key, current_value, found=None):
Tim Hall79d07d22020-04-27 18:20:16 +0100768 """
Tim Hall1bd531d2020-11-01 20:59:36 +0000769 Reads a given key from a particular section in the Vela config file. If the section contains the 'inherit'
770 option then we recurse into the section specified. If inherited sections result in multiple keys for a
Tim Halld8339a72021-05-27 18:49:40 +0100771 particular option then the key from the parent section is used, regardless of the parsing order. if specified
772 found should be an empty list that this function will append a True or False to the end of the list indicating
773 whether the key was found or not.
Tim Hall79d07d22020-04-27 18:20:16 +0100774 """
Tim Hall1bd531d2020-11-01 20:59:36 +0000775 if not self.vela_config.has_section(section):
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000776 raise ConfigOptionError("section", f"{section}. The section was not found in the Vela config file(s)")
Tim Hall1bd531d2020-11-01 20:59:36 +0000777
Tim Halld8339a72021-05-27 18:49:40 +0100778 result = str(current_value) if current_value is not None else None
779 if found is not None:
780 found.append(False)
781
Tim Hall1bd531d2020-11-01 20:59:36 +0000782 if self.vela_config.has_option(section, "inherit"):
783 inheritance_section = self.vela_config.get(section, "inherit")
784 # check for recursion loop
785 if inheritance_section == section:
786 raise ConfigOptionError(
Jonas Ohlssond8575072022-03-30 10:30:25 +0200787 "inherit",
788 f"{inheritance_section}. This references its own section and recursion is not allowed",
Tim Hall1bd531d2020-11-01 20:59:36 +0000789 )
Tim Halld8339a72021-05-27 18:49:40 +0100790 result = self._read_config(inheritance_section, key, result, found)
Tim Hall1bd531d2020-11-01 20:59:36 +0000791
792 if self.vela_config.has_option(section, key):
793 result = self.vela_config.get(section, key)
Tim Halld8339a72021-05-27 18:49:40 +0100794 if found is not None:
795 found.append(True)
Tim Hall1bd531d2020-11-01 20:59:36 +0000796
Tim Hall79d07d22020-04-27 18:20:16 +0100797 return result
Louis Verhaard52078302020-11-18 13:35:06 +0100798
799
Louis Verhaard061eeb42020-11-27 08:24:03 +0100800# Cache for default arch instances, as these are expensive to create
801default_arch_cache = dict()
802
803
Louis Verhaard52078302020-11-18 13:35:06 +0100804def create_default_arch(accelerator: Accelerator) -> ArchitectureFeatures:
805 """Creates architecture features object using default settings"""
Louis Verhaard061eeb42020-11-27 08:24:03 +0100806 if accelerator not in default_arch_cache:
807 default_arch_cache[accelerator] = ArchitectureFeatures(
808 vela_config_files=None,
809 accelerator_config=accelerator.value,
810 system_config=ArchitectureFeatures.DEFAULT_CONFIG,
811 memory_mode=ArchitectureFeatures.DEFAULT_CONFIG,
Louis Verhaard061eeb42020-11-27 08:24:03 +0100812 max_blockdep=ArchitectureFeatures.MAX_BLOCKDEP,
Louis Verhaard061eeb42020-11-27 08:24:03 +0100813 verbose_config=False,
Tim Halld8339a72021-05-27 18:49:40 +0100814 arena_cache_size=None,
Louis Verhaard061eeb42020-11-27 08:24:03 +0100815 )
816 return default_arch_cache[accelerator]