blob: 44b28c6d990ed212d688c30c9701a6f0ae242dc8 [file] [log] [blame]
Tim Hall1c8f92d2024-01-25 10:17:26 +00001# SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
Tim Hall79d07d22020-04-27 18:20:16 +01002#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Rickard Bolinbc6ee582022-11-04 08:24:29 +000016#
Tim Hall79d07d22020-04-27 18:20:16 +010017# Description:
Tim Hallc8a73862020-10-27 12:43:14 +000018# Holds a container for Ethos-U and System architecture parameters.
Diego Russoea6111a2020-04-14 18:41:58 +010019import enum
Tim Hall79d07d22020-04-27 18:20:16 +010020from collections import namedtuple
21from configparser import ConfigParser
Diego Russoea6111a2020-04-14 18:41:58 +010022
Tim Hall79d07d22020-04-27 18:20:16 +010023import numpy as np
Diego Russoea6111a2020-04-14 18:41:58 +010024
Louis Verhaardaeae5672020-11-02 18:04:27 +010025from .api import NpuAccelerator
Tim Hall1bd531d2020-11-01 20:59:36 +000026from .errors import CliOptionError
27from .errors import ConfigOptionError
Dwight Lidmana9390f72020-05-13 12:00:08 +020028from .ethos_u55_regs.ethos_u55_regs import resampling_mode
Louis Verhaard69b31762020-11-17 09:45:20 +010029from .numeric_util import full_shape
Diego Russoe8a10452020-04-21 17:39:10 +010030from .numeric_util import round_up
31from .numeric_util import round_up_divide
erik.andersson@arm.com1d6d5c42021-04-14 13:31:05 +020032from .numeric_util import round_up_to_int
Tim Hall4ed38bc2020-10-20 18:54:20 +010033from .operation import Kernel
Diego Russoea6111a2020-04-14 18:41:58 +010034from .operation import NpuBlockType
Tim Hall4ed38bc2020-10-20 18:54:20 +010035from .operation import PointXYZ
Diqing Zhongf842b692020-12-11 13:07:37 +010036from .tensor import BandwidthDirection
Diego Russoe8a10452020-04-21 17:39:10 +010037from .tensor import MemArea
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020038from .tensor import MemType
Diego Russoe8a10452020-04-21 17:39:10 +010039from .tensor import TensorFormat
40from .tensor import TensorPurpose
Jonas Ohlsson45e653d2021-07-26 16:13:12 +020041from .tflite_supported_operators import TFLiteSupportedOperators
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020042from .tosa_supported_operators import TosaSupportedOperators
Tim Hall79d07d22020-04-27 18:20:16 +010043
Tim Hall79d07d22020-04-27 18:20:16 +010044
45class Block:
Tim Halld8339a72021-05-27 18:49:40 +010046 def __init__(self, w=0, h=0, d=0):
Tim Hall79d07d22020-04-27 18:20:16 +010047 self.width = w
48 self.height = h
49 self.depth = d
50
Tim Halld8339a72021-05-27 18:49:40 +010051 def elements(self):
52 return self.width * self.height * self.depth
53
54 def elements_wh(self):
55 return self.width * self.height
56
57 def clone(self):
58 return Block(self.width, self.height, self.depth)
59
60 def as_list(self):
61 return [self.height, self.width, self.depth]
62
Tim Hall79d07d22020-04-27 18:20:16 +010063 def __eq__(self, other):
64 if self.width == other.width and self.height == other.height and self.depth == other.depth:
65 return True
66 else:
67 return False
68
69 def __repr__(self):
70 return "<Block: {0},{1},{2}>".format(self.width, self.height, self.depth)
71
Tim Halld8339a72021-05-27 18:49:40 +010072 def to_hwc(self):
73 return [self.height, self.width, self.depth]
74
Tim Hall79d07d22020-04-27 18:20:16 +010075 @classmethod
76 def from_string(cls, s):
77 w, h, c = (int(v) for v in s.split("x"))
78 return cls(w, h, c)
79
Louis Verhaard69b31762020-11-17 09:45:20 +010080 @classmethod
81 def from_shape(cls, shape) -> "Block":
82 """Converts the shape to a Block"""
83 shp = full_shape(3, shape, 1)
84 # Note: index from end, as len(shp) may be > 3
85 return Block(shp[-2], shp[-3], shp[-1])
86
Tim Halld8339a72021-05-27 18:49:40 +010087 @classmethod
88 def min(cls, a, b):
89 return cls(min(a.width, b.width), min(a.height, b.height), min(a.depth, b.depth))
90
91 @classmethod
92 def max(cls, a, b):
93 return cls(max(a.width, b.width), max(a.height, b.height), max(a.depth, b.depth))
94
95 @classmethod
96 def round(cls, a, b):
97 return cls(round_up(a.width, b.width), round_up(a.height, b.height), round_up(a.depth, b.depth))
98
99 @classmethod
100 def div_round_up(cls, a, b):
101 return cls(
102 round_up_divide(a.width, b.width), round_up_divide(a.height, b.height), round_up_divide(a.depth, b.depth)
103 )
104
Tim Hall79d07d22020-04-27 18:20:16 +0100105
106class Rect:
107 def __init__(self, x, y, z, x2, y2, z2):
108 self.x = x
109 self.y = y
110 self.z = z
111 self.x2 = x2
112 self.y2 = y2
113 self.z2 = z2
114
115 def start(self):
116 return PointXYZ(self.x, self.y, self.z)
117
118 def end(self):
119 return PointXYZ(self.x2, self.y2, self.z2)
120
121 def size(self):
122 return Block(self.x2 - self.x + 1, self.y2 - self.y + 1, self.z2 - self.z + 1)
123
124 def __repr__(self):
125 return "<Rect: ({0},{1},{2}) ({3},{4},{5})>".format(self.x, self.y, self.z, self.x2, self.y2, self.z2)
126
127
Tim Hall79d07d22020-04-27 18:20:16 +0100128class SHRAMElements:
129 IFM8 = 0
130 IFM16 = 1
131 IFM8_Elementwise = 2
132 IFM16_Elementwise = 3
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200133 IFM32 = 4
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200134 Acc16 = 5
135 Acc32 = 6
136 Acc40 = 7
Tim Hall79d07d22020-04-27 18:20:16 +0100137 Last = Acc40
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200138 BitSizes = np.array([8, 16, 8, 16, 32, 16, 32, 40], np.int32)
Louis Verhaardf98c6742020-05-12 14:22:38 +0200139 ByteSizes = BitSizes // 8
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200140 PostAlign = np.array([8, 8, 8, 8, 8, 1, 1, 1], np.int32)
141 PreAlign = np.array([1, 1, 1, 1, 1, 8, 8, 8], np.int32)
Tim Hall79d07d22020-04-27 18:20:16 +0100142
143
144class SHRAMBlockConfig:
145 def __init__(self, sizes, banks):
146 assert len(banks) == SHRAMElements.Last + 1
147 self.sizes = sizes
148 self.banks = banks
149
150
Tim Hallc8a73862020-10-27 12:43:14 +0000151# Area indices must match Ethos-U SHRAM layout spec
Tim Hall79d07d22020-04-27 18:20:16 +0100152class SharedBufferArea(enum.IntEnum):
153 OFM = 0
154 Weights = 1
155 IFM = 2
156 Accumulators = 3
157 Size = Accumulators + 1
158
159
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100160class Accelerator(enum.Enum):
161 Ethos_U55_32 = "ethos-u55-32"
162 Ethos_U55_64 = "ethos-u55-64"
163 Ethos_U55_128 = "ethos-u55-128"
164 Ethos_U55_256 = "ethos-u55-256"
Tim Hallc8a73862020-10-27 12:43:14 +0000165 Ethos_U65_256 = "ethos-u65-256"
166 Ethos_U65_512 = "ethos-u65-512"
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100167
168 @classmethod
169 def member_list(cls):
170 return [e.value for e in cls]
171
Louis Verhaardaeae5672020-11-02 18:04:27 +0100172 @classmethod
173 def from_npu_accelerator(cls, npu_accelerator: NpuAccelerator) -> "Accelerator":
174 """Converts the given public API object to Accelerator (used internally)"""
175 accelerator_map = {
176 NpuAccelerator.Ethos_U55_32: cls.Ethos_U55_32,
177 NpuAccelerator.Ethos_U55_64: cls.Ethos_U55_64,
178 NpuAccelerator.Ethos_U55_128: cls.Ethos_U55_128,
179 NpuAccelerator.Ethos_U55_256: cls.Ethos_U55_256,
180 NpuAccelerator.Ethos_U65_256: cls.Ethos_U65_256,
181 NpuAccelerator.Ethos_U65_512: cls.Ethos_U65_512,
182 }
183 assert npu_accelerator in accelerator_map, f"Unsupported accelerator {npu_accelerator}"
184 return accelerator_map[npu_accelerator]
185
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100186
Tim Hall1bd531d2020-11-01 20:59:36 +0000187@enum.unique
188class MemPort(enum.Enum):
189 Axi0 = enum.auto()
190 Axi1 = enum.auto()
191
192
Tim Halld8339a72021-05-27 18:49:40 +0100193SHRAMConfig = namedtuple(
194 "SHRAMConfig", ["reserved_output_banks", "bank_size_bytes", "total_banks", "reserved_end_banks"]
195)
196
197
Tim Hall79d07d22020-04-27 18:20:16 +0100198class ArchitectureFeatures:
Tim Hallc8a73862020-10-27 12:43:14 +0000199 """This class is a container for various parameters of the Ethos-U core
Diqing Zhonge8887a32020-09-24 09:53:48 +0200200 and system configuration that can be tuned, either by command line
Tim Hallc8a73862020-10-27 12:43:14 +0000201 parameters or by the Ethos-U architects. The class is often passed
Diqing Zhonge8887a32020-09-24 09:53:48 +0200202 around to passes that need to do architecture-dependent actions.
Tim Hall79d07d22020-04-27 18:20:16 +0100203
Diqing Zhonge8887a32020-09-24 09:53:48 +0200204 Note the difference between ArchitectureFeatures and CompilerOptions
Tim Hallc8a73862020-10-27 12:43:14 +0000205 - ArchitectureFeatures is for changing the Ethos-U and system architecture
Diqing Zhonge8887a32020-09-24 09:53:48 +0200206 - CompilerOptions is for changing the behaviour of the compiler
207 """
Tim Hall79d07d22020-04-27 18:20:16 +0100208
209 ArchitectureConfig = namedtuple(
210 "ArchitectureConfig", "macs cores ofm_ublock ifm_ublock shram_banks shram_granules elem_units"
211 )
212 accelerator_configs = {
Tim Hallc8a73862020-10-27 12:43:14 +0000213 Accelerator.Ethos_U65_512: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200214 256, 2, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 16, 8, 16, 20], 8
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100215 ),
Tim Hallc8a73862020-10-27 12:43:14 +0000216 Accelerator.Ethos_U65_256: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200217 256, 1, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 16, 8, 16, 20], 8
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100218 ),
219 Accelerator.Ethos_U55_256: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200220 256, 1, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 16, 8, 16, 20], 8
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100221 ),
222 Accelerator.Ethos_U55_128: ArchitectureConfig(
Dwight Lidman2f754572021-04-21 12:00:37 +0200223 128, 1, Block(2, 1, 8), Block(2, 1, 8), 24, [4, 4, 4, 4, 8, 4, 8, 12], 4
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100224 ),
225 Accelerator.Ethos_U55_64: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200226 64, 1, Block(1, 1, 8), Block(1, 1, 8), 16, [2, 2, 2, 2, 4, 4, 4, 8], 2
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100227 ),
228 Accelerator.Ethos_U55_32: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200229 32, 1, Block(1, 1, 4), Block(1, 1, 8), 16, [2, 2, 2, 2, 4, 4, 4, 4], 1
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100230 ),
Tim Hall79d07d22020-04-27 18:20:16 +0100231 }
232
233 OFMSplitDepth = 16
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100234 SubKernelMax = Block(8, 8, 65536)
Tim Hall79d07d22020-04-27 18:20:16 +0100235
Tim Hall1bd531d2020-11-01 20:59:36 +0000236 DEFAULT_CONFIG = "internal-default"
Louis Verhaard1e170182020-11-26 11:42:04 +0100237 MAX_BLOCKDEP = 3
Tim Hall1bd531d2020-11-01 20:59:36 +0000238
Tim Hall79d07d22020-04-27 18:20:16 +0100239 def __init__(
240 self,
Tim Hall1bd531d2020-11-01 20:59:36 +0000241 vela_config_files,
Tim Hall79d07d22020-04-27 18:20:16 +0100242 accelerator_config,
243 system_config,
Tim Hall1bd531d2020-11-01 20:59:36 +0000244 memory_mode,
Tim Hall79d07d22020-04-27 18:20:16 +0100245 max_blockdep,
Tim Hall1bd531d2020-11-01 20:59:36 +0000246 verbose_config,
Tim Halld8339a72021-05-27 18:49:40 +0100247 arena_cache_size,
Tim Hall79d07d22020-04-27 18:20:16 +0100248 ):
249 accelerator_config = accelerator_config.lower()
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100250 if accelerator_config not in Accelerator.member_list():
Tim Hall1bd531d2020-11-01 20:59:36 +0000251 raise CliOptionError("--accelerator-config", self.accelerator_config, "Unknown accelerator configuration")
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100252 self.accelerator_config = Accelerator(accelerator_config)
Tim Hall79d07d22020-04-27 18:20:16 +0100253 accel_config = ArchitectureFeatures.accelerator_configs[self.accelerator_config]
254 self.config = accel_config
255
Tim Halld8339a72021-05-27 18:49:40 +0100256 self.accumulator_granules = {
257 SHRAMElements.Acc16: accel_config.shram_granules[SHRAMElements.Acc16],
258 SHRAMElements.Acc32: accel_config.shram_granules[SHRAMElements.Acc32],
259 SHRAMElements.Acc40: accel_config.shram_granules[SHRAMElements.Acc40],
260 }
261
262 self.ifm_bank_granules = {
263 8: accel_config.shram_granules[SHRAMElements.IFM8],
264 16: accel_config.shram_granules[SHRAMElements.IFM16],
265 32: accel_config.shram_granules[SHRAMElements.IFM32],
266 }
267
268 self.ifm_ew_bank_granules = {
269 8: accel_config.shram_granules[SHRAMElements.IFM8_Elementwise],
270 16: accel_config.shram_granules[SHRAMElements.IFM16_Elementwise],
271 32: accel_config.shram_granules[SHRAMElements.IFM32],
272 }
273
274 self.shram = SHRAMConfig(2, 1024, accel_config.shram_banks, 2 if accel_config.shram_banks > 16 else 0)
275
Tim Hall79d07d22020-04-27 18:20:16 +0100276 self.system_config = system_config
Tim Hall1bd531d2020-11-01 20:59:36 +0000277 self.memory_mode = memory_mode
Tim Hallc8a73862020-10-27 12:43:14 +0000278 self.is_ethos_u65_system = self.accelerator_config in (Accelerator.Ethos_U65_256, Accelerator.Ethos_U65_512)
Tim Hall79d07d22020-04-27 18:20:16 +0100279
Tim Hall53c62452021-08-06 13:51:34 +0100280 if self.is_ethos_u65_system:
281 self.max_outstanding_dma = 2
282 axi_port_address_width = 40
283 axi_port_data_width = 128
284 else:
285 self.max_outstanding_dma = 1
286 axi_port_address_width = 32
287 axi_port_data_width = 64
288
Rickard Bolin909923a2023-10-17 07:08:54 +0000289 self.max_outstanding_kernels = 2
Tim Hall289a41d2020-08-04 21:40:14 +0100290
Tim Hall79d07d22020-04-27 18:20:16 +0100291 self.ncores = accel_config.cores
292 self.ofm_ublock = accel_config.ofm_ublock
293 self.ifm_ublock = accel_config.ifm_ublock
Tim Hall79d07d22020-04-27 18:20:16 +0100294 self.ofm_block_max = Block(64, 32, 128)
Tim Hall79d07d22020-04-27 18:20:16 +0100295
Tim Hall79d07d22020-04-27 18:20:16 +0100296 self.max_blockdep = max_blockdep
297
298 dpu_min_height = accel_config.ofm_ublock.height
299 dpu_min_width = accel_config.ofm_ublock.width
300 dpu_dot_product_width = 8
301 dpu_min_ofm_channels = accel_config.ofm_ublock.depth
302
303 self.num_elem_wise_units = accel_config.elem_units
304 self.num_macs_per_cycle = dpu_min_height * dpu_min_width * dpu_dot_product_width * dpu_min_ofm_channels
Tim Hallc1be0872022-03-03 17:50:52 +0000305 assert self.num_macs_per_cycle == accel_config.macs, f"{self.num_macs_per_cycle} != {accel_config.macs}"
Louis Verhaarda208cf82021-03-30 16:07:24 +0200306 # Max value in address offsets
Tim Hall53c62452021-08-06 13:51:34 +0100307 self.max_address_offset = 1 << axi_port_address_width
Tim Hall79d07d22020-04-27 18:20:16 +0100308
Tim Hall1bd531d2020-11-01 20:59:36 +0000309 # Get system configuration and memory mode
Tim Halld8339a72021-05-27 18:49:40 +0100310 self._get_vela_config(vela_config_files, verbose_config, arena_cache_size)
Tim Hall79d07d22020-04-27 18:20:16 +0100311
Tim Hall53c62452021-08-06 13:51:34 +0100312 self.memory_bandwidths_per_cycle = axi_port_data_width * self.memory_clock_scales / 8
Tim Hall79d07d22020-04-27 18:20:16 +0100313
Tim Hall1bd531d2020-11-01 20:59:36 +0000314 self.memory_bandwidths_per_second = self.memory_bandwidths_per_cycle * self.core_clock
Tim Hall79d07d22020-04-27 18:20:16 +0100315
Diqing Zhonge8887a32020-09-24 09:53:48 +0200316 # Get output/activation performance numbers
317 self._generate_output_perf_tables(self.accelerator_config)
318
Tim Hall79d07d22020-04-27 18:20:16 +0100319 # sizes as N x H x W x C. we need to round up to these when allocating storage
320 self.storage_rounding_quantums = {
321 TensorFormat.Unknown: (1, 1, 1, 1),
322 TensorFormat.WeightsCompressed: (1, 1, 1, 1),
323 TensorFormat.NHWC: (1, 1, 1, 1),
324 TensorFormat.NHCWB16: (1, 1, 1, 16),
325 }
326
327 # brick sizes as N x H x W x C. We have to fetch whole bricks at a time
328 self.brick_sizes = {
329 TensorFormat.Unknown: (1, 1, 1, 1),
330 TensorFormat.WeightsCompressed: (1, 1, 1, 1),
331 TensorFormat.NHWC: (1, 1, 1, 1),
332 TensorFormat.NHCWB16: (1, 1, 1, 16),
333 }
334
Tim Hall79d07d22020-04-27 18:20:16 +0100335 self.default_weight_format = TensorFormat.WeightsCompressed
336 self.default_feature_map_format = TensorFormat.NHWC
337
Tim Hall79d07d22020-04-27 18:20:16 +0100338 self.tensor_storage_mem_area = {
339 # permanent mem_area
Tim Hall465582c2020-05-26 09:33:14 +0100340 TensorPurpose.Unknown: MemArea.Unknown,
Tim Hall79d07d22020-04-27 18:20:16 +0100341 TensorPurpose.Weights: self.permanent_storage_mem_area,
342 TensorPurpose.FeatureMap: self.feature_map_storage_mem_area,
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200343 TensorPurpose.LUT: self.permanent_storage_mem_area,
Fredrik Svedberge22ba8c2021-01-27 16:53:41 +0100344 TensorPurpose.Scratch: self.feature_map_storage_mem_area,
345 TensorPurpose.ScratchFast: self.fast_storage_mem_area,
Tim Hall79d07d22020-04-27 18:20:16 +0100346 }
347
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200348 self.tensor_storage_mem_type = {
Dwight Lidman1a9d20e2020-08-11 12:10:36 +0200349 TensorPurpose.Unknown: MemType.Unknown,
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200350 TensorPurpose.Weights: MemType.Permanent_NPU,
351 TensorPurpose.FeatureMap: MemType.Scratch,
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200352 TensorPurpose.LUT: MemType.Scratch,
Fredrik Svedberge22ba8c2021-01-27 16:53:41 +0100353 TensorPurpose.Scratch: MemType.Scratch,
354 TensorPurpose.ScratchFast: MemType.Scratch_fast,
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200355 }
Tim Hall79d07d22020-04-27 18:20:16 +0100356
357 self.min_block_sizes = {
358 NpuBlockType.Default: (dpu_min_height, dpu_min_width),
359 NpuBlockType.VectorProduct: (1, 1),
360 NpuBlockType.ConvolutionMxN: (dpu_min_height, dpu_min_width),
361 NpuBlockType.Pooling: (dpu_min_height, dpu_min_width),
362 NpuBlockType.ConvolutionDepthWise: (dpu_min_height, dpu_min_width),
363 NpuBlockType.ElementWise: (1, 1),
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200364 NpuBlockType.ReduceSum: (dpu_min_height, dpu_min_width),
Tim Hall79d07d22020-04-27 18:20:16 +0100365 }
366
367 self.sub_kernel_limits = {
368 NpuBlockType.Default: (8, 8),
369 NpuBlockType.VectorProduct: (1, 1),
370 NpuBlockType.ConvolutionMxN: (8, 8),
371 NpuBlockType.Pooling: (8, 8),
372 NpuBlockType.ConvolutionDepthWise: (8, 8),
373 NpuBlockType.ElementWise: (1, 1),
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200374 NpuBlockType.ReduceSum: (8, 8),
Tim Hall79d07d22020-04-27 18:20:16 +0100375 }
376
377 # weights for scheduler search
378 from .npu_performance import make_bandwidth_array
379
380 self.bandwidth_weights = make_bandwidth_array()
381 self.bandwidth_weights[MemArea.Sram] = 1.0
382 self.bandwidth_weights[MemArea.Dram] = 10.0
383 self.bandwidth_weights[MemArea.OnChipFlash] = 2.0
384 self.bandwidth_weights[MemArea.OffChipFlash] = 20.0
385 self.cycles_weight = 40
386 self.max_sram_used_weight = 1000
387
Tim Hall1bd531d2020-11-01 20:59:36 +0000388 if self.is_spilling_enabled():
Patrik Gustavsson3ab94522020-06-29 17:36:55 +0200389 self.max_sram_used_weight = 0
Tim Hall79d07d22020-04-27 18:20:16 +0100390
391 # Shared Buffer Block allocations
392 self.shram_bank_size = 1024 # bytes
393 self.shram_size_bytes = accel_config.shram_banks * self.shram_bank_size
394 self.shram_reserved_output_banks = 2
395 self.shram_reserved_weight_banks = 0
396 self.shram_reserved_unused_banks = 2 if accel_config.shram_banks > 16 else 0
397 self.shram_total_banks = accel_config.shram_banks - self.shram_reserved_unused_banks
398 self.shram_bank_granules = np.array(accel_config.shram_granules, np.int32)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200399 self.shram_lut_size = 2048
Johan Alfven55d90dd2024-04-02 16:32:54 +0200400 self.shram_lut_slot_size = 256
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200401 # SHRAM base address of the activation lookup table
402 self.shram_lut_address = self.shram_bank_size * self.available_shram_banks(True)
Tim Hall79d07d22020-04-27 18:20:16 +0100403
Tim Hall79d07d22020-04-27 18:20:16 +0100404 # Setup supported operators and restriction checkers class
Jonas Ohlsson45e653d2021-07-26 16:13:12 +0200405 self.tflite_supported_operators = TFLiteSupportedOperators()
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200406 self.tosa_supported_operators = TosaSupportedOperators()
Tim Hall79d07d22020-04-27 18:20:16 +0100407
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200408 # Returns available number of SHRAM banks depending on activation lookup table
409 # being used or not
410 def available_shram_banks(self, uses_activation_lut):
411 banks = self.shram_total_banks
412 if uses_activation_lut and self.shram_reserved_unused_banks == 0:
413 banks -= 2
414 return banks
415
Tim Hall79d07d22020-04-27 18:20:16 +0100416 # Calculate block configuration for ALL known IFM operations and
417 # accumulator sizes. Consumers will need to select their preferred
418 # operation and bit-width at read-time.
419 def generate_block_config(self, width, height, depth):
Louis Verhaardf98c6742020-05-12 14:22:38 +0200420 # Number of bytes required for any SHRAM element for a FM of given dimensions.
421 # For IFM: size = H*W*Align(D*BYTE_WIDTH, 8)
422 # For ACC: size = H*W*Align(D,8)*BYTE_WIDTH
423 d1 = round_up(depth, SHRAMElements.PreAlign)
424 d2 = round_up(d1 * SHRAMElements.ByteSizes, SHRAMElements.PostAlign)
425 size_bytes = (height * width) * d2
426
Tim Hall79d07d22020-04-27 18:20:16 +0100427 # Convert byte size (rounded) to size in banks
428 size_banks = round_up_divide(size_bytes, self.shram_bank_size)
429 size_banks *= 2 # Double buffer the IFM/Acc (need twice as many banks)
430 # Round bank requirement to bank granularity
431 required_banks = round_up(size_banks, self.shram_bank_granules)
432 return SHRAMBlockConfig(size_bytes, required_banks)
433
Diqing Zhonge8887a32020-09-24 09:53:48 +0200434 def _generate_output_perf_tables(self, accel_config):
435 if accel_config == Accelerator.Ethos_U55_32:
436 self.output_cycles_per_elem = (2.0, 3.0, 3.0, 3.0, 4.0, 6.0, 1.0, 2.0)
437 self.activation_cycles_per_elem = (1.0, 1.0, 0.0)
438 elif accel_config == Accelerator.Ethos_U55_64:
439 self.output_cycles_per_elem = (1.0, 1.5, 1.5, 1.5, 2.0, 3.0, 0.5, 1.0)
440 self.activation_cycles_per_elem = (1.0, 1.0, 0.0)
441 elif accel_config == Accelerator.Ethos_U55_128:
442 self.output_cycles_per_elem = (0.75, 1.25, 0.75, 0.75, 1.0, 1.5, 0.25, 0.5)
443 self.activation_cycles_per_elem = (1.0, 0.5, 0.0)
Tim Hallc8a73862020-10-27 12:43:14 +0000444 elif accel_config in (Accelerator.Ethos_U55_256, Accelerator.Ethos_U65_256):
Diqing Zhonge8887a32020-09-24 09:53:48 +0200445 self.output_cycles_per_elem = (0.625, 1.125, 0.5, 0.375, 0.5, 0.75, 0.125, 0.25)
446 self.activation_cycles_per_elem = (1.0, 0.25, 0.0)
447 else:
Tim Hallc8a73862020-10-27 12:43:14 +0000448 assert accel_config == Accelerator.Ethos_U65_512
Diqing Zhonge8887a32020-09-24 09:53:48 +0200449 self.output_cycles_per_elem = (0.3125, 0.5625, 0.25, 0.1875, 0.25, 0.375, 0.0625, 0.125)
450 self.activation_cycles_per_elem = (0.5, 0.125, 0.0)
451
Tim Hall79d07d22020-04-27 18:20:16 +0100452 def calc_ifm_block_depth(self, ifm_depth, ifm_bits):
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200453 assert ifm_bits in (8, 16, 32)
Tim Hall79d07d22020-04-27 18:20:16 +0100454 assert ifm_depth > 0
455 ifm_depth = round_up(ifm_depth, self.ifm_ublock.depth)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200456 max_block_depth = 8 * 32 // ifm_bits
Tim Hall79d07d22020-04-27 18:20:16 +0100457 return min(max_block_depth, ifm_depth)
458
459 # Calculate the size of the IFM block given a depth, target OFM block and a kernel
Tim Hallc30f4952020-06-15 20:47:35 +0100460 def get_ifm_block_size(
461 self,
462 ifm_block_depth,
463 ofm_block: Block,
464 kernel: Kernel,
465 subkernel: Block = Block(8, 8, 65536),
466 ifm_resampling_mode=resampling_mode.NONE,
467 ):
Dwight Lidmana9390f72020-05-13 12:00:08 +0200468 upscaling = 1 if ifm_resampling_mode == resampling_mode.NONE else 2
Tim Hall79d07d22020-04-27 18:20:16 +0100469
erik.andersson@arm.com1d6d5c42021-04-14 13:31:05 +0200470 # Height
471 dilated_kernel_height = ((kernel.height - 1) * kernel.dilation.y) + 1
472 ifm_block_height = round_up_to_int(
473 ((ofm_block.height - 1) * kernel.stride.y + min(subkernel.height, dilated_kernel_height)) / upscaling
474 )
Tim Hall79d07d22020-04-27 18:20:16 +0100475
Dwight Lidman2f754572021-04-21 12:00:37 +0200476 ifm_block_height = round_up(ifm_block_height, self.ifm_ublock.height)
477
Tim Hall79d07d22020-04-27 18:20:16 +0100478 # Width
Tim Hall79d07d22020-04-27 18:20:16 +0100479 dilated_kernel_width = ((kernel.width - 1) * kernel.dilation.x) + 1
erik.andersson@arm.com1d6d5c42021-04-14 13:31:05 +0200480 ifm_block_width = round_up_to_int(
481 ((ofm_block.width - 1) * kernel.stride.x + min(subkernel.width, dilated_kernel_width)) / upscaling
482 )
Tim Hall79d07d22020-04-27 18:20:16 +0100483
Dwight Lidman2f754572021-04-21 12:00:37 +0200484 ifm_block_width = round_up(ifm_block_width, self.ifm_ublock.width)
485
Tim Hall79d07d22020-04-27 18:20:16 +0100486 return Block(ifm_block_width, ifm_block_height, ifm_block_depth)
487
Tim Hall1bd531d2020-11-01 20:59:36 +0000488 def is_spilling_enabled(self):
Tim Hall79d07d22020-04-27 18:20:16 +0100489 """
Tim Hall1bd531d2020-11-01 20:59:36 +0000490 Spilling is a feature that allows the Ethos-U to use a dedicated SRAM as a cache for various types of data
Tim Hall79d07d22020-04-27 18:20:16 +0100491 """
Tim Hall1bd531d2020-11-01 20:59:36 +0000492 return (
493 self._mem_port_mapping(self.cache_mem_area) == MemArea.Sram and self.cache_mem_area != self.arena_mem_area
494 )
Tim Hall79d07d22020-04-27 18:20:16 +0100495
Louis Verhaard024c3552021-03-17 14:26:34 +0100496 def mem_type_size(self, mem_type: MemType) -> int:
patrik.gustavsson6f23da62021-08-19 11:51:45 +0000497 """Returns size in bytes available for the given memory type. This is a hard limit."""
498 if mem_type == MemType.Scratch_fast and self.is_spilling_enabled():
499 # when accessing the scratch fast memory type with memory spilling enabled the arena_cache_size refers to
500 # the cache memory area which is a hard limit
Tim Halld8339a72021-05-27 18:49:40 +0100501 return self.arena_cache_size
Tim Hall53c62452021-08-06 13:51:34 +0100502 else:
patrik.gustavsson6f23da62021-08-19 11:51:45 +0000503 # for all other memory types and modes the hard limit is the maximum possible address offset
Tim Hall53c62452021-08-06 13:51:34 +0100504 return self.max_address_offset
Louis Verhaard024c3552021-03-17 14:26:34 +0100505
Tim Hall1bd531d2020-11-01 20:59:36 +0000506 def _mem_port_mapping(self, mem_port):
507 mem_port_mapping = {MemPort.Axi0: self.axi0_port, MemPort.Axi1: self.axi1_port}
508 return mem_port_mapping[mem_port]
Tim Hall79d07d22020-04-27 18:20:16 +0100509
Tim Hall1bd531d2020-11-01 20:59:36 +0000510 def _set_default_sys_config(self):
Tim Hall1bd531d2020-11-01 20:59:36 +0000511 # ArchitectureFeatures.DEFAULT_CONFIG values
512 if self.is_ethos_u65_system:
513 # Default Ethos-U65 system configuration
514 # Ethos-U65 Client-Server: SRAM (16 GB/s) and DRAM (12 GB/s)
515 self.core_clock = 1e9
516 self.axi0_port = MemArea.Sram
517 self.axi1_port = MemArea.Dram
518 self.memory_clock_scales[MemArea.Sram] = 1.0
519 self.memory_clock_scales[MemArea.Dram] = 0.75 # 3 / 4
Diqing Zhongf842b692020-12-11 13:07:37 +0100520 self.memory_burst_length[MemArea.Sram] = 32
521 self.memory_burst_length[MemArea.Dram] = 128
522 self.memory_latency[MemArea.Sram][BandwidthDirection.Read] = 32
523 self.memory_latency[MemArea.Sram][BandwidthDirection.Write] = 32
524 self.memory_latency[MemArea.Dram][BandwidthDirection.Read] = 500
525 self.memory_latency[MemArea.Dram][BandwidthDirection.Write] = 250
Tim Hall79d07d22020-04-27 18:20:16 +0100526 else:
Tim Hall1bd531d2020-11-01 20:59:36 +0000527 # Default Ethos-U55 system configuration
528 # Ethos-U55 High-End Embedded: SRAM (4 GB/s) and Flash (0.5 GB/s)
529 self.core_clock = 500e6
530 self.axi0_port = MemArea.Sram
531 self.axi1_port = MemArea.OffChipFlash
532 self.memory_clock_scales[MemArea.Sram] = 1.0
533 self.memory_clock_scales[MemArea.OffChipFlash] = 0.125 # 1 / 8
Diqing Zhongf842b692020-12-11 13:07:37 +0100534 self.memory_burst_length[MemArea.Sram] = 32
535 self.memory_burst_length[MemArea.OffChipFlash] = 128
536 self.memory_latency[MemArea.Sram][BandwidthDirection.Read] = 32
537 self.memory_latency[MemArea.Sram][BandwidthDirection.Write] = 32
538 self.memory_latency[MemArea.OffChipFlash][BandwidthDirection.Read] = 64
539 self.memory_latency[MemArea.OffChipFlash][BandwidthDirection.Write] = 64
Tim Hall79d07d22020-04-27 18:20:16 +0100540
Tim Hall1bd531d2020-11-01 20:59:36 +0000541 def _set_default_mem_mode(self):
Tim Hall1bd531d2020-11-01 20:59:36 +0000542 # ArchitectureFeatures.DEFAULT_CONFIG values
543 if self.is_ethos_u65_system:
544 # Default Ethos-U65 memory mode
Tim Hall70b71a52020-12-22 11:47:54 +0000545 # Dedicated SRAM: the SRAM is only for use by the Ethos-U
546 # The non-SRAM memory is assumed to be read-writeable
Tim Hall1bd531d2020-11-01 20:59:36 +0000547 self.const_mem_area = MemPort.Axi1
548 self.arena_mem_area = MemPort.Axi1
549 self.cache_mem_area = MemPort.Axi0
Tim Halld8339a72021-05-27 18:49:40 +0100550 self.arena_cache_size = 384 * 1024
Tim Hall1bd531d2020-11-01 20:59:36 +0000551 else:
Tim Hall70b71a52020-12-22 11:47:54 +0000552 # Default Ethos-U55 memory mode
553 # Shared SRAM: the SRAM is shared between the Ethos-U and the Cortex-M software
554 # The non-SRAM memory is assumed to be read-only
Tim Hall1bd531d2020-11-01 20:59:36 +0000555 self.const_mem_area = MemPort.Axi1
556 self.arena_mem_area = MemPort.Axi0
557 self.cache_mem_area = MemPort.Axi0
Tim Halld8339a72021-05-27 18:49:40 +0100558 self.arena_cache_size = self.max_address_offset
Tim Hall79d07d22020-04-27 18:20:16 +0100559
Tim Halld8339a72021-05-27 18:49:40 +0100560 def _get_vela_config(self, vela_config_files, verbose_config, arena_cache_size_from_cli):
Tim Hall1bd531d2020-11-01 20:59:36 +0000561 """
562 Gets the system configuration and memory modes from one or more Vela configuration file(s) or uses some
563 defaults.
564 """
Tim Hall79d07d22020-04-27 18:20:16 +0100565
Tim Hall1bd531d2020-11-01 20:59:36 +0000566 # all properties are optional and are initialised to a value of 1 (or the equivalent)
567 self.core_clock = 1
568 self.axi0_port = MemArea(1)
569 self.axi1_port = MemArea(1)
570 self.memory_clock_scales = np.ones(MemArea.Size)
Rickard Bolin5fdcf172022-12-19 12:56:17 +0000571 self.memory_burst_length = np.ones(MemArea.Size, int)
572 self.memory_latency = np.zeros((MemArea.Size, BandwidthDirection.Size), int)
Tim Hall1bd531d2020-11-01 20:59:36 +0000573 self.const_mem_area = MemPort(1)
574 self.arena_mem_area = MemPort(1)
575 self.cache_mem_area = MemPort(1)
Tim Halld8339a72021-05-27 18:49:40 +0100576 self.arena_cache_size = self.max_address_offset
577 arena_cache_size_loc_text = "Default"
Tim Hall79d07d22020-04-27 18:20:16 +0100578
Tim Hall1bd531d2020-11-01 20:59:36 +0000579 # read configuration file(s)
580 self.vela_config = None
581
582 if vela_config_files is not None:
583 self.vela_config = ConfigParser()
584 self.vela_config.read(vela_config_files)
585
586 # read system configuration
587 sys_cfg_section = "System_Config." + self.system_config
588
589 if self.vela_config is not None and self.vela_config.has_section(sys_cfg_section):
590 self.core_clock = float(self._read_config(sys_cfg_section, "core_clock", self.core_clock))
591 self.axi0_port = MemArea[self._read_config(sys_cfg_section, "axi0_port", self.axi0_port)]
592 self.axi1_port = MemArea[self._read_config(sys_cfg_section, "axi1_port", self.axi1_port)]
593
594 for mem_area in (self.axi0_port, self.axi1_port):
595 self.memory_clock_scales[mem_area] = float(
596 self._read_config(
597 sys_cfg_section, mem_area.name + "_clock_scale", self.memory_clock_scales[mem_area]
598 )
599 )
Diqing Zhongf842b692020-12-11 13:07:37 +0100600 self.memory_burst_length[mem_area] = int(
601 self._read_config(
602 sys_cfg_section, mem_area.name + "_burst_length", self.memory_burst_length[mem_area]
603 )
604 )
605 self.memory_latency[mem_area][BandwidthDirection.Read] = int(
606 self._read_config(
607 sys_cfg_section,
608 mem_area.name + "_read_latency",
609 self.memory_latency[mem_area][BandwidthDirection.Read],
610 )
611 )
612 self.memory_latency[mem_area][BandwidthDirection.Write] = int(
613 self._read_config(
614 sys_cfg_section,
615 mem_area.name + "_write_latency",
616 self.memory_latency[mem_area][BandwidthDirection.Write],
617 )
618 )
Tim Hall1bd531d2020-11-01 20:59:36 +0000619 elif self.system_config == ArchitectureFeatures.DEFAULT_CONFIG:
620 self._set_default_sys_config()
621
622 elif vela_config_files is None:
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000623 raise CliOptionError("--config", vela_config_files, "Vela config file not specified")
Tim Hall1bd531d2020-11-01 20:59:36 +0000624
625 else:
626 raise CliOptionError(
Jonas Ohlssond8575072022-03-30 10:30:25 +0200627 "--system-config",
628 self.system_config,
629 f"Section {sys_cfg_section} not found in Vela config file",
Tim Hall79d07d22020-04-27 18:20:16 +0100630 )
Tim Hall79d07d22020-04-27 18:20:16 +0100631
Tim Hall1bd531d2020-11-01 20:59:36 +0000632 # read the memory mode
633 mem_mode_section = "Memory_Mode." + self.memory_mode
Tim Hall79d07d22020-04-27 18:20:16 +0100634
Tim Hall1bd531d2020-11-01 20:59:36 +0000635 if self.vela_config is not None and self.vela_config.has_section(mem_mode_section):
636 self.const_mem_area = MemPort[
637 self._read_config(mem_mode_section, "const_mem_area", self.const_mem_area.name)
638 ]
639 self.arena_mem_area = MemPort[
640 self._read_config(mem_mode_section, "arena_mem_area", self.arena_mem_area.name)
641 ]
642 self.cache_mem_area = MemPort[
643 self._read_config(mem_mode_section, "cache_mem_area", self.cache_mem_area.name)
644 ]
Tim Halld8339a72021-05-27 18:49:40 +0100645 found = []
646 self.arena_cache_size = int(
647 self._read_config(mem_mode_section, "arena_cache_size", self.arena_cache_size, found)
648 )
649 if found[-1]:
650 arena_cache_size_loc_text = "Configuration file"
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200651
Tim Hall1bd531d2020-11-01 20:59:36 +0000652 elif self.memory_mode == ArchitectureFeatures.DEFAULT_CONFIG:
653 self._set_default_mem_mode()
Patrik Gustavsson5f47c052020-06-25 12:56:04 +0200654
Tim Hall1bd531d2020-11-01 20:59:36 +0000655 elif vela_config_files is None:
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000656 raise CliOptionError("--config", vela_config_files, "Vela config file not specified")
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200657
Tim Hall1bd531d2020-11-01 20:59:36 +0000658 else:
659 raise CliOptionError(
Jonas Ohlssond8575072022-03-30 10:30:25 +0200660 "--memory-mode",
661 self.memory_mode,
662 f"Section {mem_mode_section} not found in Vela config file",
Tim Hall1bd531d2020-11-01 20:59:36 +0000663 )
Tim Hall79d07d22020-04-27 18:20:16 +0100664
Tim Hall1bd531d2020-11-01 20:59:36 +0000665 # override sram to onchipflash
666 if self._mem_port_mapping(self.const_mem_area) == MemArea.Sram:
667 if self.const_mem_area == self.arena_mem_area == self.cache_mem_area:
668 print(
669 "Info: Changing const_mem_area from Sram to OnChipFlash. This will use the same characteristics as"
670 " Sram."
671 )
672 if self.const_mem_area == MemPort.Axi0:
673 self.const_mem_area = MemPort.Axi1
674 self.axi1_port = MemArea.OnChipFlash
675 else:
676 self.const_mem_area = MemPort.Axi0
677 self.axi0_port = MemArea.OnChipFlash
678 self.memory_clock_scales[MemArea.OnChipFlash] = self.memory_clock_scales[MemArea.Sram]
Diqing Zhongf842b692020-12-11 13:07:37 +0100679 self.memory_burst_length[MemArea.OnChipFlash] = self.memory_burst_length[MemArea.Sram]
680 self.memory_latency[MemArea.OnChipFlash] = self.memory_latency[MemArea.Sram]
Tim Hall1bd531d2020-11-01 20:59:36 +0000681
Tim Halld8339a72021-05-27 18:49:40 +0100682 # override sram usage
683 if arena_cache_size_from_cli is not None:
684 self.arena_cache_size = arena_cache_size_from_cli
685 arena_cache_size_loc_text = "CLI option"
686
Tim Hall1bd531d2020-11-01 20:59:36 +0000687 # check configuration
Tim Hall70b71a52020-12-22 11:47:54 +0000688 if self._mem_port_mapping(self.const_mem_area) not in (
689 MemArea.Dram,
690 MemArea.OnChipFlash,
691 MemArea.OffChipFlash,
692 ):
693 raise ConfigOptionError(
694 "const_mem_area",
695 self._mem_port_mapping(self.const_mem_area).name,
696 "Dram or OnChipFlash or OffChipFlash",
697 )
698
699 if self._mem_port_mapping(self.arena_mem_area) not in (MemArea.Sram, MemArea.Dram):
700 raise ConfigOptionError("arena_mem_area", self._mem_port_mapping(self.arena_mem_area).name, "Sram or Dram")
701
Tim Hall1bd531d2020-11-01 20:59:36 +0000702 if self._mem_port_mapping(self.cache_mem_area) != MemArea.Sram:
703 raise ConfigOptionError("cache_mem_area", self._mem_port_mapping(self.cache_mem_area).name, "Sram")
704
Tim Halld8339a72021-05-27 18:49:40 +0100705 if self.arena_cache_size < 0:
706 raise ConfigOptionError("arena_cache_size", self.arena_cache_size, ">= 0")
707 if self.arena_cache_size > self.max_address_offset:
708 raise ConfigOptionError(
709 "arena_cache_size",
710 f"{self.arena_cache_size}. Size is out of bounds, maximum is: {self.max_address_offset}",
711 )
712
Tim Hall1bd531d2020-11-01 20:59:36 +0000713 # assign existing memory areas
714 self.permanent_storage_mem_area = self._mem_port_mapping(self.const_mem_area)
715 self.feature_map_storage_mem_area = self._mem_port_mapping(self.arena_mem_area)
716 self.fast_storage_mem_area = self._mem_port_mapping(self.cache_mem_area)
717
Tim Hall1bd531d2020-11-01 20:59:36 +0000718 # display the system configuration and memory mode
719 if verbose_config:
Rickard Bolin7ce6b322022-06-02 09:30:33 +0000720 print("Configuration files:")
721 print(f" {vela_config_files}")
Tim Hall1bd531d2020-11-01 20:59:36 +0000722 print(f"System Configuration ({self.system_config}):")
723 print(f" core_clock = {self.core_clock}")
724 print(f" axi0_port = {self.axi0_port.name}")
725 print(f" axi1_port = {self.axi1_port.name}")
726 for mem in (MemArea.Sram, MemArea.Dram, MemArea.OnChipFlash, MemArea.OffChipFlash):
727 print(f" {mem.name}_clock_scales = {self.memory_clock_scales[mem]}")
Diqing Zhongf842b692020-12-11 13:07:37 +0100728 print(f" {mem.name}_burst_length = {self.memory_burst_length[mem]}")
729 print(f" {mem.name}_read_latency = {self.memory_latency[mem][BandwidthDirection.Read]}")
730 print(f" {mem.name}_write_latency = {self.memory_latency[mem][BandwidthDirection.Write]}")
Tim Hall1bd531d2020-11-01 20:59:36 +0000731
732 print(f"Memory Mode ({self.memory_mode}):")
733 print(f" const_mem_area = {self.const_mem_area.name}")
734 print(f" arena_mem_area = {self.arena_mem_area.name}")
735 print(f" cache_mem_area = {self.cache_mem_area.name}")
Tim Halld8339a72021-05-27 18:49:40 +0100736 print(f" arena_cache_size = {self.arena_cache_size} from {arena_cache_size_loc_text}")
Tim Hall1bd531d2020-11-01 20:59:36 +0000737
738 print("Architecture Settings:")
739 print(f" permanent_storage_mem_area = {self.permanent_storage_mem_area.name}")
740 print(f" feature_map_storage_mem_area = {self.feature_map_storage_mem_area.name}")
741 print(f" fast_storage_mem_area = {self.fast_storage_mem_area.name}")
Tim Hall1bd531d2020-11-01 20:59:36 +0000742
Tim Halld8339a72021-05-27 18:49:40 +0100743 def _read_config(self, section, key, current_value, found=None):
Tim Hall79d07d22020-04-27 18:20:16 +0100744 """
Tim Hall1bd531d2020-11-01 20:59:36 +0000745 Reads a given key from a particular section in the Vela config file. If the section contains the 'inherit'
746 option then we recurse into the section specified. If inherited sections result in multiple keys for a
Tim Halld8339a72021-05-27 18:49:40 +0100747 particular option then the key from the parent section is used, regardless of the parsing order. if specified
748 found should be an empty list that this function will append a True or False to the end of the list indicating
749 whether the key was found or not.
Tim Hall79d07d22020-04-27 18:20:16 +0100750 """
Tim Hall1bd531d2020-11-01 20:59:36 +0000751 if not self.vela_config.has_section(section):
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000752 raise ConfigOptionError("section", f"{section}. The section was not found in the Vela config file(s)")
Tim Hall1bd531d2020-11-01 20:59:36 +0000753
Tim Halld8339a72021-05-27 18:49:40 +0100754 result = str(current_value) if current_value is not None else None
755 if found is not None:
756 found.append(False)
757
Tim Hall1bd531d2020-11-01 20:59:36 +0000758 if self.vela_config.has_option(section, "inherit"):
759 inheritance_section = self.vela_config.get(section, "inherit")
760 # check for recursion loop
761 if inheritance_section == section:
762 raise ConfigOptionError(
Jonas Ohlssond8575072022-03-30 10:30:25 +0200763 "inherit",
764 f"{inheritance_section}. This references its own section and recursion is not allowed",
Tim Hall1bd531d2020-11-01 20:59:36 +0000765 )
Tim Halld8339a72021-05-27 18:49:40 +0100766 result = self._read_config(inheritance_section, key, result, found)
Tim Hall1bd531d2020-11-01 20:59:36 +0000767
768 if self.vela_config.has_option(section, key):
769 result = self.vela_config.get(section, key)
Tim Halld8339a72021-05-27 18:49:40 +0100770 if found is not None:
771 found.append(True)
Tim Hall1bd531d2020-11-01 20:59:36 +0000772
Tim Hall79d07d22020-04-27 18:20:16 +0100773 return result
Louis Verhaard52078302020-11-18 13:35:06 +0100774
775
Louis Verhaard061eeb42020-11-27 08:24:03 +0100776# Cache for default arch instances, as these are expensive to create
777default_arch_cache = dict()
778
779
Louis Verhaard52078302020-11-18 13:35:06 +0100780def create_default_arch(accelerator: Accelerator) -> ArchitectureFeatures:
781 """Creates architecture features object using default settings"""
Louis Verhaard061eeb42020-11-27 08:24:03 +0100782 if accelerator not in default_arch_cache:
783 default_arch_cache[accelerator] = ArchitectureFeatures(
784 vela_config_files=None,
785 accelerator_config=accelerator.value,
786 system_config=ArchitectureFeatures.DEFAULT_CONFIG,
787 memory_mode=ArchitectureFeatures.DEFAULT_CONFIG,
Louis Verhaard061eeb42020-11-27 08:24:03 +0100788 max_blockdep=ArchitectureFeatures.MAX_BLOCKDEP,
Louis Verhaard061eeb42020-11-27 08:24:03 +0100789 verbose_config=False,
Tim Halld8339a72021-05-27 18:49:40 +0100790 arena_cache_size=None,
Louis Verhaard061eeb42020-11-27 08:24:03 +0100791 )
792 return default_arch_cache[accelerator]