blob: 5e26f1a17f266346b250ec7025a72fca368cf493 [file] [log] [blame]
erik.andersson@arm.com460c6892021-02-24 14:38:09 +01001# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
Tim Hall79d07d22020-04-27 18:20:16 +01002#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
Tim Hallc8a73862020-10-27 12:43:14 +000017# Holds a container for Ethos-U and System architecture parameters.
Diego Russoea6111a2020-04-14 18:41:58 +010018import enum
Tim Hall79d07d22020-04-27 18:20:16 +010019from collections import namedtuple
20from configparser import ConfigParser
Diego Russoea6111a2020-04-14 18:41:58 +010021
Tim Hall79d07d22020-04-27 18:20:16 +010022import numpy as np
Diego Russoea6111a2020-04-14 18:41:58 +010023
Louis Verhaardaeae5672020-11-02 18:04:27 +010024from .api import NpuAccelerator
Tim Hall1bd531d2020-11-01 20:59:36 +000025from .errors import CliOptionError
26from .errors import ConfigOptionError
Dwight Lidmana9390f72020-05-13 12:00:08 +020027from .ethos_u55_regs.ethos_u55_regs import resampling_mode
Louis Verhaard69b31762020-11-17 09:45:20 +010028from .numeric_util import full_shape
Diego Russoe8a10452020-04-21 17:39:10 +010029from .numeric_util import round_up
30from .numeric_util import round_up_divide
erik.andersson@arm.com1d6d5c42021-04-14 13:31:05 +020031from .numeric_util import round_up_to_int
Tim Hall4ed38bc2020-10-20 18:54:20 +010032from .operation import Kernel
Diego Russoea6111a2020-04-14 18:41:58 +010033from .operation import NpuBlockType
Tim Hall4ed38bc2020-10-20 18:54:20 +010034from .operation import PointXYZ
Diqing Zhongf842b692020-12-11 13:07:37 +010035from .tensor import BandwidthDirection
Diego Russoe8a10452020-04-21 17:39:10 +010036from .tensor import MemArea
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020037from .tensor import MemType
Diego Russoe8a10452020-04-21 17:39:10 +010038from .tensor import TensorFormat
39from .tensor import TensorPurpose
Jonas Ohlsson45e653d2021-07-26 16:13:12 +020040from .tflite_supported_operators import TFLiteSupportedOperators
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020041from .tosa_supported_operators import TosaSupportedOperators
Tim Hall79d07d22020-04-27 18:20:16 +010042
Tim Hall79d07d22020-04-27 18:20:16 +010043
44class Block:
Tim Halld8339a72021-05-27 18:49:40 +010045 def __init__(self, w=0, h=0, d=0):
Tim Hall79d07d22020-04-27 18:20:16 +010046 self.width = w
47 self.height = h
48 self.depth = d
49
Tim Halld8339a72021-05-27 18:49:40 +010050 def elements(self):
51 return self.width * self.height * self.depth
52
53 def elements_wh(self):
54 return self.width * self.height
55
56 def clone(self):
57 return Block(self.width, self.height, self.depth)
58
59 def as_list(self):
60 return [self.height, self.width, self.depth]
61
Tim Hall79d07d22020-04-27 18:20:16 +010062 def __eq__(self, other):
63 if self.width == other.width and self.height == other.height and self.depth == other.depth:
64 return True
65 else:
66 return False
67
68 def __repr__(self):
69 return "<Block: {0},{1},{2}>".format(self.width, self.height, self.depth)
70
Tim Halld8339a72021-05-27 18:49:40 +010071 def to_hwc(self):
72 return [self.height, self.width, self.depth]
73
Tim Hall79d07d22020-04-27 18:20:16 +010074 @classmethod
75 def from_string(cls, s):
76 w, h, c = (int(v) for v in s.split("x"))
77 return cls(w, h, c)
78
Louis Verhaard69b31762020-11-17 09:45:20 +010079 @classmethod
80 def from_shape(cls, shape) -> "Block":
81 """Converts the shape to a Block"""
82 shp = full_shape(3, shape, 1)
83 # Note: index from end, as len(shp) may be > 3
84 return Block(shp[-2], shp[-3], shp[-1])
85
Tim Halld8339a72021-05-27 18:49:40 +010086 @classmethod
87 def min(cls, a, b):
88 return cls(min(a.width, b.width), min(a.height, b.height), min(a.depth, b.depth))
89
90 @classmethod
91 def max(cls, a, b):
92 return cls(max(a.width, b.width), max(a.height, b.height), max(a.depth, b.depth))
93
94 @classmethod
95 def round(cls, a, b):
96 return cls(round_up(a.width, b.width), round_up(a.height, b.height), round_up(a.depth, b.depth))
97
98 @classmethod
99 def div_round_up(cls, a, b):
100 return cls(
101 round_up_divide(a.width, b.width), round_up_divide(a.height, b.height), round_up_divide(a.depth, b.depth)
102 )
103
Tim Hall79d07d22020-04-27 18:20:16 +0100104
105class Rect:
106 def __init__(self, x, y, z, x2, y2, z2):
107 self.x = x
108 self.y = y
109 self.z = z
110 self.x2 = x2
111 self.y2 = y2
112 self.z2 = z2
113
114 def start(self):
115 return PointXYZ(self.x, self.y, self.z)
116
117 def end(self):
118 return PointXYZ(self.x2, self.y2, self.z2)
119
120 def size(self):
121 return Block(self.x2 - self.x + 1, self.y2 - self.y + 1, self.z2 - self.z + 1)
122
123 def __repr__(self):
124 return "<Rect: ({0},{1},{2}) ({3},{4},{5})>".format(self.x, self.y, self.z, self.x2, self.y2, self.z2)
125
126
Tim Hall79d07d22020-04-27 18:20:16 +0100127class SHRAMElements:
128 IFM8 = 0
129 IFM16 = 1
130 IFM8_Elementwise = 2
131 IFM16_Elementwise = 3
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200132 IFM32 = 4
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200133 Acc16 = 5
134 Acc32 = 6
135 Acc40 = 7
Tim Hall79d07d22020-04-27 18:20:16 +0100136 Last = Acc40
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200137 BitSizes = np.array([8, 16, 8, 16, 32, 16, 32, 40], np.int32)
Louis Verhaardf98c6742020-05-12 14:22:38 +0200138 ByteSizes = BitSizes // 8
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200139 PostAlign = np.array([8, 8, 8, 8, 8, 1, 1, 1], np.int32)
140 PreAlign = np.array([1, 1, 1, 1, 1, 8, 8, 8], np.int32)
Tim Hall79d07d22020-04-27 18:20:16 +0100141
142
143class SHRAMBlockConfig:
144 def __init__(self, sizes, banks):
145 assert len(banks) == SHRAMElements.Last + 1
146 self.sizes = sizes
147 self.banks = banks
148
149
Tim Hallc8a73862020-10-27 12:43:14 +0000150# Area indices must match Ethos-U SHRAM layout spec
Tim Hall79d07d22020-04-27 18:20:16 +0100151class SharedBufferArea(enum.IntEnum):
152 OFM = 0
153 Weights = 1
154 IFM = 2
155 Accumulators = 3
156 Size = Accumulators + 1
157
158
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100159class Accelerator(enum.Enum):
160 Ethos_U55_32 = "ethos-u55-32"
161 Ethos_U55_64 = "ethos-u55-64"
162 Ethos_U55_128 = "ethos-u55-128"
163 Ethos_U55_256 = "ethos-u55-256"
Tim Hallc8a73862020-10-27 12:43:14 +0000164 Ethos_U65_256 = "ethos-u65-256"
165 Ethos_U65_512 = "ethos-u65-512"
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100166
167 @classmethod
168 def member_list(cls):
169 return [e.value for e in cls]
170
Louis Verhaardaeae5672020-11-02 18:04:27 +0100171 @classmethod
172 def from_npu_accelerator(cls, npu_accelerator: NpuAccelerator) -> "Accelerator":
173 """Converts the given public API object to Accelerator (used internally)"""
174 accelerator_map = {
175 NpuAccelerator.Ethos_U55_32: cls.Ethos_U55_32,
176 NpuAccelerator.Ethos_U55_64: cls.Ethos_U55_64,
177 NpuAccelerator.Ethos_U55_128: cls.Ethos_U55_128,
178 NpuAccelerator.Ethos_U55_256: cls.Ethos_U55_256,
179 NpuAccelerator.Ethos_U65_256: cls.Ethos_U65_256,
180 NpuAccelerator.Ethos_U65_512: cls.Ethos_U65_512,
181 }
182 assert npu_accelerator in accelerator_map, f"Unsupported accelerator {npu_accelerator}"
183 return accelerator_map[npu_accelerator]
184
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100185
Tim Hall1bd531d2020-11-01 20:59:36 +0000186@enum.unique
187class MemPort(enum.Enum):
188 Axi0 = enum.auto()
189 Axi1 = enum.auto()
190
191
Tim Halld8339a72021-05-27 18:49:40 +0100192SHRAMConfig = namedtuple(
193 "SHRAMConfig", ["reserved_output_banks", "bank_size_bytes", "total_banks", "reserved_end_banks"]
194)
195
196
Tim Hall79d07d22020-04-27 18:20:16 +0100197class ArchitectureFeatures:
Tim Hallc8a73862020-10-27 12:43:14 +0000198 """This class is a container for various parameters of the Ethos-U core
Diqing Zhonge8887a32020-09-24 09:53:48 +0200199 and system configuration that can be tuned, either by command line
Tim Hallc8a73862020-10-27 12:43:14 +0000200 parameters or by the Ethos-U architects. The class is often passed
Diqing Zhonge8887a32020-09-24 09:53:48 +0200201 around to passes that need to do architecture-dependent actions.
Tim Hall79d07d22020-04-27 18:20:16 +0100202
Diqing Zhonge8887a32020-09-24 09:53:48 +0200203 Note the difference between ArchitectureFeatures and CompilerOptions
Tim Hallc8a73862020-10-27 12:43:14 +0000204 - ArchitectureFeatures is for changing the Ethos-U and system architecture
Diqing Zhonge8887a32020-09-24 09:53:48 +0200205 - CompilerOptions is for changing the behaviour of the compiler
206 """
Tim Hall79d07d22020-04-27 18:20:16 +0100207
208 ArchitectureConfig = namedtuple(
209 "ArchitectureConfig", "macs cores ofm_ublock ifm_ublock shram_banks shram_granules elem_units"
210 )
211 accelerator_configs = {
Tim Hallc8a73862020-10-27 12:43:14 +0000212 Accelerator.Ethos_U65_512: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200213 256, 2, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 16, 8, 16, 20], 8
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100214 ),
Tim Hallc8a73862020-10-27 12:43:14 +0000215 Accelerator.Ethos_U65_256: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200216 256, 1, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 16, 8, 16, 20], 8
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100217 ),
218 Accelerator.Ethos_U55_256: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200219 256, 1, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 16, 8, 16, 20], 8
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100220 ),
221 Accelerator.Ethos_U55_128: ArchitectureConfig(
Dwight Lidman2f754572021-04-21 12:00:37 +0200222 128, 1, Block(2, 1, 8), Block(2, 1, 8), 24, [4, 4, 4, 4, 8, 4, 8, 12], 4
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100223 ),
224 Accelerator.Ethos_U55_64: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200225 64, 1, Block(1, 1, 8), Block(1, 1, 8), 16, [2, 2, 2, 2, 4, 4, 4, 8], 2
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100226 ),
227 Accelerator.Ethos_U55_32: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200228 32, 1, Block(1, 1, 4), Block(1, 1, 8), 16, [2, 2, 2, 2, 4, 4, 4, 4], 1
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100229 ),
Tim Hall79d07d22020-04-27 18:20:16 +0100230 }
231
232 OFMSplitDepth = 16
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100233 SubKernelMax = Block(8, 8, 65536)
Tim Hall79d07d22020-04-27 18:20:16 +0100234
Tim Hall1bd531d2020-11-01 20:59:36 +0000235 DEFAULT_CONFIG = "internal-default"
Louis Verhaard1e170182020-11-26 11:42:04 +0100236 MAX_BLOCKDEP = 3
Tim Hall1bd531d2020-11-01 20:59:36 +0000237
Tim Hall79d07d22020-04-27 18:20:16 +0100238 def __init__(
239 self,
Tim Hall1bd531d2020-11-01 20:59:36 +0000240 vela_config_files,
Tim Hall79d07d22020-04-27 18:20:16 +0100241 accelerator_config,
242 system_config,
Tim Hall1bd531d2020-11-01 20:59:36 +0000243 memory_mode,
Tim Hall79d07d22020-04-27 18:20:16 +0100244 max_blockdep,
Tim Hall1bd531d2020-11-01 20:59:36 +0000245 verbose_config,
Tim Halld8339a72021-05-27 18:49:40 +0100246 arena_cache_size,
Tim Hall79d07d22020-04-27 18:20:16 +0100247 ):
248 accelerator_config = accelerator_config.lower()
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100249 if accelerator_config not in Accelerator.member_list():
Tim Hall1bd531d2020-11-01 20:59:36 +0000250 raise CliOptionError("--accelerator-config", self.accelerator_config, "Unknown accelerator configuration")
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100251 self.accelerator_config = Accelerator(accelerator_config)
Tim Hall79d07d22020-04-27 18:20:16 +0100252 accel_config = ArchitectureFeatures.accelerator_configs[self.accelerator_config]
253 self.config = accel_config
254
Tim Halld8339a72021-05-27 18:49:40 +0100255 self.accumulator_granules = {
256 SHRAMElements.Acc16: accel_config.shram_granules[SHRAMElements.Acc16],
257 SHRAMElements.Acc32: accel_config.shram_granules[SHRAMElements.Acc32],
258 SHRAMElements.Acc40: accel_config.shram_granules[SHRAMElements.Acc40],
259 }
260
261 self.ifm_bank_granules = {
262 8: accel_config.shram_granules[SHRAMElements.IFM8],
263 16: accel_config.shram_granules[SHRAMElements.IFM16],
264 32: accel_config.shram_granules[SHRAMElements.IFM32],
265 }
266
267 self.ifm_ew_bank_granules = {
268 8: accel_config.shram_granules[SHRAMElements.IFM8_Elementwise],
269 16: accel_config.shram_granules[SHRAMElements.IFM16_Elementwise],
270 32: accel_config.shram_granules[SHRAMElements.IFM32],
271 }
272
273 self.shram = SHRAMConfig(2, 1024, accel_config.shram_banks, 2 if accel_config.shram_banks > 16 else 0)
274
Tim Hall79d07d22020-04-27 18:20:16 +0100275 self.system_config = system_config
Tim Hall1bd531d2020-11-01 20:59:36 +0000276 self.memory_mode = memory_mode
Tim Hallc8a73862020-10-27 12:43:14 +0000277 self.is_ethos_u65_system = self.accelerator_config in (Accelerator.Ethos_U65_256, Accelerator.Ethos_U65_512)
Tim Hall79d07d22020-04-27 18:20:16 +0100278
Tim Hall53c62452021-08-06 13:51:34 +0100279 if self.is_ethos_u65_system:
280 self.max_outstanding_dma = 2
281 axi_port_address_width = 40
282 axi_port_data_width = 128
283 else:
284 self.max_outstanding_dma = 1
285 axi_port_address_width = 32
286 axi_port_data_width = 64
287
Tim Hall289a41d2020-08-04 21:40:14 +0100288 self.max_outstanding_kernels = 3
289
Tim Hall79d07d22020-04-27 18:20:16 +0100290 self.ncores = accel_config.cores
291 self.ofm_ublock = accel_config.ofm_ublock
292 self.ifm_ublock = accel_config.ifm_ublock
Tim Hall79d07d22020-04-27 18:20:16 +0100293 self.ofm_block_max = Block(64, 32, 128)
Tim Hall79d07d22020-04-27 18:20:16 +0100294
Tim Hall79d07d22020-04-27 18:20:16 +0100295 self.max_blockdep = max_blockdep
296
297 dpu_min_height = accel_config.ofm_ublock.height
298 dpu_min_width = accel_config.ofm_ublock.width
299 dpu_dot_product_width = 8
300 dpu_min_ofm_channels = accel_config.ofm_ublock.depth
301
302 self.num_elem_wise_units = accel_config.elem_units
303 self.num_macs_per_cycle = dpu_min_height * dpu_min_width * dpu_dot_product_width * dpu_min_ofm_channels
Louis Verhaarda208cf82021-03-30 16:07:24 +0200304 # Max value in address offsets
Tim Hall53c62452021-08-06 13:51:34 +0100305 self.max_address_offset = 1 << axi_port_address_width
Tim Hall79d07d22020-04-27 18:20:16 +0100306
Tim Hall1bd531d2020-11-01 20:59:36 +0000307 # Get system configuration and memory mode
Tim Halld8339a72021-05-27 18:49:40 +0100308 self._get_vela_config(vela_config_files, verbose_config, arena_cache_size)
Tim Hall79d07d22020-04-27 18:20:16 +0100309
Tim Hall53c62452021-08-06 13:51:34 +0100310 self.memory_bandwidths_per_cycle = axi_port_data_width * self.memory_clock_scales / 8
Tim Hall79d07d22020-04-27 18:20:16 +0100311
Tim Hall1bd531d2020-11-01 20:59:36 +0000312 self.memory_bandwidths_per_second = self.memory_bandwidths_per_cycle * self.core_clock
Tim Hall79d07d22020-04-27 18:20:16 +0100313
Diqing Zhonge8887a32020-09-24 09:53:48 +0200314 # Get output/activation performance numbers
315 self._generate_output_perf_tables(self.accelerator_config)
316
Tim Hall79d07d22020-04-27 18:20:16 +0100317 # sizes as N x H x W x C. we need to round up to these when allocating storage
318 self.storage_rounding_quantums = {
319 TensorFormat.Unknown: (1, 1, 1, 1),
320 TensorFormat.WeightsCompressed: (1, 1, 1, 1),
321 TensorFormat.NHWC: (1, 1, 1, 1),
322 TensorFormat.NHCWB16: (1, 1, 1, 16),
323 }
324
325 # brick sizes as N x H x W x C. We have to fetch whole bricks at a time
326 self.brick_sizes = {
327 TensorFormat.Unknown: (1, 1, 1, 1),
328 TensorFormat.WeightsCompressed: (1, 1, 1, 1),
329 TensorFormat.NHWC: (1, 1, 1, 1),
330 TensorFormat.NHCWB16: (1, 1, 1, 16),
331 }
332
Tim Hall79d07d22020-04-27 18:20:16 +0100333 self.default_weight_format = TensorFormat.WeightsCompressed
334 self.default_feature_map_format = TensorFormat.NHWC
335
Tim Hall79d07d22020-04-27 18:20:16 +0100336 self.tensor_storage_mem_area = {
337 # permanent mem_area
Tim Hall465582c2020-05-26 09:33:14 +0100338 TensorPurpose.Unknown: MemArea.Unknown,
Tim Hall79d07d22020-04-27 18:20:16 +0100339 TensorPurpose.Weights: self.permanent_storage_mem_area,
340 TensorPurpose.FeatureMap: self.feature_map_storage_mem_area,
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200341 TensorPurpose.LUT: self.permanent_storage_mem_area,
Fredrik Svedberge22ba8c2021-01-27 16:53:41 +0100342 TensorPurpose.Scratch: self.feature_map_storage_mem_area,
343 TensorPurpose.ScratchFast: self.fast_storage_mem_area,
Tim Hall79d07d22020-04-27 18:20:16 +0100344 }
345
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200346 self.tensor_storage_mem_type = {
Dwight Lidman1a9d20e2020-08-11 12:10:36 +0200347 TensorPurpose.Unknown: MemType.Unknown,
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200348 TensorPurpose.Weights: MemType.Permanent_NPU,
349 TensorPurpose.FeatureMap: MemType.Scratch,
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200350 TensorPurpose.LUT: MemType.Scratch,
Fredrik Svedberge22ba8c2021-01-27 16:53:41 +0100351 TensorPurpose.Scratch: MemType.Scratch,
352 TensorPurpose.ScratchFast: MemType.Scratch_fast,
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200353 }
Tim Hall79d07d22020-04-27 18:20:16 +0100354
355 self.min_block_sizes = {
356 NpuBlockType.Default: (dpu_min_height, dpu_min_width),
357 NpuBlockType.VectorProduct: (1, 1),
358 NpuBlockType.ConvolutionMxN: (dpu_min_height, dpu_min_width),
359 NpuBlockType.Pooling: (dpu_min_height, dpu_min_width),
360 NpuBlockType.ConvolutionDepthWise: (dpu_min_height, dpu_min_width),
361 NpuBlockType.ElementWise: (1, 1),
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200362 NpuBlockType.ReduceSum: (dpu_min_height, dpu_min_width),
Tim Hall79d07d22020-04-27 18:20:16 +0100363 }
364
365 self.sub_kernel_limits = {
366 NpuBlockType.Default: (8, 8),
367 NpuBlockType.VectorProduct: (1, 1),
368 NpuBlockType.ConvolutionMxN: (8, 8),
369 NpuBlockType.Pooling: (8, 8),
370 NpuBlockType.ConvolutionDepthWise: (8, 8),
371 NpuBlockType.ElementWise: (1, 1),
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200372 NpuBlockType.ReduceSum: (8, 8),
Tim Hall79d07d22020-04-27 18:20:16 +0100373 }
374
375 # weights for scheduler search
376 from .npu_performance import make_bandwidth_array
377
378 self.bandwidth_weights = make_bandwidth_array()
379 self.bandwidth_weights[MemArea.Sram] = 1.0
380 self.bandwidth_weights[MemArea.Dram] = 10.0
381 self.bandwidth_weights[MemArea.OnChipFlash] = 2.0
382 self.bandwidth_weights[MemArea.OffChipFlash] = 20.0
383 self.cycles_weight = 40
384 self.max_sram_used_weight = 1000
385
Tim Hall1bd531d2020-11-01 20:59:36 +0000386 if self.is_spilling_enabled():
Patrik Gustavsson3ab94522020-06-29 17:36:55 +0200387 self.max_sram_used_weight = 0
Tim Hall79d07d22020-04-27 18:20:16 +0100388
389 # Shared Buffer Block allocations
390 self.shram_bank_size = 1024 # bytes
391 self.shram_size_bytes = accel_config.shram_banks * self.shram_bank_size
392 self.shram_reserved_output_banks = 2
393 self.shram_reserved_weight_banks = 0
394 self.shram_reserved_unused_banks = 2 if accel_config.shram_banks > 16 else 0
395 self.shram_total_banks = accel_config.shram_banks - self.shram_reserved_unused_banks
396 self.shram_bank_granules = np.array(accel_config.shram_granules, np.int32)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200397 self.shram_lut_size = 2048
398 # SHRAM base address of the activation lookup table
399 self.shram_lut_address = self.shram_bank_size * self.available_shram_banks(True)
Tim Hall79d07d22020-04-27 18:20:16 +0100400
401 # Build a map of acceptable IFM/OFM block configurations up to the maximum
402 # IFM/OFM block size.
403 ifm_block_max = self.get_ifm_block_size(32, self.ofm_block_max, Kernel(8, 8))
404 self.block_config_map = dict()
Tim Halld8339a72021-05-27 18:49:40 +0100405 self.generate_block_config_map(Block(ifm_block_max.width * 2, ifm_block_max.height, 128))
Tim Hall79d07d22020-04-27 18:20:16 +0100406
407 # Setup supported operators and restriction checkers class
Jonas Ohlsson45e653d2021-07-26 16:13:12 +0200408 self.tflite_supported_operators = TFLiteSupportedOperators()
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200409 self.tosa_supported_operators = TosaSupportedOperators()
Tim Hall79d07d22020-04-27 18:20:16 +0100410
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200411 # Returns available number of SHRAM banks depending on activation lookup table
412 # being used or not
413 def available_shram_banks(self, uses_activation_lut):
414 banks = self.shram_total_banks
415 if uses_activation_lut and self.shram_reserved_unused_banks == 0:
416 banks -= 2
417 return banks
418
Tim Hall79d07d22020-04-27 18:20:16 +0100419 # Calculate block configuration for ALL known IFM operations and
420 # accumulator sizes. Consumers will need to select their preferred
421 # operation and bit-width at read-time.
422 def generate_block_config(self, width, height, depth):
Louis Verhaardf98c6742020-05-12 14:22:38 +0200423 # Number of bytes required for any SHRAM element for a FM of given dimensions.
424 # For IFM: size = H*W*Align(D*BYTE_WIDTH, 8)
425 # For ACC: size = H*W*Align(D,8)*BYTE_WIDTH
426 d1 = round_up(depth, SHRAMElements.PreAlign)
427 d2 = round_up(d1 * SHRAMElements.ByteSizes, SHRAMElements.PostAlign)
428 size_bytes = (height * width) * d2
429
Tim Hall79d07d22020-04-27 18:20:16 +0100430 # Convert byte size (rounded) to size in banks
431 size_banks = round_up_divide(size_bytes, self.shram_bank_size)
432 size_banks *= 2 # Double buffer the IFM/Acc (need twice as many banks)
433 # Round bank requirement to bank granularity
434 required_banks = round_up(size_banks, self.shram_bank_granules)
435 return SHRAMBlockConfig(size_bytes, required_banks)
436
437 @staticmethod
438 def make_block_config_key(width, height, depth):
439 return (int(height), int(width), int(depth))
440
441 def get_block_config(self, width, height, depth):
442 assert depth <= self.ofm_block_max.depth
443 key = ArchitectureFeatures.make_block_config_key(width, height, depth)
444 config = self.block_config_map.get(key, None)
445 return config
446
447 # Generate a key:value map of possible block configurations, where the
448 # key is compounded from the block dimensions: 0x00HHWWCC
449 def generate_block_config_map(self, block: Block):
450 for h in range(1, block.height + 1):
451 for w in range(1, block.width + 1):
452 # All possible IFM/OFM depth values
453 for c in [4, 8, 12, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128]:
454 key = ArchitectureFeatures.make_block_config_key(w, h, c)
455 self.block_config_map[key] = self.generate_block_config(w, h, c)
456
Diqing Zhonge8887a32020-09-24 09:53:48 +0200457 def _generate_output_perf_tables(self, accel_config):
458 if accel_config == Accelerator.Ethos_U55_32:
459 self.output_cycles_per_elem = (2.0, 3.0, 3.0, 3.0, 4.0, 6.0, 1.0, 2.0)
460 self.activation_cycles_per_elem = (1.0, 1.0, 0.0)
461 elif accel_config == Accelerator.Ethos_U55_64:
462 self.output_cycles_per_elem = (1.0, 1.5, 1.5, 1.5, 2.0, 3.0, 0.5, 1.0)
463 self.activation_cycles_per_elem = (1.0, 1.0, 0.0)
464 elif accel_config == Accelerator.Ethos_U55_128:
465 self.output_cycles_per_elem = (0.75, 1.25, 0.75, 0.75, 1.0, 1.5, 0.25, 0.5)
466 self.activation_cycles_per_elem = (1.0, 0.5, 0.0)
Tim Hallc8a73862020-10-27 12:43:14 +0000467 elif accel_config in (Accelerator.Ethos_U55_256, Accelerator.Ethos_U65_256):
Diqing Zhonge8887a32020-09-24 09:53:48 +0200468 self.output_cycles_per_elem = (0.625, 1.125, 0.5, 0.375, 0.5, 0.75, 0.125, 0.25)
469 self.activation_cycles_per_elem = (1.0, 0.25, 0.0)
470 else:
Tim Hallc8a73862020-10-27 12:43:14 +0000471 assert accel_config == Accelerator.Ethos_U65_512
Diqing Zhonge8887a32020-09-24 09:53:48 +0200472 self.output_cycles_per_elem = (0.3125, 0.5625, 0.25, 0.1875, 0.25, 0.375, 0.0625, 0.125)
473 self.activation_cycles_per_elem = (0.5, 0.125, 0.0)
474
Tim Hall79d07d22020-04-27 18:20:16 +0100475 def calc_ifm_block_depth(self, ifm_depth, ifm_bits):
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200476 assert ifm_bits in (8, 16, 32)
Tim Hall79d07d22020-04-27 18:20:16 +0100477 assert ifm_depth > 0
478 ifm_depth = round_up(ifm_depth, self.ifm_ublock.depth)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200479 max_block_depth = 8 * 32 // ifm_bits
Tim Hall79d07d22020-04-27 18:20:16 +0100480 return min(max_block_depth, ifm_depth)
481
482 # Calculate the size of the IFM block given a depth, target OFM block and a kernel
Tim Hallc30f4952020-06-15 20:47:35 +0100483 def get_ifm_block_size(
484 self,
485 ifm_block_depth,
486 ofm_block: Block,
487 kernel: Kernel,
488 subkernel: Block = Block(8, 8, 65536),
489 ifm_resampling_mode=resampling_mode.NONE,
490 ):
Dwight Lidmana9390f72020-05-13 12:00:08 +0200491 upscaling = 1 if ifm_resampling_mode == resampling_mode.NONE else 2
Tim Hall79d07d22020-04-27 18:20:16 +0100492
erik.andersson@arm.com1d6d5c42021-04-14 13:31:05 +0200493 # Height
494 dilated_kernel_height = ((kernel.height - 1) * kernel.dilation.y) + 1
495 ifm_block_height = round_up_to_int(
496 ((ofm_block.height - 1) * kernel.stride.y + min(subkernel.height, dilated_kernel_height)) / upscaling
497 )
Tim Hall79d07d22020-04-27 18:20:16 +0100498
Dwight Lidman2f754572021-04-21 12:00:37 +0200499 ifm_block_height = round_up(ifm_block_height, self.ifm_ublock.height)
500
Tim Hall79d07d22020-04-27 18:20:16 +0100501 # Width
Tim Hall79d07d22020-04-27 18:20:16 +0100502 dilated_kernel_width = ((kernel.width - 1) * kernel.dilation.x) + 1
erik.andersson@arm.com1d6d5c42021-04-14 13:31:05 +0200503 ifm_block_width = round_up_to_int(
504 ((ofm_block.width - 1) * kernel.stride.x + min(subkernel.width, dilated_kernel_width)) / upscaling
505 )
Tim Hall79d07d22020-04-27 18:20:16 +0100506
Dwight Lidman2f754572021-04-21 12:00:37 +0200507 ifm_block_width = round_up(ifm_block_width, self.ifm_ublock.width)
508
Tim Hall79d07d22020-04-27 18:20:16 +0100509 return Block(ifm_block_width, ifm_block_height, ifm_block_depth)
510
Tim Hall1bd531d2020-11-01 20:59:36 +0000511 def is_spilling_enabled(self):
Tim Hall79d07d22020-04-27 18:20:16 +0100512 """
Tim Hall1bd531d2020-11-01 20:59:36 +0000513 Spilling is a feature that allows the Ethos-U to use a dedicated SRAM as a cache for various types of data
Tim Hall79d07d22020-04-27 18:20:16 +0100514 """
Tim Hall1bd531d2020-11-01 20:59:36 +0000515 return (
516 self._mem_port_mapping(self.cache_mem_area) == MemArea.Sram and self.cache_mem_area != self.arena_mem_area
517 )
Tim Hall79d07d22020-04-27 18:20:16 +0100518
Louis Verhaard024c3552021-03-17 14:26:34 +0100519 def mem_type_size(self, mem_type: MemType) -> int:
520 """Returns size in bytes available for the given memory type"""
Tim Hall53c62452021-08-06 13:51:34 +0100521 if mem_type == MemType.Scratch_fast or (mem_type == MemType.Scratch and not self.is_spilling_enabled()):
522 # the arena cache memory area always contains the scratch fast memory type. it also contains the scratch
523 # memory type when memory spilling is not being used
Tim Halld8339a72021-05-27 18:49:40 +0100524 return self.arena_cache_size
Tim Hall53c62452021-08-06 13:51:34 +0100525 else:
526 # the compiler is not aware of the memory limits for these memory types and so all it can do is return the
527 # maximum address size
528 return self.max_address_offset
Louis Verhaard024c3552021-03-17 14:26:34 +0100529
Tim Hall1bd531d2020-11-01 20:59:36 +0000530 def _mem_port_mapping(self, mem_port):
531 mem_port_mapping = {MemPort.Axi0: self.axi0_port, MemPort.Axi1: self.axi1_port}
532 return mem_port_mapping[mem_port]
Tim Hall79d07d22020-04-27 18:20:16 +0100533
Tim Hall1bd531d2020-11-01 20:59:36 +0000534 def _set_default_sys_config(self):
Tim Hall1bd531d2020-11-01 20:59:36 +0000535 # ArchitectureFeatures.DEFAULT_CONFIG values
536 if self.is_ethos_u65_system:
537 # Default Ethos-U65 system configuration
538 # Ethos-U65 Client-Server: SRAM (16 GB/s) and DRAM (12 GB/s)
539 self.core_clock = 1e9
540 self.axi0_port = MemArea.Sram
541 self.axi1_port = MemArea.Dram
542 self.memory_clock_scales[MemArea.Sram] = 1.0
543 self.memory_clock_scales[MemArea.Dram] = 0.75 # 3 / 4
Diqing Zhongf842b692020-12-11 13:07:37 +0100544 self.memory_burst_length[MemArea.Sram] = 32
545 self.memory_burst_length[MemArea.Dram] = 128
546 self.memory_latency[MemArea.Sram][BandwidthDirection.Read] = 32
547 self.memory_latency[MemArea.Sram][BandwidthDirection.Write] = 32
548 self.memory_latency[MemArea.Dram][BandwidthDirection.Read] = 500
549 self.memory_latency[MemArea.Dram][BandwidthDirection.Write] = 250
Tim Hall79d07d22020-04-27 18:20:16 +0100550 else:
Tim Hall1bd531d2020-11-01 20:59:36 +0000551 # Default Ethos-U55 system configuration
552 # Ethos-U55 High-End Embedded: SRAM (4 GB/s) and Flash (0.5 GB/s)
553 self.core_clock = 500e6
554 self.axi0_port = MemArea.Sram
555 self.axi1_port = MemArea.OffChipFlash
556 self.memory_clock_scales[MemArea.Sram] = 1.0
557 self.memory_clock_scales[MemArea.OffChipFlash] = 0.125 # 1 / 8
Diqing Zhongf842b692020-12-11 13:07:37 +0100558 self.memory_burst_length[MemArea.Sram] = 32
559 self.memory_burst_length[MemArea.OffChipFlash] = 128
560 self.memory_latency[MemArea.Sram][BandwidthDirection.Read] = 32
561 self.memory_latency[MemArea.Sram][BandwidthDirection.Write] = 32
562 self.memory_latency[MemArea.OffChipFlash][BandwidthDirection.Read] = 64
563 self.memory_latency[MemArea.OffChipFlash][BandwidthDirection.Write] = 64
Tim Hall79d07d22020-04-27 18:20:16 +0100564
Tim Hall1bd531d2020-11-01 20:59:36 +0000565 def _set_default_mem_mode(self):
Tim Hall1bd531d2020-11-01 20:59:36 +0000566 # ArchitectureFeatures.DEFAULT_CONFIG values
567 if self.is_ethos_u65_system:
568 # Default Ethos-U65 memory mode
Tim Hall70b71a52020-12-22 11:47:54 +0000569 # Dedicated SRAM: the SRAM is only for use by the Ethos-U
570 # The non-SRAM memory is assumed to be read-writeable
Tim Hall1bd531d2020-11-01 20:59:36 +0000571 self.const_mem_area = MemPort.Axi1
572 self.arena_mem_area = MemPort.Axi1
573 self.cache_mem_area = MemPort.Axi0
Tim Halld8339a72021-05-27 18:49:40 +0100574 self.arena_cache_size = 384 * 1024
Tim Hall1bd531d2020-11-01 20:59:36 +0000575 else:
Tim Hall70b71a52020-12-22 11:47:54 +0000576 # Default Ethos-U55 memory mode
577 # Shared SRAM: the SRAM is shared between the Ethos-U and the Cortex-M software
578 # The non-SRAM memory is assumed to be read-only
Tim Hall1bd531d2020-11-01 20:59:36 +0000579 self.const_mem_area = MemPort.Axi1
580 self.arena_mem_area = MemPort.Axi0
581 self.cache_mem_area = MemPort.Axi0
Tim Halld8339a72021-05-27 18:49:40 +0100582 self.arena_cache_size = self.max_address_offset
Tim Hall79d07d22020-04-27 18:20:16 +0100583
Tim Halld8339a72021-05-27 18:49:40 +0100584 def _get_vela_config(self, vela_config_files, verbose_config, arena_cache_size_from_cli):
Tim Hall1bd531d2020-11-01 20:59:36 +0000585 """
586 Gets the system configuration and memory modes from one or more Vela configuration file(s) or uses some
587 defaults.
588 """
Tim Hall79d07d22020-04-27 18:20:16 +0100589
Tim Hall1bd531d2020-11-01 20:59:36 +0000590 # all properties are optional and are initialised to a value of 1 (or the equivalent)
591 self.core_clock = 1
592 self.axi0_port = MemArea(1)
593 self.axi1_port = MemArea(1)
594 self.memory_clock_scales = np.ones(MemArea.Size)
Tim Hall70b71a52020-12-22 11:47:54 +0000595 self.memory_burst_length = np.ones(MemArea.Size, np.int)
596 self.memory_latency = np.zeros((MemArea.Size, BandwidthDirection.Size), np.int)
Tim Hall1bd531d2020-11-01 20:59:36 +0000597 self.const_mem_area = MemPort(1)
598 self.arena_mem_area = MemPort(1)
599 self.cache_mem_area = MemPort(1)
Tim Halld8339a72021-05-27 18:49:40 +0100600 self.arena_cache_size = self.max_address_offset
601 arena_cache_size_loc_text = "Default"
Tim Hall79d07d22020-04-27 18:20:16 +0100602
Tim Hall1bd531d2020-11-01 20:59:36 +0000603 # read configuration file(s)
604 self.vela_config = None
605
606 if vela_config_files is not None:
607 self.vela_config = ConfigParser()
608 self.vela_config.read(vela_config_files)
609
610 # read system configuration
611 sys_cfg_section = "System_Config." + self.system_config
612
613 if self.vela_config is not None and self.vela_config.has_section(sys_cfg_section):
614 self.core_clock = float(self._read_config(sys_cfg_section, "core_clock", self.core_clock))
615 self.axi0_port = MemArea[self._read_config(sys_cfg_section, "axi0_port", self.axi0_port)]
616 self.axi1_port = MemArea[self._read_config(sys_cfg_section, "axi1_port", self.axi1_port)]
617
618 for mem_area in (self.axi0_port, self.axi1_port):
619 self.memory_clock_scales[mem_area] = float(
620 self._read_config(
621 sys_cfg_section, mem_area.name + "_clock_scale", self.memory_clock_scales[mem_area]
622 )
623 )
Diqing Zhongf842b692020-12-11 13:07:37 +0100624 self.memory_burst_length[mem_area] = int(
625 self._read_config(
626 sys_cfg_section, mem_area.name + "_burst_length", self.memory_burst_length[mem_area]
627 )
628 )
629 self.memory_latency[mem_area][BandwidthDirection.Read] = int(
630 self._read_config(
631 sys_cfg_section,
632 mem_area.name + "_read_latency",
633 self.memory_latency[mem_area][BandwidthDirection.Read],
634 )
635 )
636 self.memory_latency[mem_area][BandwidthDirection.Write] = int(
637 self._read_config(
638 sys_cfg_section,
639 mem_area.name + "_write_latency",
640 self.memory_latency[mem_area][BandwidthDirection.Write],
641 )
642 )
Tim Hall1bd531d2020-11-01 20:59:36 +0000643 elif self.system_config == ArchitectureFeatures.DEFAULT_CONFIG:
644 self._set_default_sys_config()
645
646 elif vela_config_files is None:
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000647 raise CliOptionError("--config", vela_config_files, "Vela config file not specified")
Tim Hall1bd531d2020-11-01 20:59:36 +0000648
649 else:
650 raise CliOptionError(
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000651 "--system-config", self.system_config, f"Section {sys_cfg_section} not found in Vela config file",
Tim Hall79d07d22020-04-27 18:20:16 +0100652 )
Tim Hall79d07d22020-04-27 18:20:16 +0100653
Tim Hall1bd531d2020-11-01 20:59:36 +0000654 # read the memory mode
655 mem_mode_section = "Memory_Mode." + self.memory_mode
Tim Hall79d07d22020-04-27 18:20:16 +0100656
Tim Hall1bd531d2020-11-01 20:59:36 +0000657 if self.vela_config is not None and self.vela_config.has_section(mem_mode_section):
658 self.const_mem_area = MemPort[
659 self._read_config(mem_mode_section, "const_mem_area", self.const_mem_area.name)
660 ]
661 self.arena_mem_area = MemPort[
662 self._read_config(mem_mode_section, "arena_mem_area", self.arena_mem_area.name)
663 ]
664 self.cache_mem_area = MemPort[
665 self._read_config(mem_mode_section, "cache_mem_area", self.cache_mem_area.name)
666 ]
Tim Halld8339a72021-05-27 18:49:40 +0100667 found = []
668 self.arena_cache_size = int(
669 self._read_config(mem_mode_section, "arena_cache_size", self.arena_cache_size, found)
670 )
671 if found[-1]:
672 arena_cache_size_loc_text = "Configuration file"
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200673
Tim Hall1bd531d2020-11-01 20:59:36 +0000674 elif self.memory_mode == ArchitectureFeatures.DEFAULT_CONFIG:
675 self._set_default_mem_mode()
Patrik Gustavsson5f47c052020-06-25 12:56:04 +0200676
Tim Hall1bd531d2020-11-01 20:59:36 +0000677 elif vela_config_files is None:
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000678 raise CliOptionError("--config", vela_config_files, "Vela config file not specified")
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200679
Tim Hall1bd531d2020-11-01 20:59:36 +0000680 else:
681 raise CliOptionError(
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000682 "--memory-mode", self.memory_mode, f"Section {mem_mode_section} not found in Vela config file",
Tim Hall1bd531d2020-11-01 20:59:36 +0000683 )
Tim Hall79d07d22020-04-27 18:20:16 +0100684
Tim Hall1bd531d2020-11-01 20:59:36 +0000685 # override sram to onchipflash
686 if self._mem_port_mapping(self.const_mem_area) == MemArea.Sram:
687 if self.const_mem_area == self.arena_mem_area == self.cache_mem_area:
688 print(
689 "Info: Changing const_mem_area from Sram to OnChipFlash. This will use the same characteristics as"
690 " Sram."
691 )
692 if self.const_mem_area == MemPort.Axi0:
693 self.const_mem_area = MemPort.Axi1
694 self.axi1_port = MemArea.OnChipFlash
695 else:
696 self.const_mem_area = MemPort.Axi0
697 self.axi0_port = MemArea.OnChipFlash
698 self.memory_clock_scales[MemArea.OnChipFlash] = self.memory_clock_scales[MemArea.Sram]
Diqing Zhongf842b692020-12-11 13:07:37 +0100699 self.memory_burst_length[MemArea.OnChipFlash] = self.memory_burst_length[MemArea.Sram]
700 self.memory_latency[MemArea.OnChipFlash] = self.memory_latency[MemArea.Sram]
Tim Hall1bd531d2020-11-01 20:59:36 +0000701
Tim Halld8339a72021-05-27 18:49:40 +0100702 # override sram usage
703 if arena_cache_size_from_cli is not None:
704 self.arena_cache_size = arena_cache_size_from_cli
705 arena_cache_size_loc_text = "CLI option"
706
Tim Hall1bd531d2020-11-01 20:59:36 +0000707 # check configuration
Tim Hall70b71a52020-12-22 11:47:54 +0000708 if self._mem_port_mapping(self.const_mem_area) not in (
709 MemArea.Dram,
710 MemArea.OnChipFlash,
711 MemArea.OffChipFlash,
712 ):
713 raise ConfigOptionError(
714 "const_mem_area",
715 self._mem_port_mapping(self.const_mem_area).name,
716 "Dram or OnChipFlash or OffChipFlash",
717 )
718
719 if self._mem_port_mapping(self.arena_mem_area) not in (MemArea.Sram, MemArea.Dram):
720 raise ConfigOptionError("arena_mem_area", self._mem_port_mapping(self.arena_mem_area).name, "Sram or Dram")
721
Tim Hall1bd531d2020-11-01 20:59:36 +0000722 if self._mem_port_mapping(self.cache_mem_area) != MemArea.Sram:
723 raise ConfigOptionError("cache_mem_area", self._mem_port_mapping(self.cache_mem_area).name, "Sram")
724
Tim Halld8339a72021-05-27 18:49:40 +0100725 if self.arena_cache_size < 0:
726 raise ConfigOptionError("arena_cache_size", self.arena_cache_size, ">= 0")
727 if self.arena_cache_size > self.max_address_offset:
728 raise ConfigOptionError(
729 "arena_cache_size",
730 f"{self.arena_cache_size}. Size is out of bounds, maximum is: {self.max_address_offset}",
731 )
732
Tim Hall1bd531d2020-11-01 20:59:36 +0000733 # assign existing memory areas
734 self.permanent_storage_mem_area = self._mem_port_mapping(self.const_mem_area)
735 self.feature_map_storage_mem_area = self._mem_port_mapping(self.arena_mem_area)
736 self.fast_storage_mem_area = self._mem_port_mapping(self.cache_mem_area)
737
Tim Hall1bd531d2020-11-01 20:59:36 +0000738 # display the system configuration and memory mode
739 if verbose_config:
740 print(f"System Configuration ({self.system_config}):")
741 print(f" core_clock = {self.core_clock}")
742 print(f" axi0_port = {self.axi0_port.name}")
743 print(f" axi1_port = {self.axi1_port.name}")
744 for mem in (MemArea.Sram, MemArea.Dram, MemArea.OnChipFlash, MemArea.OffChipFlash):
745 print(f" {mem.name}_clock_scales = {self.memory_clock_scales[mem]}")
Diqing Zhongf842b692020-12-11 13:07:37 +0100746 print(f" {mem.name}_burst_length = {self.memory_burst_length[mem]}")
747 print(f" {mem.name}_read_latency = {self.memory_latency[mem][BandwidthDirection.Read]}")
748 print(f" {mem.name}_write_latency = {self.memory_latency[mem][BandwidthDirection.Write]}")
Tim Hall1bd531d2020-11-01 20:59:36 +0000749
750 print(f"Memory Mode ({self.memory_mode}):")
751 print(f" const_mem_area = {self.const_mem_area.name}")
752 print(f" arena_mem_area = {self.arena_mem_area.name}")
753 print(f" cache_mem_area = {self.cache_mem_area.name}")
Tim Halld8339a72021-05-27 18:49:40 +0100754 print(f" arena_cache_size = {self.arena_cache_size} from {arena_cache_size_loc_text}")
Tim Hall1bd531d2020-11-01 20:59:36 +0000755
756 print("Architecture Settings:")
757 print(f" permanent_storage_mem_area = {self.permanent_storage_mem_area.name}")
758 print(f" feature_map_storage_mem_area = {self.feature_map_storage_mem_area.name}")
759 print(f" fast_storage_mem_area = {self.fast_storage_mem_area.name}")
Tim Hall1bd531d2020-11-01 20:59:36 +0000760
Tim Halld8339a72021-05-27 18:49:40 +0100761 def _read_config(self, section, key, current_value, found=None):
Tim Hall79d07d22020-04-27 18:20:16 +0100762 """
Tim Hall1bd531d2020-11-01 20:59:36 +0000763 Reads a given key from a particular section in the Vela config file. If the section contains the 'inherit'
764 option then we recurse into the section specified. If inherited sections result in multiple keys for a
Tim Halld8339a72021-05-27 18:49:40 +0100765 particular option then the key from the parent section is used, regardless of the parsing order. if specified
766 found should be an empty list that this function will append a True or False to the end of the list indicating
767 whether the key was found or not.
Tim Hall79d07d22020-04-27 18:20:16 +0100768 """
Tim Hall1bd531d2020-11-01 20:59:36 +0000769 if not self.vela_config.has_section(section):
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000770 raise ConfigOptionError("section", f"{section}. The section was not found in the Vela config file(s)")
Tim Hall1bd531d2020-11-01 20:59:36 +0000771
Tim Halld8339a72021-05-27 18:49:40 +0100772 result = str(current_value) if current_value is not None else None
773 if found is not None:
774 found.append(False)
775
Tim Hall1bd531d2020-11-01 20:59:36 +0000776 if self.vela_config.has_option(section, "inherit"):
777 inheritance_section = self.vela_config.get(section, "inherit")
778 # check for recursion loop
779 if inheritance_section == section:
780 raise ConfigOptionError(
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000781 "inherit", f"{inheritance_section}. This references its own section and recursion is not allowed",
Tim Hall1bd531d2020-11-01 20:59:36 +0000782 )
Tim Halld8339a72021-05-27 18:49:40 +0100783 result = self._read_config(inheritance_section, key, result, found)
Tim Hall1bd531d2020-11-01 20:59:36 +0000784
785 if self.vela_config.has_option(section, key):
786 result = self.vela_config.get(section, key)
Tim Halld8339a72021-05-27 18:49:40 +0100787 if found is not None:
788 found.append(True)
Tim Hall1bd531d2020-11-01 20:59:36 +0000789
Tim Hall79d07d22020-04-27 18:20:16 +0100790 return result
Louis Verhaard52078302020-11-18 13:35:06 +0100791
792
Louis Verhaard061eeb42020-11-27 08:24:03 +0100793# Cache for default arch instances, as these are expensive to create
794default_arch_cache = dict()
795
796
Louis Verhaard52078302020-11-18 13:35:06 +0100797def create_default_arch(accelerator: Accelerator) -> ArchitectureFeatures:
798 """Creates architecture features object using default settings"""
Louis Verhaard061eeb42020-11-27 08:24:03 +0100799 if accelerator not in default_arch_cache:
800 default_arch_cache[accelerator] = ArchitectureFeatures(
801 vela_config_files=None,
802 accelerator_config=accelerator.value,
803 system_config=ArchitectureFeatures.DEFAULT_CONFIG,
804 memory_mode=ArchitectureFeatures.DEFAULT_CONFIG,
Louis Verhaard061eeb42020-11-27 08:24:03 +0100805 max_blockdep=ArchitectureFeatures.MAX_BLOCKDEP,
Louis Verhaard061eeb42020-11-27 08:24:03 +0100806 verbose_config=False,
Tim Halld8339a72021-05-27 18:49:40 +0100807 arena_cache_size=None,
Louis Verhaard061eeb42020-11-27 08:24:03 +0100808 )
809 return default_arch_cache[accelerator]