blob: aaf1ae45cf7579b4d9e784bf0a05ef8a334844b3 [file] [log] [blame]
erik.andersson@arm.com460c6892021-02-24 14:38:09 +01001# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
Tim Hall79d07d22020-04-27 18:20:16 +01002#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
Tim Hallc8a73862020-10-27 12:43:14 +000017# Holds a container for Ethos-U and System architecture parameters.
Diego Russoea6111a2020-04-14 18:41:58 +010018import enum
Tim Hall79d07d22020-04-27 18:20:16 +010019from collections import namedtuple
20from configparser import ConfigParser
Diego Russoea6111a2020-04-14 18:41:58 +010021
Tim Hall79d07d22020-04-27 18:20:16 +010022import numpy as np
Diego Russoea6111a2020-04-14 18:41:58 +010023
Louis Verhaardaeae5672020-11-02 18:04:27 +010024from .api import NpuAccelerator
Tim Hall1bd531d2020-11-01 20:59:36 +000025from .errors import CliOptionError
26from .errors import ConfigOptionError
Dwight Lidmana9390f72020-05-13 12:00:08 +020027from .ethos_u55_regs.ethos_u55_regs import resampling_mode
Louis Verhaard69b31762020-11-17 09:45:20 +010028from .numeric_util import full_shape
Diego Russoe8a10452020-04-21 17:39:10 +010029from .numeric_util import round_up
30from .numeric_util import round_up_divide
erik.andersson@arm.com1d6d5c42021-04-14 13:31:05 +020031from .numeric_util import round_up_to_int
Tim Hall4ed38bc2020-10-20 18:54:20 +010032from .operation import Kernel
Diego Russoea6111a2020-04-14 18:41:58 +010033from .operation import NpuBlockType
Tim Hall4ed38bc2020-10-20 18:54:20 +010034from .operation import PointXYZ
Diqing Zhongf842b692020-12-11 13:07:37 +010035from .tensor import BandwidthDirection
Diego Russoe8a10452020-04-21 17:39:10 +010036from .tensor import MemArea
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020037from .tensor import MemType
Diego Russoe8a10452020-04-21 17:39:10 +010038from .tensor import TensorFormat
39from .tensor import TensorPurpose
Jonas Ohlsson45e653d2021-07-26 16:13:12 +020040from .tflite_supported_operators import TFLiteSupportedOperators
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020041from .tosa_supported_operators import TosaSupportedOperators
Tim Hall79d07d22020-04-27 18:20:16 +010042
Tim Hall79d07d22020-04-27 18:20:16 +010043
44class Block:
Tim Halld8339a72021-05-27 18:49:40 +010045 def __init__(self, w=0, h=0, d=0):
Tim Hall79d07d22020-04-27 18:20:16 +010046 self.width = w
47 self.height = h
48 self.depth = d
49
Tim Halld8339a72021-05-27 18:49:40 +010050 def elements(self):
51 return self.width * self.height * self.depth
52
53 def elements_wh(self):
54 return self.width * self.height
55
56 def clone(self):
57 return Block(self.width, self.height, self.depth)
58
59 def as_list(self):
60 return [self.height, self.width, self.depth]
61
Tim Hall79d07d22020-04-27 18:20:16 +010062 def __eq__(self, other):
63 if self.width == other.width and self.height == other.height and self.depth == other.depth:
64 return True
65 else:
66 return False
67
68 def __repr__(self):
69 return "<Block: {0},{1},{2}>".format(self.width, self.height, self.depth)
70
Tim Halld8339a72021-05-27 18:49:40 +010071 def to_hwc(self):
72 return [self.height, self.width, self.depth]
73
Tim Hall79d07d22020-04-27 18:20:16 +010074 @classmethod
75 def from_string(cls, s):
76 w, h, c = (int(v) for v in s.split("x"))
77 return cls(w, h, c)
78
Louis Verhaard69b31762020-11-17 09:45:20 +010079 @classmethod
80 def from_shape(cls, shape) -> "Block":
81 """Converts the shape to a Block"""
82 shp = full_shape(3, shape, 1)
83 # Note: index from end, as len(shp) may be > 3
84 return Block(shp[-2], shp[-3], shp[-1])
85
Tim Halld8339a72021-05-27 18:49:40 +010086 @classmethod
87 def min(cls, a, b):
88 return cls(min(a.width, b.width), min(a.height, b.height), min(a.depth, b.depth))
89
90 @classmethod
91 def max(cls, a, b):
92 return cls(max(a.width, b.width), max(a.height, b.height), max(a.depth, b.depth))
93
94 @classmethod
95 def round(cls, a, b):
96 return cls(round_up(a.width, b.width), round_up(a.height, b.height), round_up(a.depth, b.depth))
97
98 @classmethod
99 def div_round_up(cls, a, b):
100 return cls(
101 round_up_divide(a.width, b.width), round_up_divide(a.height, b.height), round_up_divide(a.depth, b.depth)
102 )
103
Tim Hall79d07d22020-04-27 18:20:16 +0100104
105class Rect:
106 def __init__(self, x, y, z, x2, y2, z2):
107 self.x = x
108 self.y = y
109 self.z = z
110 self.x2 = x2
111 self.y2 = y2
112 self.z2 = z2
113
114 def start(self):
115 return PointXYZ(self.x, self.y, self.z)
116
117 def end(self):
118 return PointXYZ(self.x2, self.y2, self.z2)
119
120 def size(self):
121 return Block(self.x2 - self.x + 1, self.y2 - self.y + 1, self.z2 - self.z + 1)
122
123 def __repr__(self):
124 return "<Rect: ({0},{1},{2}) ({3},{4},{5})>".format(self.x, self.y, self.z, self.x2, self.y2, self.z2)
125
126
Tim Hall79d07d22020-04-27 18:20:16 +0100127class SHRAMElements:
128 IFM8 = 0
129 IFM16 = 1
130 IFM8_Elementwise = 2
131 IFM16_Elementwise = 3
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200132 IFM32 = 4
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200133 Acc16 = 5
134 Acc32 = 6
135 Acc40 = 7
Tim Hall79d07d22020-04-27 18:20:16 +0100136 Last = Acc40
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200137 BitSizes = np.array([8, 16, 8, 16, 32, 16, 32, 40], np.int32)
Louis Verhaardf98c6742020-05-12 14:22:38 +0200138 ByteSizes = BitSizes // 8
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200139 PostAlign = np.array([8, 8, 8, 8, 8, 1, 1, 1], np.int32)
140 PreAlign = np.array([1, 1, 1, 1, 1, 8, 8, 8], np.int32)
Tim Hall79d07d22020-04-27 18:20:16 +0100141
142
143class SHRAMBlockConfig:
144 def __init__(self, sizes, banks):
145 assert len(banks) == SHRAMElements.Last + 1
146 self.sizes = sizes
147 self.banks = banks
148
149
Tim Hallc8a73862020-10-27 12:43:14 +0000150# Area indices must match Ethos-U SHRAM layout spec
Tim Hall79d07d22020-04-27 18:20:16 +0100151class SharedBufferArea(enum.IntEnum):
152 OFM = 0
153 Weights = 1
154 IFM = 2
155 Accumulators = 3
156 Size = Accumulators + 1
157
158
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100159class Accelerator(enum.Enum):
160 Ethos_U55_32 = "ethos-u55-32"
161 Ethos_U55_64 = "ethos-u55-64"
162 Ethos_U55_128 = "ethos-u55-128"
163 Ethos_U55_256 = "ethos-u55-256"
Tim Hallc8a73862020-10-27 12:43:14 +0000164 Ethos_U65_256 = "ethos-u65-256"
165 Ethos_U65_512 = "ethos-u65-512"
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100166
167 @classmethod
168 def member_list(cls):
169 return [e.value for e in cls]
170
Louis Verhaardaeae5672020-11-02 18:04:27 +0100171 @classmethod
172 def from_npu_accelerator(cls, npu_accelerator: NpuAccelerator) -> "Accelerator":
173 """Converts the given public API object to Accelerator (used internally)"""
174 accelerator_map = {
175 NpuAccelerator.Ethos_U55_32: cls.Ethos_U55_32,
176 NpuAccelerator.Ethos_U55_64: cls.Ethos_U55_64,
177 NpuAccelerator.Ethos_U55_128: cls.Ethos_U55_128,
178 NpuAccelerator.Ethos_U55_256: cls.Ethos_U55_256,
179 NpuAccelerator.Ethos_U65_256: cls.Ethos_U65_256,
180 NpuAccelerator.Ethos_U65_512: cls.Ethos_U65_512,
181 }
182 assert npu_accelerator in accelerator_map, f"Unsupported accelerator {npu_accelerator}"
183 return accelerator_map[npu_accelerator]
184
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100185
Tim Hall1bd531d2020-11-01 20:59:36 +0000186@enum.unique
187class MemPort(enum.Enum):
188 Axi0 = enum.auto()
189 Axi1 = enum.auto()
190
191
Tim Halld8339a72021-05-27 18:49:40 +0100192SHRAMConfig = namedtuple(
193 "SHRAMConfig", ["reserved_output_banks", "bank_size_bytes", "total_banks", "reserved_end_banks"]
194)
195
196
Tim Hall79d07d22020-04-27 18:20:16 +0100197class ArchitectureFeatures:
Tim Hallc8a73862020-10-27 12:43:14 +0000198 """This class is a container for various parameters of the Ethos-U core
Diqing Zhonge8887a32020-09-24 09:53:48 +0200199 and system configuration that can be tuned, either by command line
Tim Hallc8a73862020-10-27 12:43:14 +0000200 parameters or by the Ethos-U architects. The class is often passed
Diqing Zhonge8887a32020-09-24 09:53:48 +0200201 around to passes that need to do architecture-dependent actions.
Tim Hall79d07d22020-04-27 18:20:16 +0100202
Diqing Zhonge8887a32020-09-24 09:53:48 +0200203 Note the difference between ArchitectureFeatures and CompilerOptions
Tim Hallc8a73862020-10-27 12:43:14 +0000204 - ArchitectureFeatures is for changing the Ethos-U and system architecture
Diqing Zhonge8887a32020-09-24 09:53:48 +0200205 - CompilerOptions is for changing the behaviour of the compiler
206 """
Tim Hall79d07d22020-04-27 18:20:16 +0100207
208 ArchitectureConfig = namedtuple(
209 "ArchitectureConfig", "macs cores ofm_ublock ifm_ublock shram_banks shram_granules elem_units"
210 )
211 accelerator_configs = {
Tim Hallc8a73862020-10-27 12:43:14 +0000212 Accelerator.Ethos_U65_512: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200213 256, 2, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 16, 8, 16, 20], 8
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100214 ),
Tim Hallc8a73862020-10-27 12:43:14 +0000215 Accelerator.Ethos_U65_256: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200216 256, 1, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 16, 8, 16, 20], 8
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100217 ),
218 Accelerator.Ethos_U55_256: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200219 256, 1, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 16, 8, 16, 20], 8
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100220 ),
221 Accelerator.Ethos_U55_128: ArchitectureConfig(
Dwight Lidman2f754572021-04-21 12:00:37 +0200222 128, 1, Block(2, 1, 8), Block(2, 1, 8), 24, [4, 4, 4, 4, 8, 4, 8, 12], 4
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100223 ),
224 Accelerator.Ethos_U55_64: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200225 64, 1, Block(1, 1, 8), Block(1, 1, 8), 16, [2, 2, 2, 2, 4, 4, 4, 8], 2
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100226 ),
227 Accelerator.Ethos_U55_32: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200228 32, 1, Block(1, 1, 4), Block(1, 1, 8), 16, [2, 2, 2, 2, 4, 4, 4, 4], 1
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100229 ),
Tim Hall79d07d22020-04-27 18:20:16 +0100230 }
231
232 OFMSplitDepth = 16
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100233 SubKernelMax = Block(8, 8, 65536)
Tim Hall79d07d22020-04-27 18:20:16 +0100234
Tim Hall1bd531d2020-11-01 20:59:36 +0000235 DEFAULT_CONFIG = "internal-default"
Louis Verhaard1e170182020-11-26 11:42:04 +0100236 MAX_BLOCKDEP = 3
Tim Hall1bd531d2020-11-01 20:59:36 +0000237
Tim Hall79d07d22020-04-27 18:20:16 +0100238 def __init__(
239 self,
Tim Hall1bd531d2020-11-01 20:59:36 +0000240 vela_config_files,
Tim Hall79d07d22020-04-27 18:20:16 +0100241 accelerator_config,
242 system_config,
Tim Hall1bd531d2020-11-01 20:59:36 +0000243 memory_mode,
Tim Hall79d07d22020-04-27 18:20:16 +0100244 max_blockdep,
Tim Hall1bd531d2020-11-01 20:59:36 +0000245 verbose_config,
Tim Halld8339a72021-05-27 18:49:40 +0100246 arena_cache_size,
Tim Hall79d07d22020-04-27 18:20:16 +0100247 ):
248 accelerator_config = accelerator_config.lower()
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100249 if accelerator_config not in Accelerator.member_list():
Tim Hall1bd531d2020-11-01 20:59:36 +0000250 raise CliOptionError("--accelerator-config", self.accelerator_config, "Unknown accelerator configuration")
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100251 self.accelerator_config = Accelerator(accelerator_config)
Tim Hall79d07d22020-04-27 18:20:16 +0100252 accel_config = ArchitectureFeatures.accelerator_configs[self.accelerator_config]
253 self.config = accel_config
254
Tim Halld8339a72021-05-27 18:49:40 +0100255 self.accumulator_granules = {
256 SHRAMElements.Acc16: accel_config.shram_granules[SHRAMElements.Acc16],
257 SHRAMElements.Acc32: accel_config.shram_granules[SHRAMElements.Acc32],
258 SHRAMElements.Acc40: accel_config.shram_granules[SHRAMElements.Acc40],
259 }
260
261 self.ifm_bank_granules = {
262 8: accel_config.shram_granules[SHRAMElements.IFM8],
263 16: accel_config.shram_granules[SHRAMElements.IFM16],
264 32: accel_config.shram_granules[SHRAMElements.IFM32],
265 }
266
267 self.ifm_ew_bank_granules = {
268 8: accel_config.shram_granules[SHRAMElements.IFM8_Elementwise],
269 16: accel_config.shram_granules[SHRAMElements.IFM16_Elementwise],
270 32: accel_config.shram_granules[SHRAMElements.IFM32],
271 }
272
273 self.shram = SHRAMConfig(2, 1024, accel_config.shram_banks, 2 if accel_config.shram_banks > 16 else 0)
274
Tim Hall79d07d22020-04-27 18:20:16 +0100275 self.system_config = system_config
Tim Hall1bd531d2020-11-01 20:59:36 +0000276 self.memory_mode = memory_mode
Tim Hallc8a73862020-10-27 12:43:14 +0000277 self.is_ethos_u65_system = self.accelerator_config in (Accelerator.Ethos_U65_256, Accelerator.Ethos_U65_512)
Tim Hall79d07d22020-04-27 18:20:16 +0100278
Tim Hallc8a73862020-10-27 12:43:14 +0000279 self.max_outstanding_dma = 2 if self.is_ethos_u65_system else 1
Tim Hall289a41d2020-08-04 21:40:14 +0100280 self.max_outstanding_kernels = 3
281
Tim Hall79d07d22020-04-27 18:20:16 +0100282 self.ncores = accel_config.cores
283 self.ofm_ublock = accel_config.ofm_ublock
284 self.ifm_ublock = accel_config.ifm_ublock
Tim Hall79d07d22020-04-27 18:20:16 +0100285 self.ofm_block_max = Block(64, 32, 128)
Tim Hall79d07d22020-04-27 18:20:16 +0100286
Tim Hall79d07d22020-04-27 18:20:16 +0100287 self.max_blockdep = max_blockdep
288
289 dpu_min_height = accel_config.ofm_ublock.height
290 dpu_min_width = accel_config.ofm_ublock.width
291 dpu_dot_product_width = 8
292 dpu_min_ofm_channels = accel_config.ofm_ublock.depth
293
294 self.num_elem_wise_units = accel_config.elem_units
295 self.num_macs_per_cycle = dpu_min_height * dpu_min_width * dpu_dot_product_width * dpu_min_ofm_channels
Louis Verhaarda208cf82021-03-30 16:07:24 +0200296 # Max value in address offsets
297 self.max_address_offset = 1 << 48 if self.is_ethos_u65_system else 1 << 32
Tim Hall79d07d22020-04-27 18:20:16 +0100298
Tim Hall1bd531d2020-11-01 20:59:36 +0000299 # Get system configuration and memory mode
Tim Halld8339a72021-05-27 18:49:40 +0100300 self._get_vela_config(vela_config_files, verbose_config, arena_cache_size)
Tim Hall79d07d22020-04-27 18:20:16 +0100301
Tim Hall1bd531d2020-11-01 20:59:36 +0000302 self.axi_port_width = 128 if self.is_ethos_u65_system else 64
303 self.memory_bandwidths_per_cycle = self.axi_port_width * self.memory_clock_scales / 8
Tim Hall79d07d22020-04-27 18:20:16 +0100304
Tim Hall1bd531d2020-11-01 20:59:36 +0000305 self.memory_bandwidths_per_second = self.memory_bandwidths_per_cycle * self.core_clock
Tim Hall79d07d22020-04-27 18:20:16 +0100306
Diqing Zhonge8887a32020-09-24 09:53:48 +0200307 # Get output/activation performance numbers
308 self._generate_output_perf_tables(self.accelerator_config)
309
Tim Hall79d07d22020-04-27 18:20:16 +0100310 # sizes as N x H x W x C. we need to round up to these when allocating storage
311 self.storage_rounding_quantums = {
312 TensorFormat.Unknown: (1, 1, 1, 1),
313 TensorFormat.WeightsCompressed: (1, 1, 1, 1),
314 TensorFormat.NHWC: (1, 1, 1, 1),
315 TensorFormat.NHCWB16: (1, 1, 1, 16),
316 }
317
318 # brick sizes as N x H x W x C. We have to fetch whole bricks at a time
319 self.brick_sizes = {
320 TensorFormat.Unknown: (1, 1, 1, 1),
321 TensorFormat.WeightsCompressed: (1, 1, 1, 1),
322 TensorFormat.NHWC: (1, 1, 1, 1),
323 TensorFormat.NHCWB16: (1, 1, 1, 16),
324 }
325
Tim Hall79d07d22020-04-27 18:20:16 +0100326 self.default_weight_format = TensorFormat.WeightsCompressed
327 self.default_feature_map_format = TensorFormat.NHWC
328
Tim Hall79d07d22020-04-27 18:20:16 +0100329 self.tensor_storage_mem_area = {
330 # permanent mem_area
Tim Hall465582c2020-05-26 09:33:14 +0100331 TensorPurpose.Unknown: MemArea.Unknown,
Tim Hall79d07d22020-04-27 18:20:16 +0100332 TensorPurpose.Weights: self.permanent_storage_mem_area,
333 TensorPurpose.FeatureMap: self.feature_map_storage_mem_area,
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200334 TensorPurpose.LUT: self.permanent_storage_mem_area,
Fredrik Svedberge22ba8c2021-01-27 16:53:41 +0100335 TensorPurpose.Scratch: self.feature_map_storage_mem_area,
336 TensorPurpose.ScratchFast: self.fast_storage_mem_area,
Tim Hall79d07d22020-04-27 18:20:16 +0100337 }
338
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200339 self.tensor_storage_mem_type = {
Dwight Lidman1a9d20e2020-08-11 12:10:36 +0200340 TensorPurpose.Unknown: MemType.Unknown,
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200341 TensorPurpose.Weights: MemType.Permanent_NPU,
342 TensorPurpose.FeatureMap: MemType.Scratch,
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200343 TensorPurpose.LUT: MemType.Scratch,
Fredrik Svedberge22ba8c2021-01-27 16:53:41 +0100344 TensorPurpose.Scratch: MemType.Scratch,
345 TensorPurpose.ScratchFast: MemType.Scratch_fast,
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200346 }
Tim Hall79d07d22020-04-27 18:20:16 +0100347
348 self.min_block_sizes = {
349 NpuBlockType.Default: (dpu_min_height, dpu_min_width),
350 NpuBlockType.VectorProduct: (1, 1),
351 NpuBlockType.ConvolutionMxN: (dpu_min_height, dpu_min_width),
352 NpuBlockType.Pooling: (dpu_min_height, dpu_min_width),
353 NpuBlockType.ConvolutionDepthWise: (dpu_min_height, dpu_min_width),
354 NpuBlockType.ElementWise: (1, 1),
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200355 NpuBlockType.ReduceSum: (dpu_min_height, dpu_min_width),
Tim Hall79d07d22020-04-27 18:20:16 +0100356 }
357
358 self.sub_kernel_limits = {
359 NpuBlockType.Default: (8, 8),
360 NpuBlockType.VectorProduct: (1, 1),
361 NpuBlockType.ConvolutionMxN: (8, 8),
362 NpuBlockType.Pooling: (8, 8),
363 NpuBlockType.ConvolutionDepthWise: (8, 8),
364 NpuBlockType.ElementWise: (1, 1),
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200365 NpuBlockType.ReduceSum: (8, 8),
Tim Hall79d07d22020-04-27 18:20:16 +0100366 }
367
368 # weights for scheduler search
369 from .npu_performance import make_bandwidth_array
370
371 self.bandwidth_weights = make_bandwidth_array()
372 self.bandwidth_weights[MemArea.Sram] = 1.0
373 self.bandwidth_weights[MemArea.Dram] = 10.0
374 self.bandwidth_weights[MemArea.OnChipFlash] = 2.0
375 self.bandwidth_weights[MemArea.OffChipFlash] = 20.0
376 self.cycles_weight = 40
377 self.max_sram_used_weight = 1000
378
Tim Hall1bd531d2020-11-01 20:59:36 +0000379 if self.is_spilling_enabled():
Patrik Gustavsson3ab94522020-06-29 17:36:55 +0200380 self.max_sram_used_weight = 0
Tim Hall79d07d22020-04-27 18:20:16 +0100381
382 # Shared Buffer Block allocations
383 self.shram_bank_size = 1024 # bytes
384 self.shram_size_bytes = accel_config.shram_banks * self.shram_bank_size
385 self.shram_reserved_output_banks = 2
386 self.shram_reserved_weight_banks = 0
387 self.shram_reserved_unused_banks = 2 if accel_config.shram_banks > 16 else 0
388 self.shram_total_banks = accel_config.shram_banks - self.shram_reserved_unused_banks
389 self.shram_bank_granules = np.array(accel_config.shram_granules, np.int32)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200390 self.shram_lut_size = 2048
391 # SHRAM base address of the activation lookup table
392 self.shram_lut_address = self.shram_bank_size * self.available_shram_banks(True)
Tim Hall79d07d22020-04-27 18:20:16 +0100393
394 # Build a map of acceptable IFM/OFM block configurations up to the maximum
395 # IFM/OFM block size.
396 ifm_block_max = self.get_ifm_block_size(32, self.ofm_block_max, Kernel(8, 8))
397 self.block_config_map = dict()
Tim Halld8339a72021-05-27 18:49:40 +0100398 self.generate_block_config_map(Block(ifm_block_max.width * 2, ifm_block_max.height, 128))
Tim Hall79d07d22020-04-27 18:20:16 +0100399
400 # Setup supported operators and restriction checkers class
Jonas Ohlsson45e653d2021-07-26 16:13:12 +0200401 self.tflite_supported_operators = TFLiteSupportedOperators()
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200402 self.tosa_supported_operators = TosaSupportedOperators()
Tim Hall79d07d22020-04-27 18:20:16 +0100403
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200404 # Returns available number of SHRAM banks depending on activation lookup table
405 # being used or not
406 def available_shram_banks(self, uses_activation_lut):
407 banks = self.shram_total_banks
408 if uses_activation_lut and self.shram_reserved_unused_banks == 0:
409 banks -= 2
410 return banks
411
Tim Hall79d07d22020-04-27 18:20:16 +0100412 # Calculate block configuration for ALL known IFM operations and
413 # accumulator sizes. Consumers will need to select their preferred
414 # operation and bit-width at read-time.
415 def generate_block_config(self, width, height, depth):
Louis Verhaardf98c6742020-05-12 14:22:38 +0200416 # Number of bytes required for any SHRAM element for a FM of given dimensions.
417 # For IFM: size = H*W*Align(D*BYTE_WIDTH, 8)
418 # For ACC: size = H*W*Align(D,8)*BYTE_WIDTH
419 d1 = round_up(depth, SHRAMElements.PreAlign)
420 d2 = round_up(d1 * SHRAMElements.ByteSizes, SHRAMElements.PostAlign)
421 size_bytes = (height * width) * d2
422
Tim Hall79d07d22020-04-27 18:20:16 +0100423 # Convert byte size (rounded) to size in banks
424 size_banks = round_up_divide(size_bytes, self.shram_bank_size)
425 size_banks *= 2 # Double buffer the IFM/Acc (need twice as many banks)
426 # Round bank requirement to bank granularity
427 required_banks = round_up(size_banks, self.shram_bank_granules)
428 return SHRAMBlockConfig(size_bytes, required_banks)
429
430 @staticmethod
431 def make_block_config_key(width, height, depth):
432 return (int(height), int(width), int(depth))
433
434 def get_block_config(self, width, height, depth):
435 assert depth <= self.ofm_block_max.depth
436 key = ArchitectureFeatures.make_block_config_key(width, height, depth)
437 config = self.block_config_map.get(key, None)
438 return config
439
440 # Generate a key:value map of possible block configurations, where the
441 # key is compounded from the block dimensions: 0x00HHWWCC
442 def generate_block_config_map(self, block: Block):
443 for h in range(1, block.height + 1):
444 for w in range(1, block.width + 1):
445 # All possible IFM/OFM depth values
446 for c in [4, 8, 12, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128]:
447 key = ArchitectureFeatures.make_block_config_key(w, h, c)
448 self.block_config_map[key] = self.generate_block_config(w, h, c)
449
Diqing Zhonge8887a32020-09-24 09:53:48 +0200450 def _generate_output_perf_tables(self, accel_config):
451 if accel_config == Accelerator.Ethos_U55_32:
452 self.output_cycles_per_elem = (2.0, 3.0, 3.0, 3.0, 4.0, 6.0, 1.0, 2.0)
453 self.activation_cycles_per_elem = (1.0, 1.0, 0.0)
454 elif accel_config == Accelerator.Ethos_U55_64:
455 self.output_cycles_per_elem = (1.0, 1.5, 1.5, 1.5, 2.0, 3.0, 0.5, 1.0)
456 self.activation_cycles_per_elem = (1.0, 1.0, 0.0)
457 elif accel_config == Accelerator.Ethos_U55_128:
458 self.output_cycles_per_elem = (0.75, 1.25, 0.75, 0.75, 1.0, 1.5, 0.25, 0.5)
459 self.activation_cycles_per_elem = (1.0, 0.5, 0.0)
Tim Hallc8a73862020-10-27 12:43:14 +0000460 elif accel_config in (Accelerator.Ethos_U55_256, Accelerator.Ethos_U65_256):
Diqing Zhonge8887a32020-09-24 09:53:48 +0200461 self.output_cycles_per_elem = (0.625, 1.125, 0.5, 0.375, 0.5, 0.75, 0.125, 0.25)
462 self.activation_cycles_per_elem = (1.0, 0.25, 0.0)
463 else:
Tim Hallc8a73862020-10-27 12:43:14 +0000464 assert accel_config == Accelerator.Ethos_U65_512
Diqing Zhonge8887a32020-09-24 09:53:48 +0200465 self.output_cycles_per_elem = (0.3125, 0.5625, 0.25, 0.1875, 0.25, 0.375, 0.0625, 0.125)
466 self.activation_cycles_per_elem = (0.5, 0.125, 0.0)
467
Tim Hall79d07d22020-04-27 18:20:16 +0100468 def calc_ifm_block_depth(self, ifm_depth, ifm_bits):
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200469 assert ifm_bits in (8, 16, 32)
Tim Hall79d07d22020-04-27 18:20:16 +0100470 assert ifm_depth > 0
471 ifm_depth = round_up(ifm_depth, self.ifm_ublock.depth)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200472 max_block_depth = 8 * 32 // ifm_bits
Tim Hall79d07d22020-04-27 18:20:16 +0100473 return min(max_block_depth, ifm_depth)
474
475 # Calculate the size of the IFM block given a depth, target OFM block and a kernel
Tim Hallc30f4952020-06-15 20:47:35 +0100476 def get_ifm_block_size(
477 self,
478 ifm_block_depth,
479 ofm_block: Block,
480 kernel: Kernel,
481 subkernel: Block = Block(8, 8, 65536),
482 ifm_resampling_mode=resampling_mode.NONE,
483 ):
Dwight Lidmana9390f72020-05-13 12:00:08 +0200484 upscaling = 1 if ifm_resampling_mode == resampling_mode.NONE else 2
Tim Hall79d07d22020-04-27 18:20:16 +0100485
erik.andersson@arm.com1d6d5c42021-04-14 13:31:05 +0200486 # Height
487 dilated_kernel_height = ((kernel.height - 1) * kernel.dilation.y) + 1
488 ifm_block_height = round_up_to_int(
489 ((ofm_block.height - 1) * kernel.stride.y + min(subkernel.height, dilated_kernel_height)) / upscaling
490 )
Tim Hall79d07d22020-04-27 18:20:16 +0100491
Dwight Lidman2f754572021-04-21 12:00:37 +0200492 ifm_block_height = round_up(ifm_block_height, self.ifm_ublock.height)
493
Tim Hall79d07d22020-04-27 18:20:16 +0100494 # Width
Tim Hall79d07d22020-04-27 18:20:16 +0100495 dilated_kernel_width = ((kernel.width - 1) * kernel.dilation.x) + 1
erik.andersson@arm.com1d6d5c42021-04-14 13:31:05 +0200496 ifm_block_width = round_up_to_int(
497 ((ofm_block.width - 1) * kernel.stride.x + min(subkernel.width, dilated_kernel_width)) / upscaling
498 )
Tim Hall79d07d22020-04-27 18:20:16 +0100499
Dwight Lidman2f754572021-04-21 12:00:37 +0200500 ifm_block_width = round_up(ifm_block_width, self.ifm_ublock.width)
501
Tim Hall79d07d22020-04-27 18:20:16 +0100502 return Block(ifm_block_width, ifm_block_height, ifm_block_depth)
503
Tim Hall1bd531d2020-11-01 20:59:36 +0000504 def is_spilling_enabled(self):
Tim Hall79d07d22020-04-27 18:20:16 +0100505 """
Tim Hall1bd531d2020-11-01 20:59:36 +0000506 Spilling is a feature that allows the Ethos-U to use a dedicated SRAM as a cache for various types of data
Tim Hall79d07d22020-04-27 18:20:16 +0100507 """
Tim Hall1bd531d2020-11-01 20:59:36 +0000508 return (
509 self._mem_port_mapping(self.cache_mem_area) == MemArea.Sram and self.cache_mem_area != self.arena_mem_area
510 )
Tim Hall79d07d22020-04-27 18:20:16 +0100511
Louis Verhaard024c3552021-03-17 14:26:34 +0100512 def mem_type_size(self, mem_type: MemType) -> int:
513 """Returns size in bytes available for the given memory type"""
514 if mem_type == MemType.Scratch_fast and self.is_spilling_enabled():
Tim Halld8339a72021-05-27 18:49:40 +0100515 return self.arena_cache_size
Louis Verhaard024c3552021-03-17 14:26:34 +0100516 # Size is unknown, return max possible address offset
517 return self.max_address_offset
518
Tim Hall1bd531d2020-11-01 20:59:36 +0000519 def _mem_port_mapping(self, mem_port):
520 mem_port_mapping = {MemPort.Axi0: self.axi0_port, MemPort.Axi1: self.axi1_port}
521 return mem_port_mapping[mem_port]
Tim Hall79d07d22020-04-27 18:20:16 +0100522
Tim Hall1bd531d2020-11-01 20:59:36 +0000523 def _set_default_sys_config(self):
Tim Hall1bd531d2020-11-01 20:59:36 +0000524 # ArchitectureFeatures.DEFAULT_CONFIG values
525 if self.is_ethos_u65_system:
526 # Default Ethos-U65 system configuration
527 # Ethos-U65 Client-Server: SRAM (16 GB/s) and DRAM (12 GB/s)
528 self.core_clock = 1e9
529 self.axi0_port = MemArea.Sram
530 self.axi1_port = MemArea.Dram
531 self.memory_clock_scales[MemArea.Sram] = 1.0
532 self.memory_clock_scales[MemArea.Dram] = 0.75 # 3 / 4
Diqing Zhongf842b692020-12-11 13:07:37 +0100533 self.memory_burst_length[MemArea.Sram] = 32
534 self.memory_burst_length[MemArea.Dram] = 128
535 self.memory_latency[MemArea.Sram][BandwidthDirection.Read] = 32
536 self.memory_latency[MemArea.Sram][BandwidthDirection.Write] = 32
537 self.memory_latency[MemArea.Dram][BandwidthDirection.Read] = 500
538 self.memory_latency[MemArea.Dram][BandwidthDirection.Write] = 250
Tim Hall79d07d22020-04-27 18:20:16 +0100539 else:
Tim Hall1bd531d2020-11-01 20:59:36 +0000540 # Default Ethos-U55 system configuration
541 # Ethos-U55 High-End Embedded: SRAM (4 GB/s) and Flash (0.5 GB/s)
542 self.core_clock = 500e6
543 self.axi0_port = MemArea.Sram
544 self.axi1_port = MemArea.OffChipFlash
545 self.memory_clock_scales[MemArea.Sram] = 1.0
546 self.memory_clock_scales[MemArea.OffChipFlash] = 0.125 # 1 / 8
Diqing Zhongf842b692020-12-11 13:07:37 +0100547 self.memory_burst_length[MemArea.Sram] = 32
548 self.memory_burst_length[MemArea.OffChipFlash] = 128
549 self.memory_latency[MemArea.Sram][BandwidthDirection.Read] = 32
550 self.memory_latency[MemArea.Sram][BandwidthDirection.Write] = 32
551 self.memory_latency[MemArea.OffChipFlash][BandwidthDirection.Read] = 64
552 self.memory_latency[MemArea.OffChipFlash][BandwidthDirection.Write] = 64
Tim Hall79d07d22020-04-27 18:20:16 +0100553
Tim Hall1bd531d2020-11-01 20:59:36 +0000554 def _set_default_mem_mode(self):
Tim Hall1bd531d2020-11-01 20:59:36 +0000555 # ArchitectureFeatures.DEFAULT_CONFIG values
556 if self.is_ethos_u65_system:
557 # Default Ethos-U65 memory mode
Tim Hall70b71a52020-12-22 11:47:54 +0000558 # Dedicated SRAM: the SRAM is only for use by the Ethos-U
559 # The non-SRAM memory is assumed to be read-writeable
Tim Hall1bd531d2020-11-01 20:59:36 +0000560 self.const_mem_area = MemPort.Axi1
561 self.arena_mem_area = MemPort.Axi1
562 self.cache_mem_area = MemPort.Axi0
Tim Halld8339a72021-05-27 18:49:40 +0100563 self.arena_cache_size = 384 * 1024
Tim Hall1bd531d2020-11-01 20:59:36 +0000564 else:
Tim Hall70b71a52020-12-22 11:47:54 +0000565 # Default Ethos-U55 memory mode
566 # Shared SRAM: the SRAM is shared between the Ethos-U and the Cortex-M software
567 # The non-SRAM memory is assumed to be read-only
Tim Hall1bd531d2020-11-01 20:59:36 +0000568 self.const_mem_area = MemPort.Axi1
569 self.arena_mem_area = MemPort.Axi0
570 self.cache_mem_area = MemPort.Axi0
Tim Halld8339a72021-05-27 18:49:40 +0100571 self.arena_cache_size = self.max_address_offset
Tim Hall79d07d22020-04-27 18:20:16 +0100572
Tim Halld8339a72021-05-27 18:49:40 +0100573 def _get_vela_config(self, vela_config_files, verbose_config, arena_cache_size_from_cli):
Tim Hall1bd531d2020-11-01 20:59:36 +0000574 """
575 Gets the system configuration and memory modes from one or more Vela configuration file(s) or uses some
576 defaults.
577 """
Tim Hall79d07d22020-04-27 18:20:16 +0100578
Tim Hall1bd531d2020-11-01 20:59:36 +0000579 # all properties are optional and are initialised to a value of 1 (or the equivalent)
580 self.core_clock = 1
581 self.axi0_port = MemArea(1)
582 self.axi1_port = MemArea(1)
583 self.memory_clock_scales = np.ones(MemArea.Size)
Tim Hall70b71a52020-12-22 11:47:54 +0000584 self.memory_burst_length = np.ones(MemArea.Size, np.int)
585 self.memory_latency = np.zeros((MemArea.Size, BandwidthDirection.Size), np.int)
Tim Hall1bd531d2020-11-01 20:59:36 +0000586 self.const_mem_area = MemPort(1)
587 self.arena_mem_area = MemPort(1)
588 self.cache_mem_area = MemPort(1)
Tim Halld8339a72021-05-27 18:49:40 +0100589 self.arena_cache_size = self.max_address_offset
590 arena_cache_size_loc_text = "Default"
Tim Hall79d07d22020-04-27 18:20:16 +0100591
Tim Hall1bd531d2020-11-01 20:59:36 +0000592 # read configuration file(s)
593 self.vela_config = None
594
595 if vela_config_files is not None:
596 self.vela_config = ConfigParser()
597 self.vela_config.read(vela_config_files)
598
599 # read system configuration
600 sys_cfg_section = "System_Config." + self.system_config
601
602 if self.vela_config is not None and self.vela_config.has_section(sys_cfg_section):
603 self.core_clock = float(self._read_config(sys_cfg_section, "core_clock", self.core_clock))
604 self.axi0_port = MemArea[self._read_config(sys_cfg_section, "axi0_port", self.axi0_port)]
605 self.axi1_port = MemArea[self._read_config(sys_cfg_section, "axi1_port", self.axi1_port)]
606
607 for mem_area in (self.axi0_port, self.axi1_port):
608 self.memory_clock_scales[mem_area] = float(
609 self._read_config(
610 sys_cfg_section, mem_area.name + "_clock_scale", self.memory_clock_scales[mem_area]
611 )
612 )
Diqing Zhongf842b692020-12-11 13:07:37 +0100613 self.memory_burst_length[mem_area] = int(
614 self._read_config(
615 sys_cfg_section, mem_area.name + "_burst_length", self.memory_burst_length[mem_area]
616 )
617 )
618 self.memory_latency[mem_area][BandwidthDirection.Read] = int(
619 self._read_config(
620 sys_cfg_section,
621 mem_area.name + "_read_latency",
622 self.memory_latency[mem_area][BandwidthDirection.Read],
623 )
624 )
625 self.memory_latency[mem_area][BandwidthDirection.Write] = int(
626 self._read_config(
627 sys_cfg_section,
628 mem_area.name + "_write_latency",
629 self.memory_latency[mem_area][BandwidthDirection.Write],
630 )
631 )
Tim Hall1bd531d2020-11-01 20:59:36 +0000632 elif self.system_config == ArchitectureFeatures.DEFAULT_CONFIG:
633 self._set_default_sys_config()
634
635 elif vela_config_files is None:
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000636 raise CliOptionError("--config", vela_config_files, "Vela config file not specified")
Tim Hall1bd531d2020-11-01 20:59:36 +0000637
638 else:
639 raise CliOptionError(
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000640 "--system-config", self.system_config, f"Section {sys_cfg_section} not found in Vela config file",
Tim Hall79d07d22020-04-27 18:20:16 +0100641 )
Tim Hall79d07d22020-04-27 18:20:16 +0100642
Tim Hall1bd531d2020-11-01 20:59:36 +0000643 # read the memory mode
644 mem_mode_section = "Memory_Mode." + self.memory_mode
Tim Hall79d07d22020-04-27 18:20:16 +0100645
Tim Hall1bd531d2020-11-01 20:59:36 +0000646 if self.vela_config is not None and self.vela_config.has_section(mem_mode_section):
647 self.const_mem_area = MemPort[
648 self._read_config(mem_mode_section, "const_mem_area", self.const_mem_area.name)
649 ]
650 self.arena_mem_area = MemPort[
651 self._read_config(mem_mode_section, "arena_mem_area", self.arena_mem_area.name)
652 ]
653 self.cache_mem_area = MemPort[
654 self._read_config(mem_mode_section, "cache_mem_area", self.cache_mem_area.name)
655 ]
Tim Halld8339a72021-05-27 18:49:40 +0100656 found = []
657 self.arena_cache_size = int(
658 self._read_config(mem_mode_section, "arena_cache_size", self.arena_cache_size, found)
659 )
660 if found[-1]:
661 arena_cache_size_loc_text = "Configuration file"
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200662
Tim Hall1bd531d2020-11-01 20:59:36 +0000663 elif self.memory_mode == ArchitectureFeatures.DEFAULT_CONFIG:
664 self._set_default_mem_mode()
Patrik Gustavsson5f47c052020-06-25 12:56:04 +0200665
Tim Hall1bd531d2020-11-01 20:59:36 +0000666 elif vela_config_files is None:
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000667 raise CliOptionError("--config", vela_config_files, "Vela config file not specified")
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200668
Tim Hall1bd531d2020-11-01 20:59:36 +0000669 else:
670 raise CliOptionError(
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000671 "--memory-mode", self.memory_mode, f"Section {mem_mode_section} not found in Vela config file",
Tim Hall1bd531d2020-11-01 20:59:36 +0000672 )
Tim Hall79d07d22020-04-27 18:20:16 +0100673
Tim Hall1bd531d2020-11-01 20:59:36 +0000674 # override sram to onchipflash
675 if self._mem_port_mapping(self.const_mem_area) == MemArea.Sram:
676 if self.const_mem_area == self.arena_mem_area == self.cache_mem_area:
677 print(
678 "Info: Changing const_mem_area from Sram to OnChipFlash. This will use the same characteristics as"
679 " Sram."
680 )
681 if self.const_mem_area == MemPort.Axi0:
682 self.const_mem_area = MemPort.Axi1
683 self.axi1_port = MemArea.OnChipFlash
684 else:
685 self.const_mem_area = MemPort.Axi0
686 self.axi0_port = MemArea.OnChipFlash
687 self.memory_clock_scales[MemArea.OnChipFlash] = self.memory_clock_scales[MemArea.Sram]
Diqing Zhongf842b692020-12-11 13:07:37 +0100688 self.memory_burst_length[MemArea.OnChipFlash] = self.memory_burst_length[MemArea.Sram]
689 self.memory_latency[MemArea.OnChipFlash] = self.memory_latency[MemArea.Sram]
Tim Hall1bd531d2020-11-01 20:59:36 +0000690
Tim Halld8339a72021-05-27 18:49:40 +0100691 # override sram usage
692 if arena_cache_size_from_cli is not None:
693 self.arena_cache_size = arena_cache_size_from_cli
694 arena_cache_size_loc_text = "CLI option"
695
Tim Hall1bd531d2020-11-01 20:59:36 +0000696 # check configuration
Tim Hall70b71a52020-12-22 11:47:54 +0000697 if self._mem_port_mapping(self.const_mem_area) not in (
698 MemArea.Dram,
699 MemArea.OnChipFlash,
700 MemArea.OffChipFlash,
701 ):
702 raise ConfigOptionError(
703 "const_mem_area",
704 self._mem_port_mapping(self.const_mem_area).name,
705 "Dram or OnChipFlash or OffChipFlash",
706 )
707
708 if self._mem_port_mapping(self.arena_mem_area) not in (MemArea.Sram, MemArea.Dram):
709 raise ConfigOptionError("arena_mem_area", self._mem_port_mapping(self.arena_mem_area).name, "Sram or Dram")
710
Tim Hall1bd531d2020-11-01 20:59:36 +0000711 if self._mem_port_mapping(self.cache_mem_area) != MemArea.Sram:
712 raise ConfigOptionError("cache_mem_area", self._mem_port_mapping(self.cache_mem_area).name, "Sram")
713
Tim Halld8339a72021-05-27 18:49:40 +0100714 if self.arena_cache_size < 0:
715 raise ConfigOptionError("arena_cache_size", self.arena_cache_size, ">= 0")
716 if self.arena_cache_size > self.max_address_offset:
717 raise ConfigOptionError(
718 "arena_cache_size",
719 f"{self.arena_cache_size}. Size is out of bounds, maximum is: {self.max_address_offset}",
720 )
721
Tim Hall1bd531d2020-11-01 20:59:36 +0000722 # assign existing memory areas
723 self.permanent_storage_mem_area = self._mem_port_mapping(self.const_mem_area)
724 self.feature_map_storage_mem_area = self._mem_port_mapping(self.arena_mem_area)
725 self.fast_storage_mem_area = self._mem_port_mapping(self.cache_mem_area)
726
Tim Hall1bd531d2020-11-01 20:59:36 +0000727 # display the system configuration and memory mode
728 if verbose_config:
729 print(f"System Configuration ({self.system_config}):")
730 print(f" core_clock = {self.core_clock}")
731 print(f" axi0_port = {self.axi0_port.name}")
732 print(f" axi1_port = {self.axi1_port.name}")
733 for mem in (MemArea.Sram, MemArea.Dram, MemArea.OnChipFlash, MemArea.OffChipFlash):
734 print(f" {mem.name}_clock_scales = {self.memory_clock_scales[mem]}")
Diqing Zhongf842b692020-12-11 13:07:37 +0100735 print(f" {mem.name}_burst_length = {self.memory_burst_length[mem]}")
736 print(f" {mem.name}_read_latency = {self.memory_latency[mem][BandwidthDirection.Read]}")
737 print(f" {mem.name}_write_latency = {self.memory_latency[mem][BandwidthDirection.Write]}")
Tim Hall1bd531d2020-11-01 20:59:36 +0000738
739 print(f"Memory Mode ({self.memory_mode}):")
740 print(f" const_mem_area = {self.const_mem_area.name}")
741 print(f" arena_mem_area = {self.arena_mem_area.name}")
742 print(f" cache_mem_area = {self.cache_mem_area.name}")
Tim Halld8339a72021-05-27 18:49:40 +0100743 print(f" arena_cache_size = {self.arena_cache_size} from {arena_cache_size_loc_text}")
Tim Hall1bd531d2020-11-01 20:59:36 +0000744
745 print("Architecture Settings:")
746 print(f" permanent_storage_mem_area = {self.permanent_storage_mem_area.name}")
747 print(f" feature_map_storage_mem_area = {self.feature_map_storage_mem_area.name}")
748 print(f" fast_storage_mem_area = {self.fast_storage_mem_area.name}")
Tim Hall1bd531d2020-11-01 20:59:36 +0000749
Tim Halld8339a72021-05-27 18:49:40 +0100750 def _read_config(self, section, key, current_value, found=None):
Tim Hall79d07d22020-04-27 18:20:16 +0100751 """
Tim Hall1bd531d2020-11-01 20:59:36 +0000752 Reads a given key from a particular section in the Vela config file. If the section contains the 'inherit'
753 option then we recurse into the section specified. If inherited sections result in multiple keys for a
Tim Halld8339a72021-05-27 18:49:40 +0100754 particular option then the key from the parent section is used, regardless of the parsing order. if specified
755 found should be an empty list that this function will append a True or False to the end of the list indicating
756 whether the key was found or not.
Tim Hall79d07d22020-04-27 18:20:16 +0100757 """
Tim Hall1bd531d2020-11-01 20:59:36 +0000758 if not self.vela_config.has_section(section):
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000759 raise ConfigOptionError("section", f"{section}. The section was not found in the Vela config file(s)")
Tim Hall1bd531d2020-11-01 20:59:36 +0000760
Tim Halld8339a72021-05-27 18:49:40 +0100761 result = str(current_value) if current_value is not None else None
762 if found is not None:
763 found.append(False)
764
Tim Hall1bd531d2020-11-01 20:59:36 +0000765 if self.vela_config.has_option(section, "inherit"):
766 inheritance_section = self.vela_config.get(section, "inherit")
767 # check for recursion loop
768 if inheritance_section == section:
769 raise ConfigOptionError(
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000770 "inherit", f"{inheritance_section}. This references its own section and recursion is not allowed",
Tim Hall1bd531d2020-11-01 20:59:36 +0000771 )
Tim Halld8339a72021-05-27 18:49:40 +0100772 result = self._read_config(inheritance_section, key, result, found)
Tim Hall1bd531d2020-11-01 20:59:36 +0000773
774 if self.vela_config.has_option(section, key):
775 result = self.vela_config.get(section, key)
Tim Halld8339a72021-05-27 18:49:40 +0100776 if found is not None:
777 found.append(True)
Tim Hall1bd531d2020-11-01 20:59:36 +0000778
Tim Hall79d07d22020-04-27 18:20:16 +0100779 return result
Louis Verhaard52078302020-11-18 13:35:06 +0100780
781
Louis Verhaard061eeb42020-11-27 08:24:03 +0100782# Cache for default arch instances, as these are expensive to create
783default_arch_cache = dict()
784
785
Louis Verhaard52078302020-11-18 13:35:06 +0100786def create_default_arch(accelerator: Accelerator) -> ArchitectureFeatures:
787 """Creates architecture features object using default settings"""
Louis Verhaard061eeb42020-11-27 08:24:03 +0100788 if accelerator not in default_arch_cache:
789 default_arch_cache[accelerator] = ArchitectureFeatures(
790 vela_config_files=None,
791 accelerator_config=accelerator.value,
792 system_config=ArchitectureFeatures.DEFAULT_CONFIG,
793 memory_mode=ArchitectureFeatures.DEFAULT_CONFIG,
Louis Verhaard061eeb42020-11-27 08:24:03 +0100794 max_blockdep=ArchitectureFeatures.MAX_BLOCKDEP,
Louis Verhaard061eeb42020-11-27 08:24:03 +0100795 verbose_config=False,
Tim Halld8339a72021-05-27 18:49:40 +0100796 arena_cache_size=None,
Louis Verhaard061eeb42020-11-27 08:24:03 +0100797 )
798 return default_arch_cache[accelerator]