blob: 19133f51e8654aa89ab6450c368052507cbc0cb6 [file] [log] [blame]
erik.andersson@arm.com460c6892021-02-24 14:38:09 +01001# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
Tim Hall79d07d22020-04-27 18:20:16 +01002#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
Tim Hallc8a73862020-10-27 12:43:14 +000017# Holds a container for Ethos-U and System architecture parameters.
Diego Russoea6111a2020-04-14 18:41:58 +010018import enum
Tim Hall79d07d22020-04-27 18:20:16 +010019from collections import namedtuple
20from configparser import ConfigParser
Diego Russoea6111a2020-04-14 18:41:58 +010021
Tim Hall79d07d22020-04-27 18:20:16 +010022import numpy as np
Diego Russoea6111a2020-04-14 18:41:58 +010023
Louis Verhaardaeae5672020-11-02 18:04:27 +010024from .api import NpuAccelerator
Tim Hall1bd531d2020-11-01 20:59:36 +000025from .errors import CliOptionError
26from .errors import ConfigOptionError
Dwight Lidmana9390f72020-05-13 12:00:08 +020027from .ethos_u55_regs.ethos_u55_regs import resampling_mode
Louis Verhaard69b31762020-11-17 09:45:20 +010028from .numeric_util import full_shape
Diego Russoe8a10452020-04-21 17:39:10 +010029from .numeric_util import round_up
30from .numeric_util import round_up_divide
erik.andersson@arm.com1d6d5c42021-04-14 13:31:05 +020031from .numeric_util import round_up_to_int
Tim Hall4ed38bc2020-10-20 18:54:20 +010032from .operation import Kernel
Diego Russoea6111a2020-04-14 18:41:58 +010033from .operation import NpuBlockType
Tim Hall4ed38bc2020-10-20 18:54:20 +010034from .operation import PointXYZ
Diego Russoea6111a2020-04-14 18:41:58 +010035from .supported_operators import SupportedOperators
Diqing Zhongf842b692020-12-11 13:07:37 +010036from .tensor import BandwidthDirection
Diego Russoe8a10452020-04-21 17:39:10 +010037from .tensor import MemArea
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020038from .tensor import MemType
Diego Russoe8a10452020-04-21 17:39:10 +010039from .tensor import TensorFormat
40from .tensor import TensorPurpose
Tim Hall79d07d22020-04-27 18:20:16 +010041
Tim Hall79d07d22020-04-27 18:20:16 +010042
43class Block:
Tim Halld8339a72021-05-27 18:49:40 +010044 def __init__(self, w=0, h=0, d=0):
Tim Hall79d07d22020-04-27 18:20:16 +010045 self.width = w
46 self.height = h
47 self.depth = d
48
Tim Halld8339a72021-05-27 18:49:40 +010049 def elements(self):
50 return self.width * self.height * self.depth
51
52 def elements_wh(self):
53 return self.width * self.height
54
55 def clone(self):
56 return Block(self.width, self.height, self.depth)
57
58 def as_list(self):
59 return [self.height, self.width, self.depth]
60
Tim Hall79d07d22020-04-27 18:20:16 +010061 def __eq__(self, other):
62 if self.width == other.width and self.height == other.height and self.depth == other.depth:
63 return True
64 else:
65 return False
66
67 def __repr__(self):
68 return "<Block: {0},{1},{2}>".format(self.width, self.height, self.depth)
69
Tim Halld8339a72021-05-27 18:49:40 +010070 def to_hwc(self):
71 return [self.height, self.width, self.depth]
72
Tim Hall79d07d22020-04-27 18:20:16 +010073 @classmethod
74 def from_string(cls, s):
75 w, h, c = (int(v) for v in s.split("x"))
76 return cls(w, h, c)
77
Louis Verhaard69b31762020-11-17 09:45:20 +010078 @classmethod
79 def from_shape(cls, shape) -> "Block":
80 """Converts the shape to a Block"""
81 shp = full_shape(3, shape, 1)
82 # Note: index from end, as len(shp) may be > 3
83 return Block(shp[-2], shp[-3], shp[-1])
84
Tim Halld8339a72021-05-27 18:49:40 +010085 @classmethod
86 def min(cls, a, b):
87 return cls(min(a.width, b.width), min(a.height, b.height), min(a.depth, b.depth))
88
89 @classmethod
90 def max(cls, a, b):
91 return cls(max(a.width, b.width), max(a.height, b.height), max(a.depth, b.depth))
92
93 @classmethod
94 def round(cls, a, b):
95 return cls(round_up(a.width, b.width), round_up(a.height, b.height), round_up(a.depth, b.depth))
96
97 @classmethod
98 def div_round_up(cls, a, b):
99 return cls(
100 round_up_divide(a.width, b.width), round_up_divide(a.height, b.height), round_up_divide(a.depth, b.depth)
101 )
102
Tim Hall79d07d22020-04-27 18:20:16 +0100103
104class Rect:
105 def __init__(self, x, y, z, x2, y2, z2):
106 self.x = x
107 self.y = y
108 self.z = z
109 self.x2 = x2
110 self.y2 = y2
111 self.z2 = z2
112
113 def start(self):
114 return PointXYZ(self.x, self.y, self.z)
115
116 def end(self):
117 return PointXYZ(self.x2, self.y2, self.z2)
118
119 def size(self):
120 return Block(self.x2 - self.x + 1, self.y2 - self.y + 1, self.z2 - self.z + 1)
121
122 def __repr__(self):
123 return "<Rect: ({0},{1},{2}) ({3},{4},{5})>".format(self.x, self.y, self.z, self.x2, self.y2, self.z2)
124
125
Tim Hall79d07d22020-04-27 18:20:16 +0100126class SHRAMElements:
127 IFM8 = 0
128 IFM16 = 1
129 IFM8_Elementwise = 2
130 IFM16_Elementwise = 3
Fredrik Svedberg597fd3f2020-08-13 10:02:53 +0200131 IFM32 = 4
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200132 Acc16 = 5
133 Acc32 = 6
134 Acc40 = 7
Tim Hall79d07d22020-04-27 18:20:16 +0100135 Last = Acc40
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200136 BitSizes = np.array([8, 16, 8, 16, 32, 16, 32, 40], np.int32)
Louis Verhaardf98c6742020-05-12 14:22:38 +0200137 ByteSizes = BitSizes // 8
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200138 PostAlign = np.array([8, 8, 8, 8, 8, 1, 1, 1], np.int32)
139 PreAlign = np.array([1, 1, 1, 1, 1, 8, 8, 8], np.int32)
Tim Hall79d07d22020-04-27 18:20:16 +0100140
141
142class SHRAMBlockConfig:
143 def __init__(self, sizes, banks):
144 assert len(banks) == SHRAMElements.Last + 1
145 self.sizes = sizes
146 self.banks = banks
147
148
Tim Hallc8a73862020-10-27 12:43:14 +0000149# Area indices must match Ethos-U SHRAM layout spec
Tim Hall79d07d22020-04-27 18:20:16 +0100150class SharedBufferArea(enum.IntEnum):
151 OFM = 0
152 Weights = 1
153 IFM = 2
154 Accumulators = 3
155 Size = Accumulators + 1
156
157
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100158class Accelerator(enum.Enum):
159 Ethos_U55_32 = "ethos-u55-32"
160 Ethos_U55_64 = "ethos-u55-64"
161 Ethos_U55_128 = "ethos-u55-128"
162 Ethos_U55_256 = "ethos-u55-256"
Tim Hallc8a73862020-10-27 12:43:14 +0000163 Ethos_U65_256 = "ethos-u65-256"
164 Ethos_U65_512 = "ethos-u65-512"
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100165
166 @classmethod
167 def member_list(cls):
168 return [e.value for e in cls]
169
Louis Verhaardaeae5672020-11-02 18:04:27 +0100170 @classmethod
171 def from_npu_accelerator(cls, npu_accelerator: NpuAccelerator) -> "Accelerator":
172 """Converts the given public API object to Accelerator (used internally)"""
173 accelerator_map = {
174 NpuAccelerator.Ethos_U55_32: cls.Ethos_U55_32,
175 NpuAccelerator.Ethos_U55_64: cls.Ethos_U55_64,
176 NpuAccelerator.Ethos_U55_128: cls.Ethos_U55_128,
177 NpuAccelerator.Ethos_U55_256: cls.Ethos_U55_256,
178 NpuAccelerator.Ethos_U65_256: cls.Ethos_U65_256,
179 NpuAccelerator.Ethos_U65_512: cls.Ethos_U65_512,
180 }
181 assert npu_accelerator in accelerator_map, f"Unsupported accelerator {npu_accelerator}"
182 return accelerator_map[npu_accelerator]
183
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100184
Tim Hall1bd531d2020-11-01 20:59:36 +0000185@enum.unique
186class MemPort(enum.Enum):
187 Axi0 = enum.auto()
188 Axi1 = enum.auto()
189
190
Tim Halld8339a72021-05-27 18:49:40 +0100191SHRAMConfig = namedtuple(
192 "SHRAMConfig", ["reserved_output_banks", "bank_size_bytes", "total_banks", "reserved_end_banks"]
193)
194
195
Tim Hall79d07d22020-04-27 18:20:16 +0100196class ArchitectureFeatures:
Tim Hallc8a73862020-10-27 12:43:14 +0000197 """This class is a container for various parameters of the Ethos-U core
Diqing Zhonge8887a32020-09-24 09:53:48 +0200198 and system configuration that can be tuned, either by command line
Tim Hallc8a73862020-10-27 12:43:14 +0000199 parameters or by the Ethos-U architects. The class is often passed
Diqing Zhonge8887a32020-09-24 09:53:48 +0200200 around to passes that need to do architecture-dependent actions.
Tim Hall79d07d22020-04-27 18:20:16 +0100201
Diqing Zhonge8887a32020-09-24 09:53:48 +0200202 Note the difference between ArchitectureFeatures and CompilerOptions
Tim Hallc8a73862020-10-27 12:43:14 +0000203 - ArchitectureFeatures is for changing the Ethos-U and system architecture
Diqing Zhonge8887a32020-09-24 09:53:48 +0200204 - CompilerOptions is for changing the behaviour of the compiler
205 """
Tim Hall79d07d22020-04-27 18:20:16 +0100206
207 ArchitectureConfig = namedtuple(
208 "ArchitectureConfig", "macs cores ofm_ublock ifm_ublock shram_banks shram_granules elem_units"
209 )
210 accelerator_configs = {
Tim Hallc8a73862020-10-27 12:43:14 +0000211 Accelerator.Ethos_U65_512: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200212 256, 2, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 16, 8, 16, 20], 8
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100213 ),
Tim Hallc8a73862020-10-27 12:43:14 +0000214 Accelerator.Ethos_U65_256: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200215 256, 1, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 16, 8, 16, 20], 8
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100216 ),
217 Accelerator.Ethos_U55_256: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200218 256, 1, Block(2, 2, 8), Block(2, 2, 8), 48, [8, 8, 8, 8, 16, 8, 16, 20], 8
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100219 ),
220 Accelerator.Ethos_U55_128: ArchitectureConfig(
Dwight Lidman2f754572021-04-21 12:00:37 +0200221 128, 1, Block(2, 1, 8), Block(2, 1, 8), 24, [4, 4, 4, 4, 8, 4, 8, 12], 4
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100222 ),
223 Accelerator.Ethos_U55_64: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200224 64, 1, Block(1, 1, 8), Block(1, 1, 8), 16, [2, 2, 2, 2, 4, 4, 4, 8], 2
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100225 ),
226 Accelerator.Ethos_U55_32: ArchitectureConfig(
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200227 32, 1, Block(1, 1, 4), Block(1, 1, 8), 16, [2, 2, 2, 2, 4, 4, 4, 4], 1
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100228 ),
Tim Hall79d07d22020-04-27 18:20:16 +0100229 }
230
231 OFMSplitDepth = 16
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100232 SubKernelMax = Block(8, 8, 65536)
Tim Hall79d07d22020-04-27 18:20:16 +0100233
Tim Hall1bd531d2020-11-01 20:59:36 +0000234 DEFAULT_CONFIG = "internal-default"
Louis Verhaard1e170182020-11-26 11:42:04 +0100235 MAX_BLOCKDEP = 3
Tim Hall1bd531d2020-11-01 20:59:36 +0000236
Tim Hall79d07d22020-04-27 18:20:16 +0100237 def __init__(
238 self,
Tim Hall1bd531d2020-11-01 20:59:36 +0000239 vela_config_files,
Tim Hall79d07d22020-04-27 18:20:16 +0100240 accelerator_config,
241 system_config,
Tim Hall1bd531d2020-11-01 20:59:36 +0000242 memory_mode,
Tim Hall79d07d22020-04-27 18:20:16 +0100243 max_blockdep,
Tim Hall1bd531d2020-11-01 20:59:36 +0000244 verbose_config,
Tim Halld8339a72021-05-27 18:49:40 +0100245 arena_cache_size,
Tim Hall79d07d22020-04-27 18:20:16 +0100246 ):
247 accelerator_config = accelerator_config.lower()
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100248 if accelerator_config not in Accelerator.member_list():
Tim Hall1bd531d2020-11-01 20:59:36 +0000249 raise CliOptionError("--accelerator-config", self.accelerator_config, "Unknown accelerator configuration")
Manupa Karunaratned83d2e12020-07-20 12:05:32 +0100250 self.accelerator_config = Accelerator(accelerator_config)
Tim Hall79d07d22020-04-27 18:20:16 +0100251 accel_config = ArchitectureFeatures.accelerator_configs[self.accelerator_config]
252 self.config = accel_config
253
Tim Halld8339a72021-05-27 18:49:40 +0100254 self.accumulator_granules = {
255 SHRAMElements.Acc16: accel_config.shram_granules[SHRAMElements.Acc16],
256 SHRAMElements.Acc32: accel_config.shram_granules[SHRAMElements.Acc32],
257 SHRAMElements.Acc40: accel_config.shram_granules[SHRAMElements.Acc40],
258 }
259
260 self.ifm_bank_granules = {
261 8: accel_config.shram_granules[SHRAMElements.IFM8],
262 16: accel_config.shram_granules[SHRAMElements.IFM16],
263 32: accel_config.shram_granules[SHRAMElements.IFM32],
264 }
265
266 self.ifm_ew_bank_granules = {
267 8: accel_config.shram_granules[SHRAMElements.IFM8_Elementwise],
268 16: accel_config.shram_granules[SHRAMElements.IFM16_Elementwise],
269 32: accel_config.shram_granules[SHRAMElements.IFM32],
270 }
271
272 self.shram = SHRAMConfig(2, 1024, accel_config.shram_banks, 2 if accel_config.shram_banks > 16 else 0)
273
Tim Hall79d07d22020-04-27 18:20:16 +0100274 self.system_config = system_config
Tim Hall1bd531d2020-11-01 20:59:36 +0000275 self.memory_mode = memory_mode
Tim Hallc8a73862020-10-27 12:43:14 +0000276 self.is_ethos_u65_system = self.accelerator_config in (Accelerator.Ethos_U65_256, Accelerator.Ethos_U65_512)
Tim Hall79d07d22020-04-27 18:20:16 +0100277
Tim Hallc8a73862020-10-27 12:43:14 +0000278 self.max_outstanding_dma = 2 if self.is_ethos_u65_system else 1
Tim Hall289a41d2020-08-04 21:40:14 +0100279 self.max_outstanding_kernels = 3
280
Tim Hall79d07d22020-04-27 18:20:16 +0100281 self.ncores = accel_config.cores
282 self.ofm_ublock = accel_config.ofm_ublock
283 self.ifm_ublock = accel_config.ifm_ublock
Tim Hall79d07d22020-04-27 18:20:16 +0100284 self.ofm_block_max = Block(64, 32, 128)
Tim Hall79d07d22020-04-27 18:20:16 +0100285
Tim Hall79d07d22020-04-27 18:20:16 +0100286 self.max_blockdep = max_blockdep
287
288 dpu_min_height = accel_config.ofm_ublock.height
289 dpu_min_width = accel_config.ofm_ublock.width
290 dpu_dot_product_width = 8
291 dpu_min_ofm_channels = accel_config.ofm_ublock.depth
292
293 self.num_elem_wise_units = accel_config.elem_units
294 self.num_macs_per_cycle = dpu_min_height * dpu_min_width * dpu_dot_product_width * dpu_min_ofm_channels
Louis Verhaarda208cf82021-03-30 16:07:24 +0200295 # Max value in address offsets
296 self.max_address_offset = 1 << 48 if self.is_ethos_u65_system else 1 << 32
Tim Hall79d07d22020-04-27 18:20:16 +0100297
Tim Hall1bd531d2020-11-01 20:59:36 +0000298 # Get system configuration and memory mode
Tim Halld8339a72021-05-27 18:49:40 +0100299 self._get_vela_config(vela_config_files, verbose_config, arena_cache_size)
Tim Hall79d07d22020-04-27 18:20:16 +0100300
Tim Hall1bd531d2020-11-01 20:59:36 +0000301 self.axi_port_width = 128 if self.is_ethos_u65_system else 64
302 self.memory_bandwidths_per_cycle = self.axi_port_width * self.memory_clock_scales / 8
Tim Hall79d07d22020-04-27 18:20:16 +0100303
Tim Hall1bd531d2020-11-01 20:59:36 +0000304 self.memory_bandwidths_per_second = self.memory_bandwidths_per_cycle * self.core_clock
Tim Hall79d07d22020-04-27 18:20:16 +0100305
Diqing Zhonge8887a32020-09-24 09:53:48 +0200306 # Get output/activation performance numbers
307 self._generate_output_perf_tables(self.accelerator_config)
308
Tim Hall79d07d22020-04-27 18:20:16 +0100309 # sizes as N x H x W x C. we need to round up to these when allocating storage
310 self.storage_rounding_quantums = {
311 TensorFormat.Unknown: (1, 1, 1, 1),
312 TensorFormat.WeightsCompressed: (1, 1, 1, 1),
313 TensorFormat.NHWC: (1, 1, 1, 1),
314 TensorFormat.NHCWB16: (1, 1, 1, 16),
315 }
316
317 # brick sizes as N x H x W x C. We have to fetch whole bricks at a time
318 self.brick_sizes = {
319 TensorFormat.Unknown: (1, 1, 1, 1),
320 TensorFormat.WeightsCompressed: (1, 1, 1, 1),
321 TensorFormat.NHWC: (1, 1, 1, 1),
322 TensorFormat.NHCWB16: (1, 1, 1, 16),
323 }
324
Tim Hall79d07d22020-04-27 18:20:16 +0100325 self.default_weight_format = TensorFormat.WeightsCompressed
326 self.default_feature_map_format = TensorFormat.NHWC
327
Tim Hall79d07d22020-04-27 18:20:16 +0100328 self.tensor_storage_mem_area = {
329 # permanent mem_area
Tim Hall465582c2020-05-26 09:33:14 +0100330 TensorPurpose.Unknown: MemArea.Unknown,
Tim Hall79d07d22020-04-27 18:20:16 +0100331 TensorPurpose.Weights: self.permanent_storage_mem_area,
332 TensorPurpose.FeatureMap: self.feature_map_storage_mem_area,
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200333 TensorPurpose.LUT: self.permanent_storage_mem_area,
Fredrik Svedberge22ba8c2021-01-27 16:53:41 +0100334 TensorPurpose.Scratch: self.feature_map_storage_mem_area,
335 TensorPurpose.ScratchFast: self.fast_storage_mem_area,
Tim Hall79d07d22020-04-27 18:20:16 +0100336 }
337
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200338 self.tensor_storage_mem_type = {
Dwight Lidman1a9d20e2020-08-11 12:10:36 +0200339 TensorPurpose.Unknown: MemType.Unknown,
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200340 TensorPurpose.Weights: MemType.Permanent_NPU,
341 TensorPurpose.FeatureMap: MemType.Scratch,
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200342 TensorPurpose.LUT: MemType.Scratch,
Fredrik Svedberge22ba8c2021-01-27 16:53:41 +0100343 TensorPurpose.Scratch: MemType.Scratch,
344 TensorPurpose.ScratchFast: MemType.Scratch_fast,
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200345 }
Tim Hall79d07d22020-04-27 18:20:16 +0100346
347 self.min_block_sizes = {
348 NpuBlockType.Default: (dpu_min_height, dpu_min_width),
349 NpuBlockType.VectorProduct: (1, 1),
350 NpuBlockType.ConvolutionMxN: (dpu_min_height, dpu_min_width),
351 NpuBlockType.Pooling: (dpu_min_height, dpu_min_width),
352 NpuBlockType.ConvolutionDepthWise: (dpu_min_height, dpu_min_width),
353 NpuBlockType.ElementWise: (1, 1),
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200354 NpuBlockType.ReduceSum: (dpu_min_height, dpu_min_width),
Tim Hall79d07d22020-04-27 18:20:16 +0100355 }
356
357 self.sub_kernel_limits = {
358 NpuBlockType.Default: (8, 8),
359 NpuBlockType.VectorProduct: (1, 1),
360 NpuBlockType.ConvolutionMxN: (8, 8),
361 NpuBlockType.Pooling: (8, 8),
362 NpuBlockType.ConvolutionDepthWise: (8, 8),
363 NpuBlockType.ElementWise: (1, 1),
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200364 NpuBlockType.ReduceSum: (8, 8),
Tim Hall79d07d22020-04-27 18:20:16 +0100365 }
366
367 # weights for scheduler search
368 from .npu_performance import make_bandwidth_array
369
370 self.bandwidth_weights = make_bandwidth_array()
371 self.bandwidth_weights[MemArea.Sram] = 1.0
372 self.bandwidth_weights[MemArea.Dram] = 10.0
373 self.bandwidth_weights[MemArea.OnChipFlash] = 2.0
374 self.bandwidth_weights[MemArea.OffChipFlash] = 20.0
375 self.cycles_weight = 40
376 self.max_sram_used_weight = 1000
377
Tim Hall1bd531d2020-11-01 20:59:36 +0000378 if self.is_spilling_enabled():
Patrik Gustavsson3ab94522020-06-29 17:36:55 +0200379 self.max_sram_used_weight = 0
Tim Hall79d07d22020-04-27 18:20:16 +0100380
381 # Shared Buffer Block allocations
382 self.shram_bank_size = 1024 # bytes
383 self.shram_size_bytes = accel_config.shram_banks * self.shram_bank_size
384 self.shram_reserved_output_banks = 2
385 self.shram_reserved_weight_banks = 0
386 self.shram_reserved_unused_banks = 2 if accel_config.shram_banks > 16 else 0
387 self.shram_total_banks = accel_config.shram_banks - self.shram_reserved_unused_banks
388 self.shram_bank_granules = np.array(accel_config.shram_granules, np.int32)
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200389 self.shram_lut_size = 2048
390 # SHRAM base address of the activation lookup table
391 self.shram_lut_address = self.shram_bank_size * self.available_shram_banks(True)
Tim Hall79d07d22020-04-27 18:20:16 +0100392
393 # Build a map of acceptable IFM/OFM block configurations up to the maximum
394 # IFM/OFM block size.
395 ifm_block_max = self.get_ifm_block_size(32, self.ofm_block_max, Kernel(8, 8))
396 self.block_config_map = dict()
Tim Halld8339a72021-05-27 18:49:40 +0100397 self.generate_block_config_map(Block(ifm_block_max.width * 2, ifm_block_max.height, 128))
Tim Hall79d07d22020-04-27 18:20:16 +0100398
399 # Setup supported operators and restriction checkers class
Fredrik Svedberg880e7352020-08-25 11:31:47 +0200400 self.supported_operators = SupportedOperators()
Tim Hall79d07d22020-04-27 18:20:16 +0100401
Louis Verhaard0b8268a2020-08-05 16:11:29 +0200402 # Returns available number of SHRAM banks depending on activation lookup table
403 # being used or not
404 def available_shram_banks(self, uses_activation_lut):
405 banks = self.shram_total_banks
406 if uses_activation_lut and self.shram_reserved_unused_banks == 0:
407 banks -= 2
408 return banks
409
Tim Hall79d07d22020-04-27 18:20:16 +0100410 # Calculate block configuration for ALL known IFM operations and
411 # accumulator sizes. Consumers will need to select their preferred
412 # operation and bit-width at read-time.
413 def generate_block_config(self, width, height, depth):
Louis Verhaardf98c6742020-05-12 14:22:38 +0200414 # Number of bytes required for any SHRAM element for a FM of given dimensions.
415 # For IFM: size = H*W*Align(D*BYTE_WIDTH, 8)
416 # For ACC: size = H*W*Align(D,8)*BYTE_WIDTH
417 d1 = round_up(depth, SHRAMElements.PreAlign)
418 d2 = round_up(d1 * SHRAMElements.ByteSizes, SHRAMElements.PostAlign)
419 size_bytes = (height * width) * d2
420
Tim Hall79d07d22020-04-27 18:20:16 +0100421 # Convert byte size (rounded) to size in banks
422 size_banks = round_up_divide(size_bytes, self.shram_bank_size)
423 size_banks *= 2 # Double buffer the IFM/Acc (need twice as many banks)
424 # Round bank requirement to bank granularity
425 required_banks = round_up(size_banks, self.shram_bank_granules)
426 return SHRAMBlockConfig(size_bytes, required_banks)
427
428 @staticmethod
429 def make_block_config_key(width, height, depth):
430 return (int(height), int(width), int(depth))
431
432 def get_block_config(self, width, height, depth):
433 assert depth <= self.ofm_block_max.depth
434 key = ArchitectureFeatures.make_block_config_key(width, height, depth)
435 config = self.block_config_map.get(key, None)
436 return config
437
438 # Generate a key:value map of possible block configurations, where the
439 # key is compounded from the block dimensions: 0x00HHWWCC
440 def generate_block_config_map(self, block: Block):
441 for h in range(1, block.height + 1):
442 for w in range(1, block.width + 1):
443 # All possible IFM/OFM depth values
444 for c in [4, 8, 12, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128]:
445 key = ArchitectureFeatures.make_block_config_key(w, h, c)
446 self.block_config_map[key] = self.generate_block_config(w, h, c)
447
Diqing Zhonge8887a32020-09-24 09:53:48 +0200448 def _generate_output_perf_tables(self, accel_config):
449 if accel_config == Accelerator.Ethos_U55_32:
450 self.output_cycles_per_elem = (2.0, 3.0, 3.0, 3.0, 4.0, 6.0, 1.0, 2.0)
451 self.activation_cycles_per_elem = (1.0, 1.0, 0.0)
452 elif accel_config == Accelerator.Ethos_U55_64:
453 self.output_cycles_per_elem = (1.0, 1.5, 1.5, 1.5, 2.0, 3.0, 0.5, 1.0)
454 self.activation_cycles_per_elem = (1.0, 1.0, 0.0)
455 elif accel_config == Accelerator.Ethos_U55_128:
456 self.output_cycles_per_elem = (0.75, 1.25, 0.75, 0.75, 1.0, 1.5, 0.25, 0.5)
457 self.activation_cycles_per_elem = (1.0, 0.5, 0.0)
Tim Hallc8a73862020-10-27 12:43:14 +0000458 elif accel_config in (Accelerator.Ethos_U55_256, Accelerator.Ethos_U65_256):
Diqing Zhonge8887a32020-09-24 09:53:48 +0200459 self.output_cycles_per_elem = (0.625, 1.125, 0.5, 0.375, 0.5, 0.75, 0.125, 0.25)
460 self.activation_cycles_per_elem = (1.0, 0.25, 0.0)
461 else:
Tim Hallc8a73862020-10-27 12:43:14 +0000462 assert accel_config == Accelerator.Ethos_U65_512
Diqing Zhonge8887a32020-09-24 09:53:48 +0200463 self.output_cycles_per_elem = (0.3125, 0.5625, 0.25, 0.1875, 0.25, 0.375, 0.0625, 0.125)
464 self.activation_cycles_per_elem = (0.5, 0.125, 0.0)
465
Tim Hall79d07d22020-04-27 18:20:16 +0100466 def calc_ifm_block_depth(self, ifm_depth, ifm_bits):
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200467 assert ifm_bits in (8, 16, 32)
Tim Hall79d07d22020-04-27 18:20:16 +0100468 assert ifm_depth > 0
469 ifm_depth = round_up(ifm_depth, self.ifm_ublock.depth)
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200470 max_block_depth = 8 * 32 // ifm_bits
Tim Hall79d07d22020-04-27 18:20:16 +0100471 return min(max_block_depth, ifm_depth)
472
473 # Calculate the size of the IFM block given a depth, target OFM block and a kernel
Tim Hallc30f4952020-06-15 20:47:35 +0100474 def get_ifm_block_size(
475 self,
476 ifm_block_depth,
477 ofm_block: Block,
478 kernel: Kernel,
479 subkernel: Block = Block(8, 8, 65536),
480 ifm_resampling_mode=resampling_mode.NONE,
481 ):
Dwight Lidmana9390f72020-05-13 12:00:08 +0200482 upscaling = 1 if ifm_resampling_mode == resampling_mode.NONE else 2
Tim Hall79d07d22020-04-27 18:20:16 +0100483
erik.andersson@arm.com1d6d5c42021-04-14 13:31:05 +0200484 # Height
485 dilated_kernel_height = ((kernel.height - 1) * kernel.dilation.y) + 1
486 ifm_block_height = round_up_to_int(
487 ((ofm_block.height - 1) * kernel.stride.y + min(subkernel.height, dilated_kernel_height)) / upscaling
488 )
Tim Hall79d07d22020-04-27 18:20:16 +0100489
Dwight Lidman2f754572021-04-21 12:00:37 +0200490 ifm_block_height = round_up(ifm_block_height, self.ifm_ublock.height)
491
Tim Hall79d07d22020-04-27 18:20:16 +0100492 # Width
Tim Hall79d07d22020-04-27 18:20:16 +0100493 dilated_kernel_width = ((kernel.width - 1) * kernel.dilation.x) + 1
erik.andersson@arm.com1d6d5c42021-04-14 13:31:05 +0200494 ifm_block_width = round_up_to_int(
495 ((ofm_block.width - 1) * kernel.stride.x + min(subkernel.width, dilated_kernel_width)) / upscaling
496 )
Tim Hall79d07d22020-04-27 18:20:16 +0100497
Dwight Lidman2f754572021-04-21 12:00:37 +0200498 ifm_block_width = round_up(ifm_block_width, self.ifm_ublock.width)
499
Tim Hall79d07d22020-04-27 18:20:16 +0100500 return Block(ifm_block_width, ifm_block_height, ifm_block_depth)
501
Tim Hall1bd531d2020-11-01 20:59:36 +0000502 def is_spilling_enabled(self):
Tim Hall79d07d22020-04-27 18:20:16 +0100503 """
Tim Hall1bd531d2020-11-01 20:59:36 +0000504 Spilling is a feature that allows the Ethos-U to use a dedicated SRAM as a cache for various types of data
Tim Hall79d07d22020-04-27 18:20:16 +0100505 """
Tim Hall1bd531d2020-11-01 20:59:36 +0000506 return (
507 self._mem_port_mapping(self.cache_mem_area) == MemArea.Sram and self.cache_mem_area != self.arena_mem_area
508 )
Tim Hall79d07d22020-04-27 18:20:16 +0100509
Louis Verhaard024c3552021-03-17 14:26:34 +0100510 def mem_type_size(self, mem_type: MemType) -> int:
511 """Returns size in bytes available for the given memory type"""
512 if mem_type == MemType.Scratch_fast and self.is_spilling_enabled():
Tim Halld8339a72021-05-27 18:49:40 +0100513 return self.arena_cache_size
Louis Verhaard024c3552021-03-17 14:26:34 +0100514 # Size is unknown, return max possible address offset
515 return self.max_address_offset
516
Tim Hall1bd531d2020-11-01 20:59:36 +0000517 def _mem_port_mapping(self, mem_port):
518 mem_port_mapping = {MemPort.Axi0: self.axi0_port, MemPort.Axi1: self.axi1_port}
519 return mem_port_mapping[mem_port]
Tim Hall79d07d22020-04-27 18:20:16 +0100520
Tim Hall1bd531d2020-11-01 20:59:36 +0000521 def _set_default_sys_config(self):
Tim Hall1bd531d2020-11-01 20:59:36 +0000522 # ArchitectureFeatures.DEFAULT_CONFIG values
523 if self.is_ethos_u65_system:
524 # Default Ethos-U65 system configuration
525 # Ethos-U65 Client-Server: SRAM (16 GB/s) and DRAM (12 GB/s)
526 self.core_clock = 1e9
527 self.axi0_port = MemArea.Sram
528 self.axi1_port = MemArea.Dram
529 self.memory_clock_scales[MemArea.Sram] = 1.0
530 self.memory_clock_scales[MemArea.Dram] = 0.75 # 3 / 4
Diqing Zhongf842b692020-12-11 13:07:37 +0100531 self.memory_burst_length[MemArea.Sram] = 32
532 self.memory_burst_length[MemArea.Dram] = 128
533 self.memory_latency[MemArea.Sram][BandwidthDirection.Read] = 32
534 self.memory_latency[MemArea.Sram][BandwidthDirection.Write] = 32
535 self.memory_latency[MemArea.Dram][BandwidthDirection.Read] = 500
536 self.memory_latency[MemArea.Dram][BandwidthDirection.Write] = 250
Tim Hall79d07d22020-04-27 18:20:16 +0100537 else:
Tim Hall1bd531d2020-11-01 20:59:36 +0000538 # Default Ethos-U55 system configuration
539 # Ethos-U55 High-End Embedded: SRAM (4 GB/s) and Flash (0.5 GB/s)
540 self.core_clock = 500e6
541 self.axi0_port = MemArea.Sram
542 self.axi1_port = MemArea.OffChipFlash
543 self.memory_clock_scales[MemArea.Sram] = 1.0
544 self.memory_clock_scales[MemArea.OffChipFlash] = 0.125 # 1 / 8
Diqing Zhongf842b692020-12-11 13:07:37 +0100545 self.memory_burst_length[MemArea.Sram] = 32
546 self.memory_burst_length[MemArea.OffChipFlash] = 128
547 self.memory_latency[MemArea.Sram][BandwidthDirection.Read] = 32
548 self.memory_latency[MemArea.Sram][BandwidthDirection.Write] = 32
549 self.memory_latency[MemArea.OffChipFlash][BandwidthDirection.Read] = 64
550 self.memory_latency[MemArea.OffChipFlash][BandwidthDirection.Write] = 64
Tim Hall79d07d22020-04-27 18:20:16 +0100551
Tim Hall1bd531d2020-11-01 20:59:36 +0000552 def _set_default_mem_mode(self):
Tim Hall1bd531d2020-11-01 20:59:36 +0000553 # ArchitectureFeatures.DEFAULT_CONFIG values
554 if self.is_ethos_u65_system:
555 # Default Ethos-U65 memory mode
Tim Hall70b71a52020-12-22 11:47:54 +0000556 # Dedicated SRAM: the SRAM is only for use by the Ethos-U
557 # The non-SRAM memory is assumed to be read-writeable
Tim Hall1bd531d2020-11-01 20:59:36 +0000558 self.const_mem_area = MemPort.Axi1
559 self.arena_mem_area = MemPort.Axi1
560 self.cache_mem_area = MemPort.Axi0
Tim Halld8339a72021-05-27 18:49:40 +0100561 self.arena_cache_size = 384 * 1024
Tim Hall1bd531d2020-11-01 20:59:36 +0000562 else:
Tim Hall70b71a52020-12-22 11:47:54 +0000563 # Default Ethos-U55 memory mode
564 # Shared SRAM: the SRAM is shared between the Ethos-U and the Cortex-M software
565 # The non-SRAM memory is assumed to be read-only
Tim Hall1bd531d2020-11-01 20:59:36 +0000566 self.const_mem_area = MemPort.Axi1
567 self.arena_mem_area = MemPort.Axi0
568 self.cache_mem_area = MemPort.Axi0
Tim Halld8339a72021-05-27 18:49:40 +0100569 self.arena_cache_size = self.max_address_offset
Tim Hall79d07d22020-04-27 18:20:16 +0100570
Tim Halld8339a72021-05-27 18:49:40 +0100571 def _get_vela_config(self, vela_config_files, verbose_config, arena_cache_size_from_cli):
Tim Hall1bd531d2020-11-01 20:59:36 +0000572 """
573 Gets the system configuration and memory modes from one or more Vela configuration file(s) or uses some
574 defaults.
575 """
Tim Hall79d07d22020-04-27 18:20:16 +0100576
Tim Hall1bd531d2020-11-01 20:59:36 +0000577 # all properties are optional and are initialised to a value of 1 (or the equivalent)
578 self.core_clock = 1
579 self.axi0_port = MemArea(1)
580 self.axi1_port = MemArea(1)
581 self.memory_clock_scales = np.ones(MemArea.Size)
Tim Hall70b71a52020-12-22 11:47:54 +0000582 self.memory_burst_length = np.ones(MemArea.Size, np.int)
583 self.memory_latency = np.zeros((MemArea.Size, BandwidthDirection.Size), np.int)
Tim Hall1bd531d2020-11-01 20:59:36 +0000584 self.const_mem_area = MemPort(1)
585 self.arena_mem_area = MemPort(1)
586 self.cache_mem_area = MemPort(1)
Tim Halld8339a72021-05-27 18:49:40 +0100587 self.arena_cache_size = self.max_address_offset
588 arena_cache_size_loc_text = "Default"
Tim Hall79d07d22020-04-27 18:20:16 +0100589
Tim Hall1bd531d2020-11-01 20:59:36 +0000590 # read configuration file(s)
591 self.vela_config = None
592
593 if vela_config_files is not None:
594 self.vela_config = ConfigParser()
595 self.vela_config.read(vela_config_files)
596
597 # read system configuration
598 sys_cfg_section = "System_Config." + self.system_config
599
600 if self.vela_config is not None and self.vela_config.has_section(sys_cfg_section):
601 self.core_clock = float(self._read_config(sys_cfg_section, "core_clock", self.core_clock))
602 self.axi0_port = MemArea[self._read_config(sys_cfg_section, "axi0_port", self.axi0_port)]
603 self.axi1_port = MemArea[self._read_config(sys_cfg_section, "axi1_port", self.axi1_port)]
604
605 for mem_area in (self.axi0_port, self.axi1_port):
606 self.memory_clock_scales[mem_area] = float(
607 self._read_config(
608 sys_cfg_section, mem_area.name + "_clock_scale", self.memory_clock_scales[mem_area]
609 )
610 )
Diqing Zhongf842b692020-12-11 13:07:37 +0100611 self.memory_burst_length[mem_area] = int(
612 self._read_config(
613 sys_cfg_section, mem_area.name + "_burst_length", self.memory_burst_length[mem_area]
614 )
615 )
616 self.memory_latency[mem_area][BandwidthDirection.Read] = int(
617 self._read_config(
618 sys_cfg_section,
619 mem_area.name + "_read_latency",
620 self.memory_latency[mem_area][BandwidthDirection.Read],
621 )
622 )
623 self.memory_latency[mem_area][BandwidthDirection.Write] = int(
624 self._read_config(
625 sys_cfg_section,
626 mem_area.name + "_write_latency",
627 self.memory_latency[mem_area][BandwidthDirection.Write],
628 )
629 )
Tim Hall1bd531d2020-11-01 20:59:36 +0000630 elif self.system_config == ArchitectureFeatures.DEFAULT_CONFIG:
631 self._set_default_sys_config()
632
633 elif vela_config_files is None:
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000634 raise CliOptionError("--config", vela_config_files, "Vela config file not specified")
Tim Hall1bd531d2020-11-01 20:59:36 +0000635
636 else:
637 raise CliOptionError(
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000638 "--system-config", self.system_config, f"Section {sys_cfg_section} not found in Vela config file",
Tim Hall79d07d22020-04-27 18:20:16 +0100639 )
Tim Hall79d07d22020-04-27 18:20:16 +0100640
Tim Hall1bd531d2020-11-01 20:59:36 +0000641 # read the memory mode
642 mem_mode_section = "Memory_Mode." + self.memory_mode
Tim Hall79d07d22020-04-27 18:20:16 +0100643
Tim Hall1bd531d2020-11-01 20:59:36 +0000644 if self.vela_config is not None and self.vela_config.has_section(mem_mode_section):
645 self.const_mem_area = MemPort[
646 self._read_config(mem_mode_section, "const_mem_area", self.const_mem_area.name)
647 ]
648 self.arena_mem_area = MemPort[
649 self._read_config(mem_mode_section, "arena_mem_area", self.arena_mem_area.name)
650 ]
651 self.cache_mem_area = MemPort[
652 self._read_config(mem_mode_section, "cache_mem_area", self.cache_mem_area.name)
653 ]
Tim Halld8339a72021-05-27 18:49:40 +0100654 found = []
655 self.arena_cache_size = int(
656 self._read_config(mem_mode_section, "arena_cache_size", self.arena_cache_size, found)
657 )
658 if found[-1]:
659 arena_cache_size_loc_text = "Configuration file"
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200660
Tim Hall1bd531d2020-11-01 20:59:36 +0000661 elif self.memory_mode == ArchitectureFeatures.DEFAULT_CONFIG:
662 self._set_default_mem_mode()
Patrik Gustavsson5f47c052020-06-25 12:56:04 +0200663
Tim Hall1bd531d2020-11-01 20:59:36 +0000664 elif vela_config_files is None:
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000665 raise CliOptionError("--config", vela_config_files, "Vela config file not specified")
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200666
Tim Hall1bd531d2020-11-01 20:59:36 +0000667 else:
668 raise CliOptionError(
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000669 "--memory-mode", self.memory_mode, f"Section {mem_mode_section} not found in Vela config file",
Tim Hall1bd531d2020-11-01 20:59:36 +0000670 )
Tim Hall79d07d22020-04-27 18:20:16 +0100671
Tim Hall1bd531d2020-11-01 20:59:36 +0000672 # override sram to onchipflash
673 if self._mem_port_mapping(self.const_mem_area) == MemArea.Sram:
674 if self.const_mem_area == self.arena_mem_area == self.cache_mem_area:
675 print(
676 "Info: Changing const_mem_area from Sram to OnChipFlash. This will use the same characteristics as"
677 " Sram."
678 )
679 if self.const_mem_area == MemPort.Axi0:
680 self.const_mem_area = MemPort.Axi1
681 self.axi1_port = MemArea.OnChipFlash
682 else:
683 self.const_mem_area = MemPort.Axi0
684 self.axi0_port = MemArea.OnChipFlash
685 self.memory_clock_scales[MemArea.OnChipFlash] = self.memory_clock_scales[MemArea.Sram]
Diqing Zhongf842b692020-12-11 13:07:37 +0100686 self.memory_burst_length[MemArea.OnChipFlash] = self.memory_burst_length[MemArea.Sram]
687 self.memory_latency[MemArea.OnChipFlash] = self.memory_latency[MemArea.Sram]
Tim Hall1bd531d2020-11-01 20:59:36 +0000688
Tim Halld8339a72021-05-27 18:49:40 +0100689 # override sram usage
690 if arena_cache_size_from_cli is not None:
691 self.arena_cache_size = arena_cache_size_from_cli
692 arena_cache_size_loc_text = "CLI option"
693
Tim Hall1bd531d2020-11-01 20:59:36 +0000694 # check configuration
Tim Hall70b71a52020-12-22 11:47:54 +0000695 if self._mem_port_mapping(self.const_mem_area) not in (
696 MemArea.Dram,
697 MemArea.OnChipFlash,
698 MemArea.OffChipFlash,
699 ):
700 raise ConfigOptionError(
701 "const_mem_area",
702 self._mem_port_mapping(self.const_mem_area).name,
703 "Dram or OnChipFlash or OffChipFlash",
704 )
705
706 if self._mem_port_mapping(self.arena_mem_area) not in (MemArea.Sram, MemArea.Dram):
707 raise ConfigOptionError("arena_mem_area", self._mem_port_mapping(self.arena_mem_area).name, "Sram or Dram")
708
Tim Hall1bd531d2020-11-01 20:59:36 +0000709 if self._mem_port_mapping(self.cache_mem_area) != MemArea.Sram:
710 raise ConfigOptionError("cache_mem_area", self._mem_port_mapping(self.cache_mem_area).name, "Sram")
711
Tim Halld8339a72021-05-27 18:49:40 +0100712 if self.arena_cache_size < 0:
713 raise ConfigOptionError("arena_cache_size", self.arena_cache_size, ">= 0")
714 if self.arena_cache_size > self.max_address_offset:
715 raise ConfigOptionError(
716 "arena_cache_size",
717 f"{self.arena_cache_size}. Size is out of bounds, maximum is: {self.max_address_offset}",
718 )
719
Tim Hall1bd531d2020-11-01 20:59:36 +0000720 # assign existing memory areas
721 self.permanent_storage_mem_area = self._mem_port_mapping(self.const_mem_area)
722 self.feature_map_storage_mem_area = self._mem_port_mapping(self.arena_mem_area)
723 self.fast_storage_mem_area = self._mem_port_mapping(self.cache_mem_area)
724
Tim Hall1bd531d2020-11-01 20:59:36 +0000725 # display the system configuration and memory mode
726 if verbose_config:
727 print(f"System Configuration ({self.system_config}):")
728 print(f" core_clock = {self.core_clock}")
729 print(f" axi0_port = {self.axi0_port.name}")
730 print(f" axi1_port = {self.axi1_port.name}")
731 for mem in (MemArea.Sram, MemArea.Dram, MemArea.OnChipFlash, MemArea.OffChipFlash):
732 print(f" {mem.name}_clock_scales = {self.memory_clock_scales[mem]}")
Diqing Zhongf842b692020-12-11 13:07:37 +0100733 print(f" {mem.name}_burst_length = {self.memory_burst_length[mem]}")
734 print(f" {mem.name}_read_latency = {self.memory_latency[mem][BandwidthDirection.Read]}")
735 print(f" {mem.name}_write_latency = {self.memory_latency[mem][BandwidthDirection.Write]}")
Tim Hall1bd531d2020-11-01 20:59:36 +0000736
737 print(f"Memory Mode ({self.memory_mode}):")
738 print(f" const_mem_area = {self.const_mem_area.name}")
739 print(f" arena_mem_area = {self.arena_mem_area.name}")
740 print(f" cache_mem_area = {self.cache_mem_area.name}")
Tim Halld8339a72021-05-27 18:49:40 +0100741 print(f" arena_cache_size = {self.arena_cache_size} from {arena_cache_size_loc_text}")
Tim Hall1bd531d2020-11-01 20:59:36 +0000742
743 print("Architecture Settings:")
744 print(f" permanent_storage_mem_area = {self.permanent_storage_mem_area.name}")
745 print(f" feature_map_storage_mem_area = {self.feature_map_storage_mem_area.name}")
746 print(f" fast_storage_mem_area = {self.fast_storage_mem_area.name}")
Tim Hall1bd531d2020-11-01 20:59:36 +0000747
Tim Halld8339a72021-05-27 18:49:40 +0100748 def _read_config(self, section, key, current_value, found=None):
Tim Hall79d07d22020-04-27 18:20:16 +0100749 """
Tim Hall1bd531d2020-11-01 20:59:36 +0000750 Reads a given key from a particular section in the Vela config file. If the section contains the 'inherit'
751 option then we recurse into the section specified. If inherited sections result in multiple keys for a
Tim Halld8339a72021-05-27 18:49:40 +0100752 particular option then the key from the parent section is used, regardless of the parsing order. if specified
753 found should be an empty list that this function will append a True or False to the end of the list indicating
754 whether the key was found or not.
Tim Hall79d07d22020-04-27 18:20:16 +0100755 """
Tim Hall1bd531d2020-11-01 20:59:36 +0000756 if not self.vela_config.has_section(section):
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000757 raise ConfigOptionError("section", f"{section}. The section was not found in the Vela config file(s)")
Tim Hall1bd531d2020-11-01 20:59:36 +0000758
Tim Halld8339a72021-05-27 18:49:40 +0100759 result = str(current_value) if current_value is not None else None
760 if found is not None:
761 found.append(False)
762
Tim Hall1bd531d2020-11-01 20:59:36 +0000763 if self.vela_config.has_option(section, "inherit"):
764 inheritance_section = self.vela_config.get(section, "inherit")
765 # check for recursion loop
766 if inheritance_section == section:
767 raise ConfigOptionError(
Michael McGeagh7a6f8432020-12-02 15:29:22 +0000768 "inherit", f"{inheritance_section}. This references its own section and recursion is not allowed",
Tim Hall1bd531d2020-11-01 20:59:36 +0000769 )
Tim Halld8339a72021-05-27 18:49:40 +0100770 result = self._read_config(inheritance_section, key, result, found)
Tim Hall1bd531d2020-11-01 20:59:36 +0000771
772 if self.vela_config.has_option(section, key):
773 result = self.vela_config.get(section, key)
Tim Halld8339a72021-05-27 18:49:40 +0100774 if found is not None:
775 found.append(True)
Tim Hall1bd531d2020-11-01 20:59:36 +0000776
Tim Hall79d07d22020-04-27 18:20:16 +0100777 return result
Louis Verhaard52078302020-11-18 13:35:06 +0100778
779
Louis Verhaard061eeb42020-11-27 08:24:03 +0100780# Cache for default arch instances, as these are expensive to create
781default_arch_cache = dict()
782
783
Louis Verhaard52078302020-11-18 13:35:06 +0100784def create_default_arch(accelerator: Accelerator) -> ArchitectureFeatures:
785 """Creates architecture features object using default settings"""
Louis Verhaard061eeb42020-11-27 08:24:03 +0100786 if accelerator not in default_arch_cache:
787 default_arch_cache[accelerator] = ArchitectureFeatures(
788 vela_config_files=None,
789 accelerator_config=accelerator.value,
790 system_config=ArchitectureFeatures.DEFAULT_CONFIG,
791 memory_mode=ArchitectureFeatures.DEFAULT_CONFIG,
Louis Verhaard061eeb42020-11-27 08:24:03 +0100792 max_blockdep=ArchitectureFeatures.MAX_BLOCKDEP,
Louis Verhaard061eeb42020-11-27 08:24:03 +0100793 verbose_config=False,
Tim Halld8339a72021-05-27 18:49:40 +0100794 arena_cache_size=None,
Louis Verhaard061eeb42020-11-27 08:24:03 +0100795 )
796 return default_arch_cache[accelerator]