Rickard Bolin | bc6ee58 | 2022-11-04 08:24:29 +0000 | [diff] [blame^] | 1 | # SPDX-FileCopyrightText: Copyright 2020-2022 Arm Limited and/or its affiliates <open-source-office@arm.com> |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 2 | # |
| 3 | # SPDX-License-Identifier: Apache-2.0 |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the License); you may |
| 6 | # not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an AS IS BASIS, WITHOUT |
| 13 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
Rickard Bolin | bc6ee58 | 2022-11-04 08:24:29 +0000 | [diff] [blame^] | 16 | # |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 17 | # Description: |
| 18 | # Wrapping function to do tensor address allocation. That is, assigning addresses to tensors based on what has been |
| 19 | # worked out from the allowable overlaps that are calculated by the live range analysis. |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 20 | import math |
Louis Verhaard | 226ecaf | 2021-03-30 10:18:28 +0200 | [diff] [blame] | 21 | from typing import List |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 22 | |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 23 | import numpy as np |
| 24 | |
Louis Verhaard | d700252 | 2021-01-20 17:23:54 +0100 | [diff] [blame] | 25 | from . import hillclimb_allocation |
Diego Russo | ea6111a | 2020-04-14 18:41:58 +0100 | [diff] [blame] | 26 | from . import live_range |
| 27 | from . import numeric_util |
Jacob Bohlin | 0628a8c | 2020-08-28 13:25:14 +0200 | [diff] [blame] | 28 | from .errors import AllocationError |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 29 | from .greedy_allocation import allocate_live_ranges as greedy_allocate_live_ranges |
Louis Verhaard | 226ecaf | 2021-03-30 10:18:28 +0200 | [diff] [blame] | 30 | from .live_range import LiveRange |
Louis Verhaard | 9bfe0f8 | 2020-12-03 12:26:25 +0100 | [diff] [blame] | 31 | from .live_range import LiveRangeGraph |
Diego Russo | e8a1045 | 2020-04-21 17:39:10 +0100 | [diff] [blame] | 32 | from .nn_graph import TensorAllocator |
| 33 | from .tensor import MemArea |
Patrik Gustavsson | eca2e95 | 2020-05-27 09:15:11 +0200 | [diff] [blame] | 34 | from .tensor import MemType |
Jacob Bohlin | 0628a8c | 2020-08-28 13:25:14 +0200 | [diff] [blame] | 35 | from .tensor import Tensor |
Louis Verhaard | 0b8268a | 2020-08-05 16:11:29 +0200 | [diff] [blame] | 36 | from .tensor import TensorPurpose |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 37 | |
| 38 | |
Jacob Bohlin | 0628a8c | 2020-08-28 13:25:14 +0200 | [diff] [blame] | 39 | def linear_allocate_live_ranges(live_ranges, alloc_granularity=Tensor.AllocationQuantum): |
Louis Verhaard | 3c07c97 | 2020-05-07 08:12:58 +0200 | [diff] [blame] | 40 | # Allocates using increasing addresses. Duplicate constant tensors will be allocated to the same address |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 41 | total_sz = 0 |
| 42 | allocated_tensors = [] |
| 43 | |
Louis Verhaard | 3c07c97 | 2020-05-07 08:12:58 +0200 | [diff] [blame] | 44 | # just assign increasing addresses, except for duplicates |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 45 | for tens, lr in live_ranges.ranges.items(): |
| 46 | if tens in allocated_tensors: |
| 47 | continue |
| 48 | |
Louis Verhaard | 3c07c97 | 2020-05-07 08:12:58 +0200 | [diff] [blame] | 49 | address = total_sz |
| 50 | if tens.weight_compression_config is not None: |
| 51 | for allocated_tens in allocated_tensors: |
| 52 | if allocated_tens.weight_compression_config == tens.weight_compression_config: |
Tim Hall | d784af7 | 2021-06-08 21:25:57 +0100 | [diff] [blame] | 53 | assert allocated_tens.scale_compression_config == tens.scale_compression_config |
Louis Verhaard | 3c07c97 | 2020-05-07 08:12:58 +0200 | [diff] [blame] | 54 | address = allocated_tens.address |
| 55 | break |
Louis Verhaard | 0b8268a | 2020-08-05 16:11:29 +0200 | [diff] [blame] | 56 | if tens.purpose == TensorPurpose.LUT: |
| 57 | for allocated_tens in allocated_tensors: |
| 58 | if allocated_tens.equivalent(tens): |
| 59 | address = allocated_tens.address |
| 60 | break |
Louis Verhaard | 3c07c97 | 2020-05-07 08:12:58 +0200 | [diff] [blame] | 61 | lr.set_address(address) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 62 | allocated_tensors += lr.tensors |
Louis Verhaard | 3c07c97 | 2020-05-07 08:12:58 +0200 | [diff] [blame] | 63 | if address == total_sz: |
| 64 | total_sz += numeric_util.round_up(int(math.ceil(lr.size)), alloc_granularity) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 65 | |
Jacob Bohlin | 0628a8c | 2020-08-28 13:25:14 +0200 | [diff] [blame] | 66 | verify_alignment(live_ranges, alloc_granularity) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 67 | return total_sz |
| 68 | |
| 69 | |
Tim Hall | cda4fcb | 2022-05-19 12:36:58 +0100 | [diff] [blame] | 70 | def hillclimb_allocate_live_ranges( |
| 71 | live_ranges: LiveRangeGraph, alloc_granularity: int, max_iterations: int, mem_limit: int |
| 72 | ) -> int: |
Louis Verhaard | d700252 | 2021-01-20 17:23:54 +0100 | [diff] [blame] | 73 | # Allocates using the hill climb allocator |
Tim Hall | cda4fcb | 2022-05-19 12:36:58 +0100 | [diff] [blame] | 74 | addresses = hillclimb_allocation.allocate_live_ranges(live_ranges.lrs, max_iterations, mem_limit) |
Louis Verhaard | 9bfe0f8 | 2020-12-03 12:26:25 +0100 | [diff] [blame] | 75 | # The result is a list containing the allocated addresses |
| 76 | total_sz = 0 |
Louis Verhaard | 226ecaf | 2021-03-30 10:18:28 +0200 | [diff] [blame] | 77 | for lr, address in zip(live_ranges.lrs, addresses): |
Louis Verhaard | 9bfe0f8 | 2020-12-03 12:26:25 +0100 | [diff] [blame] | 78 | total_sz = max(total_sz, address + lr.size) |
| 79 | lr.set_address(address) |
| 80 | verify_allocation(live_ranges, alloc_granularity) |
| 81 | return total_sz |
| 82 | |
| 83 | |
| 84 | def verify_alignment(live_ranges: LiveRangeGraph, alignment: int): |
Louis Verhaard | 226ecaf | 2021-03-30 10:18:28 +0200 | [diff] [blame] | 85 | for lr in live_ranges.lrs: |
Jacob Bohlin | 0628a8c | 2020-08-28 13:25:14 +0200 | [diff] [blame] | 86 | for tens in lr.tensors: |
| 87 | if not all(op and op.run_on_npu for op in tens.ops + tens.consumer_list): |
| 88 | # This is a CPU tensor, verify alignment |
| 89 | if tens.address % alignment != 0: |
Michael McGeagh | 7a6f843 | 2020-12-02 15:29:22 +0000 | [diff] [blame] | 90 | raise AllocationError(f"Tensor '{tens.name}' not aligned to {alignment} bytes") |
Jacob Bohlin | 0628a8c | 2020-08-28 13:25:14 +0200 | [diff] [blame] | 91 | |
| 92 | |
Louis Verhaard | 9bfe0f8 | 2020-12-03 12:26:25 +0100 | [diff] [blame] | 93 | def verify_allocation(live_ranges: LiveRangeGraph, alignment: int): |
Louis Verhaard | 226ecaf | 2021-03-30 10:18:28 +0200 | [diff] [blame] | 94 | verify_alignment(live_ranges, alignment) |
| 95 | nr_time_slots = 1 + max(lr.end_time for lr in live_ranges.lrs) |
| 96 | # Contains active live ranges at each timestamp |
Jonas Ohlsson | 845e232 | 2022-03-01 12:39:55 +0100 | [diff] [blame] | 97 | lrs_at_time: List[List[LiveRange]] = [[] for i in range(nr_time_slots)] |
Louis Verhaard | 226ecaf | 2021-03-30 10:18:28 +0200 | [diff] [blame] | 98 | for lr in live_ranges.lrs: |
| 99 | for t in range(lr.start_time, lr.end_time + 1): |
| 100 | lrs_at_time[t].append(lr) |
| 101 | for t in range(nr_time_slots): |
Johan Alfvén | 36da8d3 | 2022-01-18 08:56:56 +0100 | [diff] [blame] | 102 | lrs_new_items = [lr for lr in lrs_at_time[t] if t == 0 or lr not in lrs_at_time[t - 1]] |
| 103 | for m in lrs_new_items: |
| 104 | for n in lrs_at_time[t]: |
Louis Verhaard | 9bfe0f8 | 2020-12-03 12:26:25 +0100 | [diff] [blame] | 105 | overlap, tens_n, tens_m = n.overlaps_address(m) |
| 106 | if overlap and not (tens_n.equivalent(tens_m) and tens_n.address == tens_m.address): |
| 107 | raise AllocationError( |
Michael McGeagh | 7a6f843 | 2020-12-02 15:29:22 +0000 | [diff] [blame] | 108 | f"Overlapping buffers: {n.name}: {tens_n.address} -> {tens_n.address + n.size}" |
| 109 | f" and {m.name}: {tens_m.address} -> {tens_m.address + m.size}" |
Louis Verhaard | 9bfe0f8 | 2020-12-03 12:26:25 +0100 | [diff] [blame] | 110 | ) |
| 111 | |
| 112 | |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 113 | def mark_sram_used_for_cascaded_passes(sg, lrs): |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 114 | if len(sg.cascaded_passes) < 1: |
| 115 | return |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 116 | end_pos = max(ps.time for ps in sg.cascaded_passes) + 2 |
| 117 | mem_usage = np.zeros(end_pos, dtype=np.int64) |
| 118 | |
| 119 | for tens, rng in lrs.ranges.items(): |
| 120 | storage_size = tens.storage_size() |
| 121 | mem_usage[rng.start_time : rng.end_time] += storage_size |
| 122 | |
| 123 | for cps in sg.cascaded_passes: |
| 124 | sram_used = max(mem_usage[cps.time], mem_usage[cps.time + 1]) |
| 125 | cps.sram_used = sram_used |
| 126 | for ps in cps.passes: |
| 127 | ps.sram_used = sram_used |
| 128 | |
| 129 | |
Tim Hall | 64556f3 | 2021-05-17 22:57:46 +0100 | [diff] [blame] | 130 | def print_allocation(lrs, mem_area, mem_type_set, tensor_allocator, sg, actual_mem_usage_for_alloc): |
| 131 | print("\n" + "#" * 80) |
| 132 | sg_placement = ( |
| 133 | sg.placement.name |
Jonas Ohlsson | d857507 | 2022-03-30 10:30:25 +0200 | [diff] [blame] | 134 | if mem_type_set.intersection( |
| 135 | ( |
| 136 | MemType.Permanent_NPU, |
| 137 | MemType.Permanent_CPU, |
| 138 | ) |
| 139 | ) |
Tim Hall | 64556f3 | 2021-05-17 22:57:46 +0100 | [diff] [blame] | 140 | else "Cpu and Npu" |
| 141 | ) |
| 142 | print( |
| 143 | f"Tensor Allocation for mem_area {mem_area.name}, of mem_type_set (" |
| 144 | f'{", ".join(f"{mem_type.name}" for mem_type in mem_type_set)}' |
| 145 | f"), using allocator {tensor_allocator}, in {sg_placement} subgraph:" |
| 146 | ) |
| 147 | |
| 148 | memory_hist = memory_usage_histogram(lrs.lrs) |
| 149 | min_mem_usage_for_alloc = max(memory_hist) |
Tim Hall | cda4fcb | 2022-05-19 12:36:58 +0100 | [diff] [blame] | 150 | print( |
| 151 | f"{'Start Time':>10s} - {'End Time':>10s}: {'Start Addr':>10s} - {'End Addr':>10s}: {'Tensor Size':>11s}:" |
| 152 | f" {'Memory Usage':>12s}: {'Purpose':12s}: Name" |
| 153 | ) |
Tim Hall | 64556f3 | 2021-05-17 22:57:46 +0100 | [diff] [blame] | 154 | for start_time, end_time, size, start_addr, end_addr, purpose, name in sorted( |
Jonas Ohlsson | d857507 | 2022-03-30 10:30:25 +0200 | [diff] [blame] | 155 | ( |
| 156 | lr.start_time, |
| 157 | lr.end_time, |
| 158 | lr.size, |
| 159 | tens.address, |
| 160 | tens.address + lr.size, |
| 161 | tens.purpose, |
| 162 | tens.name, |
| 163 | ) |
Tim Hall | 64556f3 | 2021-05-17 22:57:46 +0100 | [diff] [blame] | 164 | for tens, lr in lrs.ranges.items() |
| 165 | ): |
| 166 | print( |
| 167 | f"{start_time:10d} - {end_time:10d}: {start_addr:#10x} - {end_addr:#10x}: {size:11d}:" |
Tim Hall | cda4fcb | 2022-05-19 12:36:58 +0100 | [diff] [blame] | 168 | f" {memory_hist[start_time]:12d}: {purpose.display_name():12s}: {name:s}" |
Tim Hall | 64556f3 | 2021-05-17 22:57:46 +0100 | [diff] [blame] | 169 | ) |
| 170 | |
| 171 | alloc_overhead_fraction = (actual_mem_usage_for_alloc - min_mem_usage_for_alloc) / min_mem_usage_for_alloc |
| 172 | print( |
| 173 | f"Allocation Peak Tensor Size: {min_mem_usage_for_alloc:9d} ({min_mem_usage_for_alloc:#10x})" |
| 174 | f" Bytes {min_mem_usage_for_alloc/1024.0:8.2f} KiB" |
| 175 | ) |
| 176 | print( |
| 177 | f"Allocation Peak Memory Usage: {actual_mem_usage_for_alloc:9d} ({actual_mem_usage_for_alloc:#10x})" |
| 178 | f" Bytes {actual_mem_usage_for_alloc/1024.0:8.2f} KiB" |
| 179 | ) |
| 180 | print( |
| 181 | f"Allocation Overhead: {actual_mem_usage_for_alloc-min_mem_usage_for_alloc:9d}" |
| 182 | f" Bytes ({100*alloc_overhead_fraction:.2f} %)" |
| 183 | ) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 184 | |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 185 | |
Tim Hall | 64556f3 | 2021-05-17 22:57:46 +0100 | [diff] [blame] | 186 | def memory_usage_histogram(lrs: List[LiveRange]): |
| 187 | histogram = [0] * (1 + max(lr.end_time for lr in lrs)) |
Louis Verhaard | 226ecaf | 2021-03-30 10:18:28 +0200 | [diff] [blame] | 188 | for lr in lrs: |
erik.andersson@arm.com | 3438c92 | 2021-03-24 10:32:09 +0100 | [diff] [blame] | 189 | for t in range(lr.start_time, lr.end_time + 1): |
Tim Hall | 64556f3 | 2021-05-17 22:57:46 +0100 | [diff] [blame] | 190 | histogram[t] += lr.size |
erik.andersson@arm.com | 3438c92 | 2021-03-24 10:32:09 +0100 | [diff] [blame] | 191 | |
Tim Hall | 64556f3 | 2021-05-17 22:57:46 +0100 | [diff] [blame] | 192 | return histogram |
erik.andersson@arm.com | 3438c92 | 2021-03-24 10:32:09 +0100 | [diff] [blame] | 193 | |
| 194 | |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 195 | def allocate( |
| 196 | sg, |
| 197 | arch, |
| 198 | mem_area, |
| 199 | mem_type_set, |
| 200 | tensor_allocator=TensorAllocator.Greedy, |
| 201 | lr_graph=None, |
| 202 | cpu_tensor_alignment=Tensor.AllocationQuantum, |
Tim Hall | cda4fcb | 2022-05-19 12:36:58 +0100 | [diff] [blame] | 203 | hillclimb_max_iterations=None, |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 204 | ): |
| 205 | # Allocates addresses to tensors, returns False if tensors could not be fit within max_size |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 206 | lrs = live_range.extract_live_ranges_from_cascaded_passes( |
Jonas Ohlsson | d857507 | 2022-03-30 10:30:25 +0200 | [diff] [blame] | 207 | sg, |
| 208 | mem_area, |
| 209 | mem_type_set, |
| 210 | lr_graph=lr_graph, |
| 211 | cpu_tensor_alignment=cpu_tensor_alignment, |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 212 | ) |
| 213 | total_sz = 0 |
| 214 | if lrs.ranges: |
| 215 | tens_alloc = tensor_allocator |
| 216 | if tens_alloc == TensorAllocator.Greedy: |
Tim Hall | cda4fcb | 2022-05-19 12:36:58 +0100 | [diff] [blame] | 217 | total_sz = greedy_allocate_live_ranges(lrs, cpu_tensor_alignment) |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 218 | verify_allocation(lrs, cpu_tensor_alignment) |
| 219 | elif tens_alloc == TensorAllocator.LinearAlloc: |
| 220 | total_sz = linear_allocate_live_ranges(lrs, cpu_tensor_alignment) |
| 221 | elif tens_alloc == TensorAllocator.HillClimb: |
Tim Hall | cda4fcb | 2022-05-19 12:36:58 +0100 | [diff] [blame] | 222 | mem_type = MemType.Scratch_fast if MemType.Scratch_fast in mem_type_set else list(mem_type_set)[0] |
| 223 | mem_size = arch.mem_type_size(mem_type) |
| 224 | total_sz = hillclimb_allocate_live_ranges(lrs, cpu_tensor_alignment, hillclimb_max_iterations, mem_size) |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 225 | else: |
| 226 | assert 0 |
| 227 | return lrs, total_sz |
| 228 | |
| 229 | |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 230 | def allocate_tensors( |
| 231 | nng, |
| 232 | sg, |
| 233 | arch, |
| 234 | mem_area, |
Patrik Gustavsson | eca2e95 | 2020-05-27 09:15:11 +0200 | [diff] [blame] | 235 | mem_type_set, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 236 | tensor_allocator=TensorAllocator.Greedy, |
| 237 | verbose_allocation=False, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 238 | lr_graph=None, |
Tim Hall | b9b515c | 2020-11-01 21:27:19 +0000 | [diff] [blame] | 239 | cpu_tensor_alignment=Tensor.AllocationQuantum, |
Tim Hall | cda4fcb | 2022-05-19 12:36:58 +0100 | [diff] [blame] | 240 | hillclimb_max_iterations=None, |
Louis Verhaard | 0b9c9a3 | 2020-09-15 14:05:38 +0200 | [diff] [blame] | 241 | max_size=None, |
| 242 | dry_test=False, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 243 | ): |
Louis Verhaard | 0b9c9a3 | 2020-09-15 14:05:38 +0200 | [diff] [blame] | 244 | # Allocates addresses to tensors, returns False if tensors could not be fit within max_size |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 245 | lrs, total_sz = allocate( |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 246 | sg, |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 247 | arch, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 248 | mem_area, |
Patrik Gustavsson | eca2e95 | 2020-05-27 09:15:11 +0200 | [diff] [blame] | 249 | mem_type_set, |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 250 | tensor_allocator=tensor_allocator, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 251 | lr_graph=lr_graph, |
Tim Hall | b9b515c | 2020-11-01 21:27:19 +0000 | [diff] [blame] | 252 | cpu_tensor_alignment=cpu_tensor_alignment, |
Tim Hall | cda4fcb | 2022-05-19 12:36:58 +0100 | [diff] [blame] | 253 | hillclimb_max_iterations=hillclimb_max_iterations, |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 254 | ) |
| 255 | |
| 256 | if lrs.ranges: |
Louis Verhaard | 0b9c9a3 | 2020-09-15 14:05:38 +0200 | [diff] [blame] | 257 | alloc_ok = max_size is None or total_sz <= max_size |
| 258 | if dry_test or not alloc_ok: |
| 259 | # Dry test or allocation failed; undo allocation |
| 260 | for lr in lrs.ranges.values(): |
| 261 | lr.set_address(None) |
| 262 | return alloc_ok |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 263 | |
Patrik Gustavsson | eca2e95 | 2020-05-27 09:15:11 +0200 | [diff] [blame] | 264 | if sg.memory_used.get(mem_area, 0) == 0: |
| 265 | sg.memory_used[mem_area] = total_sz |
| 266 | else: |
| 267 | sg.memory_used[mem_area] += total_sz |
| 268 | |
| 269 | # Keep track of how much should be used for scratch or permanent storage for NPU |
| 270 | for mem_type in mem_type_set: |
| 271 | if sg.memory_used_per_type.get(mem_type, 0) == 0: |
| 272 | sg.memory_used_per_type[mem_type] = total_sz |
| 273 | else: |
| 274 | sg.memory_used_per_type[mem_type] += total_sz |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 275 | |
Tim Hall | 64556f3 | 2021-05-17 22:57:46 +0100 | [diff] [blame] | 276 | if verbose_allocation: |
| 277 | print_allocation(lrs, mem_area, mem_type_set, tensor_allocator, sg, total_sz) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 278 | |
| 279 | if mem_area == MemArea.Sram: |
| 280 | # Mark Sram usage for all subgraphs |
| 281 | for sg_ in nng.subgraphs: |
| 282 | mark_sram_used_for_cascaded_passes(sg_, lrs) |
| 283 | |
| 284 | if sg == nng.get_root_subgraph(): |
| 285 | nng.memory_used = sg.memory_used |
Louis Verhaard | 0b9c9a3 | 2020-09-15 14:05:38 +0200 | [diff] [blame] | 286 | return True |