Blame - ethosu/vela/tensor_allocation.py - ml/ethos-u/ethos-u-vela

2020-04-27 18:20:16 +0100

[diff] [blame]

1

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

16

# Description:

17

# Wrapping function to do tensor address allocation. That is, assigning addresses to tensors based on what has been

18

# worked out from the allowable overlaps that are calculated by the live range analysis.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

19

import math

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

20

Diego Russo

ea6111a

2020-04-14 18:41:58 +0100

[diff] [blame]

21

import numpy as np

22

23

from . import live_range

24

from . import numeric_util

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

25

from .errors import AllocationError

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

26

from .greedy_allocation import allocate_live_ranges as greedy_allocate_live_ranges

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

27

from .nn_graph import TensorAllocator

28

from .tensor import MemArea

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

29

from .tensor import MemType

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

30

from .tensor import Tensor

Louis Verhaard

0b8268a

2020-08-05 16:11:29 +0200

[diff] [blame]

31

from .tensor import TensorPurpose

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

32

33

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

34

def linear_allocate_live_ranges(live_ranges, alloc_granularity=Tensor.AllocationQuantum):

Louis Verhaard

2020-05-07 08:12:58 +0200

[diff] [blame]

35

# Allocates using increasing addresses. Duplicate constant tensors will be allocated to the same address

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

36

total_sz = 0

37

allocated_tensors = []

38

Louis Verhaard

2020-05-07 08:12:58 +0200

[diff] [blame]

39

# just assign increasing addresses, except for duplicates

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

40

for tens, lr in live_ranges.ranges.items():

41

if tens in allocated_tensors:

42

continue

43

Louis Verhaard

2020-05-07 08:12:58 +0200

[diff] [blame]

44

address = total_sz

45

if tens.weight_compression_config is not None:

46

for allocated_tens in allocated_tensors:

47

if allocated_tens.weight_compression_config == tens.weight_compression_config:

48

address = allocated_tens.address

49

break

Louis Verhaard

0b8268a

2020-08-05 16:11:29 +0200

[diff] [blame]

50

if tens.purpose == TensorPurpose.LUT:

51

for allocated_tens in allocated_tensors:

52

if allocated_tens.equivalent(tens):

53

address = allocated_tens.address

54

break

Louis Verhaard

2020-05-07 08:12:58 +0200

[diff] [blame]

55

lr.set_address(address)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

56

allocated_tensors += lr.tensors

Louis Verhaard

2020-05-07 08:12:58 +0200

[diff] [blame]

57

if address == total_sz:

58

total_sz += numeric_util.round_up(int(math.ceil(lr.size)), alloc_granularity)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

59

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

60

verify_alignment(live_ranges, alloc_granularity)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

return total_sz

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

64

def verify_alignment(live_ranges, alignment):

65

for lr in live_ranges.ranges.values():

66

for tens in lr.tensors:

67

if not all(op and op.run_on_npu for op in tens.ops + tens.consumer_list):

68

# This is a CPU tensor, verify alignment

69

if tens.address % alignment != 0:

70

raise AllocationError("Tensor {} not aligned to {} bytes".format(tens.name, alignment))

71

72

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

73

def mark_sram_used_for_cascaded_passes(sg, lrs):

74

end_pos = max(ps.time for ps in sg.cascaded_passes) + 2

75

mem_usage = np.zeros(end_pos, dtype=np.int64)

76

77

for tens, rng in lrs.ranges.items():

78

storage_size = tens.storage_size()

79

mem_usage[rng.start_time : rng.end_time] += storage_size

80

81

for cps in sg.cascaded_passes:

82

sram_used = max(mem_usage[cps.time], mem_usage[cps.time + 1])

83

cps.sram_used = sram_used

84

for ps in cps.passes:

85

ps.sram_used = sram_used

86

87

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

88

def print_allocation(lrs, mem_area, mem_type_set, sg, verbose_allocation, show_minimum_possible_allocation):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

89

if verbose_allocation:

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

90

if mem_type_set == set((MemType.Permanent_NPU,)) or mem_type_set == set((MemType.Permanent_CPU,)):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

91

print("allocation for", mem_area, "- constant tensors in", sg.placement.name, "subgraph(s)")

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

92

else:

93

print("allocation for", mem_area, "- non-constant tensors in Cpu and Npu subgraphs")

Louis Verhaard

1356c2a

2020-09-16 10:25:28 +0200

[diff] [blame]

94

mem_usage = 0

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

95

for start_time, start, end, name, end_time in sorted(

(

lr.start_time,

tens.address,

tens.address + int(math.ceil(tens.storage_size())),

100

tens.name + " " + str(tens.purpose),

101

lr.end_time,

102

)

103

for tens, lr in lrs.ranges.items()

104

):

105

name = name.replace("\x00", "")

106

print("%9d: %#12x - %#12x: %3d - %3d %s" % ((end - start), start, end, start_time, end_time, name))

Louis Verhaard

1356c2a

2020-09-16 10:25:28 +0200

[diff] [blame]

107

mem_usage = max(mem_usage, end)

108

print("Memory usage: {} ({:#x}) bytes / {:.1f} KB".format(mem_usage, mem_usage, mem_usage / 1024))

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

109

print()

110

111

if show_minimum_possible_allocation and mem_area == MemArea.Sram:

112

min_possible_allocation = max(cps.sram_used for cps in sg.cascaded_passes)

113

print(

114

"Min possible allocation %d bytes / %.1f KB / %.1f MB"

115

% (min_possible_allocation, min_possible_allocation / 1024, min_possible_allocation / 1024 / 1024)

)

def allocate_tensors(

nng,

sg,

arch,

mem_area,

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

124

mem_type_set,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

125

use_ifm_ofm_overlap=True,

126

tensor_allocator=TensorAllocator.Greedy,

127

verbose_allocation=False,

128

show_minimum_possible_allocation=False,

129

lr_graph=None,

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

130

allocation_alignment=Tensor.AllocationQuantum,

Louis Verhaard

2020-09-15 14:05:38 +0200

[diff] [blame^]

131

max_size=None,

132

dry_test=False,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

133

):

Louis Verhaard

2020-09-15 14:05:38 +0200

[diff] [blame^]

134

# Allocates addresses to tensors, returns False if tensors could not be fit within max_size

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

135

ignore_subgraph_input_output_tensors = False

136

lrs = live_range.extract_live_ranges_from_cascaded_passes(

137

sg,

138

mem_area,

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

139

mem_type_set,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

140

mark_output_tensors_overlapping_with_input_tensors=False,

141

use_ifm_ofm_overlap=use_ifm_ofm_overlap,

142

ignore_subgraph_input_output_tensors=ignore_subgraph_input_output_tensors,

143

lr_graph=lr_graph,

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

144

allocation_alignment=allocation_alignment,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

)

if lrs.ranges:

tens_alloc = tensor_allocator

149

if tens_alloc == TensorAllocator.Greedy:

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

150

total_sz = greedy_allocate_live_ranges(sg, arch, lrs, mem_area, allocation_alignment, verbose_allocation)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

151

elif tens_alloc == TensorAllocator.LinearAlloc:

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

152

total_sz = linear_allocate_live_ranges(lrs, allocation_alignment)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

153

else:

154

assert 0

Louis Verhaard

2020-09-15 14:05:38 +0200

[diff] [blame^]

155

alloc_ok = max_size is None or total_sz <= max_size

156

if dry_test or not alloc_ok:

157

# Dry test or allocation failed; undo allocation

158

for lr in lrs.ranges.values():

159

lr.set_address(None)

160

return alloc_ok

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

161

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

162

if sg.memory_used.get(mem_area, 0) == 0:

163

sg.memory_used[mem_area] = total_sz

164

else:

165

sg.memory_used[mem_area] += total_sz

166

167

# Keep track of how much should be used for scratch or permanent storage for NPU

168

for mem_type in mem_type_set:

169

if sg.memory_used_per_type.get(mem_type, 0) == 0:

170

sg.memory_used_per_type[mem_type] = total_sz

171

else:

172

sg.memory_used_per_type[mem_type] += total_sz

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

173

174

nng.total_size[mem_area] = nng.total_size.get(mem_area, 0) + sum(tens.storage_size() for tens in lrs.ranges)

175

nng.total_elements[mem_area] = nng.total_elements.get(mem_area, 0) + sum(tens.elements() for tens in lrs.ranges)

176

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

177

print_allocation(lrs, mem_area, mem_type_set, sg, verbose_allocation, show_minimum_possible_allocation)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

178

179

if mem_area == MemArea.Sram:

180

# Mark Sram usage for all subgraphs

181

for sg_ in nng.subgraphs:

182

mark_sram_used_for_cascaded_passes(sg_, lrs)

183

184

if sg == nng.get_root_subgraph():

185

nng.memory_used = sg.memory_used

186

for mem_area in nng.total_elements.keys():

187

try:

188

nng.bits_per_element[mem_area] = nng.total_size[mem_area] * 8 / nng.total_elements[mem_area]

189

except ZeroDivisionError:

190

nng.bits_per_element[mem_area] = 0.0

Louis Verhaard