Blame - ethosu/vela/live_range.py - ml/ethos-u/ethos-u-vela

2020-04-27 18:20:16 +0100

[diff] [blame]

1

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

16

# Description:

17

# Build a live range graph for tensors in one or more subgraphs. Used for tensor allocation as well as in the scheduler.

18

# Can work with either a pass packed subgraph or a scheduled subgraph.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

19

from .high_level_command_stream_generator import calc_allowed_ofm_ifm_overlap_for_cascaded_pass

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

20

from .nn_graph import PassPlacement

Louis Verhaard

aee5d75

2020-09-30 09:01:52 +0200

[diff] [blame]

21

from .operation import Op

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

22

from .tensor import MemType

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

23

from .tensor import Tensor

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

24

25

26

class LiveRange:

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

27

def __init__(self, tens, alignment):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

28

self.tensors = [] # Tensors that are assigned to the same LiveRange will be allocated to the same address

29

self.start_time = 99999999999

30

self.end_time = -1

31

self.size = 0

32

self.name = ""

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

33

self.alignment = alignment

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

34

35

if tens:

36

self.add_tensor(tens)

37

38

def __str__(self):

39

return "<live_range.LiveRange: '%s' start_time=%s, end_time=%s>" % (self.name, self.start_time, self.end_time)

__repr__ = __str__

def add_tensor(self, tens):

44

if self.size == 0:

45

self.size = tens.storage_size()

46

self.name = tens.name # LiveRange will be named after the first tensor added

47

else:

48

assert (

49

self.size >= tens.storage_size()

50

), "Tensors assigned to the same LiveRange need to fit the size of the LiveRange."

51

52

self.tensors.append(tens)

53

54

def mark_usage(self, op_time):

55

if op_time == -1:

56

return

57

op_time_start = op_time

58

op_time_end = op_time + 1

59

60

self.start_time = min(self.start_time, op_time_start)

61

self.end_time = max(self.end_time, op_time_end)

62

63

def overlaps_ranges(self, other):

64

return max(self.start_time, other.start_time) < min(self.end_time, other.end_time)

65

66

def overlaps_address(self, other):

67

# Returns the first pair of tensors in this LiveRange and 'other' which have

68

# overlapping addresses

69

for tens in self.tensors:

70

for other_tens in other.tensors:

71

if max(tens.address, other_tens.address) < min(

72

tens.address + self.size, other_tens.address + other.size

73

):

74

return True, tens, other_tens

75

76

return False, None, None

77

78

def __lt__(self, other):

79

if self.start_time != other.start_time:

80

return self.start_time < other.start_time

81

if self.end_time != other.end_time:

82

return self.end_time < other.end_time

83

if self.size != other.size:

84

return self.size < other.size

85

return self.name < other.name

86

87

def set_address(self, address):

Jacob Bohlin

1a66697

2020-09-11 10:04:15 +0200

[diff] [blame]

88

# Set address of all tensors in LiveRange

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

89

for tens in self.tensors:

Jacob Bohlin

1a66697

2020-09-11 10:04:15 +0200

[diff] [blame]

90

tens.address = address

91

92

return address

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

93

94

def get_alignment(self):

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

95

return self.alignment

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

96

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

97

def set_alignment(self, alignment):

98

self.alignment = max(self.alignment, alignment)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

99

100

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

101

class LiveRangeGraph:

102

def __init__(self):

103

self.ranges = {} # tens -> range

104

self.allowed_overlaps = {} # (tens,tens) -> overlap_int

105

self.ignore_tensors = set()

106

self.processed_subgraphs = set()

107

self.current_time = 0

108

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

109

def get_or_create_range(self, tens, alignment=Tensor.AllocationQuantum):

Jacob Bohlin

1a66697

2020-09-11 10:04:15 +0200

[diff] [blame]

110

# Return the live range of the tensor (or any of its clones)

111

for existing_tensor, rng in self.ranges.items():

112

if tens.equivalent(existing_tensor):

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

113

rng.set_alignment(alignment)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

114

return rng

115

116

# No live range found for the tensor, create a new one

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

117

rng = LiveRange(tens, alignment)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

118

self.ranges[tens] = rng

119

return rng

120

121

def fuse_ranges(self, in_tens, out_tens):

122

live_range = self.get_or_create_range(in_tens)

123

assert out_tens not in self.ranges, out_tens

124

live_range.add_tensor(out_tens)

125

self.ranges[out_tens] = live_range

return live_range

Patrik Gustavsson

2020-10-16 13:59:52 +0200

[diff] [blame^]

129

def tensor_should_be_ignored(lr_graph, tens, target_mem_area, target_mem_type_set):

130

if tens.mem_area != target_mem_area or tens.mem_type not in target_mem_type_set:

131

return True

132

if tens in lr_graph.ignore_tensors:

133

return True

134

if tens.name.endswith("reshape_shape_npu"):

135

# Reshape tensor, no need to allocate

136

lr_graph.ignore_tensors.add(tens)

return True

return False

# Tries merging of ifm/ofm live ranges for memory only ops and elementwise ops

142

def merge_op_ranges(sg, lr_graph, target_mem_area, target_mem_type_set):

143

for ps in sg.passes:

144

if ps.placement == PassPlacement.MemoryOnly:

145

# For memory only passes, e.g. Reshape. Add input and output tensor to the same LiveRange

146

input_tensor = ps.inputs[0]

147

output_tensor = ps.outputs[0]

148

if not tensor_should_be_ignored(lr_graph, input_tensor, target_mem_area, target_mem_type_set) and not (

149

tensor_should_be_ignored(lr_graph, output_tensor, target_mem_area, target_mem_type_set)

150

):

151

lr_graph.fuse_ranges(input_tensor, output_tensor)

152

elif ps.is_element_wise:

153

merge_elementwise_op_ranges(ps, lr_graph, target_mem_area, target_mem_type_set)

154

155

156

# Tries to merge ifm/ofm live of elementwise op

157

def merge_elementwise_op_ranges(ps, lr_graph, target_mem_area, target_mem_type_set):

158

elem_op = None

159

for op in ps.ops:

160

if op.type.is_elementwise_op():

161

assert elem_op is None

162

elem_op = op

163

164

if elem_op is not None and not tensor_should_be_ignored(

165

lr_graph, elem_op.ofm, target_mem_area, target_mem_type_set

166

):

167

# Check if overwriting the inputs can be allowed

168

if elem_op.type not in (Op.SHL, Op.SHR):

169

inps = []

170

if (

171

elem_op.ifm is not None

172

and elem_op.ifm.shape != []

173

and elem_op.ifm.mem_area == target_mem_area

174

and elem_op.ifm.mem_type in target_mem_type_set

175

):

176

inps.append(elem_op.ifm)

177

if (

178

elem_op.ifm2 is not None

179

and elem_op.ifm2.shape != []

180

and elem_op.ifm2.mem_area == target_mem_area

181

and elem_op.ifm.mem_type in target_mem_type_set

182

):

183

inps.append(elem_op.ifm2)

if len(inps) > 0:

for inp in inps:

# check input format, dtype, broadcasting or if there are more input consumers

188

if (

189

inp.format == elem_op.ofm.format

190

and inp.dtype == elem_op.ofm.dtype

191

and inp.shape == elem_op.ofm.shape

192

and (len(inp.consumer_list) == 1 and len(inp.ops) == 1)

193

):

194

lr_graph.fuse_ranges(inp, elem_op.ofm)

break

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

198

def extract_live_ranges_from_passes(

199

sg,

200

target_mem_area,

Patrik Gustavsson

2020-10-16 13:59:52 +0200

[diff] [blame^]

201

target_mem_type=set((MemType.Scratch, MemType.Scratch_fast)),

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

202

ignore_subgraph_input_output_tensors=False,

203

):

204

lr_graph = LiveRangeGraph()

205

206

if ignore_subgraph_input_output_tensors:

207

lr_graph.ignore_tensors.update(sg.input_tensors)

208

lr_graph.ignore_tensors.update(sg.output_tensors)

209

Patrik Gustavsson

2020-10-16 13:59:52 +0200

[diff] [blame^]

210

# Try to merge live ranges of operations in the NPU subgraphs

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

211

if sg.placement == PassPlacement.Npu:

Patrik Gustavsson

2020-10-16 13:59:52 +0200

[diff] [blame^]

212

merge_op_ranges(sg, lr_graph, target_mem_area, target_mem_type)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

213

214

for idx, ps in enumerate(sg.passes):

215

ps.time = 2 * idx

216

217

time_for_pass = ps.time

218

Patrik Gustavsson

2020-10-16 13:59:52 +0200

[diff] [blame^]

219

for tens in ps.inputs + ps.intermediates + ps.outputs:

220

if tensor_should_be_ignored(lr_graph, tens, target_mem_area, target_mem_type):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

221

continue

222

rng = lr_graph.get_or_create_range(tens)

223

rng.mark_usage(time_for_pass)

224

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

225

end_time = len(sg.passes) * 2

226

for tens in sg.output_tensors:

Patrik Gustavsson

2020-10-16 13:59:52 +0200

[diff] [blame^]

227

if tensor_should_be_ignored(lr_graph, tens, target_mem_area, target_mem_type):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

228

continue

229

rng = lr_graph.get_or_create_range(tens)

230

rng.mark_usage(end_time)

return lr_graph

def extract_live_ranges_from_cascaded_passes(

236

sg,

237

target_mem_area,

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

238

target_mem_type_set,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

239

use_ifm_ofm_overlap=True,

240

ignore_subgraph_input_output_tensors=False,

241

lr_graph=None,

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

242

allocation_alignment=Tensor.AllocationQuantum,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

243

):

Diego Russo

ea6111a

2020-04-14 18:41:58 +0100

[diff] [blame]

244

if lr_graph is None:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

245

lr_graph = LiveRangeGraph()

246

247

if sg in lr_graph.processed_subgraphs:

248

# if subgraph has been processed already, return the lr_graph as is

249

return lr_graph

250

251

if ignore_subgraph_input_output_tensors:

252

lr_graph.ignore_tensors.update(sg.input_tensors)

253

lr_graph.ignore_tensors.update(sg.output_tensors)

254

Patrik Gustavsson

2020-10-16 13:59:52 +0200

[diff] [blame^]

255

# Try to merge live ranges of operations in the NPU subgraphs

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

256

if sg.placement == PassPlacement.Npu:

Patrik Gustavsson

2020-10-16 13:59:52 +0200

[diff] [blame^]

257

merge_op_ranges(sg, lr_graph, target_mem_area, target_mem_type_set)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

258

259

for cps in sg.cascaded_passes:

260

cps.time = lr_graph.current_time

261

262

time_for_pass = cps.time

263

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

264

for tens in cps.inputs:

Patrik Gustavsson

2020-10-16 13:59:52 +0200

[diff] [blame^]

265

if tensor_should_be_ignored(lr_graph, tens, target_mem_area, target_mem_type_set):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

266

continue

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

267

rng = lr_graph.get_or_create_range(tens, allocation_alignment)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

268

rng.mark_usage(time_for_pass)

269

270

cps_primary_op = cps.passes[0].primary_op

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

271

Louis Verhaard

aee5d75

2020-09-30 09:01:52 +0200

[diff] [blame]

272

if (

273

cps_primary_op

274

and cps_primary_op.type == Op.CustomNpuOp

275

and MemType.Permanent_CPU not in target_mem_type_set

276

):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

277

# If the primary-op is an NpuOp that means this is where an Npu subgraph

278

# is called. Go into said subgraph and extract live ranges before continuing.

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

279

# Use default allocation alignment of 16 for Npu tensors

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

280

npu_sg = cps_primary_op.attrs["subgraph"]

281

lr_graph = extract_live_ranges_from_cascaded_passes(

Patrik Gustavsson

2020-10-16 13:59:52 +0200

[diff] [blame^]

282

npu_sg, target_mem_area, target_mem_type_set, use_ifm_ofm_overlap, False, lr_graph,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

283

)

284

# Set the new time after handling the Npu subgraph

285

time_for_pass = lr_graph.current_time

286

cps.time = time_for_pass

287

Patrik Gustavsson

2020-10-16 13:59:52 +0200

[diff] [blame^]

288

for tens in cps.intermediates + cps.outputs:

289

if tensor_should_be_ignored(lr_graph, tens, target_mem_area, target_mem_type_set):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

290

continue

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

291

rng = lr_graph.get_or_create_range(tens, allocation_alignment)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

292

rng.mark_usage(time_for_pass)

293

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

294

if use_ifm_ofm_overlap:

295

# fill allowed overlap for ifm and ofm tensor

296

ifm_tensor = cps.passes[0].ifm_tensor

297

ofm_tensor = cps.passes[-1].ofm_tensor

298

if (

299

ifm_tensor is not None

300

and ofm_tensor is not None

Patrik Gustavsson

2020-10-16 13:59:52 +0200

[diff] [blame^]

301

and not tensor_should_be_ignored(lr_graph, ifm_tensor, target_mem_area, target_mem_type_set)

302

and not tensor_should_be_ignored(lr_graph, ofm_tensor, target_mem_area, target_mem_type_set)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

303

):

304

lr_graph.allowed_overlaps[(ifm_tensor, ofm_tensor)] = calc_allowed_ofm_ifm_overlap_for_cascaded_pass(

cps

)

lr_graph.current_time += 2

309

310

end_time = 0

311

for rng in lr_graph.ranges.values():

312

# Find the maximum end time of all live-ranges in the graph

313

end_time = max(end_time, rng.end_time)

314

315

for tens in sg.output_tensors:

Patrik Gustavsson

2020-10-16 13:59:52 +0200

[diff] [blame^]

316

if tensor_should_be_ignored(lr_graph, tens, target_mem_area, target_mem_type_set):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

317

continue

Jacob Bohlin

2020-08-28 13:25:14 +0200

[diff] [blame]

318

rng = lr_graph.get_or_create_range(tens, allocation_alignment)

Tim Hall