Blame - ethosu/vela/high_level_command_stream_generator.py - ml/ethos-u/ethos-u-vela

2020-04-27 18:20:16 +0100

[diff] [blame]

1

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

16

# Description:

17

# Generate a high-level command stream from a scheduled subgraph with CascadedPasses.

18

#

19

# Also used during scheduling to work out allowable IFM/OFM overlap, this functionality can be accessed using

20

# calc_allowed_ofm_ifm_overlap_for_cascaded_pass().

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

21

from .high_level_command_stream import Box

22

from .high_level_command_stream import DMA

23

from .high_level_command_stream import NpuStripe

24

from .nn_graph import PassPlacement

25

from .nn_graph import SchedulingStrategy

Charles Xu

89a6bbf

2020-08-11 12:31:58 +0200

[diff] [blame]

26

from .numeric_util import round_up_divide

Louis Verhaard

e8a5a78

2020-11-02 18:04:27 +0100

[diff] [blame^]

27

from .operation import create_activation_function

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

28

from .operation import NpuBlockType

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

29

from .operation import Op

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

30

from .tensor import TensorPurpose

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

31

32

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

33

def dma_if_necessary(ps, box, tensor):

Louis Verhaard

3c07c97

2020-05-07 08:12:58 +0200

[diff] [blame]

34

if tensor.needs_dma():

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

35

dma_op = tensor.ops[0]

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

36

in_tensor = dma_op.inputs[0]

Louis Verhaard

0b8268a

2020-08-05 16:11:29 +0200

[diff] [blame]

37

yield DMA(ps, in_tensor, tensor, box)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

38

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

39

Charles Xu

600351a

2020-05-18 08:54:47 +0200

[diff] [blame]

40

def match_tensor(source, derived):

41

if source == derived:

42

return True

43

ops = derived.ops

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

44

return ops != [] and len(ops) == 1 and ops[0].type == Op.SplitSliceRead and source == ops[0].inputs[0]

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

45

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

46

47

def generate_high_level_command_stream_for_pass(strat, passes, block_configs, idx):

48

is_first = idx == 0

49

is_last = idx == len(passes) - 1

50

ps = passes[idx]

51

block_config = block_configs[idx]

Charles Xu

600351a

2020-05-18 08:54:47 +0200

[diff] [blame]

52

npu_block_type = ps.npu_block_type

53

split_offsets = [None, None] # offset for [ifm, ifm2]

54

Louis Verhaard

2e186c7

2020-10-09 10:47:04 +0200

[diff] [blame]

55

if ps.ifm_tensor is not None and ps.ifm2_tensor is not None and npu_block_type == NpuBlockType.ElementWise:

Patrik Gustavsson

2020-09-01 12:23:25 +0200

[diff] [blame]

56

# Ensure correct ifm and ifm2 order

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

57

if match_tensor(ps.inputs[0], ps.primary_op.inputs[1]) and match_tensor(ps.inputs[1], ps.primary_op.inputs[0]):

Charles Xu

600351a

2020-05-18 08:54:47 +0200

[diff] [blame]

58

ps.ifm_tensor, ps.ifm2_tensor = ps.ifm2_tensor, ps.ifm_tensor

Patrik Gustavsson

2020-09-01 12:23:25 +0200

[diff] [blame]

59

60

for op in ps.ops:

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

61

if op.type == Op.SplitSliceRead:

62

ps.primary_op.memory_function = op.type

Patrik Gustavsson

2020-09-01 12:23:25 +0200

[diff] [blame]

63

assert len(op.inputs) == 1

64

if match_tensor(ps.ifm_tensor, op.inputs[0]):

65

split_offsets[0] = op.attrs["split_start"]

66

elif match_tensor(ps.ifm2_tensor, op.inputs[0]):

67

split_offsets[1] = op.attrs["split_start"]

else:

assert False

else:

ifm_idx = 0

for op in ps.ops:

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

73

if op.type == Op.SplitSliceRead:

Patrik Gustavsson

2020-09-01 12:23:25 +0200

[diff] [blame]

74

assert ifm_idx < 2

75

split_offsets[ifm_idx] = op.attrs["split_start"]

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

76

ps.primary_op.memory_function = op.type

Patrik Gustavsson

2020-09-01 12:23:25 +0200

[diff] [blame]

77

ifm_idx += 1

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

78

79

ifm_tensor = ps.ifm_tensor

80

ifm2_tensor = ps.ifm2_tensor

81

ofm_tensor = ps.ofm_tensor

82

weight_tensor = ps.weight_tensor

83

scale_tensor = ps.scale_tensor

84

85

ofm_start = [0] * len(ofm_tensor.shape)

86

ofm_end = list(ofm_tensor.shape)

87

88

strides = None

89

skirt = None

Jacob Bohlin

611fcdf

2020-06-11 15:09:57 +0200

[diff] [blame]

90

upscaling = 1

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

91

if ps.primary_op is not None:

92

strides = ps.primary_op.attrs.get("strides", None)

93

skirt = ps.primary_op.attrs.get("skirt", None)

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

94

if ps.primary_op.type == Op.Conv2DBackpropInputSwitchedBias:

Jacob Bohlin

611fcdf

2020-06-11 15:09:57 +0200

[diff] [blame]

95

upscaling = ofm_tensor.shape[-3] // ifm_tensor.shape[-3]

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

96

elif ps.primary_op.type == Op.ResizeBilinear:

Charles Xu

89a6bbf

2020-08-11 12:31:58 +0200

[diff] [blame]

97

upscaling = round_up_divide(ofm_tensor.shape[-3], ifm_tensor.shape[-3])

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

98

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

concat_axis = 0

concat_offset = 0

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

102

for op in ps.ops:

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

103

if op.type == Op.ConcatSliceWrite:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

104

concat_axis = op.attrs["concat_axis"]

105

concat_start = op.attrs["concat_start"]

106

concat_end = op.attrs["concat_end"]

107

108

ofm_start[concat_axis] = concat_start

109

ofm_end[concat_axis] = concat_end

110

concat_offset = concat_start

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

111

ps.primary_op.memory_function = op.type

112

elif op.type.is_relu_op() or op.type in (Op.Tanh, Op.Sigmoid):

Louis Verhaard

e8a5a78

2020-11-02 18:04:27 +0100

[diff] [blame^]

113

ps.primary_op.activation = create_activation_function(op.type)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

114

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

115

if strat == SchedulingStrategy.WeightStream:

116

ofm_step = block_config[-1]

117

ofm_stop = ofm_end[-1]

Louis Verhaard

3c07c97

2020-05-07 08:12:58 +0200

[diff] [blame]

118

if weight_tensor is None or not weight_tensor.needs_dma():

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

119

ofm_step = ofm_stop

120

for start in range(ofm_start[-1], ofm_stop, ofm_step):

121

end = min(start + ofm_step, ofm_stop)

122

ofm_start[-1] = start

123

ofm_end[-1] = end

124

ofm_box = Box(ofm_start, ofm_end)

ifm_box = None

ifm2_box = None

if ifm_tensor.shape != []:

129

ifm_box, _, _ = ofm_box.transform_with_strides_and_skirt(

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

strides,

skirt,

ifm_tensor.shape,

npu_block_type,

concat_axis,

concat_offset,

split_offsets[0],

upscaling,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

138

)

139

else:

140

ifm_box = Box([], [])

141

if ifm2_tensor is not None and ifm2_tensor.shape != []:

142

ifm2_box, _, _ = ofm_box.transform_with_strides_and_skirt(

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

strides,

skirt,

ifm2_tensor.shape,

npu_block_type,

concat_axis,

concat_offset,

split_offsets[1],

upscaling,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

151

)

152

else:

153

ifm2_box = Box([], [])

154

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

155

for intermediate in ps.intermediates:

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

156

if (

157

intermediate is not None

158

and intermediate.shape != []

Fredrik Svedberg

a0c3624

2020-06-03 15:43:31 +0200

[diff] [blame]

159

and intermediate.purpose in (TensorPurpose.FeatureMap, TensorPurpose.LUT)

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

160

):

Fredrik Svedberg

a0c3624

2020-06-03 15:43:31 +0200

[diff] [blame]

161

if intermediate.purpose is TensorPurpose.FeatureMap:

162

intermediate_box, _, _ = ofm_box.transform_with_strides_and_skirt(

strides,

skirt,

intermediate.shape,

npu_block_type,

concat_axis,

concat_offset,

split_offsets[0],

upscaling,

)

else:

intermediate_box = Box([0] * len(intermediate.shape), list(intermediate.shape))

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

174

yield from dma_if_necessary(ps, intermediate_box, intermediate)

175

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

176

weight_box = None

177

if weight_tensor is not None:

178

weight_oc_start = start

179

weight_oc_end = end

180

if concat_axis - len(weight_tensor.shape) == -1:

181

weight_oc_start -= concat_offset

182

weight_oc_end -= concat_offset

183

184

weight_box = Box.make_weight_box(

weight_tensor.shape,

npu_block_type,

weight_oc_start,

weight_oc_end,

weight_tensor.weight_transpose_depthwise,

190

)

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

191

yield from dma_if_necessary(ps, weight_box, weight_tensor)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

yield NpuStripe(

ps,

block_config,

is_first,

is_last,

True,

True,

ifm_tensor,

ifm_box,

ofm_tensor,

ofm_box,

weight_tensor,

weight_box,

scale_tensor,

concat_axis,

concat_offset,

ifm2_tensor=ifm2_tensor,

ifm2_box=ifm2_box,

)

elif strat == SchedulingStrategy.IfmStream:

214

y_step = block_config[0]

215

y_start = 0

216

y_dim = 1

217

if len(ofm_tensor.shape) >= 3:

218

y_start = ofm_start[-3]

y_dim = ofm_end[-3]

if idx > 0:

ifm_y_present = 0

prev_pass = passes[idx - 1]

223

prev_pass_gen = generate_high_level_command_stream_for_pass(strat, passes, block_configs, idx - 1)

224

else:

225

ifm_y_present = 1

226

if len(ifm_tensor.shape) >= 3:

227

ifm_y_present = ifm_tensor.shape[-3]

prev_pass_gen = []

prev_pass = None

if len(passes) == 1:

# no cascading, can just issue one big stripe

233

# but only if we've done allocation and OFM does not overlap IFM

Charles Xu

04ce34c

2020-06-23 12:42:28 +0200

[diff] [blame]

234

if ifm_tensor.address is not None and ofm_tensor.address is not None:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

235

if (

236

ifm_tensor.address + ifm_tensor.storage_size() <= ofm_tensor.address

237

or ofm_tensor.address + ofm_tensor.storage_size() <= ifm_tensor.address

):

y_step = y_dim

weight_box = None

Andreas Nevalainen

897cc14

2020-10-28 15:42:08 +0100

[diff] [blame]

242

scale_box = None

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

243

244

for start in range(y_start, y_dim, y_step):

245

end = min(start + y_step, y_dim)

246

if len(ofm_tensor.shape) >= 3:

247

ofm_start[-3] = start

248

ofm_end[-3] = end

249

ofm_box = Box(ofm_start, ofm_end)

250

251

k_height = 1

Charles Xu

89a6bbf

2020-08-11 12:31:58 +0200

[diff] [blame]

252

if npu_block_type in (NpuBlockType.Pooling, NpuBlockType.ReduceSum):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

253

if ps.primary_op is not None:

254

k_height = ps.primary_op.attrs["ksize"][1]

255

else:

256

if weight_tensor is not None:

257

k_height = weight_tensor.shape[0]

258

259

ifm_box, pad_top, pad_bottom = ofm_box.transform_with_strides_and_skirt(

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

strides,

skirt,

ifm_tensor.shape,

npu_block_type,

concat_axis,

concat_offset,

split_offsets[0],

k_height,

upscaling,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

269

)

270

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

271

for intermediate in ps.intermediates:

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

272

if (

273

intermediate is not None

274

and intermediate.shape != []

Michael McGeagh

34ad19b

2020-09-04 15:44:23 +0100

[diff] [blame]

275

and intermediate.purpose in (TensorPurpose.FeatureMap, TensorPurpose.LUT)

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

276

):

Michael McGeagh

34ad19b

2020-09-04 15:44:23 +0100

[diff] [blame]

277

if intermediate.purpose is TensorPurpose.FeatureMap:

278

intermediate_box, _, _ = ofm_box.transform_with_strides_and_skirt(

strides,

skirt,

intermediate.shape,

npu_block_type,

concat_axis,

concat_offset,

split_offsets[0],

upscaling,

)

else:

intermediate_box = Box([0] * len(intermediate.shape), list(intermediate.shape))

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

290

yield from dma_if_necessary(ps, intermediate_box, intermediate)

291

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

292

ifm_y_needed = 1

293

if len(ifm_box.end_coord) >= 3:

294

ifm_y_needed = ifm_box.end_coord[-3]

295

if ifm_y_present < ifm_y_needed:

296

for prev_cmd in prev_pass_gen:

297

yield prev_cmd

298

rng = prev_cmd.get_ofm_y_range_for_pass(prev_pass)

299

if rng is not None:

300

ifm_y_present = max(ifm_y_present, rng[1])

301

if ifm_y_present >= ifm_y_needed:

302

break

303

Andreas Nevalainen

897cc14

2020-10-28 15:42:08 +0100

[diff] [blame]

304

if scale_tensor is not None and scale_tensor.purpose == TensorPurpose.FSBias and scale_box is None:

305

scale_box = Box([0] * len(scale_tensor.shape), list(scale_tensor.shape))

306

yield from dma_if_necessary(ps, scale_box, scale_tensor)

307

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

308

if weight_tensor is not None and weight_box is None:

309

weight_box = Box.make_weight_box(

310

weight_tensor.shape, npu_block_type, weights_transposed=weight_tensor.weight_transpose_depthwise

311

)

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

312

yield from dma_if_necessary(ps, weight_box, weight_tensor)

Tim Hall