Blame - ethosu/vela/high_level_command_stream_generator.py - ml/ethos-u/ethos-u-vela

Patrik Gustavsson

e3b1b91

2021-02-09 15:38:46 +0100

[diff] [blame]

1

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

16

# Description:

Tim Hall

d8339a7

2021-05-27 18:49:40 +0100

[diff] [blame]

17

# Generate a high-level command stream from a schedule

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

18

from .high_level_command_stream import Box

19

from .high_level_command_stream import DMA

20

from .high_level_command_stream import NpuStripe

Charles Xu

89a6bbf

2020-08-11 12:31:58 +0200

[diff] [blame]

21

from .numeric_util import round_up_divide

Louis Verhaard

e8a5a78

2020-11-02 18:04:27 +0100

[diff] [blame]

22

from .operation import create_activation_function

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

23

from .operation import NpuBlockType

Louis Verhaard

aee5d75

2020-09-30 09:01:52 +0200

[diff] [blame]

24

from .operation import Op

patrik.gustavsson

eeb8515

2020-12-21 17:10:40 +0000

[diff] [blame]

25

from .shape4d import Shape4D

Charles Xu

7879222

2020-05-13 10:15:26 +0200

[diff] [blame]

26

from .tensor import TensorPurpose

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

27

28

Charles Xu

7879222

2020-05-13 10:15:26 +0200

[diff] [blame]

29

def dma_if_necessary(ps, box, tensor):

Tim Hall

d8339a7

2021-05-27 18:49:40 +0100

[diff] [blame]

30

src_tensor = tensor.src_tensor

31

if src_tensor and tensor.mem_area != src_tensor.mem_area:

32

yield DMA(ps, src_tensor, tensor, box)

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

33

Tim Hall

c30f495

2020-06-15 20:47:35 +0100

[diff] [blame]

34

Tim Hall

d8339a7

2021-05-27 18:49:40 +0100

[diff] [blame]

35

def generate_high_level_command_stream_for_schedule(nng, sg, arch, verbose_high_level_command_stream):

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

36

res = []

Tim Hall

d8339a7

2021-05-27 18:49:40 +0100

[diff] [blame]

37

# sg.sched_ops are ordered by execution

38

processed_cascades = set()

39

for sched_op in sg.sched_ops:

40

op_info = sg.schedule.cost_map[sched_op]

41

if op_info.cascade in processed_cascades:

42

# This cascade has already been processed

43

continue

44

45

if op_info.cascade == 0:

46

# Generate high-level commands for this Op in isolation

47

res += list(generate_high_level_commands_for_sched_op(sched_op, sg.schedule))

48

else:

49

# Generate high-level commands for the whole cascade

50

cascade_info = sg.schedule.cascades[op_info.cascade]

51

# Start from the last Op in the cascade

52

res += list(generate_high_level_commands_for_sched_op(sg.sched_ops[cascade_info.end], sg.schedule))

53

processed_cascades.add(op_info.cascade)

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

54

55

sg.high_level_command_stream = res

56

if verbose_high_level_command_stream:

57

sg.print_high_level_command_stream()

58

59

Tim Hall

d8339a7

2021-05-27 18:49:40 +0100

[diff] [blame]

60

def generate_high_level_commands_for_sched_op(sched_op, schedule):

61

op_info = schedule.cost_map[sched_op]

62

cascade_info = schedule.cascades.get(op_info.cascade)

63

npu_block_type = sched_op.parent_ps.npu_block_type

64

block_config = op_info.block_config

65

ps = sched_op.parent_ps

66

parent_op = sched_op.parent_op

67

ofm_tensor = ps.ofm_tensor

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

68

Tim Hall

d8339a7

2021-05-27 18:49:40 +0100

[diff] [blame]

69

# Get Tensors and Full Shapes

70

(ifm_tensor, ifm2_tensor, uncomp_weight_tensor, _, _,) = parent_op.get_ifm_ifm2_weights_biases_ofm()

71

ifm = sched_op.ifm

72

ifm2 = sched_op.ifm2

73

ofm_shape = sched_op.ofm.shape

74

75

# Get Kernel strides and upscaling factor

76

kernel_stride = sched_op.kernel.stride

77

strides = [1, kernel_stride.y, kernel_stride.x, 1]

78

skirt = parent_op.attrs.get("skirt", None)

79

upscaling = 1

80

if sched_op.op_type == Op.Conv2DBackpropInputSwitchedBias:

81

upscaling = ofm_shape.height // ifm.shape.height

82

elif sched_op.op_type == Op.ResizeBilinear:

83

upscaling = round_up_divide(ofm_shape.height, ifm.shape.height)

# Get Kernel height

k_height = 1

if npu_block_type in (NpuBlockType.Pooling, NpuBlockType.ReduceSum):

88

if parent_op is not None:

89

k_height = parent_op.attrs["ksize"][1]

90

else:

91

if uncomp_weight_tensor is not None:

92

k_height = uncomp_weight_tensor.shape[0]

93

94

# Define Start and End coordinates for the OFM

95

ofm_start = Shape4D(0, 0, 0, op_info.ofm_depth_slices[0])

96

ofm_end = ofm_shape

97

98

ofm_depth_slices = op_info.ofm_depth_slices

99

100

# Read/Write offsets

101

read_offsets = list(parent_op.read_offsets) # offset for [ifm, ifm2]

102

read_shapes = list(parent_op.read_shapes) # read shapes for [ifm, ifm2]

103

write_offset = Shape4D(0, 0, 0, 0)

104

if parent_op.write_offset is not None:

105

write_offset = parent_op.write_offset

106

ofm_start = write_offset

107

ofm_end = parent_op.write_offset + parent_op.write_shape

108

109

# Create activation function if needed

110

for op in ps.ops:

111

if op.type.is_relu_op() or op.type in (Op.Tanh, Op.Sigmoid):

Patrik Gustavsson

8f1f9aa

2021-06-28 07:41:58 +0200

[diff] [blame]

112

ps.primary_op.activation = create_activation_function(

113

op.type, min=op.attrs.get("min", None), max=op.attrs.get("max", None)

114

)

Tim Hall

d8339a7

2021-05-27 18:49:40 +0100

[diff] [blame]

115

116

# Generate commands for the Op that produces this Op's IFM, if applicable

117

if cascade_info is None or cascade_info.start == sched_op.index:

118

# Lone Op or First Op in cascade - all IFM data is present

119

ifm_present = Box([0, 0, 0, 0], ifm.shape.as_list())

producer_op = None

prev_cmd_gen = []

else:

ifm_present = Box([0, 0, 0, 0], [0, 0, 0, 0])

124

producer_op = sched_op.ifm.connection.producers[0]

125

prev_cmd_gen = generate_high_level_commands_for_sched_op(producer_op, schedule)

126

127

ofm_step = op_info.stripe

128

for start_height in range(ofm_start.height, ofm_end.height, ofm_step.height):

129

end_height = min(start_height + ofm_step.height, ofm_end.height)

130

for start_width in range(ofm_start.width, ofm_end.width, ofm_step.width):

131

end_width = min(start_width + ofm_step.width, ofm_end.width)

132

Dwight Lidman

8f78ac2

2021-08-13 14:04:30 +0200

[diff] [blame^]

133

lut_dma_done = False

Tim Hall

d8339a7

2021-05-27 18:49:40 +0100

[diff] [blame]

134

for depth_idx, start_channel in enumerate(ofm_depth_slices[:-1]):

135

start_channel = max(start_channel, ofm_start.depth)

136

end_channel = min(ofm_depth_slices[depth_idx + 1], ofm_end.depth)

137

138

# Construct the OFM box for the current stripe

139

ofm_box_start = Shape4D(ofm_start.batch, start_height, start_width, start_channel)

140

ofm_box_end = Shape4D(ofm_end.batch, end_height, end_width, end_channel)

141

ofm_box = Box(ofm_box_start.as_list(), ofm_box_end.as_list())

142

ifm_box = Box([], [])

143

ifm2_box = Box([], [])

144

145

# Calculate IFM input box based on the OFM box

146

if ifm:

147

ifm_box, pad_top, pad_bottom = ofm_box.transform_with_strides_and_skirt(

strides,

skirt,

ifm.shape,

npu_block_type,

write_offset.as_list(),

read_offsets[0],

read_shapes[0],

k_height,

upscaling,

)

# Calculate IFM2 input box based on the OFM box

160

if ifm2:

161

ifm2_box, pad_top, pad_bottom = ofm_box.transform_with_strides_and_skirt(

strides,

skirt,

ifm2.shape,

npu_block_type,

write_offset.as_list(),

read_offsets[1],

read_shapes[1],

k_height,

upscaling,

)

ifm_required = ifm_box

174

# Get the Op that produces this Op's IFM data - only applicable within cascades

175

if producer_op:

176

assert op_info.cascade != 0

177

assert op_info.cascade == schedule.cost_map[producer_op].cascade

178

for prev_cmd in prev_cmd_gen:

179

yield prev_cmd

180

if prev_cmd.is_npu_pass_command() and prev_cmd.ps == producer_op.parent_ps:

181

ifm_present.end_coord = prev_cmd.ofm_box.end_coord

182

if ifm_required.is_subbox_of(ifm_present):

183

# There is enough IFM data - exit loop

184

break

185

186

# Information about the current stripe's location in the cascade

187

is_first_h_stripe = ofm_box_start.height == ofm_start.height

188

is_last_h_stripe = ofm_box_end.height >= ofm_end.height

189

190

# Calculate the weight box - i.e. the subshape of weights needed for this NpuStripe command

191

weight_tensor = op_info.npu_weights_tensor

Tim Hall

d784af7

2021-06-08 21:25:57 +0100

[diff] [blame]

192

scale_tensor = op_info.npu_scales_tensor

Tim Hall

d8339a7

2021-05-27 18:49:40 +0100

[diff] [blame]

193

if op_info.npu_weights_tensor:

194

weight_box = Box([0, 0, 0, start_channel], [1, 1, 1, end_channel])

195

196

if op_info.buffered_weight_tensor and is_first_h_stripe:

197

yield from dma_if_necessary(sched_op.parent_ps, weight_box, op_info.buffered_weight_tensor)

198

weight_tensor = op_info.buffered_weight_tensor

else:

weight_box = None

Dwight Lidman

2021-08-13 14:04:30 +0200

[diff] [blame^]

202

# Should only be done once per loop but not before weights above

203

if parent_op.activation_lut and not lut_dma_done:

204

lut_tensor = [tens for tens in parent_op.inputs if tens.purpose == TensorPurpose.LUT][0]

205

lut_box = Box([0] * len(lut_tensor.shape), list(lut_tensor.shape))

206

lut_dma_done = True

207

yield from dma_if_necessary(sched_op.parent_ps, lut_box, lut_tensor)

208

Tim Hall

d8339a7

2021-05-27 18:49:40 +0100

[diff] [blame]

209

yield NpuStripe(

210

sched_op.parent_ps,

211

block_config.old_style_representation(),

is_first_h_stripe,

is_last_h_stripe,

ifm_tensor,

ifm_box,

ofm_tensor,

ofm_box,

weight_tensor,

weight_box,

Tim Hall

d784af7

2021-06-08 21:25:57 +0100

[diff] [blame]

220

scale_tensor,

Tim Hall

d8339a7

2021-05-27 18:49:40 +0100

[diff] [blame]

221

ifm2_tensor=ifm2_tensor,

222

ifm2_box=ifm2_box,

223

pad_top=pad_top,

224

pad_bottom=pad_bottom,

Patrik Gustavsson

2349d42

2020-12-01 16:02:29 +0100

[diff] [blame]

225

)