Blame - ethosu/vela/high_level_command_stream_generator.py - ml/ethos-u/ethos-u-vela

2020-04-27 18:20:16 +0100

[diff] [blame]

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

16

# Description:

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

17

# Generate a high-level command stream from a schedule

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

18

from .high_level_command_stream import Box

19

from .high_level_command_stream import DMA

20

from .high_level_command_stream import NpuStripe

Charles Xu

89a6bbf

2020-08-11 12:31:58 +0200

[diff] [blame]

21

from .numeric_util import round_up_divide

Louis Verhaard

e8a5a78

2020-11-02 18:04:27 +0100

[diff] [blame]

22

from .operation import create_activation_function

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

23

from .operation import NpuBlockType

Louis Verhaard

aee5d75

2020-09-30 09:01:52 +0200

[diff] [blame]

24

from .operation import Op

patrik.gustavsson

eeb8515

2020-12-21 17:10:40 +0000

[diff] [blame]

25

from .shape4d import Shape4D

Charles Xu

7879222

2020-05-13 10:15:26 +0200

[diff] [blame]

26

from .tensor import TensorPurpose

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

27

28

Charles Xu

7879222

2020-05-13 10:15:26 +0200

[diff] [blame]

29

def dma_if_necessary(ps, box, tensor):

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

30

src_tensor = tensor.src_tensor

31

if src_tensor and tensor.mem_area != src_tensor.mem_area:

32

yield DMA(ps, src_tensor, tensor, box)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

33

Tim Hall

c30f495

2020-06-15 20:47:35 +0100

[diff] [blame]

34

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

35

def generate_high_level_command_stream_for_schedule(nng, sg, arch, verbose_high_level_command_stream):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

36

res = []

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

37

# sg.sched_ops are ordered by execution

38

processed_cascades = set()

39

for sched_op in sg.sched_ops:

40

op_info = sg.schedule.cost_map[sched_op]

41

if op_info.cascade in processed_cascades:

42

# This cascade has already been processed

43

continue

44

45

if op_info.cascade == 0:

46

# Generate high-level commands for this Op in isolation

47

res += list(generate_high_level_commands_for_sched_op(sched_op, sg.schedule))

48

else:

49

# Generate high-level commands for the whole cascade

50

cascade_info = sg.schedule.cascades[op_info.cascade]

51

# Start from the last Op in the cascade

52

res += list(generate_high_level_commands_for_sched_op(sg.sched_ops[cascade_info.end], sg.schedule))

53

processed_cascades.add(op_info.cascade)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

54

55

sg.high_level_command_stream = res

56

if verbose_high_level_command_stream:

57

sg.print_high_level_command_stream()

58

59

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

60

def generate_high_level_commands_for_sched_op(sched_op, schedule):

61

op_info = schedule.cost_map[sched_op]

62

cascade_info = schedule.cascades.get(op_info.cascade)

63

npu_block_type = sched_op.parent_ps.npu_block_type

64

block_config = op_info.block_config

65

ps = sched_op.parent_ps

66

parent_op = sched_op.parent_op

67

ofm_tensor = ps.ofm_tensor

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

68

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

69

# Get Tensors and Full Shapes

Jonas Ohlsson

d857507

2022-03-30 10:30:25 +0200

[diff] [blame]

(

ifm_tensor,

ifm2_tensor,

uncomp_weight_tensor,

74

_,

75

_,

76

) = parent_op.get_ifm_ifm2_weights_biases_ofm()

Fredrik Svedberg

b81e1bb

2022-10-11 21:50:51 +0200

[diff] [blame]

77

if sched_op.reversed_operands:

78

ifm2_tensor, ifm_tensor = ifm_tensor, ifm2_tensor

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

79

ifm = sched_op.ifm

80

ifm2 = sched_op.ifm2

81

ofm_shape = sched_op.ofm.shape

82

83

# Get Kernel strides and upscaling factor

84

kernel_stride = sched_op.kernel.stride

85

strides = [1, kernel_stride.y, kernel_stride.x, 1]

86

skirt = parent_op.attrs.get("skirt", None)

87

upscaling = 1

88

if sched_op.op_type == Op.Conv2DBackpropInputSwitchedBias:

89

upscaling = ofm_shape.height // ifm.shape.height

Tim Hall

885033b

2022-07-21 11:46:03 +0100

[diff] [blame]

90

elif sched_op.op_type.is_resize_op():

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

91

upscaling = round_up_divide(ofm_shape.height, ifm.shape.height)

92

Rickard Bolin

2022-01-07 14:22:52 +0000

[diff] [blame]

93

# Get kernel height and height dilation

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

94

k_height = 1

95

if npu_block_type in (NpuBlockType.Pooling, NpuBlockType.ReduceSum):

96

if parent_op is not None:

97

k_height = parent_op.attrs["ksize"][1]

98

else:

99

if uncomp_weight_tensor is not None:

100

k_height = uncomp_weight_tensor.shape[0]

101

Rickard Bolin

2022-01-07 14:22:52 +0000

[diff] [blame]

102

k_height_dilation = parent_op.attrs.get("dilation", (_, 1, _, _))[-3]

103

104

# Calculate dilated kernel height

105

k_dilated_height = k_height_dilation * (k_height - 1) + 1

106

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

107

# Define Start and End coordinates for the OFM

108

ofm_start = Shape4D(0, 0, 0, op_info.ofm_depth_slices[0])

109

ofm_end = ofm_shape

110

111

ofm_depth_slices = op_info.ofm_depth_slices

112

113

# Read/Write offsets

114

read_offsets = list(parent_op.read_offsets) # offset for [ifm, ifm2]

115

read_shapes = list(parent_op.read_shapes) # read shapes for [ifm, ifm2]

116

write_offset = Shape4D(0, 0, 0, 0)

117

if parent_op.write_offset is not None:

118

write_offset = parent_op.write_offset

119

ofm_start = write_offset

120

ofm_end = parent_op.write_offset + parent_op.write_shape

121

122

# Create activation function if needed

123

for op in ps.ops:

124

if op.type.is_relu_op() or op.type in (Op.Tanh, Op.Sigmoid):

Patrik Gustavsson

8f1f9aa

2021-06-28 07:41:58 +0200

[diff] [blame]

125

ps.primary_op.activation = create_activation_function(

126

op.type, min=op.attrs.get("min", None), max=op.attrs.get("max", None)

127

)

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

128

129

# Generate commands for the Op that produces this Op's IFM, if applicable

130

if cascade_info is None or cascade_info.start == sched_op.index:

131

# Lone Op or First Op in cascade - all IFM data is present

132

ifm_present = Box([0, 0, 0, 0], ifm.shape.as_list())

producer_op = None

prev_cmd_gen = []

else:

ifm_present = Box([0, 0, 0, 0], [0, 0, 0, 0])

137

producer_op = sched_op.ifm.connection.producers[0]

138

prev_cmd_gen = generate_high_level_commands_for_sched_op(producer_op, schedule)

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

139

ofm_step = op_info.stripe

140

for start_height in range(ofm_start.height, ofm_end.height, ofm_step.height):

141

end_height = min(start_height + ofm_step.height, ofm_end.height)

142

for start_width in range(ofm_start.width, ofm_end.width, ofm_step.width):

143

end_width = min(start_width + ofm_step.width, ofm_end.width)

144

Dwight Lidman

8f78ac2

2021-08-13 14:04:30 +0200

[diff] [blame]

145

lut_dma_done = False

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

146

for depth_idx, start_channel in enumerate(ofm_depth_slices[:-1]):

147

start_channel = max(start_channel, ofm_start.depth)

148

end_channel = min(ofm_depth_slices[depth_idx + 1], ofm_end.depth)

149

150

# Construct the OFM box for the current stripe

151

ofm_box_start = Shape4D(ofm_start.batch, start_height, start_width, start_channel)

152

ofm_box_end = Shape4D(ofm_end.batch, end_height, end_width, end_channel)

153

ofm_box = Box(ofm_box_start.as_list(), ofm_box_end.as_list())

154

ifm_box = Box([], [])

155

ifm2_box = Box([], [])

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

156

# Calculate IFM input box based on the OFM box

157

if ifm:

158

ifm_box, pad_top, pad_bottom = ofm_box.transform_with_strides_and_skirt(

strides,

skirt,

ifm.shape,

npu_block_type,

write_offset.as_list(),

Rickard Bolin

2022-01-07 14:22:52 +0000

[diff] [blame]

164

k_dilated_height,

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

165

read_offsets[0],

166

read_shapes[0],

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

167

upscaling,

erik.andersson@arm.com

6b2a0b4

2022-03-22 15:35:30 +0100

[diff] [blame]

168

op.type,

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

169

)

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

170

# Calculate IFM2 input box based on the OFM box

171

if ifm2:

172

ifm2_box, pad_top, pad_bottom = ofm_box.transform_with_strides_and_skirt(

strides,

skirt,

ifm2.shape,

npu_block_type,

write_offset.as_list(),

Rickard Bolin

2022-01-07 14:22:52 +0000

[diff] [blame]

178

k_dilated_height,

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

179

read_offsets[1],

180

read_shapes[1],

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

181

upscaling,

erik.andersson@arm.com

6b2a0b4

2022-03-22 15:35:30 +0100

[diff] [blame]

182

op.type,

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

183

)

184

185

ifm_required = ifm_box

186

# Get the Op that produces this Op's IFM data - only applicable within cascades

187

if producer_op:

188

assert op_info.cascade != 0

189

assert op_info.cascade == schedule.cost_map[producer_op].cascade

Fredrik Svedberg

d03dc50

2022-06-30 10:44:12 +0200

[diff] [blame]

190

if not ifm_required.is_subbox_of(ifm_present):

191

for prev_cmd in prev_cmd_gen:

192

yield prev_cmd

193

if prev_cmd.is_npu_pass_command() and prev_cmd.ps == producer_op.parent_ps:

194

ifm_present.end_coord = prev_cmd.ofm_box.end_coord

195

if ifm_required.is_subbox_of(ifm_present):

196

# There is enough IFM data - exit loop

197

break

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

198

199

# Information about the current stripe's location in the cascade

200

is_first_h_stripe = ofm_box_start.height == ofm_start.height

201

is_last_h_stripe = ofm_box_end.height >= ofm_end.height

202

203

# Calculate the weight box - i.e. the subshape of weights needed for this NpuStripe command

204

weight_tensor = op_info.npu_weights_tensor

Tim Hall

d784af7

2021-06-08 21:25:57 +0100

[diff] [blame]

205

scale_tensor = op_info.npu_scales_tensor

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

206

if op_info.npu_weights_tensor:

207

weight_box = Box([0, 0, 0, start_channel], [1, 1, 1, end_channel])

208

Rickard Bolin

fd8b500

2022-05-16 09:11:06 +0000

[diff] [blame]

209

if op_info.buffered_weight_tensors and is_first_h_stripe:

210

idx = depth_idx % len(op_info.buffered_weight_tensors)

211

yield from dma_if_necessary(

212

sched_op.parent_ps, weight_box, op_info.buffered_weight_tensors[idx]

213

)

214

weight_tensor = op_info.buffered_weight_tensors[idx]

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

else:

weight_box = None

Dwight Lidman

2021-08-13 14:04:30 +0200

[diff] [blame]

218

# Should only be done once per loop but not before weights above

219

if parent_op.activation_lut and not lut_dma_done:

220

lut_tensor = [tens for tens in parent_op.inputs if tens.purpose == TensorPurpose.LUT][0]

221

lut_box = Box([0] * len(lut_tensor.shape), list(lut_tensor.shape))

222

lut_dma_done = True

223

yield from dma_if_necessary(sched_op.parent_ps, lut_box, lut_tensor)

224

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

225

yield NpuStripe(

226

sched_op.parent_ps,

227

block_config.old_style_representation(),

is_first_h_stripe,

is_last_h_stripe,

ifm_tensor,

ifm_box,

ofm_tensor,

ofm_box,

weight_tensor,

weight_box,

Tim Hall

d784af7

2021-06-08 21:25:57 +0100

[diff] [blame]

236

scale_tensor,

Tim Hall