Blame - ethosu/vela/high_level_command_stream_generator.py - ml/ethos-u/ethos-u-vela

2020-04-27 18:20:16 +0100

[diff] [blame]

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Rickard Bolin

bc6ee58

2022-11-04 08:24:29 +0000

[diff] [blame^]

16

#

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

17

# Description:

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

18

# Generate a high-level command stream from a schedule

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

19

from .high_level_command_stream import Box

20

from .high_level_command_stream import DMA

21

from .high_level_command_stream import NpuStripe

Charles Xu

89a6bbf

2020-08-11 12:31:58 +0200

[diff] [blame]

22

from .numeric_util import round_up_divide

Louis Verhaard

e8a5a78

2020-11-02 18:04:27 +0100

[diff] [blame]

23

from .operation import create_activation_function

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

24

from .operation import NpuBlockType

Louis Verhaard

aee5d75

2020-09-30 09:01:52 +0200

[diff] [blame]

25

from .operation import Op

patrik.gustavsson

eeb8515

2020-12-21 17:10:40 +0000

[diff] [blame]

26

from .shape4d import Shape4D

Charles Xu

7879222

2020-05-13 10:15:26 +0200

[diff] [blame]

27

from .tensor import TensorPurpose

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

28

29

Charles Xu

7879222

2020-05-13 10:15:26 +0200

[diff] [blame]

30

def dma_if_necessary(ps, box, tensor):

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

31

src_tensor = tensor.src_tensor

32

if src_tensor and tensor.mem_area != src_tensor.mem_area:

33

yield DMA(ps, src_tensor, tensor, box)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

34

Tim Hall

c30f495

2020-06-15 20:47:35 +0100

[diff] [blame]

35

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

36

def generate_high_level_command_stream_for_schedule(nng, sg, arch, verbose_high_level_command_stream):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

37

res = []

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

38

# sg.sched_ops are ordered by execution

39

processed_cascades = set()

40

for sched_op in sg.sched_ops:

41

op_info = sg.schedule.cost_map[sched_op]

42

if op_info.cascade in processed_cascades:

43

# This cascade has already been processed

44

continue

45

46

if op_info.cascade == 0:

47

# Generate high-level commands for this Op in isolation

48

res += list(generate_high_level_commands_for_sched_op(sched_op, sg.schedule))

49

else:

50

# Generate high-level commands for the whole cascade

51

cascade_info = sg.schedule.cascades[op_info.cascade]

52

# Start from the last Op in the cascade

53

res += list(generate_high_level_commands_for_sched_op(sg.sched_ops[cascade_info.end], sg.schedule))

54

processed_cascades.add(op_info.cascade)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

55

56

sg.high_level_command_stream = res

57

if verbose_high_level_command_stream:

58

sg.print_high_level_command_stream()

59

60

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

61

def generate_high_level_commands_for_sched_op(sched_op, schedule):

62

op_info = schedule.cost_map[sched_op]

63

cascade_info = schedule.cascades.get(op_info.cascade)

64

npu_block_type = sched_op.parent_ps.npu_block_type

65

block_config = op_info.block_config

66

ps = sched_op.parent_ps

67

parent_op = sched_op.parent_op

68

ofm_tensor = ps.ofm_tensor

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

69

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

70

# Get Tensors and Full Shapes

Jonas Ohlsson

d857507

2022-03-30 10:30:25 +0200

[diff] [blame]

(

ifm_tensor,

ifm2_tensor,

uncomp_weight_tensor,

75

_,

76

_,

77

) = parent_op.get_ifm_ifm2_weights_biases_ofm()

Fredrik Svedberg

b81e1bb

2022-10-11 21:50:51 +0200

[diff] [blame]

78

if sched_op.reversed_operands:

79

ifm2_tensor, ifm_tensor = ifm_tensor, ifm2_tensor

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

80

ifm = sched_op.ifm

81

ifm2 = sched_op.ifm2

82

ofm_shape = sched_op.ofm.shape

83

84

# Get Kernel strides and upscaling factor

85

kernel_stride = sched_op.kernel.stride

86

strides = [1, kernel_stride.y, kernel_stride.x, 1]

87

skirt = parent_op.attrs.get("skirt", None)

88

upscaling = 1

89

if sched_op.op_type == Op.Conv2DBackpropInputSwitchedBias:

90

upscaling = ofm_shape.height // ifm.shape.height

Tim Hall

885033b

2022-07-21 11:46:03 +0100

[diff] [blame]

91

elif sched_op.op_type.is_resize_op():

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

92

upscaling = round_up_divide(ofm_shape.height, ifm.shape.height)

93

Rickard Bolin

2022-01-07 14:22:52 +0000

[diff] [blame]

94

# Get kernel height and height dilation

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

95

k_height = 1

96

if npu_block_type in (NpuBlockType.Pooling, NpuBlockType.ReduceSum):

97

if parent_op is not None:

98

k_height = parent_op.attrs["ksize"][1]

99

else:

100

if uncomp_weight_tensor is not None:

101

k_height = uncomp_weight_tensor.shape[0]

102

Rickard Bolin

2022-01-07 14:22:52 +0000

[diff] [blame]

103

k_height_dilation = parent_op.attrs.get("dilation", (_, 1, _, _))[-3]

104

105

# Calculate dilated kernel height

106

k_dilated_height = k_height_dilation * (k_height - 1) + 1

107

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

108

# Define Start and End coordinates for the OFM

109

ofm_start = Shape4D(0, 0, 0, op_info.ofm_depth_slices[0])

110

ofm_end = ofm_shape

111

112

ofm_depth_slices = op_info.ofm_depth_slices

113

114

# Read/Write offsets

115

read_offsets = list(parent_op.read_offsets) # offset for [ifm, ifm2]

116

read_shapes = list(parent_op.read_shapes) # read shapes for [ifm, ifm2]

117

write_offset = Shape4D(0, 0, 0, 0)

118

if parent_op.write_offset is not None:

119

write_offset = parent_op.write_offset

120

ofm_start = write_offset

121

ofm_end = parent_op.write_offset + parent_op.write_shape

122

123

# Create activation function if needed

124

for op in ps.ops:

125

if op.type.is_relu_op() or op.type in (Op.Tanh, Op.Sigmoid):

Patrik Gustavsson

8f1f9aa

2021-06-28 07:41:58 +0200

[diff] [blame]

126

ps.primary_op.activation = create_activation_function(

127

op.type, min=op.attrs.get("min", None), max=op.attrs.get("max", None)

128

)

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

129

130

# Generate commands for the Op that produces this Op's IFM, if applicable

131

if cascade_info is None or cascade_info.start == sched_op.index:

132

# Lone Op or First Op in cascade - all IFM data is present

133

ifm_present = Box([0, 0, 0, 0], ifm.shape.as_list())

producer_op = None

prev_cmd_gen = []

else:

ifm_present = Box([0, 0, 0, 0], [0, 0, 0, 0])

138

producer_op = sched_op.ifm.connection.producers[0]

139

prev_cmd_gen = generate_high_level_commands_for_sched_op(producer_op, schedule)

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

140

ofm_step = op_info.stripe

141

for start_height in range(ofm_start.height, ofm_end.height, ofm_step.height):

142

end_height = min(start_height + ofm_step.height, ofm_end.height)

143

for start_width in range(ofm_start.width, ofm_end.width, ofm_step.width):

144

end_width = min(start_width + ofm_step.width, ofm_end.width)

145

Dwight Lidman

8f78ac2

2021-08-13 14:04:30 +0200

[diff] [blame]

146

lut_dma_done = False

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

147

for depth_idx, start_channel in enumerate(ofm_depth_slices[:-1]):

148

start_channel = max(start_channel, ofm_start.depth)

149

end_channel = min(ofm_depth_slices[depth_idx + 1], ofm_end.depth)

150

151

# Construct the OFM box for the current stripe

152

ofm_box_start = Shape4D(ofm_start.batch, start_height, start_width, start_channel)

153

ofm_box_end = Shape4D(ofm_end.batch, end_height, end_width, end_channel)

154

ofm_box = Box(ofm_box_start.as_list(), ofm_box_end.as_list())

155

ifm_box = Box([], [])

156

ifm2_box = Box([], [])

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

157

# Calculate IFM input box based on the OFM box

158

if ifm:

159

ifm_box, pad_top, pad_bottom = ofm_box.transform_with_strides_and_skirt(

strides,

skirt,

ifm.shape,

npu_block_type,

write_offset.as_list(),

Rickard Bolin

2022-01-07 14:22:52 +0000

[diff] [blame]

165

k_dilated_height,

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

166

read_offsets[0],

167

read_shapes[0],

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

168

upscaling,

erik.andersson@arm.com

6b2a0b4

2022-03-22 15:35:30 +0100

[diff] [blame]

169

op.type,

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

170

)

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

171

# Calculate IFM2 input box based on the OFM box

172

if ifm2:

173

ifm2_box, pad_top, pad_bottom = ofm_box.transform_with_strides_and_skirt(

strides,

skirt,

ifm2.shape,

npu_block_type,

write_offset.as_list(),

Rickard Bolin

2022-01-07 14:22:52 +0000

[diff] [blame]

179

k_dilated_height,

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

180

read_offsets[1],

181

read_shapes[1],

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

182

upscaling,

erik.andersson@arm.com

6b2a0b4

2022-03-22 15:35:30 +0100

[diff] [blame]

183

op.type,

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

184

)

185

186

ifm_required = ifm_box

187

# Get the Op that produces this Op's IFM data - only applicable within cascades

188

if producer_op:

189

assert op_info.cascade != 0

190

assert op_info.cascade == schedule.cost_map[producer_op].cascade

Fredrik Svedberg

d03dc50

2022-06-30 10:44:12 +0200

[diff] [blame]

191

if not ifm_required.is_subbox_of(ifm_present):

192

for prev_cmd in prev_cmd_gen:

193

yield prev_cmd

194

if prev_cmd.is_npu_pass_command() and prev_cmd.ps == producer_op.parent_ps:

195

ifm_present.end_coord = prev_cmd.ofm_box.end_coord

196

if ifm_required.is_subbox_of(ifm_present):

197

# There is enough IFM data - exit loop

198

break

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

199

200

# Information about the current stripe's location in the cascade

201

is_first_h_stripe = ofm_box_start.height == ofm_start.height

202

is_last_h_stripe = ofm_box_end.height >= ofm_end.height

203

204

# Calculate the weight box - i.e. the subshape of weights needed for this NpuStripe command

205

weight_tensor = op_info.npu_weights_tensor

Tim Hall

d784af7

2021-06-08 21:25:57 +0100

[diff] [blame]

206

scale_tensor = op_info.npu_scales_tensor

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

207

if op_info.npu_weights_tensor:

208

weight_box = Box([0, 0, 0, start_channel], [1, 1, 1, end_channel])

209

Rickard Bolin

fd8b500

2022-05-16 09:11:06 +0000

[diff] [blame]

210

if op_info.buffered_weight_tensors and is_first_h_stripe:

211

idx = depth_idx % len(op_info.buffered_weight_tensors)

212

yield from dma_if_necessary(

213

sched_op.parent_ps, weight_box, op_info.buffered_weight_tensors[idx]

214

)

215

weight_tensor = op_info.buffered_weight_tensors[idx]

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

else:

weight_box = None

Dwight Lidman

2021-08-13 14:04:30 +0200

[diff] [blame]

219

# Should only be done once per loop but not before weights above

220

if parent_op.activation_lut and not lut_dma_done:

221

lut_tensor = [tens for tens in parent_op.inputs if tens.purpose == TensorPurpose.LUT][0]

222

lut_box = Box([0] * len(lut_tensor.shape), list(lut_tensor.shape))

223

lut_dma_done = True

224

yield from dma_if_necessary(sched_op.parent_ps, lut_box, lut_tensor)

225

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

226

yield NpuStripe(

227

sched_op.parent_ps,

228

block_config.old_style_representation(),

is_first_h_stripe,

is_last_h_stripe,

ifm_tensor,

ifm_box,

ofm_tensor,

ofm_box,

weight_tensor,

weight_box,

Tim Hall

d784af7

2021-06-08 21:25:57 +0100

[diff] [blame]

237

scale_tensor,

Tim Hall