Blame - ethosu/vela/high_level_command_stream_generator.py - ml/ethos-u/ethos-u-vela

2023-02-02 09:07:48 +0100

[diff] [blame^]

1

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Rickard Bolin

bc6ee58

2022-11-04 08:24:29 +0000

[diff] [blame]

16

#

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

17

# Description:

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

18

# Generate a high-level command stream from a schedule

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

19

from .high_level_command_stream import Box

20

from .high_level_command_stream import DMA

Johan Alfven

2023-02-02 09:07:48 +0100

[diff] [blame^]

21

from .high_level_command_stream import NOP

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

22

from .high_level_command_stream import NpuStripe

Charles Xu

89a6bbf

2020-08-11 12:31:58 +0200

[diff] [blame]

23

from .numeric_util import round_up_divide

Louis Verhaard

e8a5a78

2020-11-02 18:04:27 +0100

[diff] [blame]

24

from .operation import create_activation_function

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

25

from .operation import NpuBlockType

Louis Verhaard

aee5d75

2020-09-30 09:01:52 +0200

[diff] [blame]

26

from .operation import Op

patrik.gustavsson

eeb8515

2020-12-21 17:10:40 +0000

[diff] [blame]

27

from .shape4d import Shape4D

Charles Xu

7879222

2020-05-13 10:15:26 +0200

[diff] [blame]

28

from .tensor import TensorPurpose

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

29

30

Charles Xu

7879222

2020-05-13 10:15:26 +0200

[diff] [blame]

31

def dma_if_necessary(ps, box, tensor):

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

32

src_tensor = tensor.src_tensor

33

if src_tensor and tensor.mem_area != src_tensor.mem_area:

34

yield DMA(ps, src_tensor, tensor, box)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

35

Tim Hall

c30f495

2020-06-15 20:47:35 +0100

[diff] [blame]

36

Johan Alfven

2023-02-02 09:07:48 +0100

[diff] [blame^]

37

def dma_feature_map_if_necessary(ps, src_tensor, dst_tensor):

38

box = Box([0] * len(src_tensor.shape), list(src_tensor.shape))

39

src_addr = src_tensor.address_for_coordinate(box.start_coord)

40

dst_addr = dst_tensor.address_for_coordinate(box.start_coord)

41

42

if src_addr != dst_addr or src_tensor.mem_area != dst_tensor.mem_area:

43

yield DMA(ps, src_tensor, dst_tensor, box)

44

else:

45

# Source and destination is the same so no need for a DMA transaction

46

# Create a NOP for visibility when printing the high_level_command_stream

47

yield NOP(ps, src_tensor, dst_tensor)

48

49

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

50

def generate_high_level_command_stream_for_schedule(nng, sg, arch, verbose_high_level_command_stream):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

51

res = []

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

52

# sg.sched_ops are ordered by execution

53

processed_cascades = set()

54

for sched_op in sg.sched_ops:

55

op_info = sg.schedule.cost_map[sched_op]

56

if op_info.cascade in processed_cascades:

57

# This cascade has already been processed

58

continue

59

60

if op_info.cascade == 0:

61

# Generate high-level commands for this Op in isolation

62

res += list(generate_high_level_commands_for_sched_op(sched_op, sg.schedule))

63

else:

64

# Generate high-level commands for the whole cascade

65

cascade_info = sg.schedule.cascades[op_info.cascade]

66

# Start from the last Op in the cascade

67

res += list(generate_high_level_commands_for_sched_op(sg.sched_ops[cascade_info.end], sg.schedule))

68

processed_cascades.add(op_info.cascade)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

69

70

sg.high_level_command_stream = res

71

if verbose_high_level_command_stream:

72

sg.print_high_level_command_stream()

73

74

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

75

def generate_high_level_commands_for_sched_op(sched_op, schedule):

76

op_info = schedule.cost_map[sched_op]

77

cascade_info = schedule.cascades.get(op_info.cascade)

78

npu_block_type = sched_op.parent_ps.npu_block_type

79

block_config = op_info.block_config

80

ps = sched_op.parent_ps

81

parent_op = sched_op.parent_op

82

ofm_tensor = ps.ofm_tensor

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

83

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

84

# Get Tensors and Full Shapes

Jonas Ohlsson

d857507

2022-03-30 10:30:25 +0200

[diff] [blame]

(

ifm_tensor,

ifm2_tensor,

uncomp_weight_tensor,

89

_,

90

_,

91

) = parent_op.get_ifm_ifm2_weights_biases_ofm()

Fredrik Svedberg

b81e1bb

2022-10-11 21:50:51 +0200

[diff] [blame]

92

if sched_op.reversed_operands:

93

ifm2_tensor, ifm_tensor = ifm_tensor, ifm2_tensor

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

94

ifm = sched_op.ifm

95

ifm2 = sched_op.ifm2

96

ofm_shape = sched_op.ofm.shape

97

98

# Get Kernel strides and upscaling factor

99

kernel_stride = sched_op.kernel.stride

100

strides = [1, kernel_stride.y, kernel_stride.x, 1]

101

skirt = parent_op.attrs.get("skirt", None)

102

upscaling = 1

103

if sched_op.op_type == Op.Conv2DBackpropInputSwitchedBias:

104

upscaling = ofm_shape.height // ifm.shape.height

Tim Hall

885033b

2022-07-21 11:46:03 +0100

[diff] [blame]

105

elif sched_op.op_type.is_resize_op():

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

106

upscaling = round_up_divide(ofm_shape.height, ifm.shape.height)

107

Rickard Bolin

2022-01-07 14:22:52 +0000

[diff] [blame]

108

# Get kernel height and height dilation

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

109

k_height = 1

110

if npu_block_type in (NpuBlockType.Pooling, NpuBlockType.ReduceSum):

111

if parent_op is not None:

112

k_height = parent_op.attrs["ksize"][1]

113

else:

114

if uncomp_weight_tensor is not None:

115

k_height = uncomp_weight_tensor.shape[0]

116

Rickard Bolin

2022-01-07 14:22:52 +0000

[diff] [blame]

117

k_height_dilation = parent_op.attrs.get("dilation", (_, 1, _, _))[-3]

118

119

# Calculate dilated kernel height

120

k_dilated_height = k_height_dilation * (k_height - 1) + 1

121

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

122

# Define Start and End coordinates for the OFM

123

ofm_start = Shape4D(0, 0, 0, op_info.ofm_depth_slices[0])

124

ofm_end = ofm_shape

125

126

ofm_depth_slices = op_info.ofm_depth_slices

127

128

# Read/Write offsets

129

read_offsets = list(parent_op.read_offsets) # offset for [ifm, ifm2]

130

read_shapes = list(parent_op.read_shapes) # read shapes for [ifm, ifm2]

131

write_offset = Shape4D(0, 0, 0, 0)

132

if parent_op.write_offset is not None:

133

write_offset = parent_op.write_offset

134

ofm_start = write_offset

135

ofm_end = parent_op.write_offset + parent_op.write_shape

136

137

# Create activation function if needed

138

for op in ps.ops:

139

if op.type.is_relu_op() or op.type in (Op.Tanh, Op.Sigmoid):

Patrik Gustavsson

8f1f9aa

2021-06-28 07:41:58 +0200

[diff] [blame]

140

ps.primary_op.activation = create_activation_function(

141

op.type, min=op.attrs.get("min", None), max=op.attrs.get("max", None)

142

)

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

143

144

# Generate commands for the Op that produces this Op's IFM, if applicable

145

if cascade_info is None or cascade_info.start == sched_op.index:

146

# Lone Op or First Op in cascade - all IFM data is present

147

ifm_present = Box([0, 0, 0, 0], ifm.shape.as_list())

producer_op = None

prev_cmd_gen = []

else:

ifm_present = Box([0, 0, 0, 0], [0, 0, 0, 0])

152

producer_op = sched_op.ifm.connection.producers[0]

153

prev_cmd_gen = generate_high_level_commands_for_sched_op(producer_op, schedule)

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

154

ofm_step = op_info.stripe

155

for start_height in range(ofm_start.height, ofm_end.height, ofm_step.height):

156

end_height = min(start_height + ofm_step.height, ofm_end.height)

157

for start_width in range(ofm_start.width, ofm_end.width, ofm_step.width):

158

end_width = min(start_width + ofm_step.width, ofm_end.width)

159

Dwight Lidman

8f78ac2

2021-08-13 14:04:30 +0200

[diff] [blame]

160

lut_dma_done = False

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

161

for depth_idx, start_channel in enumerate(ofm_depth_slices[:-1]):

162

start_channel = max(start_channel, ofm_start.depth)

163

end_channel = min(ofm_depth_slices[depth_idx + 1], ofm_end.depth)

164

165

# Construct the OFM box for the current stripe

166

ofm_box_start = Shape4D(ofm_start.batch, start_height, start_width, start_channel)

167

ofm_box_end = Shape4D(ofm_end.batch, end_height, end_width, end_channel)

168

ofm_box = Box(ofm_box_start.as_list(), ofm_box_end.as_list())

169

ifm_box = Box([], [])

170

ifm2_box = Box([], [])

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

171

# Calculate IFM input box based on the OFM box

172

if ifm:

173

ifm_box, pad_top, pad_bottom = ofm_box.transform_with_strides_and_skirt(

strides,

skirt,

ifm.shape,

npu_block_type,

write_offset.as_list(),

Rickard Bolin

2022-01-07 14:22:52 +0000

[diff] [blame]

179

k_dilated_height,

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

180

read_offsets[0],

181

read_shapes[0],

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

182

upscaling,

erik.andersson@arm.com

6b2a0b4

2022-03-22 15:35:30 +0100

[diff] [blame]

183

op.type,

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

184

)

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

185

# Calculate IFM2 input box based on the OFM box

186

if ifm2:

187

ifm2_box, pad_top, pad_bottom = ofm_box.transform_with_strides_and_skirt(

strides,

skirt,

ifm2.shape,

npu_block_type,

write_offset.as_list(),

Rickard Bolin

2022-01-07 14:22:52 +0000

[diff] [blame]

193

k_dilated_height,

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

194

read_offsets[1],

195

read_shapes[1],

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

196

upscaling,

erik.andersson@arm.com

6b2a0b4

2022-03-22 15:35:30 +0100

[diff] [blame]

197

op.type,

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

198

)

199

200

ifm_required = ifm_box

201

# Get the Op that produces this Op's IFM data - only applicable within cascades

202

if producer_op:

203

assert op_info.cascade != 0

204

assert op_info.cascade == schedule.cost_map[producer_op].cascade

Fredrik Svedberg

d03dc50

2022-06-30 10:44:12 +0200

[diff] [blame]

205

if not ifm_required.is_subbox_of(ifm_present):

206

for prev_cmd in prev_cmd_gen:

207

yield prev_cmd

208

if prev_cmd.is_npu_pass_command() and prev_cmd.ps == producer_op.parent_ps:

209

ifm_present.end_coord = prev_cmd.ofm_box.end_coord

210

if ifm_required.is_subbox_of(ifm_present):

211

# There is enough IFM data - exit loop

212

break

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

213

214

# Information about the current stripe's location in the cascade

215

is_first_h_stripe = ofm_box_start.height == ofm_start.height

216

is_last_h_stripe = ofm_box_end.height >= ofm_end.height

217

218

# Calculate the weight box - i.e. the subshape of weights needed for this NpuStripe command

219

weight_tensor = op_info.npu_weights_tensor

Tim Hall

d784af7

2021-06-08 21:25:57 +0100

[diff] [blame]

220

scale_tensor = op_info.npu_scales_tensor

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

221

if op_info.npu_weights_tensor:

222

weight_box = Box([0, 0, 0, start_channel], [1, 1, 1, end_channel])

223

Johan Alfvén

af1d443

2022-12-21 11:23:01 +0100

[diff] [blame]

224

if op_info.buffered_weight_tensors:

Rickard Bolin

fd8b500

2022-05-16 09:11:06 +0000

[diff] [blame]

225

idx = depth_idx % len(op_info.buffered_weight_tensors)

Rickard Bolin

fd8b500

2022-05-16 09:11:06 +0000

[diff] [blame]

226

weight_tensor = op_info.buffered_weight_tensors[idx]

Johan Alfvén

af1d443

2022-12-21 11:23:01 +0100

[diff] [blame]

227

if is_first_h_stripe:

228

yield from dma_if_necessary(

229

sched_op.parent_ps, weight_box, op_info.buffered_weight_tensors[idx]

230

)

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

else:

weight_box = None

Dwight Lidman

2021-08-13 14:04:30 +0200

[diff] [blame]

234

# Should only be done once per loop but not before weights above

235

if parent_op.activation_lut and not lut_dma_done:

236

lut_tensor = [tens for tens in parent_op.inputs if tens.purpose == TensorPurpose.LUT][0]

237

lut_box = Box([0] * len(lut_tensor.shape), list(lut_tensor.shape))

238

lut_dma_done = True

239

yield from dma_if_necessary(sched_op.parent_ps, lut_box, lut_tensor)

240

Johan Alfven