Blame - ethosu/vela/npu_serialisation.py - ml/ethos-u/ethos-u-vela

2020-04-27 18:20:16 +0100

[diff] [blame]

1

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

16

# Description:

17

# Serialises and packs an NPU subgraph into tensors.

Diego Russo

ea6111a

2020-04-14 18:41:58 +0100

[diff] [blame]

18

import numpy as np

19

20

from . import driver_actions

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

21

from .data_type import DataType

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

22

from .nn_graph import PassPlacement

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

23

from .operation import Op

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

24

from .operation import Operation

25

from .tensor import MemArea

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

26

from .tensor import MemType

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

27

from .tensor import Tensor

28

from .tensor import TensorFormat

29

from .tensor import TensorPurpose

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

30

31

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

32

def make_memory_tensor(name, mem_area, mem_type, sz, want_values, arch):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

33

tens = Tensor([sz], DataType.uint8, name)

34

tens.mem_area = mem_area

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

35

tens.mem_type = mem_type

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

36

tens.purpose = TensorPurpose.FeatureMap

37

tens.set_format(TensorFormat.NHWC, arch)

38

if want_values:

39

tens.values = np.zeros(tens.shape, np.uint8)

return tens

def copy_compressed_values_to_memory_tensor(memory_tensor, src_tensor):

44

start_addr = src_tensor.address

45

for compressed_values in src_tensor.compressed_values:

46

end_addr = start_addr + len(compressed_values)

47

memory_tensor.values[start_addr:end_addr] = compressed_values

48

start_addr = end_addr

49

Tim Hall

c30f495

2020-06-15 20:47:35 +0100

[diff] [blame]

50

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

51

def copy_ifm_values_to_memory_tensor(memory_tensor, src_tensor):

52

start_addr = src_tensor.address

Fredrik Svedberg

0f98b36

2020-09-29 10:00:39 +0200

[diff] [blame]

53

values = src_tensor.quant_values.flatten() if src_tensor.quant_values is not None else src_tensor.values.flatten()

Fredrik Svedberg

bb1a92a

2020-08-27 15:51:50 +0200

[diff] [blame]

54

if src_tensor.dtype.size_in_bytes() > 1:

55

values = np.frombuffer(values.tobytes(), dtype=np.uint8)

Charles Xu

9a03fdf

2020-07-02 15:12:40 +0200

[diff] [blame]

56

end_addr = start_addr + values.size

57

memory_tensor.values[start_addr:end_addr] = values

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

58

Tim Hall

c30f495

2020-06-15 20:47:35 +0100

[diff] [blame]

59

Patrik Gustavsson

2020-06-29 17:36:55 +0200

[diff] [blame]

60

def serialise_npu_subgraph_into_tensors(nng, sg, arch, scratch_tens, scratch_fast_tens, flash_tens):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

61

if sg.placement != PassPlacement.Npu:

Patrik Gustavsson

2020-06-29 17:36:55 +0200

[diff] [blame]

62

return scratch_tens, scratch_fast_tens, flash_tens

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

63

64

flash_area = arch.permanent_storage_mem_area

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

65

scratch_area = arch.feature_map_storage_mem_area

Patrik Gustavsson

2020-06-29 17:36:55 +0200

[diff] [blame]

66

scratch_fast_area = arch.fast_storage_mem_area

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

67

68

flash_size = sg.memory_used.get(flash_area, 0)

69

scratch_size = sg.memory_used.get(scratch_area, 0)

70

Louis Verhaard

5207830

2020-11-18 13:35:06 +0100

[diff] [blame^]

71

payload_bytes = driver_actions.create_driver_payload(sg.register_command_stream, arch)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

72

73

command_stream_size_bytes = len(payload_bytes)

74

75

# Adjust the bits per element calculation to exclude metadata generated by Vela

76

nng.total_size[flash_area] = nng.total_size.get(flash_area, 0) - flash_size - command_stream_size_bytes

77

nng.total_elements[flash_area] = nng.total_elements.get(flash_area, 0) - flash_size - command_stream_size_bytes

78

nng.total_size[scratch_area] = nng.total_size.get(scratch_area, 0) - scratch_size

79

nng.total_elements[scratch_area] = nng.total_elements.get(scratch_area, 0) - scratch_size

80

Patrik Gustavsson

2020-06-29 17:36:55 +0200

[diff] [blame]

81

if scratch_area != scratch_fast_area:

82

nng.total_size[scratch_fast_area] = nng.total_size.get(scratch_fast_area, 0)

83

nng.total_elements[scratch_fast_area] = nng.total_elements.get(scratch_fast_area, 0)

84

Diego Russo

ea6111a

2020-04-14 18:41:58 +0100

[diff] [blame]

85

if flash_tens == scratch_tens is None:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

86

# First Npu subgraph, create scratch and flash tensors

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

87

sg.scratch_tensor = make_memory_tensor(

88

sg.name + "_scratch", scratch_area, MemType.Scratch, scratch_size, False, arch

89

)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

90

sg.scratch_tensor.purpose = TensorPurpose.Scratch

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

91

sg.flash_tensor = make_memory_tensor(

92

sg.name + "_flash", flash_area, MemType.Permanent_CPU, flash_size, True, arch

93

)

Patrik Gustavsson

2020-06-29 17:36:55 +0200

[diff] [blame]

94

sg.scratch_fast_tensor = make_memory_tensor(

Jacob Bohlin

268394d

2020-08-13 13:24:59 +0200

[diff] [blame]

95

sg.name + "_scratch_fast", scratch_fast_area, MemType.Scratch_fast, 0, False, arch

Patrik Gustavsson

2020-06-29 17:36:55 +0200

[diff] [blame]

96

)

97

sg.scratch_fast_tensor.purpose = TensorPurpose.Scratch

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

98

else:

99

sg.scratch_tensor = scratch_tens

100

sg.scratch_tensor.shape[0] += scratch_size

101

sg.flash_tensor = flash_tens

102

sg.flash_tensor.shape[0] += flash_size

103

Patrik Gustavsson

2020-06-29 17:36:55 +0200

[diff] [blame]

104

sg.scratch_fast_tensor = scratch_fast_tens

105

sg.scratch_fast_tensor.shape[0] = 0

106

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

107

for cps in sg.cascaded_passes:

108

for ps in cps.passes:

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

109

if ps.placement == PassPlacement.Npu:

Tim Hall

c30f495

2020-06-15 20:47:35 +0100

[diff] [blame]

110

if ps.weight_tensor is not None:

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

111

# For DMA ops, ps.weight_tensor is referring to the SRAM weight tensor and therefore the address

112

# is pointing at the destination address of where the weights should be placed in SRAM.

113

# This ensures that the Flash weight tensor is used instead and thus gets the correct address.

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

114

if ps.weight_tensor.ops[0].type == Op.DMA:

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

115

copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.weight_tensor.ops[0].inputs[0])

116

else:

117

copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.weight_tensor)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

118

Andreas Nevalainen

897cc14

2020-10-28 15:42:08 +0100

[diff] [blame]

119

if ps.scale_tensor.ops[0].type == Op.DMA:

120

copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.scale_tensor.ops[0].inputs[0])

121

else:

122

copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.scale_tensor)

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

123

Fredrik Svedberg

a0c3624

2020-06-03 15:43:31 +0200

[diff] [blame]

124

if ps.lut_tensor is not None:

125

copy_ifm_values_to_memory_tensor(sg.flash_tensor, ps.lut_tensor)

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

126

if ps.ifm_tensor is not None and ps.ifm_tensor.mem_type not in (MemType.Scratch, MemType.Scratch_fast):

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

127

copy_ifm_values_to_memory_tensor(sg.flash_tensor, ps.ifm_tensor)

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

128

if ps.ifm2_tensor is not None and (

129

ps.ifm2_tensor.mem_type not in (MemType.Scratch, MemType.Scratch_fast)

130

):

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

131

copy_ifm_values_to_memory_tensor(sg.flash_tensor, ps.ifm2_tensor)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

132

133

sg.command_stream_tensor = make_memory_tensor(

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

134

sg.name + "_command_stream", flash_area, MemType.Permanent_CPU, command_stream_size_bytes, True, arch

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

135

)

136

sg.command_stream_tensor.values = np.frombuffer(payload_bytes, dtype=np.uint8)

137

Patrik Gustavsson

2020-06-29 17:36:55 +0200

[diff] [blame]

138

return sg.scratch_tensor, sg.scratch_fast_tensor, sg.flash_tensor

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

139

140

141

def add_const_tens_to_startup_cascaded_pass(startup_cps, tens):

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

142

op = Operation(Op.Const, tens.name + "_const")

Michael McGeagh

c5b549b

2020-08-07 11:54:28 +0100

[diff] [blame]

143

op.set_output_tensor(tens)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

144

startup_cps.passes[0].ops.insert(0, op)

145

startup_cps.passes[0].outputs.insert(0, tens)

146

startup_cps.outputs.insert(0, tens)

147

148

149

def rewrite_npu_call_ops(nng, sg, arch):

150

if sg.placement != PassPlacement.Cpu:

151

return

152

153

startup_cps = sg.cascaded_passes[0]

154

155

for idx, cps in enumerate(sg.cascaded_passes):

156

for ps in cps.passes:

157

for op in ps.ops:

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

158

if op.type == Op.CustomNpuOp:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

159

callee = op.attrs["subgraph"]

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

160

161

sz = 0

Patrik Gustavsson

2020-06-29 17:36:55 +0200

[diff] [blame]

162

for tens in [

163

callee.scratch_fast_tensor,

164

callee.scratch_tensor,

165

callee.flash_tensor,

166

callee.command_stream_tensor,

167

]:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

168

op.inputs.insert(0, tens)

169

ps.inputs.insert(0, tens)

170

cps.inputs.insert(0, tens)

Patrik Gustavsson

2020-06-29 17:36:55 +0200

[diff] [blame]

171

if tens != callee.scratch_tensor and tens != callee.scratch_fast_tensor:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

172

add_const_tens_to_startup_cascaded_pass(startup_cps, tens)

173

sz += tens.storage_size()

174

175

for prev_cps in sg.cascaded_passes[: idx + 1]:

176

prev_cps.sram_used += sz

177

178

if callee.scratch_tensor is not None:

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

179

if callee.scratch_tensor.mem_area == MemArea.Sram:

180

cps.sram_used += callee.scratch_tensor.storage_size()

Patrik Gustavsson