Blame - ethosu/vela/npu_serialisation.py - ml/ethos-u/ethos-u-vela

2020-04-27 18:20:16 +0100

[diff] [blame]

1

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

16

# Description:

17

# Serialises and packs an NPU subgraph into tensors.

Diego Russo

ea6111a

2020-04-14 18:41:58 +0100

[diff] [blame]

import struct

import numpy as np

from . import driver_actions

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

23

from .data_type import DataType

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

24

from .nn_graph import PassPlacement

25

from .operation import Operation

26

from .tensor import MemArea

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame^]

27

from .tensor import MemType

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

28

from .tensor import Tensor

29

from .tensor import TensorFormat

30

from .tensor import TensorPurpose

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

31

32

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame^]

33

def make_memory_tensor(name, mem_area, mem_type, sz, want_values, arch):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

34

tens = Tensor([sz], DataType.uint8, name)

35

tens.mem_area = mem_area

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame^]

36

tens.mem_type = mem_type

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

37

tens.purpose = TensorPurpose.FeatureMap

38

tens.set_format(TensorFormat.NHWC, arch)

39

if want_values:

40

tens.values = np.zeros(tens.shape, np.uint8)

return tens

def copy_compressed_values_to_memory_tensor(memory_tensor, src_tensor):

45

start_addr = src_tensor.address

46

for compressed_values in src_tensor.compressed_values:

47

end_addr = start_addr + len(compressed_values)

48

memory_tensor.values[start_addr:end_addr] = compressed_values

49

start_addr = end_addr

50

Tim Hall

c30f495

2020-06-15 20:47:35 +0100

[diff] [blame]

51

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

52

def copy_ifm_values_to_memory_tensor(memory_tensor, src_tensor):

53

start_addr = src_tensor.address

54

end_addr = start_addr + src_tensor.quant_values.size

55

memory_tensor.values[start_addr:end_addr] = src_tensor.quant_values

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

56

Tim Hall

c30f495

2020-06-15 20:47:35 +0100

[diff] [blame]

57

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

58

def serialise_npu_subgraph_into_tensors(nng, sg, arch, scratch_tens, flash_tens):

59

if sg.placement != PassPlacement.Npu:

60

return scratch_tens, flash_tens

61

62

flash_area = arch.permanent_storage_mem_area

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame^]

63

scratch_area = arch.feature_map_storage_mem_area

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

64

65

flash_size = sg.memory_used.get(flash_area, 0)

66

scratch_size = sg.memory_used.get(scratch_area, 0)

67

68

# Prepare driver actions for this command tensor

69

da_list = []

70

driver_actions.emit_fourcc(da_list, "COP1")

71

driver_actions.emit_config(da_list, 0, 1, arch)

72

driver_actions.emit_cmd_stream_header(da_list, len(sg.register_command_stream))

73

74

# Append command stream words

75

da_list.extend(sg.register_command_stream)

76

77

# Convert to bytes

78

payload_bytes = struct.pack("<{0}I".format(len(da_list)), *da_list)

79

80

command_stream_size_bytes = len(payload_bytes)

81

82

# Adjust the bits per element calculation to exclude metadata generated by Vela

83

nng.total_size[flash_area] = nng.total_size.get(flash_area, 0) - flash_size - command_stream_size_bytes

84

nng.total_elements[flash_area] = nng.total_elements.get(flash_area, 0) - flash_size - command_stream_size_bytes

85

nng.total_size[scratch_area] = nng.total_size.get(scratch_area, 0) - scratch_size

86

nng.total_elements[scratch_area] = nng.total_elements.get(scratch_area, 0) - scratch_size

87

Diego Russo

ea6111a

2020-04-14 18:41:58 +0100

[diff] [blame]

88

if flash_tens == scratch_tens is None:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

89

# First Npu subgraph, create scratch and flash tensors

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame^]

90

sg.scratch_tensor = make_memory_tensor(

91

sg.name + "_scratch", scratch_area, MemType.Scratch, scratch_size, False, arch

92

)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

93

sg.scratch_tensor.purpose = TensorPurpose.Scratch

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame^]

94

sg.flash_tensor = make_memory_tensor(

95

sg.name + "_flash", flash_area, MemType.Permanent_CPU, flash_size, True, arch

96

)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

97

else:

98

sg.scratch_tensor = scratch_tens

99

sg.scratch_tensor.shape[0] += scratch_size

100

sg.flash_tensor = flash_tens

101

sg.flash_tensor.shape[0] += flash_size

102

103

for cps in sg.cascaded_passes:

104

for ps in cps.passes:

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

105

if ps.placement == PassPlacement.Npu:

Tim Hall

c30f495

2020-06-15 20:47:35 +0100

[diff] [blame]

106

if ps.weight_tensor is not None:

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

107

# For DMA ops, ps.weight_tensor is referring to the SRAM weight tensor and therefore the address

108

# is pointing at the destination address of where the weights should be placed in SRAM.

109

# This ensures that the Flash weight tensor is used instead and thus gets the correct address.

110

if ps.weight_tensor.ops[0].type == "DMA":

111

copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.weight_tensor.ops[0].inputs[0])

112

else:

113

copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.weight_tensor)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

114

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

115

copy_compressed_values_to_memory_tensor(sg.flash_tensor, ps.scale_tensor)

116

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame^]

117

if ps.ifm_tensor is not None and ps.ifm_tensor.mem_type not in (MemType.Scratch, MemType.Scratch_fast):

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

118

copy_ifm_values_to_memory_tensor(sg.flash_tensor, ps.ifm_tensor)

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame^]

119

if ps.ifm2_tensor is not None and (

120

ps.ifm2_tensor.mem_type not in (MemType.Scratch, MemType.Scratch_fast)

121

):

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

122

copy_ifm_values_to_memory_tensor(sg.flash_tensor, ps.ifm2_tensor)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

123

124

sg.command_stream_tensor = make_memory_tensor(

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame^]

125

sg.name + "_command_stream", flash_area, MemType.Permanent_CPU, command_stream_size_bytes, True, arch

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

126

)

127

sg.command_stream_tensor.values = np.frombuffer(payload_bytes, dtype=np.uint8)

128

129

return sg.scratch_tensor, sg.flash_tensor

130

131

132

def add_const_tens_to_startup_cascaded_pass(startup_cps, tens):

133

op = Operation("Const", tens.name + "_const")

134

op.outputs = [tens]

135

tens.ops = [op]

136

startup_cps.passes[0].ops.insert(0, op)

137

startup_cps.passes[0].outputs.insert(0, tens)

138

startup_cps.outputs.insert(0, tens)

139

140

141

def rewrite_npu_call_ops(nng, sg, arch):

142

if sg.placement != PassPlacement.Cpu:

143

return

144

145

startup_cps = sg.cascaded_passes[0]

146

147

for idx, cps in enumerate(sg.cascaded_passes):

148

for ps in cps.passes:

149

for op in ps.ops:

150

if op.type == "NpuOp":

151

callee = op.attrs["subgraph"]

Tim Hall

c8310b1

2020-06-17 14:53:11 +0100

[diff] [blame]

152

op.attrs["custom_type"] = op.type

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

153

154

sz = 0

155

for tens in [callee.scratch_tensor, callee.flash_tensor, callee.command_stream_tensor]:

156

op.inputs.insert(0, tens)

157

ps.inputs.insert(0, tens)

158

cps.inputs.insert(0, tens)

159

if tens != callee.scratch_tensor:

160

add_const_tens_to_startup_cascaded_pass(startup_cps, tens)

161

sz += tens.storage_size()

162

163

for prev_cps in sg.cascaded_passes[: idx + 1]:

164

prev_cps.sram_used += sz

165

166

if callee.scratch_tensor is not None:

Patrik Gustavsson