Blame - ethosu/vela/insert_dma.py - ml/ethos-u/ethos-u-vela

2020-04-27 18:20:16 +0100

[diff] [blame]

1

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

16

# Description:

17

# Insert DMA operations into the graph for transfering weights.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

18

from . import rewrite_graph

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

19

from .operation import NpuBlockType

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

20

from .operation import Op

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

21

from .operation import Operation

22

from .tensor import MemArea

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

23

from .tensor import MemType

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

24

from .tensor import TensorPurpose

Patrik Gustavsson

2020-10-01 08:22:10 +0200

[diff] [blame]

25

from .weight_compressor import compress_weights

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

26

Tim Hall

c30f495

2020-06-15 20:47:35 +0100

[diff] [blame]

27

Patrik Gustavsson

2020-10-01 08:22:10 +0200

[diff] [blame]

28

def weights_fit_sram(arch, op, tens, nng):

Louis Verhaard

2020-10-14 08:32:41 +0200

[diff] [blame]

29

# Compresses weights and checks if they fit in SRAM

Patrik Gustavsson

2020-08-17 12:43:22 +0200

[diff] [blame]

30

if tens.purpose != TensorPurpose.Weights:

return True

min_weight_size = 0

if len(tens.shape) == 4:

35

min_weight_size = tens.shape[0] * tens.shape[1] * tens.shape[2] * arch.OFMSplitDepth

36

elif len(tens.shape) == 2:

37

min_weight_size = tens.shape[0] * arch.OFMSplitDepth

38

Louis Verhaard

2020-10-14 08:32:41 +0200

[diff] [blame]

39

compress_weights(arch, nng, tens, op.type.npu_block_type, 16, 16, op.get_dilation_h_w())

Patrik Gustavsson

2020-10-01 08:22:10 +0200

[diff] [blame]

40

Louis Verhaard

2020-10-14 08:32:41 +0200

[diff] [blame]

41

# Need to be fit into Sram, as a double buffer

42

worst_buffer_size = tens.compression_scale_for_worst_weight_stream * min_weight_size * 2

43

if worst_buffer_size > arch.sram_size:

44

print(

45

"Weights, {}, are too big to be DMAed to SRAM, estimated minimum size is {} bytes".format(

46

tens.name, worst_buffer_size

Patrik Gustavsson

2020-08-17 12:43:22 +0200

[diff] [blame]

47

)

Louis Verhaard

2020-10-14 08:32:41 +0200

[diff] [blame]

48

)

49

return False

Patrik Gustavsson

2020-08-17 12:43:22 +0200

[diff] [blame]

return True

Patrik Gustavsson

2020-10-01 08:22:10 +0200

[diff] [blame]

53

def insert_dma_cmd(op, arch, nng):

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

54

if op.type == Op.DMA or not op.run_on_npu:

Jacob Bohlin

68a04b1

2020-07-13 11:39:36 +0200

[diff] [blame]

55

return op

56

Patrik Gustavsson

2020-10-01 08:22:10 +0200

[diff] [blame]

57

is_lut_used = any(inp.purpose == TensorPurpose.LUT for inp in op.inputs)

58

max_ifm_shram_avail = (

59

(arch.available_shram_banks(is_lut_used) - arch.shram_reserved_output_banks) * arch.shram_bank_size // 2

60

)

Diqing Zhong

55d9e33

2020-09-11 10:05:22 +0200

[diff] [blame]

61

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

62

for idx, tens in enumerate(op.inputs):

63

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

64

if tens.mem_type not in (MemType.Scratch, MemType.Scratch_fast):

65

# Tensor is in permanent storage

66

# Only when permanent storage differs from fast storage, there is a point moving the data

Fredrik Svedberg

a0c3624

2020-06-03 15:43:31 +0200

[diff] [blame]

67

if (

68

tens.mem_area in (MemArea.Dram, MemArea.OffChipFlash)

69

and arch.permanent_storage_mem_area != arch.fast_storage_mem_area

70

) or tens.purpose == TensorPurpose.LUT:

71

if tens.purpose in (TensorPurpose.Weights, TensorPurpose.LUT) or (

Patrik Gustavsson

2020-10-01 08:22:10 +0200

[diff] [blame]

72

tens.purpose == TensorPurpose.FeatureMap

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

73

and op.type.is_binary_elementwise_op()

Patrik Gustavsson

2020-10-01 08:22:10 +0200

[diff] [blame]

74

and tens.shape != []

Patrik Gustavsson

2349d42

2020-12-01 16:02:29 +0100

[diff] [blame]

75

and op.ifm_shapes[0] != op.ofm_shapes[0]

Patrik Gustavsson

2020-10-01 08:22:10 +0200

[diff] [blame]

76

and tens.storage_size() > max_ifm_shram_avail

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

77

):

78

only_vector_product_consumers = True

79

for oper in tens.consumers():

Diqing Zhong

94457b1

2020-12-09 15:22:40 +0100

[diff] [blame]

80

if oper is None or oper.type.npu_block_type != NpuBlockType.VectorProduct:

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

81

only_vector_product_consumers = False

82

break

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

83

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

84

# Tensor products has no need for DMA, tensors are only read once and can be in flash.

85

# Other operations re-reads tensors, this is better done from SRAM.

Fredrik Svedberg

a0c3624

2020-06-03 15:43:31 +0200

[diff] [blame]

86

# LUTs must be placed in the last 2 blocks of SHRAM.

Patrik Gustavsson

2020-08-17 12:43:22 +0200

[diff] [blame]

87

if (

Patrik Gustavsson

2020-10-01 08:22:10 +0200

[diff] [blame]

88

not only_vector_product_consumers and weights_fit_sram(arch, op, tens, nng)

Patrik Gustavsson

2020-08-17 12:43:22 +0200

[diff] [blame]

89

) or tens.purpose == TensorPurpose.LUT:

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

90

# Insert a DMA command here, as well as a new tensor situated in SRAM of the same size.

91

new_tens = tens.clone_into_fast_storage(arch)

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

92

dma_cmd = Operation(Op.DMA, tens.ops[0].name + "_dma")

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

93

dma_cmd.inputs = [tens]

Michael McGeagh

c5b549b

2020-08-07 11:54:28 +0100

[diff] [blame]

94

dma_cmd.set_output_tensor(new_tens)

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

95

dma_cmd.attrs["source"] = tens.mem_area

96

dma_cmd.attrs["destination"] = new_tens.mem_area

97

dma_cmd.run_on_npu = True

Fredrik Svedberg

a0c3624

2020-06-03 15:43:31 +0200

[diff] [blame]

98

if tens.purpose == TensorPurpose.LUT:

Louis Verhaard

0b8268a

2020-08-05 16:11:29 +0200

[diff] [blame]

99

new_tens.mem_area = MemArea.Shram

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

100

op.inputs[idx] = new_tens

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

return op

def insert_dma_commands(nng, arch, verbose_graph=False):

105

106

for idx, sg in enumerate(nng.subgraphs):

Patrik Gustavsson

2020-10-01 08:22:10 +0200

[diff] [blame]

107

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(nng, sg, arch, [], [insert_dma_cmd])

Tim Hall