Blame - ethosu/vela/insert_dma.py - ml/ethos-u/ethos-u-vela

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

1

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

16

# Description:

17

# Insert DMA operations into the graph for transfering weights.

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

18

from . import rewrite_graph

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

19

from .operation import NpuBlockType

20

from .operation import Operation

21

from .tensor import MemArea

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

22

from .tensor import MemType

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

23

from .tensor import TensorPurpose

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

24

Tim Hall

c30f495

2020-06-15 20:47:35 +0100

[diff] [blame]

25

Charles Xu

7879222

2020-05-13 10:15:26 +0200

[diff] [blame]

26

binary_elementwise_op = set(("AddAct", "MulAct", "SubAct", "Maximum", "Minimum"))

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

27

Tim Hall

c30f495

2020-06-15 20:47:35 +0100

[diff] [blame]

28

Patrik Gustavsson

fa34c6f

2020-08-17 12:43:22 +0200

[diff] [blame]

29

def weights_fit_sram(arch, tens):

30

if tens.purpose != TensorPurpose.Weights:

return True

min_weight_size = 0

if len(tens.shape) == 4:

35

min_weight_size = tens.shape[0] * tens.shape[1] * tens.shape[2] * arch.OFMSplitDepth

36

elif len(tens.shape) == 2:

37

min_weight_size = tens.shape[0] * arch.OFMSplitDepth

38

39

w_compression = 1 # TODO worst compression ratio currently assumed

40

41

# Need to be fit into Sram, as a double buffer

42

if (w_compression * min_weight_size * 2) > arch.sram_size:

43

print(

44

"Weights, {}, are too big to be DMAed to SRAM, estimated minimum size is {} bytes".format(

45

tens.name, (w_compression * min_weight_size * 2)

)

)

return False

return True

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

52

def insert_dma_cmd(op, arch):

Jacob Bohlin

68a04b1

2020-07-13 11:39:36 +0200

[diff] [blame]

53

if op.type == "DMA" or not op.run_on_npu:

54

return op

55

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

56

for idx, tens in enumerate(op.inputs):

57

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

58

if tens.mem_type not in (MemType.Scratch, MemType.Scratch_fast):

59

# Tensor is in permanent storage

60

# Only when permanent storage differs from fast storage, there is a point moving the data

Fredrik Svedberg

a0c3624

2020-06-03 15:43:31 +0200

[diff] [blame]

61

if (

62

tens.mem_area in (MemArea.Dram, MemArea.OffChipFlash)

63

and arch.permanent_storage_mem_area != arch.fast_storage_mem_area

64

) or tens.purpose == TensorPurpose.LUT:

65

if tens.purpose in (TensorPurpose.Weights, TensorPurpose.LUT) or (

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

66

tens.purpose == TensorPurpose.FeatureMap and op.type in binary_elementwise_op and tens.shape != []

67

):

68

only_vector_product_consumers = True

69

for oper in tens.consumers():

70

if oper is None or oper.attrs.get("npu_block_type") != NpuBlockType.VectorProduct:

71

only_vector_product_consumers = False

72

break

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

73

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

74

# Tensor products has no need for DMA, tensors are only read once and can be in flash.

75

# Other operations re-reads tensors, this is better done from SRAM.

Fredrik Svedberg

a0c3624

2020-06-03 15:43:31 +0200

[diff] [blame]

76

# LUTs must be placed in the last 2 blocks of SHRAM.

Patrik Gustavsson

fa34c6f

2020-08-17 12:43:22 +0200

[diff] [blame]

77

if (

78

not only_vector_product_consumers and weights_fit_sram(arch, tens)

79

) or tens.purpose == TensorPurpose.LUT:

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

80

# Insert a DMA command here, as well as a new tensor situated in SRAM of the same size.

81

new_tens = tens.clone_into_fast_storage(arch)

82

dma_cmd = Operation("DMA", tens.ops[0].name + "_dma")

83

dma_cmd.inputs = [tens]

Michael McGeagh

c5b549b

2020-08-07 11:54:28 +0100

[diff] [blame]

84

dma_cmd.set_output_tensor(new_tens)

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

85

dma_cmd.attrs["source"] = tens.mem_area

86

dma_cmd.attrs["destination"] = new_tens.mem_area

87

dma_cmd.run_on_npu = True

Fredrik Svedberg

a0c3624

2020-06-03 15:43:31 +0200

[diff] [blame]

88

if tens.purpose == TensorPurpose.LUT:

Louis Verhaard

0b8268a

2020-08-05 16:11:29 +0200

[diff] [blame]

89

new_tens.mem_area = MemArea.Shram

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

90

op.inputs[idx] = new_tens

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

return op

def insert_dma_commands(nng, arch, verbose_graph=False):

95

96

for idx, sg in enumerate(nng.subgraphs):

97

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(sg, arch, [], [insert_dma_cmd])

98

if verbose_graph:

99

nng.print_graph()

100

return nng