Blame - ethosu/vela/insert_dma.py - ml/ethos-u/ethos-u-vela

2020-04-27 18:20:16 +0100

[diff] [blame]

1

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

16

# Description:

17

# Insert DMA operations into the graph for transfering weights.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

18

from . import rewrite_graph

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

19

from .operation import NpuBlockType

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

20

from .operation import Op

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

21

from .operation import Operation

22

from .tensor import MemArea

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

23

from .tensor import MemType

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

24

from .tensor import TensorPurpose

Patrik Gustavsson

2020-10-01 08:22:10 +0200

[diff] [blame]

25

from .weight_compressor import compress_weights

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

26

Tim Hall

c30f495

2020-06-15 20:47:35 +0100

[diff] [blame]

27

Patrik Gustavsson

2020-10-01 08:22:10 +0200

[diff] [blame]

28

def weights_fit_sram(arch, op, tens, nng):

Patrik Gustavsson

2020-08-17 12:43:22 +0200

[diff] [blame]

29

if tens.purpose != TensorPurpose.Weights:

return True

min_weight_size = 0

if len(tens.shape) == 4:

34

min_weight_size = tens.shape[0] * tens.shape[1] * tens.shape[2] * arch.OFMSplitDepth

35

elif len(tens.shape) == 2:

36

min_weight_size = tens.shape[0] * arch.OFMSplitDepth

37

Patrik Gustavsson

2020-08-17 12:43:22 +0200

[diff] [blame]

38

# Need to be fit into Sram, as a double buffer

Patrik Gustavsson

2020-10-01 08:22:10 +0200

[diff] [blame]

39

# Only evaluate when the compression test limit will make it impossible to fit

40

w_comp_test_limit = 2

41

if (w_comp_test_limit * min_weight_size * 2) > arch.sram_size:

42

# check worst compression ratio

43

npu_block_type = op.attrs.get("npu_block_type", NpuBlockType.Default)

44

compress_weights(arch, nng, tens, npu_block_type, 16, 16, op.get_dilation_h_w())

45

46

worst_buffer_size = tens.compression_scale_for_worst_weight_stream * min_weight_size * 2

47

if worst_buffer_size > arch.sram_size:

48

print(

49

"Weights, {}, are too big to be DMAed to SRAM, estimated minimum size is {} bytes".format(

50

tens.name, worst_buffer_size

51

)

Patrik Gustavsson

2020-08-17 12:43:22 +0200

[diff] [blame]

52

)

Patrik Gustavsson

2020-10-01 08:22:10 +0200

[diff] [blame]

53

return False

Patrik Gustavsson

2020-08-17 12:43:22 +0200

[diff] [blame]

return True

Patrik Gustavsson

2020-10-01 08:22:10 +0200

[diff] [blame]

57

def insert_dma_cmd(op, arch, nng):

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

58

if op.type == Op.DMA or not op.run_on_npu:

Jacob Bohlin

68a04b1

2020-07-13 11:39:36 +0200

[diff] [blame]

59

return op

60

Patrik Gustavsson

2020-10-01 08:22:10 +0200

[diff] [blame]

61

is_lut_used = any(inp.purpose == TensorPurpose.LUT for inp in op.inputs)

62

max_ifm_shram_avail = (

63

(arch.available_shram_banks(is_lut_used) - arch.shram_reserved_output_banks) * arch.shram_bank_size // 2

64

)

Diqing Zhong

55d9e33

2020-09-11 10:05:22 +0200

[diff] [blame]

65

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

66

for idx, tens in enumerate(op.inputs):

67

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

68

if tens.mem_type not in (MemType.Scratch, MemType.Scratch_fast):

69

# Tensor is in permanent storage

70

# Only when permanent storage differs from fast storage, there is a point moving the data

Fredrik Svedberg

a0c3624

2020-06-03 15:43:31 +0200

[diff] [blame]

71

if (

72

tens.mem_area in (MemArea.Dram, MemArea.OffChipFlash)

73

and arch.permanent_storage_mem_area != arch.fast_storage_mem_area

74

) or tens.purpose == TensorPurpose.LUT:

75

if tens.purpose in (TensorPurpose.Weights, TensorPurpose.LUT) or (

Patrik Gustavsson

2020-10-01 08:22:10 +0200

[diff] [blame]

76

tens.purpose == TensorPurpose.FeatureMap

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

77

and op.type.is_binary_elementwise_op()

Patrik Gustavsson

2020-10-01 08:22:10 +0200

[diff] [blame]

78

and tens.shape != []

79

and tens.shape != op.outputs[0].shape

80

and tens.storage_size() > max_ifm_shram_avail

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

81

):

82

only_vector_product_consumers = True

83

for oper in tens.consumers():

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

84

if oper is None or oper.type.npu_block_type != NpuBlockType.VectorProduct:

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

85

only_vector_product_consumers = False

86

break

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

87

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

88

# Tensor products has no need for DMA, tensors are only read once and can be in flash.

89

# Other operations re-reads tensors, this is better done from SRAM.

Fredrik Svedberg

a0c3624

2020-06-03 15:43:31 +0200

[diff] [blame]

90

# LUTs must be placed in the last 2 blocks of SHRAM.

Patrik Gustavsson

2020-08-17 12:43:22 +0200

[diff] [blame]

91

if (

Patrik Gustavsson

2020-10-01 08:22:10 +0200

[diff] [blame]

92

not only_vector_product_consumers and weights_fit_sram(arch, op, tens, nng)

Patrik Gustavsson

2020-08-17 12:43:22 +0200

[diff] [blame]

93

) or tens.purpose == TensorPurpose.LUT:

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

94

# Insert a DMA command here, as well as a new tensor situated in SRAM of the same size.

95

new_tens = tens.clone_into_fast_storage(arch)

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

96

dma_cmd = Operation(Op.DMA, tens.ops[0].name + "_dma")

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

97

dma_cmd.inputs = [tens]

Michael McGeagh

c5b549b

2020-08-07 11:54:28 +0100

[diff] [blame]

98

dma_cmd.set_output_tensor(new_tens)

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

99

dma_cmd.attrs["source"] = tens.mem_area

100

dma_cmd.attrs["destination"] = new_tens.mem_area

101

dma_cmd.run_on_npu = True

Fredrik Svedberg

a0c3624

2020-06-03 15:43:31 +0200

[diff] [blame]

102

if tens.purpose == TensorPurpose.LUT:

Louis Verhaard

0b8268a

2020-08-05 16:11:29 +0200

[diff] [blame]

103

new_tens.mem_area = MemArea.Shram

Patrik Gustavsson

2020-05-27 09:15:11 +0200

[diff] [blame]

104

op.inputs[idx] = new_tens

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

return op

def insert_dma_commands(nng, arch, verbose_graph=False):

109

110

for idx, sg in enumerate(nng.subgraphs):

Patrik Gustavsson

2020-10-01 08:22:10 +0200

[diff] [blame]

111

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(nng, sg, arch, [], [insert_dma_cmd])

Tim Hall