Blame - ethosu/vela/insert_dma.py - ml/ethos-u/ethos-u-vela

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

1

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

16

# Description:

17

# Insert DMA operations into the graph for transfering weights.

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

18

from . import rewrite_graph

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

19

from .operation import NpuBlockType

20

from .operation import Operation

21

from .tensor import MemArea

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

22

from .tensor import MemType

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

23

from .tensor import TensorPurpose

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

24

Tim Hall

c30f495

2020-06-15 20:47:35 +0100

[diff] [blame]

25

Charles Xu

7879222

2020-05-13 10:15:26 +0200

[diff] [blame]

26

binary_elementwise_op = set(("AddAct", "MulAct", "SubAct", "Maximum", "Minimum"))

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

27

Tim Hall

c30f495

2020-06-15 20:47:35 +0100

[diff] [blame]

28

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

29

def insert_dma_cmd(op, arch):

Jacob Bohlin

68a04b1

2020-07-13 11:39:36 +0200

[diff] [blame]

30

if op.type == "DMA" or not op.run_on_npu:

31

return op

32

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

33

for idx, tens in enumerate(op.inputs):

34

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

35

if tens.mem_type not in (MemType.Scratch, MemType.Scratch_fast):

36

# Tensor is in permanent storage

37

# Only when permanent storage differs from fast storage, there is a point moving the data

Fredrik Svedberg

a0c3624

2020-06-03 15:43:31 +0200

[diff] [blame]

38

if (

39

tens.mem_area in (MemArea.Dram, MemArea.OffChipFlash)

40

and arch.permanent_storage_mem_area != arch.fast_storage_mem_area

41

) or tens.purpose == TensorPurpose.LUT:

42

if tens.purpose in (TensorPurpose.Weights, TensorPurpose.LUT) or (

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

43

tens.purpose == TensorPurpose.FeatureMap and op.type in binary_elementwise_op and tens.shape != []

44

):

45

only_vector_product_consumers = True

46

for oper in tens.consumers():

47

if oper is None or oper.attrs.get("npu_block_type") != NpuBlockType.VectorProduct:

48

only_vector_product_consumers = False

49

break

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

50

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

51

# Tensor products has no need for DMA, tensors are only read once and can be in flash.

52

# Other operations re-reads tensors, this is better done from SRAM.

Fredrik Svedberg

a0c3624

2020-06-03 15:43:31 +0200

[diff] [blame]

53

# LUTs must be placed in the last 2 blocks of SHRAM.

54

if not only_vector_product_consumers or tens.purpose == TensorPurpose.LUT:

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

55

# Insert a DMA command here, as well as a new tensor situated in SRAM of the same size.

56

new_tens = tens.clone_into_fast_storage(arch)

57

dma_cmd = Operation("DMA", tens.ops[0].name + "_dma")

58

dma_cmd.inputs = [tens]

Michael McGeagh

c5b549b

2020-08-07 11:54:28 +0100

[diff] [blame]

59

dma_cmd.set_output_tensor(new_tens)

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

60

dma_cmd.attrs["source"] = tens.mem_area

61

dma_cmd.attrs["destination"] = new_tens.mem_area

62

dma_cmd.run_on_npu = True

Fredrik Svedberg

a0c3624

2020-06-03 15:43:31 +0200

[diff] [blame]

63

if tens.purpose == TensorPurpose.LUT:

64

# TODO: Add support more than one LUT at a time

65

# Reserve last 2 blocks for LUT

66

if arch.shram_reserved_unused_banks == 0:

67

arch.shram_reserved_unused_banks = 2

68

arch.shram_total_banks -= arch.shram_reserved_unused_banks

69

# Place the LUT in the last 2 blocks of SHRAM

70

new_tens.address = arch.shram_bank_size * arch.shram_total_banks

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

71

op.inputs[idx] = new_tens

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

return op

def insert_dma_commands(nng, arch, verbose_graph=False):

76

77

for idx, sg in enumerate(nng.subgraphs):

78

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(sg, arch, [], [insert_dma_cmd])

79

if verbose_graph:

80

nng.print_graph()

81

return nng