Blame - ethosu/vela/insert_dma.py - ml/ethos-u/ethos-u-vela

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

1

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

16

# Description:

17

# Insert DMA operations into the graph for transfering weights.

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

18

from . import rewrite_graph

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

19

from .operation import NpuBlockType

20

from .operation import Operation

21

from .tensor import MemArea

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

22

from .tensor import MemType

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

23

from .tensor import TensorPurpose

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

24

Tim Hall

c30f495

2020-06-15 20:47:35 +0100

[diff] [blame]

25

Charles Xu

7879222

2020-05-13 10:15:26 +0200

[diff] [blame]

26

binary_elementwise_op = set(("AddAct", "MulAct", "SubAct", "Maximum", "Minimum"))

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

27

Tim Hall

c30f495

2020-06-15 20:47:35 +0100

[diff] [blame]

28

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

29

def insert_dma_cmd(op, arch):

30

if op.type == "DMA":

Diego Russo

ea6111a

2020-04-14 18:41:58 +0100

[diff] [blame]

31

return op # Already rewritten

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

32

for idx, tens in enumerate(op.inputs):

33

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

34

if tens.mem_type not in (MemType.Scratch, MemType.Scratch_fast):

35

# Tensor is in permanent storage

36

# Only when permanent storage differs from fast storage, there is a point moving the data

37

if tens.mem_area in (MemArea.Dram, MemArea.OffChipFlash) and (

38

arch.permanent_storage_mem_area != arch.fast_storage_mem_area

Tim Hall

c30f495

2020-06-15 20:47:35 +0100

[diff] [blame]

39

):

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

40

if tens.purpose == TensorPurpose.Weights or (

41

tens.purpose == TensorPurpose.FeatureMap and op.type in binary_elementwise_op and tens.shape != []

42

):

43

only_vector_product_consumers = True

44

for oper in tens.consumers():

45

if oper is None or oper.attrs.get("npu_block_type") != NpuBlockType.VectorProduct:

46

only_vector_product_consumers = False

47

break

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

48

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

49

# Tensor products has no need for DMA, tensors are only read once and can be in flash.

50

# Other operations re-reads tensors, this is better done from SRAM.

51

if not only_vector_product_consumers:

52

# Insert a DMA command here, as well as a new tensor situated in SRAM of the same size.

53

new_tens = tens.clone_into_fast_storage(arch)

54

dma_cmd = Operation("DMA", tens.ops[0].name + "_dma")

55

dma_cmd.inputs = [tens]

56

dma_cmd.outputs = [new_tens]

57

dma_cmd.attrs["source"] = tens.mem_area

58

dma_cmd.attrs["destination"] = new_tens.mem_area

59

dma_cmd.run_on_npu = True

60

new_tens.ops = [dma_cmd]

61

op.inputs[idx] = new_tens

Tim Hall

79d07d2

2020-04-27 18:20:16 +0100

[diff] [blame]

return op

def insert_dma_commands(nng, arch, verbose_graph=False):

66

67

for idx, sg in enumerate(nng.subgraphs):

68

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(sg, arch, [], [insert_dma_cmd])

69

if verbose_graph:

70

nng.print_graph()

71

return nng