Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame^] | 1 | # Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. |
| 2 | # |
| 3 | # SPDX-License-Identifier: Apache-2.0 |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the License); you may |
| 6 | # not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an AS IS BASIS, WITHOUT |
| 13 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
| 16 | |
| 17 | |
| 18 | # Description: |
| 19 | # Insert DMA operations into the graph for transfering weights. |
| 20 | |
| 21 | from .nn_graph import Operation, MemArea, TensorPurpose, NpuBlockType |
| 22 | from . import rewrite_graph |
| 23 | |
| 24 | |
| 25 | def insert_dma_cmd(op, arch): |
| 26 | if op.type == "DMA": |
| 27 | return op # Already rewritten |
| 28 | for idx, tens in enumerate(op.inputs): |
| 29 | |
| 30 | if tens.mem_area in (MemArea.Dram, MemArea.OffChipFlash) and tens.mem_area != arch.fast_storage_mem_area: |
| 31 | if tens.purpose == TensorPurpose.Weights: |
| 32 | only_vector_product_consumers = True |
| 33 | for oper in tens.consumers(): |
| 34 | if oper is None or oper.attrs.get("npu_block_type") != NpuBlockType.VectorProduct: |
| 35 | only_vector_product_consumers = False |
| 36 | break |
| 37 | |
| 38 | # Tensor products has no need for DMA, tensors are only read once and can be in flash. |
| 39 | # Other operations re-reads tensors, this is better done from SRAM. |
| 40 | if not only_vector_product_consumers: |
| 41 | # Insert a DMA command here, as well as a new tensor situated in SRAM of the same size. |
| 42 | new_tens = tens.clone_into_fast_storage(arch) |
| 43 | dma_cmd = Operation("DMA", tens.ops[0].name + "_dma") |
| 44 | dma_cmd.inputs = [tens] |
| 45 | dma_cmd.outputs = [new_tens] |
| 46 | dma_cmd.attrs["source"] = tens.mem_area |
| 47 | dma_cmd.attrs["destination"] = new_tens.mem_area |
| 48 | dma_cmd.run_on_npu = True |
| 49 | new_tens.ops = [dma_cmd] |
| 50 | op.inputs[idx] = new_tens |
| 51 | return op |
| 52 | |
| 53 | |
| 54 | def insert_dma_commands(nng, arch, verbose_graph=False): |
| 55 | |
| 56 | for idx, sg in enumerate(nng.subgraphs): |
| 57 | nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(sg, arch, [], [insert_dma_cmd]) |
| 58 | if verbose_graph: |
| 59 | nng.print_graph() |
| 60 | return nng |