blob: c238286315726c86bc508438c6efe347ba81a5d6 [file] [log] [blame]
Tim Hall79d07d22020-04-27 18:20:16 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
17# Insert DMA operations into the graph for transfering weights.
Tim Hall79d07d22020-04-27 18:20:16 +010018from . import rewrite_graph
Diego Russoe8a10452020-04-21 17:39:10 +010019from .operation import NpuBlockType
20from .operation import Operation
21from .tensor import MemArea
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020022from .tensor import MemType
Diego Russoe8a10452020-04-21 17:39:10 +010023from .tensor import TensorPurpose
Tim Hall79d07d22020-04-27 18:20:16 +010024
Tim Hallc30f4952020-06-15 20:47:35 +010025
Charles Xu78792222020-05-13 10:15:26 +020026binary_elementwise_op = set(("AddAct", "MulAct", "SubAct", "Maximum", "Minimum"))
Tim Hall79d07d22020-04-27 18:20:16 +010027
Tim Hallc30f4952020-06-15 20:47:35 +010028
Patrik Gustavssonfa34c6f2020-08-17 12:43:22 +020029def weights_fit_sram(arch, tens):
30 if tens.purpose != TensorPurpose.Weights:
31 return True
32
33 min_weight_size = 0
34 if len(tens.shape) == 4:
35 min_weight_size = tens.shape[0] * tens.shape[1] * tens.shape[2] * arch.OFMSplitDepth
36 elif len(tens.shape) == 2:
37 min_weight_size = tens.shape[0] * arch.OFMSplitDepth
38
39 w_compression = 1 # TODO worst compression ratio currently assumed
40
41 # Need to be fit into Sram, as a double buffer
42 if (w_compression * min_weight_size * 2) > arch.sram_size:
43 print(
44 "Weights, {}, are too big to be DMAed to SRAM, estimated minimum size is {} bytes".format(
45 tens.name, (w_compression * min_weight_size * 2)
46 )
47 )
48 return False
49 return True
50
51
Tim Hall79d07d22020-04-27 18:20:16 +010052def insert_dma_cmd(op, arch):
Jacob Bohlin68a04b12020-07-13 11:39:36 +020053 if op.type == "DMA" or not op.run_on_npu:
54 return op
55
Tim Hall79d07d22020-04-27 18:20:16 +010056 for idx, tens in enumerate(op.inputs):
57
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020058 if tens.mem_type not in (MemType.Scratch, MemType.Scratch_fast):
59 # Tensor is in permanent storage
60 # Only when permanent storage differs from fast storage, there is a point moving the data
Fredrik Svedberga0c36242020-06-03 15:43:31 +020061 if (
62 tens.mem_area in (MemArea.Dram, MemArea.OffChipFlash)
63 and arch.permanent_storage_mem_area != arch.fast_storage_mem_area
64 ) or tens.purpose == TensorPurpose.LUT:
65 if tens.purpose in (TensorPurpose.Weights, TensorPurpose.LUT) or (
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020066 tens.purpose == TensorPurpose.FeatureMap and op.type in binary_elementwise_op and tens.shape != []
67 ):
68 only_vector_product_consumers = True
69 for oper in tens.consumers():
70 if oper is None or oper.attrs.get("npu_block_type") != NpuBlockType.VectorProduct:
71 only_vector_product_consumers = False
72 break
Tim Hall79d07d22020-04-27 18:20:16 +010073
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020074 # Tensor products has no need for DMA, tensors are only read once and can be in flash.
75 # Other operations re-reads tensors, this is better done from SRAM.
Fredrik Svedberga0c36242020-06-03 15:43:31 +020076 # LUTs must be placed in the last 2 blocks of SHRAM.
Patrik Gustavssonfa34c6f2020-08-17 12:43:22 +020077 if (
78 not only_vector_product_consumers and weights_fit_sram(arch, tens)
79 ) or tens.purpose == TensorPurpose.LUT:
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020080 # Insert a DMA command here, as well as a new tensor situated in SRAM of the same size.
81 new_tens = tens.clone_into_fast_storage(arch)
82 dma_cmd = Operation("DMA", tens.ops[0].name + "_dma")
83 dma_cmd.inputs = [tens]
Michael McGeaghc5b549b2020-08-07 11:54:28 +010084 dma_cmd.set_output_tensor(new_tens)
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020085 dma_cmd.attrs["source"] = tens.mem_area
86 dma_cmd.attrs["destination"] = new_tens.mem_area
87 dma_cmd.run_on_npu = True
Fredrik Svedberga0c36242020-06-03 15:43:31 +020088 if tens.purpose == TensorPurpose.LUT:
Louis Verhaard0b8268a2020-08-05 16:11:29 +020089 new_tens.mem_area = MemArea.Shram
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020090 op.inputs[idx] = new_tens
Tim Hall79d07d22020-04-27 18:20:16 +010091 return op
92
93
94def insert_dma_commands(nng, arch, verbose_graph=False):
95
96 for idx, sg in enumerate(nng.subgraphs):
97 nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(sg, arch, [], [insert_dma_cmd])
98 if verbose_graph:
99 nng.print_graph()
100 return nng