blob: 3797f43e074782ce7b996503b09cc5bbfc159944 [file] [log] [blame]
Tim Hall79d07d22020-04-27 18:20:16 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
17# Insert DMA operations into the graph for transfering weights.
Tim Hall79d07d22020-04-27 18:20:16 +010018from . import rewrite_graph
Diego Russoe8a10452020-04-21 17:39:10 +010019from .operation import NpuBlockType
Louis Verhaardaee5d752020-09-30 09:01:52 +020020from .operation import Op
Diego Russoe8a10452020-04-21 17:39:10 +010021from .operation import Operation
22from .tensor import MemArea
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020023from .tensor import MemType
Diego Russoe8a10452020-04-21 17:39:10 +010024from .tensor import TensorPurpose
Patrik Gustavsson3010d9b2020-10-01 08:22:10 +020025from .weight_compressor import compress_weights
Tim Hall79d07d22020-04-27 18:20:16 +010026
Tim Hallc30f4952020-06-15 20:47:35 +010027
Patrik Gustavsson3010d9b2020-10-01 08:22:10 +020028def weights_fit_sram(arch, op, tens, nng):
Louis Verhaard17afa282020-10-14 08:32:41 +020029 # Compresses weights and checks if they fit in SRAM
Patrik Gustavssonfa34c6f2020-08-17 12:43:22 +020030 if tens.purpose != TensorPurpose.Weights:
31 return True
32
33 min_weight_size = 0
34 if len(tens.shape) == 4:
35 min_weight_size = tens.shape[0] * tens.shape[1] * tens.shape[2] * arch.OFMSplitDepth
36 elif len(tens.shape) == 2:
37 min_weight_size = tens.shape[0] * arch.OFMSplitDepth
38
Louis Verhaard17afa282020-10-14 08:32:41 +020039 compress_weights(arch, nng, tens, op.type.npu_block_type, 16, 16, op.get_dilation_h_w())
Patrik Gustavsson3010d9b2020-10-01 08:22:10 +020040
Louis Verhaard17afa282020-10-14 08:32:41 +020041 # Need to be fit into Sram, as a double buffer
42 worst_buffer_size = tens.compression_scale_for_worst_weight_stream * min_weight_size * 2
43 if worst_buffer_size > arch.sram_size:
44 print(
45 "Weights, {}, are too big to be DMAed to SRAM, estimated minimum size is {} bytes".format(
46 tens.name, worst_buffer_size
Patrik Gustavssonfa34c6f2020-08-17 12:43:22 +020047 )
Louis Verhaard17afa282020-10-14 08:32:41 +020048 )
49 return False
Patrik Gustavssonfa34c6f2020-08-17 12:43:22 +020050 return True
51
52
Patrik Gustavsson3010d9b2020-10-01 08:22:10 +020053def insert_dma_cmd(op, arch, nng):
Louis Verhaardaee5d752020-09-30 09:01:52 +020054 if op.type == Op.DMA or not op.run_on_npu:
Jacob Bohlin68a04b12020-07-13 11:39:36 +020055 return op
56
Patrik Gustavsson3010d9b2020-10-01 08:22:10 +020057 is_lut_used = any(inp.purpose == TensorPurpose.LUT for inp in op.inputs)
58 max_ifm_shram_avail = (
59 (arch.available_shram_banks(is_lut_used) - arch.shram_reserved_output_banks) * arch.shram_bank_size // 2
60 )
Diqing Zhong55d9e332020-09-11 10:05:22 +020061
Tim Hall79d07d22020-04-27 18:20:16 +010062 for idx, tens in enumerate(op.inputs):
63
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020064 if tens.mem_type not in (MemType.Scratch, MemType.Scratch_fast):
65 # Tensor is in permanent storage
66 # Only when permanent storage differs from fast storage, there is a point moving the data
Fredrik Svedberga0c36242020-06-03 15:43:31 +020067 if (
68 tens.mem_area in (MemArea.Dram, MemArea.OffChipFlash)
69 and arch.permanent_storage_mem_area != arch.fast_storage_mem_area
70 ) or tens.purpose == TensorPurpose.LUT:
71 if tens.purpose in (TensorPurpose.Weights, TensorPurpose.LUT) or (
Patrik Gustavsson3010d9b2020-10-01 08:22:10 +020072 tens.purpose == TensorPurpose.FeatureMap
Louis Verhaardaee5d752020-09-30 09:01:52 +020073 and op.type.is_binary_elementwise_op()
Patrik Gustavsson3010d9b2020-10-01 08:22:10 +020074 and tens.shape != []
Patrik Gustavsson2349d422020-12-01 16:02:29 +010075 and op.ifm_shapes[0] != op.ofm_shapes[0]
Patrik Gustavsson3010d9b2020-10-01 08:22:10 +020076 and tens.storage_size() > max_ifm_shram_avail
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020077 ):
78 only_vector_product_consumers = True
79 for oper in tens.consumers():
Diqing Zhong94457b12020-12-09 15:22:40 +010080 if oper is None or oper.type.npu_block_type != NpuBlockType.VectorProduct:
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020081 only_vector_product_consumers = False
82 break
Tim Hall79d07d22020-04-27 18:20:16 +010083
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020084 # Tensor products has no need for DMA, tensors are only read once and can be in flash.
85 # Other operations re-reads tensors, this is better done from SRAM.
Fredrik Svedberga0c36242020-06-03 15:43:31 +020086 # LUTs must be placed in the last 2 blocks of SHRAM.
Patrik Gustavssonfa34c6f2020-08-17 12:43:22 +020087 if (
Patrik Gustavsson3010d9b2020-10-01 08:22:10 +020088 not only_vector_product_consumers and weights_fit_sram(arch, op, tens, nng)
Patrik Gustavssonfa34c6f2020-08-17 12:43:22 +020089 ) or tens.purpose == TensorPurpose.LUT:
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020090 # Insert a DMA command here, as well as a new tensor situated in SRAM of the same size.
91 new_tens = tens.clone_into_fast_storage(arch)
Louis Verhaardaee5d752020-09-30 09:01:52 +020092 dma_cmd = Operation(Op.DMA, tens.ops[0].name + "_dma")
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020093 dma_cmd.inputs = [tens]
Michael McGeaghc5b549b2020-08-07 11:54:28 +010094 dma_cmd.set_output_tensor(new_tens)
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020095 dma_cmd.attrs["source"] = tens.mem_area
96 dma_cmd.attrs["destination"] = new_tens.mem_area
97 dma_cmd.run_on_npu = True
Fredrik Svedberga0c36242020-06-03 15:43:31 +020098 if tens.purpose == TensorPurpose.LUT:
Louis Verhaard0b8268a2020-08-05 16:11:29 +020099 new_tens.mem_area = MemArea.Shram
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200100 op.inputs[idx] = new_tens
Tim Hall79d07d22020-04-27 18:20:16 +0100101 return op
102
103
104def insert_dma_commands(nng, arch, verbose_graph=False):
105
106 for idx, sg in enumerate(nng.subgraphs):
Patrik Gustavsson3010d9b2020-10-01 08:22:10 +0200107 nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(nng, sg, arch, [], [insert_dma_cmd])
Tim Hall79d07d22020-04-27 18:20:16 +0100108 if verbose_graph:
109 nng.print_graph()
110 return nng