Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 1 | # Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. |
| 2 | # |
| 3 | # SPDX-License-Identifier: Apache-2.0 |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the License); you may |
| 6 | # not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an AS IS BASIS, WITHOUT |
| 13 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 16 | # Description: |
| 17 | # Insert DMA operations into the graph for transfering weights. |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 18 | from . import rewrite_graph |
Diego Russo | e8a1045 | 2020-04-21 17:39:10 +0100 | [diff] [blame] | 19 | from .operation import NpuBlockType |
Louis Verhaard | aee5d75 | 2020-09-30 09:01:52 +0200 | [diff] [blame] | 20 | from .operation import Op |
Diego Russo | e8a1045 | 2020-04-21 17:39:10 +0100 | [diff] [blame] | 21 | from .operation import Operation |
| 22 | from .tensor import MemArea |
Patrik Gustavsson | eca2e95 | 2020-05-27 09:15:11 +0200 | [diff] [blame] | 23 | from .tensor import MemType |
Diego Russo | e8a1045 | 2020-04-21 17:39:10 +0100 | [diff] [blame] | 24 | from .tensor import TensorPurpose |
Patrik Gustavsson | 3010d9b | 2020-10-01 08:22:10 +0200 | [diff] [blame] | 25 | from .weight_compressor import compress_weights |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 26 | |
Tim Hall | c30f495 | 2020-06-15 20:47:35 +0100 | [diff] [blame] | 27 | |
Patrik Gustavsson | 3010d9b | 2020-10-01 08:22:10 +0200 | [diff] [blame] | 28 | def weights_fit_sram(arch, op, tens, nng): |
Louis Verhaard | 17afa28 | 2020-10-14 08:32:41 +0200 | [diff] [blame] | 29 | # Compresses weights and checks if they fit in SRAM |
Patrik Gustavsson | fa34c6f | 2020-08-17 12:43:22 +0200 | [diff] [blame] | 30 | if tens.purpose != TensorPurpose.Weights: |
| 31 | return True |
| 32 | |
| 33 | min_weight_size = 0 |
| 34 | if len(tens.shape) == 4: |
| 35 | min_weight_size = tens.shape[0] * tens.shape[1] * tens.shape[2] * arch.OFMSplitDepth |
| 36 | elif len(tens.shape) == 2: |
| 37 | min_weight_size = tens.shape[0] * arch.OFMSplitDepth |
| 38 | |
Louis Verhaard | 17afa28 | 2020-10-14 08:32:41 +0200 | [diff] [blame] | 39 | compress_weights(arch, nng, tens, op.type.npu_block_type, 16, 16, op.get_dilation_h_w()) |
Patrik Gustavsson | 3010d9b | 2020-10-01 08:22:10 +0200 | [diff] [blame] | 40 | |
Louis Verhaard | 17afa28 | 2020-10-14 08:32:41 +0200 | [diff] [blame] | 41 | # Need to be fit into Sram, as a double buffer |
| 42 | worst_buffer_size = tens.compression_scale_for_worst_weight_stream * min_weight_size * 2 |
| 43 | if worst_buffer_size > arch.sram_size: |
| 44 | print( |
| 45 | "Weights, {}, are too big to be DMAed to SRAM, estimated minimum size is {} bytes".format( |
| 46 | tens.name, worst_buffer_size |
Patrik Gustavsson | fa34c6f | 2020-08-17 12:43:22 +0200 | [diff] [blame] | 47 | ) |
Louis Verhaard | 17afa28 | 2020-10-14 08:32:41 +0200 | [diff] [blame] | 48 | ) |
| 49 | return False |
Patrik Gustavsson | fa34c6f | 2020-08-17 12:43:22 +0200 | [diff] [blame] | 50 | return True |
| 51 | |
| 52 | |
Patrik Gustavsson | 3010d9b | 2020-10-01 08:22:10 +0200 | [diff] [blame] | 53 | def insert_dma_cmd(op, arch, nng): |
Louis Verhaard | aee5d75 | 2020-09-30 09:01:52 +0200 | [diff] [blame] | 54 | if op.type == Op.DMA or not op.run_on_npu: |
Jacob Bohlin | 68a04b1 | 2020-07-13 11:39:36 +0200 | [diff] [blame] | 55 | return op |
| 56 | |
Patrik Gustavsson | 3010d9b | 2020-10-01 08:22:10 +0200 | [diff] [blame] | 57 | is_lut_used = any(inp.purpose == TensorPurpose.LUT for inp in op.inputs) |
| 58 | max_ifm_shram_avail = ( |
| 59 | (arch.available_shram_banks(is_lut_used) - arch.shram_reserved_output_banks) * arch.shram_bank_size // 2 |
| 60 | ) |
Diqing Zhong | 55d9e33 | 2020-09-11 10:05:22 +0200 | [diff] [blame] | 61 | |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 62 | for idx, tens in enumerate(op.inputs): |
| 63 | |
Patrik Gustavsson | eca2e95 | 2020-05-27 09:15:11 +0200 | [diff] [blame] | 64 | if tens.mem_type not in (MemType.Scratch, MemType.Scratch_fast): |
| 65 | # Tensor is in permanent storage |
| 66 | # Only when permanent storage differs from fast storage, there is a point moving the data |
Fredrik Svedberg | a0c3624 | 2020-06-03 15:43:31 +0200 | [diff] [blame] | 67 | if ( |
| 68 | tens.mem_area in (MemArea.Dram, MemArea.OffChipFlash) |
| 69 | and arch.permanent_storage_mem_area != arch.fast_storage_mem_area |
| 70 | ) or tens.purpose == TensorPurpose.LUT: |
| 71 | if tens.purpose in (TensorPurpose.Weights, TensorPurpose.LUT) or ( |
Patrik Gustavsson | 3010d9b | 2020-10-01 08:22:10 +0200 | [diff] [blame] | 72 | tens.purpose == TensorPurpose.FeatureMap |
Louis Verhaard | aee5d75 | 2020-09-30 09:01:52 +0200 | [diff] [blame] | 73 | and op.type.is_binary_elementwise_op() |
Patrik Gustavsson | 3010d9b | 2020-10-01 08:22:10 +0200 | [diff] [blame] | 74 | and tens.shape != [] |
| 75 | and tens.shape != op.outputs[0].shape |
| 76 | and tens.storage_size() > max_ifm_shram_avail |
Patrik Gustavsson | eca2e95 | 2020-05-27 09:15:11 +0200 | [diff] [blame] | 77 | ): |
| 78 | only_vector_product_consumers = True |
| 79 | for oper in tens.consumers(): |
Louis Verhaard | aee5d75 | 2020-09-30 09:01:52 +0200 | [diff] [blame] | 80 | if oper is None or oper.type.npu_block_type != NpuBlockType.VectorProduct: |
Patrik Gustavsson | eca2e95 | 2020-05-27 09:15:11 +0200 | [diff] [blame] | 81 | only_vector_product_consumers = False |
| 82 | break |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 83 | |
Patrik Gustavsson | eca2e95 | 2020-05-27 09:15:11 +0200 | [diff] [blame] | 84 | # Tensor products has no need for DMA, tensors are only read once and can be in flash. |
| 85 | # Other operations re-reads tensors, this is better done from SRAM. |
Fredrik Svedberg | a0c3624 | 2020-06-03 15:43:31 +0200 | [diff] [blame] | 86 | # LUTs must be placed in the last 2 blocks of SHRAM. |
Patrik Gustavsson | fa34c6f | 2020-08-17 12:43:22 +0200 | [diff] [blame] | 87 | if ( |
Patrik Gustavsson | 3010d9b | 2020-10-01 08:22:10 +0200 | [diff] [blame] | 88 | not only_vector_product_consumers and weights_fit_sram(arch, op, tens, nng) |
Patrik Gustavsson | fa34c6f | 2020-08-17 12:43:22 +0200 | [diff] [blame] | 89 | ) or tens.purpose == TensorPurpose.LUT: |
Patrik Gustavsson | eca2e95 | 2020-05-27 09:15:11 +0200 | [diff] [blame] | 90 | # Insert a DMA command here, as well as a new tensor situated in SRAM of the same size. |
| 91 | new_tens = tens.clone_into_fast_storage(arch) |
Louis Verhaard | aee5d75 | 2020-09-30 09:01:52 +0200 | [diff] [blame] | 92 | dma_cmd = Operation(Op.DMA, tens.ops[0].name + "_dma") |
Patrik Gustavsson | eca2e95 | 2020-05-27 09:15:11 +0200 | [diff] [blame] | 93 | dma_cmd.inputs = [tens] |
Michael McGeagh | c5b549b | 2020-08-07 11:54:28 +0100 | [diff] [blame] | 94 | dma_cmd.set_output_tensor(new_tens) |
Patrik Gustavsson | eca2e95 | 2020-05-27 09:15:11 +0200 | [diff] [blame] | 95 | dma_cmd.attrs["source"] = tens.mem_area |
| 96 | dma_cmd.attrs["destination"] = new_tens.mem_area |
| 97 | dma_cmd.run_on_npu = True |
Fredrik Svedberg | a0c3624 | 2020-06-03 15:43:31 +0200 | [diff] [blame] | 98 | if tens.purpose == TensorPurpose.LUT: |
Louis Verhaard | 0b8268a | 2020-08-05 16:11:29 +0200 | [diff] [blame] | 99 | new_tens.mem_area = MemArea.Shram |
Patrik Gustavsson | eca2e95 | 2020-05-27 09:15:11 +0200 | [diff] [blame] | 100 | op.inputs[idx] = new_tens |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 101 | return op |
| 102 | |
| 103 | |
| 104 | def insert_dma_commands(nng, arch, verbose_graph=False): |
| 105 | |
| 106 | for idx, sg in enumerate(nng.subgraphs): |
Patrik Gustavsson | 3010d9b | 2020-10-01 08:22:10 +0200 | [diff] [blame] | 107 | nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(nng, sg, arch, [], [insert_dma_cmd]) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 108 | if verbose_graph: |
| 109 | nng.print_graph() |
| 110 | return nng |