blob: c868672f98e0ed0d78174fa54999436925a3713e [file] [log] [blame]
Rickard Bolinbc6ee582022-11-04 08:24:29 +00001# SPDX-FileCopyrightText: Copyright 2020-2021 Arm Limited and/or its affiliates <open-source-office@arm.com>
Tim Hall79d07d22020-04-27 18:20:16 +01002#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Rickard Bolinbc6ee582022-11-04 08:24:29 +000016#
Tim Hall79d07d22020-04-27 18:20:16 +010017# Description:
18# Serialises and packs an NPU subgraph into tensors.
Diego Russoea6111a2020-04-14 18:41:58 +010019import numpy as np
20
21from . import driver_actions
Tim Hall79d07d22020-04-27 18:20:16 +010022from .data_type import DataType
Diego Russoe8a10452020-04-21 17:39:10 +010023from .nn_graph import PassPlacement
Louis Verhaardaee5d752020-09-30 09:01:52 +020024from .operation import Op
Diego Russoe8a10452020-04-21 17:39:10 +010025from .operation import Operation
26from .tensor import MemArea
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020027from .tensor import MemType
Diego Russoe8a10452020-04-21 17:39:10 +010028from .tensor import Tensor
29from .tensor import TensorFormat
30from .tensor import TensorPurpose
Tim Hall79d07d22020-04-27 18:20:16 +010031
32
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020033def make_memory_tensor(name, mem_area, mem_type, sz, want_values, arch):
Tim Hall79d07d22020-04-27 18:20:16 +010034 tens = Tensor([sz], DataType.uint8, name)
35 tens.mem_area = mem_area
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020036 tens.mem_type = mem_type
Tim Hall79d07d22020-04-27 18:20:16 +010037 tens.purpose = TensorPurpose.FeatureMap
38 tens.set_format(TensorFormat.NHWC, arch)
39 if want_values:
40 tens.values = np.zeros(tens.shape, np.uint8)
41 return tens
42
43
44def copy_compressed_values_to_memory_tensor(memory_tensor, src_tensor):
45 start_addr = src_tensor.address
Tim Halld8339a72021-05-27 18:49:40 +010046 end_addr = src_tensor.address + src_tensor.storage_size()
47 memory_tensor.values[start_addr:end_addr] = src_tensor.buffer.copy()
Tim Hall79d07d22020-04-27 18:20:16 +010048
Tim Hallc30f4952020-06-15 20:47:35 +010049
Charles Xu78792222020-05-13 10:15:26 +020050def copy_ifm_values_to_memory_tensor(memory_tensor, src_tensor):
51 start_addr = src_tensor.address
James Peet7519d502021-07-19 16:47:58 +010052 values = src_tensor.values.flatten()
Fredrik Svedbergbb1a92a2020-08-27 15:51:50 +020053 if src_tensor.dtype.size_in_bytes() > 1:
54 values = np.frombuffer(values.tobytes(), dtype=np.uint8)
Charles Xu9a03fdf2020-07-02 15:12:40 +020055 end_addr = start_addr + values.size
56 memory_tensor.values[start_addr:end_addr] = values
Tim Hall79d07d22020-04-27 18:20:16 +010057
Tim Hallc30f4952020-06-15 20:47:35 +010058
Tim Hall03d40a22021-04-22 12:08:28 +010059def serialise_npu_subgraph_into_tensors(sg, arch, scratch_tens, scratch_fast_tens, flash_tens):
Tim Hall79d07d22020-04-27 18:20:16 +010060 if sg.placement != PassPlacement.Npu:
Patrik Gustavsson3ab94522020-06-29 17:36:55 +020061 return scratch_tens, scratch_fast_tens, flash_tens
Tim Hall79d07d22020-04-27 18:20:16 +010062
63 flash_area = arch.permanent_storage_mem_area
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020064 scratch_area = arch.feature_map_storage_mem_area
Patrik Gustavsson3ab94522020-06-29 17:36:55 +020065 scratch_fast_area = arch.fast_storage_mem_area
Tim Hall79d07d22020-04-27 18:20:16 +010066
67 flash_size = sg.memory_used.get(flash_area, 0)
68 scratch_size = sg.memory_used.get(scratch_area, 0)
69
Louis Verhaard52078302020-11-18 13:35:06 +010070 payload_bytes = driver_actions.create_driver_payload(sg.register_command_stream, arch)
Tim Hall79d07d22020-04-27 18:20:16 +010071
72 command_stream_size_bytes = len(payload_bytes)
73
Diego Russoea6111a2020-04-14 18:41:58 +010074 if flash_tens == scratch_tens is None:
Tim Hall79d07d22020-04-27 18:20:16 +010075 # First Npu subgraph, create scratch and flash tensors
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020076 sg.scratch_tensor = make_memory_tensor(
77 sg.name + "_scratch", scratch_area, MemType.Scratch, scratch_size, False, arch
78 )
Tim Hall79d07d22020-04-27 18:20:16 +010079 sg.scratch_tensor.purpose = TensorPurpose.Scratch
Patrik Gustavssoneca2e952020-05-27 09:15:11 +020080 sg.flash_tensor = make_memory_tensor(
81 sg.name + "_flash", flash_area, MemType.Permanent_CPU, flash_size, True, arch
82 )
Patrik Gustavsson3ab94522020-06-29 17:36:55 +020083 sg.scratch_fast_tensor = make_memory_tensor(
Jacob Bohlin268394d2020-08-13 13:24:59 +020084 sg.name + "_scratch_fast", scratch_fast_area, MemType.Scratch_fast, 0, False, arch
Patrik Gustavsson3ab94522020-06-29 17:36:55 +020085 )
Samuel Panijel6f4955a2021-06-10 13:40:03 +030086 sg.scratch_fast_tensor.purpose = TensorPurpose.ScratchFast
Tim Hall79d07d22020-04-27 18:20:16 +010087 else:
88 sg.scratch_tensor = scratch_tens
89 sg.scratch_tensor.shape[0] += scratch_size
90 sg.flash_tensor = flash_tens
91 sg.flash_tensor.shape[0] += flash_size
92
Patrik Gustavsson3ab94522020-06-29 17:36:55 +020093 sg.scratch_fast_tensor = scratch_fast_tens
94 sg.scratch_fast_tensor.shape[0] = 0
95
Tim Halld8339a72021-05-27 18:49:40 +010096 for sched_op in sg.sched_ops:
97 ifm_tensor, ifm2_tensor, _, _, _ = sched_op.parent_op.get_ifm_ifm2_weights_biases_ofm()
Tim Hall79d07d22020-04-27 18:20:16 +010098
Tim Halld8339a72021-05-27 18:49:40 +010099 op_info = sg.schedule.cost_map[sched_op]
100 if op_info.npu_weights_tensor:
101 copy_compressed_values_to_memory_tensor(sg.flash_tensor, op_info.npu_weights_tensor)
Tim Halld784af72021-06-08 21:25:57 +0100102 if op_info.npu_scales_tensor:
103 copy_compressed_values_to_memory_tensor(sg.flash_tensor, op_info.npu_scales_tensor)
Charles Xu78792222020-05-13 10:15:26 +0200104
Tim Halld8339a72021-05-27 18:49:40 +0100105 if ifm_tensor and ifm_tensor.mem_type not in (MemType.Scratch, MemType.Scratch_fast):
106 copy_ifm_values_to_memory_tensor(sg.flash_tensor, ifm_tensor)
107 if ifm2_tensor and (ifm2_tensor.mem_type not in (MemType.Scratch, MemType.Scratch_fast)):
108 copy_ifm_values_to_memory_tensor(sg.flash_tensor, ifm2_tensor)
109
110 if sched_op.parent_op.activation_lut:
111 copy_ifm_values_to_memory_tensor(sg.flash_tensor, sched_op.parent_ps.lut_tensor)
112
Tim Hall79d07d22020-04-27 18:20:16 +0100113 sg.command_stream_tensor = make_memory_tensor(
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200114 sg.name + "_command_stream", flash_area, MemType.Permanent_CPU, command_stream_size_bytes, True, arch
Tim Hall79d07d22020-04-27 18:20:16 +0100115 )
116 sg.command_stream_tensor.values = np.frombuffer(payload_bytes, dtype=np.uint8)
117
Patrik Gustavsson3ab94522020-06-29 17:36:55 +0200118 return sg.scratch_tensor, sg.scratch_fast_tensor, sg.flash_tensor
Tim Hall79d07d22020-04-27 18:20:16 +0100119
120
121def add_const_tens_to_startup_cascaded_pass(startup_cps, tens):
Louis Verhaardaee5d752020-09-30 09:01:52 +0200122 op = Operation(Op.Const, tens.name + "_const")
Michael McGeaghc5b549b2020-08-07 11:54:28 +0100123 op.set_output_tensor(tens)
Tim Hall79d07d22020-04-27 18:20:16 +0100124 startup_cps.passes[0].ops.insert(0, op)
125 startup_cps.passes[0].outputs.insert(0, tens)
126 startup_cps.outputs.insert(0, tens)
127
128
Tim Hall03d40a22021-04-22 12:08:28 +0100129def rewrite_npu_call_ops(sg, arch):
Tim Hall79d07d22020-04-27 18:20:16 +0100130 if sg.placement != PassPlacement.Cpu:
131 return
132
133 startup_cps = sg.cascaded_passes[0]
134
135 for idx, cps in enumerate(sg.cascaded_passes):
136 for ps in cps.passes:
137 for op in ps.ops:
Louis Verhaardaee5d752020-09-30 09:01:52 +0200138 if op.type == Op.CustomNpuOp:
Tim Hall79d07d22020-04-27 18:20:16 +0100139 callee = op.attrs["subgraph"]
Tim Hall79d07d22020-04-27 18:20:16 +0100140
141 sz = 0
Patrik Gustavsson3ab94522020-06-29 17:36:55 +0200142 for tens in [
143 callee.scratch_fast_tensor,
144 callee.scratch_tensor,
145 callee.flash_tensor,
146 callee.command_stream_tensor,
147 ]:
Tim Hall79d07d22020-04-27 18:20:16 +0100148 op.inputs.insert(0, tens)
149 ps.inputs.insert(0, tens)
150 cps.inputs.insert(0, tens)
Patrik Gustavsson3ab94522020-06-29 17:36:55 +0200151 if tens != callee.scratch_tensor and tens != callee.scratch_fast_tensor:
Tim Hall79d07d22020-04-27 18:20:16 +0100152 add_const_tens_to_startup_cascaded_pass(startup_cps, tens)
153 sz += tens.storage_size()
154
155 for prev_cps in sg.cascaded_passes[: idx + 1]:
156 prev_cps.sram_used += sz
157
158 if callee.scratch_tensor is not None:
Patrik Gustavssoneca2e952020-05-27 09:15:11 +0200159 if callee.scratch_tensor.mem_area == MemArea.Sram:
160 cps.sram_used += callee.scratch_tensor.storage_size()
Patrik Gustavsson3ab94522020-06-29 17:36:55 +0200161
162 if callee.scratch_fast_tensor is not None:
163 if callee.scratch_fast_tensor.mem_area == MemArea.Sram:
164 cps.sram_used += callee.scratch_fast_tensor.storage_size()