Patrik Gustavsson | e3b1b91 | 2021-02-09 15:38:46 +0100 | [diff] [blame] | 1 | # Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved. |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 2 | # |
| 3 | # SPDX-License-Identifier: Apache-2.0 |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the License); you may |
| 6 | # not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an AS IS BASIS, WITHOUT |
| 13 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 16 | # Description: |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 17 | # Generate a high-level command stream from a schedule |
Diego Russo | e8a1045 | 2020-04-21 17:39:10 +0100 | [diff] [blame] | 18 | from .high_level_command_stream import Box |
| 19 | from .high_level_command_stream import DMA |
| 20 | from .high_level_command_stream import NpuStripe |
Charles Xu | 89a6bbf | 2020-08-11 12:31:58 +0200 | [diff] [blame] | 21 | from .numeric_util import round_up_divide |
Louis Verhaard | e8a5a78 | 2020-11-02 18:04:27 +0100 | [diff] [blame] | 22 | from .operation import create_activation_function |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 23 | from .operation import NpuBlockType |
Louis Verhaard | aee5d75 | 2020-09-30 09:01:52 +0200 | [diff] [blame] | 24 | from .operation import Op |
patrik.gustavsson | eeb8515 | 2020-12-21 17:10:40 +0000 | [diff] [blame] | 25 | from .shape4d import Shape4D |
Charles Xu | 7879222 | 2020-05-13 10:15:26 +0200 | [diff] [blame] | 26 | from .tensor import TensorPurpose |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 27 | |
| 28 | |
Charles Xu | 7879222 | 2020-05-13 10:15:26 +0200 | [diff] [blame] | 29 | def dma_if_necessary(ps, box, tensor): |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 30 | src_tensor = tensor.src_tensor |
| 31 | if src_tensor and tensor.mem_area != src_tensor.mem_area: |
| 32 | yield DMA(ps, src_tensor, tensor, box) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 33 | |
Tim Hall | c30f495 | 2020-06-15 20:47:35 +0100 | [diff] [blame] | 34 | |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 35 | def generate_high_level_command_stream_for_schedule(nng, sg, arch, verbose_high_level_command_stream): |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 36 | res = [] |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 37 | # sg.sched_ops are ordered by execution |
| 38 | processed_cascades = set() |
| 39 | for sched_op in sg.sched_ops: |
| 40 | op_info = sg.schedule.cost_map[sched_op] |
| 41 | if op_info.cascade in processed_cascades: |
| 42 | # This cascade has already been processed |
| 43 | continue |
| 44 | |
| 45 | if op_info.cascade == 0: |
| 46 | # Generate high-level commands for this Op in isolation |
| 47 | res += list(generate_high_level_commands_for_sched_op(sched_op, sg.schedule)) |
| 48 | else: |
| 49 | # Generate high-level commands for the whole cascade |
| 50 | cascade_info = sg.schedule.cascades[op_info.cascade] |
| 51 | # Start from the last Op in the cascade |
| 52 | res += list(generate_high_level_commands_for_sched_op(sg.sched_ops[cascade_info.end], sg.schedule)) |
| 53 | processed_cascades.add(op_info.cascade) |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 54 | |
| 55 | sg.high_level_command_stream = res |
| 56 | if verbose_high_level_command_stream: |
| 57 | sg.print_high_level_command_stream() |
| 58 | |
| 59 | |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 60 | def generate_high_level_commands_for_sched_op(sched_op, schedule): |
| 61 | op_info = schedule.cost_map[sched_op] |
| 62 | cascade_info = schedule.cascades.get(op_info.cascade) |
| 63 | npu_block_type = sched_op.parent_ps.npu_block_type |
| 64 | block_config = op_info.block_config |
| 65 | ps = sched_op.parent_ps |
| 66 | parent_op = sched_op.parent_op |
| 67 | ofm_tensor = ps.ofm_tensor |
Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame] | 68 | |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 69 | # Get Tensors and Full Shapes |
Jonas Ohlsson | d857507 | 2022-03-30 10:30:25 +0200 | [diff] [blame] | 70 | ( |
| 71 | ifm_tensor, |
| 72 | ifm2_tensor, |
| 73 | uncomp_weight_tensor, |
| 74 | _, |
| 75 | _, |
| 76 | ) = parent_op.get_ifm_ifm2_weights_biases_ofm() |
Fredrik Svedberg | b81e1bb | 2022-10-11 21:50:51 +0200 | [diff] [blame] | 77 | if sched_op.reversed_operands: |
| 78 | ifm2_tensor, ifm_tensor = ifm_tensor, ifm2_tensor |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 79 | ifm = sched_op.ifm |
| 80 | ifm2 = sched_op.ifm2 |
| 81 | ofm_shape = sched_op.ofm.shape |
| 82 | |
| 83 | # Get Kernel strides and upscaling factor |
| 84 | kernel_stride = sched_op.kernel.stride |
| 85 | strides = [1, kernel_stride.y, kernel_stride.x, 1] |
| 86 | skirt = parent_op.attrs.get("skirt", None) |
| 87 | upscaling = 1 |
| 88 | if sched_op.op_type == Op.Conv2DBackpropInputSwitchedBias: |
| 89 | upscaling = ofm_shape.height // ifm.shape.height |
Tim Hall | 885033b | 2022-07-21 11:46:03 +0100 | [diff] [blame] | 90 | elif sched_op.op_type.is_resize_op(): |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 91 | upscaling = round_up_divide(ofm_shape.height, ifm.shape.height) |
| 92 | |
Rickard Bolin | 1c08afa | 2022-01-07 14:22:52 +0000 | [diff] [blame] | 93 | # Get kernel height and height dilation |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 94 | k_height = 1 |
| 95 | if npu_block_type in (NpuBlockType.Pooling, NpuBlockType.ReduceSum): |
| 96 | if parent_op is not None: |
| 97 | k_height = parent_op.attrs["ksize"][1] |
| 98 | else: |
| 99 | if uncomp_weight_tensor is not None: |
| 100 | k_height = uncomp_weight_tensor.shape[0] |
| 101 | |
Rickard Bolin | 1c08afa | 2022-01-07 14:22:52 +0000 | [diff] [blame] | 102 | k_height_dilation = parent_op.attrs.get("dilation", (_, 1, _, _))[-3] |
| 103 | |
| 104 | # Calculate dilated kernel height |
| 105 | k_dilated_height = k_height_dilation * (k_height - 1) + 1 |
| 106 | |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 107 | # Define Start and End coordinates for the OFM |
| 108 | ofm_start = Shape4D(0, 0, 0, op_info.ofm_depth_slices[0]) |
| 109 | ofm_end = ofm_shape |
| 110 | |
| 111 | ofm_depth_slices = op_info.ofm_depth_slices |
| 112 | |
| 113 | # Read/Write offsets |
| 114 | read_offsets = list(parent_op.read_offsets) # offset for [ifm, ifm2] |
| 115 | read_shapes = list(parent_op.read_shapes) # read shapes for [ifm, ifm2] |
| 116 | write_offset = Shape4D(0, 0, 0, 0) |
| 117 | if parent_op.write_offset is not None: |
| 118 | write_offset = parent_op.write_offset |
| 119 | ofm_start = write_offset |
| 120 | ofm_end = parent_op.write_offset + parent_op.write_shape |
| 121 | |
| 122 | # Create activation function if needed |
| 123 | for op in ps.ops: |
| 124 | if op.type.is_relu_op() or op.type in (Op.Tanh, Op.Sigmoid): |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 125 | ps.primary_op.activation = create_activation_function( |
| 126 | op.type, min=op.attrs.get("min", None), max=op.attrs.get("max", None) |
| 127 | ) |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 128 | |
| 129 | # Generate commands for the Op that produces this Op's IFM, if applicable |
| 130 | if cascade_info is None or cascade_info.start == sched_op.index: |
| 131 | # Lone Op or First Op in cascade - all IFM data is present |
| 132 | ifm_present = Box([0, 0, 0, 0], ifm.shape.as_list()) |
| 133 | producer_op = None |
| 134 | prev_cmd_gen = [] |
| 135 | else: |
| 136 | ifm_present = Box([0, 0, 0, 0], [0, 0, 0, 0]) |
| 137 | producer_op = sched_op.ifm.connection.producers[0] |
| 138 | prev_cmd_gen = generate_high_level_commands_for_sched_op(producer_op, schedule) |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 139 | ofm_step = op_info.stripe |
| 140 | for start_height in range(ofm_start.height, ofm_end.height, ofm_step.height): |
| 141 | end_height = min(start_height + ofm_step.height, ofm_end.height) |
| 142 | for start_width in range(ofm_start.width, ofm_end.width, ofm_step.width): |
| 143 | end_width = min(start_width + ofm_step.width, ofm_end.width) |
| 144 | |
Dwight Lidman | 8f78ac2 | 2021-08-13 14:04:30 +0200 | [diff] [blame] | 145 | lut_dma_done = False |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 146 | for depth_idx, start_channel in enumerate(ofm_depth_slices[:-1]): |
| 147 | start_channel = max(start_channel, ofm_start.depth) |
| 148 | end_channel = min(ofm_depth_slices[depth_idx + 1], ofm_end.depth) |
| 149 | |
| 150 | # Construct the OFM box for the current stripe |
| 151 | ofm_box_start = Shape4D(ofm_start.batch, start_height, start_width, start_channel) |
| 152 | ofm_box_end = Shape4D(ofm_end.batch, end_height, end_width, end_channel) |
| 153 | ofm_box = Box(ofm_box_start.as_list(), ofm_box_end.as_list()) |
| 154 | ifm_box = Box([], []) |
| 155 | ifm2_box = Box([], []) |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 156 | # Calculate IFM input box based on the OFM box |
| 157 | if ifm: |
| 158 | ifm_box, pad_top, pad_bottom = ofm_box.transform_with_strides_and_skirt( |
| 159 | strides, |
| 160 | skirt, |
| 161 | ifm.shape, |
| 162 | npu_block_type, |
| 163 | write_offset.as_list(), |
Rickard Bolin | 1c08afa | 2022-01-07 14:22:52 +0000 | [diff] [blame] | 164 | k_dilated_height, |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 165 | read_offsets[0], |
| 166 | read_shapes[0], |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 167 | upscaling, |
erik.andersson@arm.com | 6b2a0b4 | 2022-03-22 15:35:30 +0100 | [diff] [blame] | 168 | op.type, |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 169 | ) |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 170 | # Calculate IFM2 input box based on the OFM box |
| 171 | if ifm2: |
| 172 | ifm2_box, pad_top, pad_bottom = ofm_box.transform_with_strides_and_skirt( |
| 173 | strides, |
| 174 | skirt, |
| 175 | ifm2.shape, |
| 176 | npu_block_type, |
| 177 | write_offset.as_list(), |
Rickard Bolin | 1c08afa | 2022-01-07 14:22:52 +0000 | [diff] [blame] | 178 | k_dilated_height, |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 179 | read_offsets[1], |
| 180 | read_shapes[1], |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 181 | upscaling, |
erik.andersson@arm.com | 6b2a0b4 | 2022-03-22 15:35:30 +0100 | [diff] [blame] | 182 | op.type, |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 183 | ) |
| 184 | |
| 185 | ifm_required = ifm_box |
| 186 | # Get the Op that produces this Op's IFM data - only applicable within cascades |
| 187 | if producer_op: |
| 188 | assert op_info.cascade != 0 |
| 189 | assert op_info.cascade == schedule.cost_map[producer_op].cascade |
Fredrik Svedberg | d03dc50 | 2022-06-30 10:44:12 +0200 | [diff] [blame] | 190 | if not ifm_required.is_subbox_of(ifm_present): |
| 191 | for prev_cmd in prev_cmd_gen: |
| 192 | yield prev_cmd |
| 193 | if prev_cmd.is_npu_pass_command() and prev_cmd.ps == producer_op.parent_ps: |
| 194 | ifm_present.end_coord = prev_cmd.ofm_box.end_coord |
| 195 | if ifm_required.is_subbox_of(ifm_present): |
| 196 | # There is enough IFM data - exit loop |
| 197 | break |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 198 | |
| 199 | # Information about the current stripe's location in the cascade |
| 200 | is_first_h_stripe = ofm_box_start.height == ofm_start.height |
| 201 | is_last_h_stripe = ofm_box_end.height >= ofm_end.height |
| 202 | |
| 203 | # Calculate the weight box - i.e. the subshape of weights needed for this NpuStripe command |
| 204 | weight_tensor = op_info.npu_weights_tensor |
Tim Hall | d784af7 | 2021-06-08 21:25:57 +0100 | [diff] [blame] | 205 | scale_tensor = op_info.npu_scales_tensor |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 206 | if op_info.npu_weights_tensor: |
| 207 | weight_box = Box([0, 0, 0, start_channel], [1, 1, 1, end_channel]) |
| 208 | |
Rickard Bolin | fd8b500 | 2022-05-16 09:11:06 +0000 | [diff] [blame] | 209 | if op_info.buffered_weight_tensors and is_first_h_stripe: |
| 210 | idx = depth_idx % len(op_info.buffered_weight_tensors) |
| 211 | yield from dma_if_necessary( |
| 212 | sched_op.parent_ps, weight_box, op_info.buffered_weight_tensors[idx] |
| 213 | ) |
| 214 | weight_tensor = op_info.buffered_weight_tensors[idx] |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 215 | else: |
| 216 | weight_box = None |
| 217 | |
Dwight Lidman | 8f78ac2 | 2021-08-13 14:04:30 +0200 | [diff] [blame] | 218 | # Should only be done once per loop but not before weights above |
| 219 | if parent_op.activation_lut and not lut_dma_done: |
| 220 | lut_tensor = [tens for tens in parent_op.inputs if tens.purpose == TensorPurpose.LUT][0] |
| 221 | lut_box = Box([0] * len(lut_tensor.shape), list(lut_tensor.shape)) |
| 222 | lut_dma_done = True |
| 223 | yield from dma_if_necessary(sched_op.parent_ps, lut_box, lut_tensor) |
| 224 | |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 225 | yield NpuStripe( |
| 226 | sched_op.parent_ps, |
| 227 | block_config.old_style_representation(), |
| 228 | is_first_h_stripe, |
| 229 | is_last_h_stripe, |
| 230 | ifm_tensor, |
| 231 | ifm_box, |
| 232 | ofm_tensor, |
| 233 | ofm_box, |
| 234 | weight_tensor, |
| 235 | weight_box, |
Tim Hall | d784af7 | 2021-06-08 21:25:57 +0100 | [diff] [blame] | 236 | scale_tensor, |
Tim Hall | d8339a7 | 2021-05-27 18:49:40 +0100 | [diff] [blame] | 237 | ifm2_tensor=ifm2_tensor, |
| 238 | ifm2_box=ifm2_box, |
| 239 | pad_top=pad_top, |
| 240 | pad_bottom=pad_bottom, |
Fredrik Svedberg | b81e1bb | 2022-10-11 21:50:51 +0200 | [diff] [blame] | 241 | reversed_operands=sched_op.reversed_operands, |
Patrik Gustavsson | 2349d42 | 2020-12-01 16:02:29 +0100 | [diff] [blame] | 242 | ) |