blob: c01790a4b79604579d941b20c73fb75b0f595ed5 [file] [log] [blame]
Patrik Gustavssone3b1b912021-02-09 15:38:46 +01001# Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
Tim Hall79d07d22020-04-27 18:20:16 +01002#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
17# Generate a high-level command stream from a scheduled subgraph with CascadedPasses.
18#
19# Also used during scheduling to work out allowable IFM/OFM overlap, this functionality can be accessed using
20# calc_allowed_ofm_ifm_overlap_for_cascaded_pass().
Diego Russoe8a10452020-04-21 17:39:10 +010021from .high_level_command_stream import Box
22from .high_level_command_stream import DMA
23from .high_level_command_stream import NpuStripe
24from .nn_graph import PassPlacement
25from .nn_graph import SchedulingStrategy
Charles Xu89a6bbf2020-08-11 12:31:58 +020026from .numeric_util import round_up_divide
Louis Verhaarde8a5a782020-11-02 18:04:27 +010027from .operation import create_activation_function
Tim Hall79d07d22020-04-27 18:20:16 +010028from .operation import NpuBlockType
Louis Verhaardaee5d752020-09-30 09:01:52 +020029from .operation import Op
patrik.gustavssoneeb85152020-12-21 17:10:40 +000030from .shape4d import Shape4D
Charles Xu78792222020-05-13 10:15:26 +020031from .tensor import TensorPurpose
Tim Hall79d07d22020-04-27 18:20:16 +010032
33
Charles Xu78792222020-05-13 10:15:26 +020034def dma_if_necessary(ps, box, tensor):
Louis Verhaard3c07c972020-05-07 08:12:58 +020035 if tensor.needs_dma():
Charles Xu78792222020-05-13 10:15:26 +020036 dma_op = tensor.ops[0]
Tim Hall79d07d22020-04-27 18:20:16 +010037 in_tensor = dma_op.inputs[0]
Louis Verhaard0b8268a2020-08-05 16:11:29 +020038 yield DMA(ps, in_tensor, tensor, box)
Tim Hall79d07d22020-04-27 18:20:16 +010039
Tim Hallc30f4952020-06-15 20:47:35 +010040
Tim Hall79d07d22020-04-27 18:20:16 +010041def generate_high_level_command_stream_for_pass(strat, passes, block_configs, idx):
42 is_first = idx == 0
43 is_last = idx == len(passes) - 1
44 ps = passes[idx]
45 block_config = block_configs[idx]
Charles Xu600351a2020-05-18 08:54:47 +020046 npu_block_type = ps.npu_block_type
Patrik Gustavssone3b1b912021-02-09 15:38:46 +010047 split_offsets = list(ps.primary_op.read_offsets) # offset for [ifm, ifm2]
Charles Xu600351a2020-05-18 08:54:47 +020048
Henrik G Olsson1c772e42021-04-09 10:51:33 +020049 if (
50 len(ps.inputs) == 2
51 and ps.ifm_tensor is not None
52 and ps.ifm2_tensor is not None
53 and npu_block_type == NpuBlockType.ElementWise
54 ):
Patrik Gustavsson438e5632020-09-01 12:23:25 +020055 # Ensure correct ifm and ifm2 order
Patrik Gustavssone3b1b912021-02-09 15:38:46 +010056 if ps.inputs[0] == ps.primary_op.inputs[1] and ps.inputs[1] == ps.primary_op.inputs[0]:
Charles Xu600351a2020-05-18 08:54:47 +020057 ps.ifm_tensor, ps.ifm2_tensor = ps.ifm2_tensor, ps.ifm_tensor
Patrik Gustavsson2349d422020-12-01 16:02:29 +010058 ps.ifm_shapes[0], ps.ifm_shapes[1] = ps.ifm_shapes[1], ps.ifm_shapes[0]
Patrik Gustavsson438e5632020-09-01 12:23:25 +020059
Tim Hall79d07d22020-04-27 18:20:16 +010060 ifm_tensor = ps.ifm_tensor
Patrik Gustavsson2349d422020-12-01 16:02:29 +010061 ifm_shape = None
62 if ifm_tensor.shape != []:
63 ifm_shape = ps.ifm_shapes[0]
Tim Hall79d07d22020-04-27 18:20:16 +010064 ifm2_tensor = ps.ifm2_tensor
Patrik Gustavsson2349d422020-12-01 16:02:29 +010065 ifm2_shape = None
66 if ifm2_tensor is not None and ifm2_tensor.shape != []:
67 ifm2_shape = ps.ifm_shapes[1]
Tim Hall79d07d22020-04-27 18:20:16 +010068 ofm_tensor = ps.ofm_tensor
Patrik Gustavsson2349d422020-12-01 16:02:29 +010069 ofm_shape = ps.ofm_shapes[0]
Tim Hall79d07d22020-04-27 18:20:16 +010070 weight_tensor = ps.weight_tensor
71 scale_tensor = ps.scale_tensor
72
patrik.gustavssoneeb85152020-12-21 17:10:40 +000073 ofm_start = [0, 0, 0, 0]
74 ofm_end = ofm_shape.as_list()
Tim Hall79d07d22020-04-27 18:20:16 +010075
76 strides = None
77 skirt = None
Jacob Bohlin611fcdf2020-06-11 15:09:57 +020078 upscaling = 1
Tim Hall79d07d22020-04-27 18:20:16 +010079 if ps.primary_op is not None:
80 strides = ps.primary_op.attrs.get("strides", None)
81 skirt = ps.primary_op.attrs.get("skirt", None)
Louis Verhaardaee5d752020-09-30 09:01:52 +020082 if ps.primary_op.type == Op.Conv2DBackpropInputSwitchedBias:
patrik.gustavssoneeb85152020-12-21 17:10:40 +000083 upscaling = ofm_shape.height // ifm_shape.height
Louis Verhaardaee5d752020-09-30 09:01:52 +020084 elif ps.primary_op.type == Op.ResizeBilinear:
patrik.gustavssoneeb85152020-12-21 17:10:40 +000085 upscaling = round_up_divide(ofm_shape.height, ifm_shape.height)
Tim Hall79d07d22020-04-27 18:20:16 +010086
Louis Verhaardc822d622021-03-11 14:59:06 +010087 concat_offset = [0, 0, 0, 0]
Tim Hall79d07d22020-04-27 18:20:16 +010088
Tim Hall79d07d22020-04-27 18:20:16 +010089 for op in ps.ops:
Louis Verhaardc822d622021-03-11 14:59:06 +010090 if op.write_offset is not None:
91 concat_offset = op.write_offset.as_list()
Louis Verhaardbb010ea2021-03-31 10:01:11 +020092 ofm_start = concat_offset[:]
Louis Verhaardc822d622021-03-11 14:59:06 +010093 ofm_end = (op.write_offset + op.write_shape).as_list()
94 if op.type.is_relu_op() or op.type in (Op.Tanh, Op.Sigmoid):
Louis Verhaarde8a5a782020-11-02 18:04:27 +010095 ps.primary_op.activation = create_activation_function(op.type)
Tim Hall79d07d22020-04-27 18:20:16 +010096
Tim Hall79d07d22020-04-27 18:20:16 +010097 if strat == SchedulingStrategy.WeightStream:
98 ofm_step = block_config[-1]
99 ofm_stop = ofm_end[-1]
Louis Verhaard3c07c972020-05-07 08:12:58 +0200100 if weight_tensor is None or not weight_tensor.needs_dma():
Tim Hall79d07d22020-04-27 18:20:16 +0100101 ofm_step = ofm_stop
102 for start in range(ofm_start[-1], ofm_stop, ofm_step):
103 end = min(start + ofm_step, ofm_stop)
104 ofm_start[-1] = start
105 ofm_end[-1] = end
106 ofm_box = Box(ofm_start, ofm_end)
107 ifm_box = None
108 ifm2_box = None
109
Patrik Gustavsson2349d422020-12-01 16:02:29 +0100110 if ifm_shape is not None:
Tim Hall79d07d22020-04-27 18:20:16 +0100111 ifm_box, _, _ = ofm_box.transform_with_strides_and_skirt(
Louis Verhaardc822d622021-03-11 14:59:06 +0100112 strides, skirt, ifm_shape, npu_block_type, concat_offset, split_offsets[0], upscaling,
Tim Hall79d07d22020-04-27 18:20:16 +0100113 )
114 else:
115 ifm_box = Box([], [])
Patrik Gustavsson2349d422020-12-01 16:02:29 +0100116 if ifm2_shape is not None:
Tim Hall79d07d22020-04-27 18:20:16 +0100117 ifm2_box, _, _ = ofm_box.transform_with_strides_and_skirt(
Louis Verhaardc822d622021-03-11 14:59:06 +0100118 strides, skirt, ifm2_shape, npu_block_type, concat_offset, split_offsets[1], upscaling,
Tim Hall79d07d22020-04-27 18:20:16 +0100119 )
120 else:
121 ifm2_box = Box([], [])
122
Charles Xu78792222020-05-13 10:15:26 +0200123 for intermediate in ps.intermediates:
Tim Hallc30f4952020-06-15 20:47:35 +0100124 if (
125 intermediate is not None
126 and intermediate.shape != []
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200127 and intermediate.purpose in (TensorPurpose.FeatureMap, TensorPurpose.LUT)
Tim Hallc30f4952020-06-15 20:47:35 +0100128 ):
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200129 if intermediate.purpose is TensorPurpose.FeatureMap:
130 intermediate_box, _, _ = ofm_box.transform_with_strides_and_skirt(
131 strides,
132 skirt,
patrik.gustavssoneeb85152020-12-21 17:10:40 +0000133 Shape4D(intermediate.shape),
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200134 npu_block_type,
Fredrik Svedberga0c36242020-06-03 15:43:31 +0200135 concat_offset,
136 split_offsets[0],
137 upscaling,
138 )
139 else:
140 intermediate_box = Box([0] * len(intermediate.shape), list(intermediate.shape))
Charles Xu78792222020-05-13 10:15:26 +0200141 yield from dma_if_necessary(ps, intermediate_box, intermediate)
142
Tim Hall79d07d22020-04-27 18:20:16 +0100143 weight_box = None
144 if weight_tensor is not None:
Louis Verhaardc822d622021-03-11 14:59:06 +0100145 weight_offset = concat_offset[len(weight_tensor.shape) - 1]
146 weight_oc_start = start - weight_offset
147 weight_oc_end = end - weight_offset
Tim Hall79d07d22020-04-27 18:20:16 +0100148
149 weight_box = Box.make_weight_box(
150 weight_tensor.shape,
151 npu_block_type,
152 weight_oc_start,
153 weight_oc_end,
154 weight_tensor.weight_transpose_depthwise,
155 )
Charles Xu78792222020-05-13 10:15:26 +0200156 yield from dma_if_necessary(ps, weight_box, weight_tensor)
Tim Hall79d07d22020-04-27 18:20:16 +0100157
158 yield NpuStripe(
159 ps,
160 block_config,
161 is_first,
162 is_last,
163 True,
164 True,
165 ifm_tensor,
166 ifm_box,
167 ofm_tensor,
168 ofm_box,
169 weight_tensor,
170 weight_box,
171 scale_tensor,
Tim Hall79d07d22020-04-27 18:20:16 +0100172 ifm2_tensor=ifm2_tensor,
173 ifm2_box=ifm2_box,
174 )
175
176 elif strat == SchedulingStrategy.IfmStream:
patrik.gustavssoneeb85152020-12-21 17:10:40 +0000177 assert ifm_shape is not None
Tim Hall79d07d22020-04-27 18:20:16 +0100178 y_step = block_config[0]
Patrik Gustavsson2349d422020-12-01 16:02:29 +0100179 y_start = ofm_start[-3]
180 y_dim = ofm_end[-3]
181
Tim Hall79d07d22020-04-27 18:20:16 +0100182 if idx > 0:
183 ifm_y_present = 0
184 prev_pass = passes[idx - 1]
185 prev_pass_gen = generate_high_level_command_stream_for_pass(strat, passes, block_configs, idx - 1)
186 else:
187 ifm_y_present = 1
patrik.gustavssoneeb85152020-12-21 17:10:40 +0000188 ifm_y_present = ifm_shape.height
Tim Hall79d07d22020-04-27 18:20:16 +0100189 prev_pass_gen = []
190 prev_pass = None
191
192 if len(passes) == 1:
193 # no cascading, can just issue one big stripe
194 # but only if we've done allocation and OFM does not overlap IFM
Charles Xu04ce34c2020-06-23 12:42:28 +0200195 if ifm_tensor.address is not None and ofm_tensor.address is not None:
Tim Hall79d07d22020-04-27 18:20:16 +0100196 if (
197 ifm_tensor.address + ifm_tensor.storage_size() <= ofm_tensor.address
198 or ofm_tensor.address + ofm_tensor.storage_size() <= ifm_tensor.address
199 ):
200 y_step = y_dim
201
202 weight_box = None
Andreas Nevalainen897cc142020-10-28 15:42:08 +0100203 scale_box = None
Tim Hall79d07d22020-04-27 18:20:16 +0100204
205 for start in range(y_start, y_dim, y_step):
206 end = min(start + y_step, y_dim)
Patrik Gustavsson2349d422020-12-01 16:02:29 +0100207 ofm_start[-3] = start
208 ofm_end[-3] = end
Tim Hall79d07d22020-04-27 18:20:16 +0100209 ofm_box = Box(ofm_start, ofm_end)
210
211 k_height = 1
Charles Xu89a6bbf2020-08-11 12:31:58 +0200212 if npu_block_type in (NpuBlockType.Pooling, NpuBlockType.ReduceSum):
Tim Hall79d07d22020-04-27 18:20:16 +0100213 if ps.primary_op is not None:
214 k_height = ps.primary_op.attrs["ksize"][1]
215 else:
216 if weight_tensor is not None:
217 k_height = weight_tensor.shape[0]
218
219 ifm_box, pad_top, pad_bottom = ofm_box.transform_with_strides_and_skirt(
Louis Verhaardc822d622021-03-11 14:59:06 +0100220 strides, skirt, ifm_shape, npu_block_type, concat_offset, split_offsets[0], k_height, upscaling,
Tim Hall79d07d22020-04-27 18:20:16 +0100221 )
222
Diqing Zhong455e20e2021-02-03 16:37:31 +0100223 ifm_y_needed = 1
224 if len(ifm_box.end_coord) >= 3:
225 ifm_y_needed = ifm_box.end_coord[-3]
226 if ifm_y_present < ifm_y_needed:
227 for prev_cmd in prev_pass_gen:
228 yield prev_cmd
229 rng = prev_cmd.get_ofm_y_range_for_pass(prev_pass)
230 if rng is not None:
231 ifm_y_present = max(ifm_y_present, rng[1])
232 if ifm_y_present >= ifm_y_needed:
233 break
234
Charles Xu78792222020-05-13 10:15:26 +0200235 for intermediate in ps.intermediates:
Tim Hallc30f4952020-06-15 20:47:35 +0100236 if (
237 intermediate is not None
238 and intermediate.shape != []
Michael McGeagh34ad19b2020-09-04 15:44:23 +0100239 and intermediate.purpose in (TensorPurpose.FeatureMap, TensorPurpose.LUT)
Tim Hallc30f4952020-06-15 20:47:35 +0100240 ):
Michael McGeagh34ad19b2020-09-04 15:44:23 +0100241 if intermediate.purpose is TensorPurpose.FeatureMap:
242 intermediate_box, _, _ = ofm_box.transform_with_strides_and_skirt(
243 strides,
244 skirt,
patrik.gustavssoneeb85152020-12-21 17:10:40 +0000245 Shape4D(intermediate.shape),
Michael McGeagh34ad19b2020-09-04 15:44:23 +0100246 npu_block_type,
Michael McGeagh34ad19b2020-09-04 15:44:23 +0100247 concat_offset,
248 split_offsets[0],
249 upscaling,
250 )
251 else:
252 intermediate_box = Box([0] * len(intermediate.shape), list(intermediate.shape))
Charles Xu78792222020-05-13 10:15:26 +0200253 yield from dma_if_necessary(ps, intermediate_box, intermediate)
254
Andreas Nevalainen897cc142020-10-28 15:42:08 +0100255 if scale_tensor is not None and scale_tensor.purpose == TensorPurpose.FSBias and scale_box is None:
256 scale_box = Box([0] * len(scale_tensor.shape), list(scale_tensor.shape))
257 yield from dma_if_necessary(ps, scale_box, scale_tensor)
258
Tim Hall79d07d22020-04-27 18:20:16 +0100259 if weight_tensor is not None and weight_box is None:
260 weight_box = Box.make_weight_box(
261 weight_tensor.shape, npu_block_type, weights_transposed=weight_tensor.weight_transpose_depthwise
262 )
Charles Xu78792222020-05-13 10:15:26 +0200263 yield from dma_if_necessary(ps, weight_box, weight_tensor)
Tim Hall79d07d22020-04-27 18:20:16 +0100264
265 # Check if first/last stripe in pass
266 is_first_h_stripe = start == y_start
267 is_last_h_stripe = (start + y_step) >= y_dim
268
269 stripe = NpuStripe(
270 ps,
271 block_config,
272 is_first,
273 is_last,
274 is_first_h_stripe,
275 is_last_h_stripe,
276 ifm_tensor,
277 ifm_box,
278 ofm_tensor,
279 ofm_box,
280 weight_tensor,
281 weight_box,
282 scale_tensor,
Tim Hall79d07d22020-04-27 18:20:16 +0100283 None,
284 None,
285 pad_top,
286 pad_bottom,
287 )
288 yield stripe
289 else:
290 assert 0, "unknown scheduling strategy"
291
292
293def generate_high_level_command_stream_for_pass_list(strat, passes, block_configs):
294 if strat == SchedulingStrategy.WeightStream:
295 for idx in range(len(passes)):
296 yield from generate_high_level_command_stream_for_pass(strat, passes, block_configs, idx)
297 elif strat == SchedulingStrategy.IfmStream:
298 yield from generate_high_level_command_stream_for_pass(strat, passes, block_configs, len(passes) - 1)
299 else:
300 assert 0, "Unknown streaming strategy"
301
302
303def generate_high_level_command_stream_for_cascaded_pass(cps):
304 yield from generate_high_level_command_stream_for_pass_list(
305 cps.strategy, cps.passes, [ps.block_config for ps in cps.passes]
306 )
307
308
309def generate_high_level_command_stream(nng, sg, arch, verbose_high_level_command_stream):
310 res = []
311 for cps in sg.cascaded_passes:
312 if cps.placement == PassPlacement.Npu:
313 res += list(generate_high_level_command_stream_for_cascaded_pass(cps))
314
315 sg.high_level_command_stream = res
316 if verbose_high_level_command_stream:
317 sg.print_high_level_command_stream()
318
319
320def calc_allowed_ofm_ifm_overlap_for_pass_list(strat, passes, block_configs):
321 highest_ofm_write = 0
322 if not passes[0].ifm_tensor or not passes[-1].ofm_tensor:
323 return 0
324
Michael McGeagh298e3832020-11-24 14:46:03 +0000325 ifm_read = passes[0].ifm_tensor.storage_size()
Tim Hall79d07d22020-04-27 18:20:16 +0100326 min_overlap = 999999999999999999999
327 ofm_size = passes[-1].ofm_tensor.storage_size()
328 if strat == SchedulingStrategy.WeightStream:
329 return 0
330 for cmd in generate_high_level_command_stream_for_pass_list(strat, passes, block_configs):
331 if cmd.is_npu_pass_command():
332 if cmd.is_first:
Patrik Gustavsson2349d422020-12-01 16:02:29 +0100333 ifm_read = cmd.ifm_tensor.address_offset_for_coordinate(
Patrik Gustavsson3a269202021-01-21 08:28:55 +0100334 cmd.ifm_box.start_coord, cmd.ps.ifm_shapes[0], is_top_box=False
Patrik Gustavsson2349d422020-12-01 16:02:29 +0100335 )
Tim Hall79d07d22020-04-27 18:20:16 +0100336 if ifm_read is None:
337 return 0
338 if cmd.is_last:
Patrik Gustavsson2349d422020-12-01 16:02:29 +0100339 write_offset = cmd.ofm_tensor.address_offset_for_coordinate(
Patrik Gustavsson3a269202021-01-21 08:28:55 +0100340 cmd.ofm_box.end_coord, cmd.ps.ofm_shapes[0], is_top_box=True
Patrik Gustavsson2349d422020-12-01 16:02:29 +0100341 )
Tim Hall79d07d22020-04-27 18:20:16 +0100342 if write_offset is None:
343 return 0
344 highest_ofm_write = max(write_offset, highest_ofm_write)
345
346 if cmd.is_first or cmd.is_last:
347 overlap_required = max(highest_ofm_write - min(ifm_read, ofm_size), 0)
348 can_overwrite = ofm_size - overlap_required
349 min_overlap = min(min_overlap, can_overwrite)
350
351 if cmd.is_first:
Patrik Gustavsson2349d422020-12-01 16:02:29 +0100352 ifm_read = cmd.ifm_tensor.address_offset_for_coordinate(
Patrik Gustavsson3a269202021-01-21 08:28:55 +0100353 cmd.ifm_box.end_coord, cmd.ps.ifm_shapes[0], is_top_box=True
Patrik Gustavsson2349d422020-12-01 16:02:29 +0100354 )
Tim Hall79d07d22020-04-27 18:20:16 +0100355
356 min_overlap = max(min_overlap, 0)
357 return min_overlap