blob: 8a94f3617701e258e2df350920f9b6f45f61e888 [file] [log] [blame]
Johan Alfven7647b0f2024-04-02 20:56:09 +02001# SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02002#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Rickard Bolinbc6ee582022-11-04 08:24:29 +000016#
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020017# Description:
18# Common functions and definitions used during the graph optimization.
Patrik Gustavssonc74682c2021-08-17 14:26:38 +020019from typing import Tuple
20
Patrik Gustavssondf995102021-08-23 15:33:59 +020021import numpy as np
22
Tim Halld6efcd32022-09-02 15:01:01 +010023from .architecture_features import Accelerator
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020024from .data_type import DataType
25from .debug_database import DebugDatabase
Patrik Gustavssondf995102021-08-23 15:33:59 +020026from .errors import UnsupportedFeatureError
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020027from .errors import VelaError
28from .operation import Op
Raul Farkas66207142023-05-25 11:15:20 +010029from .operation import Operation
Fredrik Svedberg0ac08042023-04-11 22:35:04 +020030from .operation_util import create_avgpool_nop
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020031from .shape4d import Shape4D
Raul Farkas72c6a242023-03-16 16:38:05 +000032from .tensor import Tensor
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020033
Jonas Ohlsson81942e92021-08-20 09:33:28 +020034memory_only_ops = (
35 Op.Reshape,
Jonas Ohlsson0957e3e2021-09-01 15:57:21 +020036 Op.QuantizedReshape,
Jonas Ohlsson81942e92021-08-20 09:33:28 +020037 Op.Squeeze,
Jonas Ohlsson0957e3e2021-09-01 15:57:21 +020038 Op.ExpandDims,
Patrik Gustavssonef3ebdd2021-10-01 11:10:25 +020039 Op.Identity,
Jonas Ohlsson81942e92021-08-20 09:33:28 +020040)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020041
42
43def _avoid_nhcwb16_for_concat(tens):
44 # If axis corresponds to C-dimension, NHCWB16 can only be used in the output if all the concat_start's are a
45 # multiple of 16. This as, it is only then the address offset for the ofm, for all operations, will be 16 byte
46 # aligned. For other values of axis the address offsets will be 16 byte aligned, as they are all based on c = 0
47 # and those addresses are always 16 byte aligned due to the NHCWB16 format.
48 return any(op.write_offset.depth % 16 != 0 for op in tens.ops if op.write_offset is not None)
49
50
51def _avoid_nhcwb16_for_split(tens):
52 # If read offset is not a multiple of 16 in the C-dimension, NHCWB16 need to be avoided in the input
James Ward6bf16132021-09-08 11:14:20 +010053
54 # Return True if NHCWB16 needs to be avoided
55 def offset_not_aligned(read_offset):
56 return read_offset is not None and (read_offset.depth % 16) != 0
57
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020058 for cons_op in tens.consumer_list:
59 if cons_op.ifm == tens:
James Ward6bf16132021-09-08 11:14:20 +010060 if offset_not_aligned(cons_op.read_offsets[0]):
61 return True
62 if cons_op.ifm2 is not None and cons_op.ifm2 == tens:
63 if offset_not_aligned(cons_op.read_offsets[1]):
64 return True
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020065 return False
66
67
68def _avoid_nhcwb16_for_shapes(tens):
69 # check all producers/consumers to see if any op shape is preventing NHCWB16
70 for cons_op in tens.consumer_list:
71 if cons_op.ifm == tens:
72 cons_op_shape = cons_op.ifm_shapes[0]
73 elif cons_op.type.is_binary_elementwise_op() and cons_op.ifm2 == tens:
74 cons_op_shape = cons_op.ifm_shapes[1]
75 else:
76 assert False
77 if Shape4D(tens.shape) != cons_op_shape:
78 return True
79
80 for prod_op in tens.ops:
81 if Shape4D(tens.shape) != prod_op.ofm_shapes[0]:
82 return True
83
84 return False
85
86
Johan Alfven90724962023-02-02 09:07:48 +010087def _avoid_nhcwb16_for_memory_only(tens):
88 # check all producers/consumers to see if any op is preventing NHCWB16
89 return any(op.type == Op.Memcpy for op in (tens.consumer_list + tens.ops))
90
91
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020092# Check if non linear format can be used
Raul Farkas72c6a242023-03-16 16:38:05 +000093def check_format_restrictions(tens: Tensor, arch):
94 if tens.force_linear_format:
95 return
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020096 if len(tens.ops) < 1:
97 return
98 if tens.ops[0].type in (Op.Placeholder, Op.SubgraphInput, Op.Const) or any(
99 cons is None for cons in tens.consumer_list
100 ):
101 return
102
Fredrik Svedberg0ac08042023-04-11 22:35:04 +0200103 # Writing to the buffer of a variable tensor needs to be linear format
104 if tens.ops[0].memory_function == Op.VariableTensorWrite:
105 return
106
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200107 # Check if any of the producers/consumers is run on CPU
108 if not all(cons.run_on_npu for cons in tens.consumer_list):
109 return
110 if not all(prod.run_on_npu for prod in tens.ops):
111 return
112
113 # "Concat" ofm exception:
114 if _avoid_nhcwb16_for_concat(tens):
115 return
116
117 # "Split" ifm exception:
118 if _avoid_nhcwb16_for_split(tens):
119 return
120
121 # Shapes checking: check all producers/consumers are NHCWB16 compatible with tens.shape
122 if _avoid_nhcwb16_for_shapes(tens):
123 return
124
Johan Alfven90724962023-02-02 09:07:48 +0100125 # Memory only ifm/ofm exception: DMA ops must use NHCW
126 if _avoid_nhcwb16_for_memory_only(tens):
127 return
128
Rickard Bolinfea15162022-07-04 16:19:16 +0000129 # Resize bilinear half pixel center implementation requires OFM with linear format to
130 # allow stride modification in H/W dimensions.
131 for op in tens.ops:
132 if op.original_type == Op.ResizeBilinear and op.type == Op.DepthwiseConv2DBias:
133 return
134
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200135 for op in tens.consumer_list:
Tim Halld6efcd32022-09-02 15:01:01 +0100136 if op.type == Op.ReduceSum and (
137 tens.dtype == DataType.int32 or arch.accelerator_config == Accelerator.Ethos_U65_512
138 ):
139 # ReduceSum requires NHWC input
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200140 return
141 if op.type == Op.Reshape:
142 # Using NHCWB16 format for a no-op reshape is only an option if subsequent
143 # consumers do not also need to perform a reshape or if the OFM is going to
144 # be processed by CPU operations. No-op reshape consumers with empty lists
145 # (those that have no consumers, or null-consumers used as list terminators)
146 # must use normal NHWC output.
147
148 def incompatible_consumers(oper):
149 if oper and oper.type == Op.Reshape:
150 for consumer in oper.outputs[0].consumer_list:
151 yield from incompatible_consumers(consumer)
152 yield not oper or not oper.run_on_npu
153
154 if not any(incompatible_consumers(op)):
155
156 def get_rewrites(oper):
157 if oper and oper.type == Op.Reshape:
158 for consumer in oper.outputs[0].consumer_list:
159 yield from get_rewrites(consumer)
160 yield oper
161
162 # Detect no-op reshapes by comparing their full input and output tensor shapes.
163 inshape = op.ifm_shapes[0]
164 compatible_shape = [(inshape == oper.ofm_shapes[0]) for oper in get_rewrites(op)]
165 if not (compatible_shape and all(compatible_shape)):
166 return
167 else:
168 return
169
Raul Farkas72c6a242023-03-16 16:38:05 +0000170 tens.force_linear_format = False
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200171
172
Patrik Gustavssonc74682c2021-08-17 14:26:38 +0200173def calc_explicit_padding(input_size, stride, filter_size, pad_before, pad_after) -> Tuple[int, int]:
174 """
175 Based on explicit padding provided in a PAD operation, returns the corresponding hardware padding
176 that provides equivalent results.
177 """
178 total_padding = needed_total_padding(input_size, stride, filter_size)
179
180 # The bottom/right padding might need downward adjustment depending on stride/input size
181 total_minus_before = total_padding - pad_before
182 output_pad_after = pad_after
183 while output_pad_after > 0 and output_pad_after % stride != total_minus_before % stride:
184 output_pad_after -= 1
185 return pad_before, output_pad_after
186
187
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200188def needed_total_padding(input_size, stride, filter_size):
Raul Farkas3b64f062023-05-16 17:18:31 +0100189 """Compute hardware padding."""
190 if input_size % stride == 0:
191 return max(filter_size - stride, 0)
192
193 return max(filter_size - (input_size % stride), 0)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200194
195
Raul Farkas66207142023-05-25 11:15:20 +0100196def set_tensor_equivalence(op: Operation, arch, nng) -> Operation:
197 """Set input/output tensor equivalence to the same id for memory operations."""
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200198 if op.type in memory_only_ops:
199 eid = op.outputs[0].equivalence_id
200 for inp in op.inputs:
201 inp.equivalence_id = eid
202 return op
203
204
205def set_ifm_ofm_op_shapes(op, arch, nng):
206 if op.run_on_npu and op.type.needs_shapes():
207 if op.ifm_shapes or op.ofm_shapes:
208 # Shapes already set
209 return op
210 op.set_ifm_ofm_shapes()
211 return op
212
213
Johan Alfven7647b0f2024-04-02 20:56:09 +0200214def check_splitsliceread_to_consumer_shape(op, cons_op):
215 assert op.type == Op.SplitSliceRead
Johan Alfven190b63a2024-04-04 13:26:18 +0200216 # SplitSliceRead ofm shape must fit within the consumer ifm shape
Johan Alfven7647b0f2024-04-02 20:56:09 +0200217 if cons_op.ifm == op.ofm:
Johan Alfven190b63a2024-04-04 13:26:18 +0200218 cons_shape = cons_op.ifm_shapes[0].as_list()
219 read_shape = op.ofm_shapes[0].as_list()
Johan Alfven7647b0f2024-04-02 20:56:09 +0200220 elif cons_op.type.is_binary_elementwise_op() and cons_op.ifm2 == op.ofm:
Johan Alfven190b63a2024-04-04 13:26:18 +0200221 cons_shape = cons_op.ifm_shapes[1].as_list()
222 read_shape = op.ofm_shapes[0].as_list()
223 else:
224 return False
Johan Alfven7647b0f2024-04-02 20:56:09 +0200225
Johan Alfven190b63a2024-04-04 13:26:18 +0200226 # All read shape values <= consumer shape values
227 return all(read_shape[idx] <= x for idx, x in enumerate(cons_shape))
Johan Alfven7647b0f2024-04-02 20:56:09 +0200228
229
Patrik Gustavssonf1580f02021-09-01 12:43:02 +0200230def move_splitsliceread_to_consumer(op, cons_op):
231 assert op.type == Op.SplitSliceRead
232
233 if cons_op.ifm == op.ofm:
234 cons_op.read_offsets[0] = op.read_offsets[0]
235 cons_op.read_shapes[0] = op.read_shapes[0]
236 cons_op.set_input_tensor(op.ifm, cons_op.type.info.indices.ifms[0])
237 cons_op.ifm_shapes[0] = op.ifm_shapes[0]
238 elif cons_op.type.is_binary_elementwise_op() and cons_op.ifm2 == op.ofm:
239 cons_op.read_offsets[1] = op.read_offsets[0]
240 cons_op.read_shapes[1] = op.read_shapes[0]
241 cons_op.set_input_tensor(op.ifm, cons_op.type.info.indices.ifms[1])
242 cons_op.ifm_shapes[1] = op.ifm_shapes[0]
Patrik Gustavssonf1580f02021-09-01 12:43:02 +0200243 op.ofm.consumer_list.remove(cons_op)
244 op.ofm.ops = []
Fredrik Svedberg0ac08042023-04-11 22:35:04 +0200245 if op in op.ifm.consumer_list:
246 op.ifm.consumer_list.remove(op)
Patrik Gustavssonf1580f02021-09-01 12:43:02 +0200247
248
Jonas Ohlsson0957e3e2021-09-01 15:57:21 +0200249def check_memory_only_removed(op, arch):
250 if op.run_on_npu and op.type in memory_only_ops:
251 # Memory only operators should have been removed
252 raise VelaError(f"Memory only {op.type} op {op} expected to have been removed, still remains")
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200253
254
255def record_optimised(op, arch):
wilisa0179a89042022-11-02 17:18:43 +0000256 if op.type not in (Op.Const, Op.Placeholder):
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200257 DebugDatabase.add_optimised(op, op)
Patrik Gustavssondf995102021-08-23 15:33:59 +0200258
259
Johan Alfvena5e1b622023-02-02 14:59:03 +0100260def bypass_memory_only_ops(op, arch, nng):
Jonas Ohlsson0957e3e2021-09-01 15:57:21 +0200261 if not op.run_on_npu or op.type not in memory_only_ops:
Patrik Gustavssondf995102021-08-23 15:33:59 +0200262 return op
263
Johan Alfvena5e1b622023-02-02 14:59:03 +0100264 # Memory only operators can be completely removed if there is a one to one
265 # connection. The reshape OFM can be connected to the previous op.
Johan Alfvén48e51592022-09-28 20:06:25 +0200266 #
Johan Alfvena5e1b622023-02-02 14:59:03 +0100267 # Bypassed to
268 # --->
269 # 1x6x6x10 1x6x6x10
270 # ADD ADD
271 # | -------> |
272 # 1x6x6x10 | 1x20x3x6
273 # RESHAPE | MEAN
274 # | ---------|
275 # 1x20x3x10
276 # MEAN
Johan Alfvén48e51592022-09-28 20:06:25 +0200277 #
Johan Alfvena5e1b622023-02-02 14:59:03 +0100278 # In the above the ADD OFM = RESHAPE IFM is removed and replaced by
279 # the RESHAPE OFM.
280 #
281 # Then there are two cases when bypassing is not possible. One is when
282 # the IFM is produced by the CPU. This tensor must be preserved. It
283 # cannot be removed from the graph. The other case is when the IFM has
284 # multiple consumers, then it is not possible to just bypass the op and
285 # there is a need for a DMA (nop).
286 #
287 # Converts to
288 # --->
289 # 1x6x6x10 1x6x6x10
290 # -----ADD----- -----ADD-----
291 # | | | |
292 # 1x6x6x10 1x6x6x10 1x6x6x10 1x6x6x10
293 # RESHAPE MEAN DMA OP MEAN
294 # | |
295 # 1x20x3x6 1x20x3x6
296 # MEAN MEAN
297 #
298 # If the DMA IFM and DMA OFM ends up in the same memory area
299 # the DMA op will be removed when the cmd stream is generated.
300
Johan Alfvén48e51592022-09-28 20:06:25 +0200301 ifm_has_multiple_cons = len(op.ifm.consumer_list) > 1
Johan Alfvén5060ff52022-09-15 15:50:30 +0200302 ifm_is_cpu_produced = any(ifm_prod is not None and not ifm_prod.run_on_npu for ifm_prod in op.ifm.ops)
303
Johan Alfvena5e1b622023-02-02 14:59:03 +0100304 if ifm_has_multiple_cons or ifm_is_cpu_produced:
305 # Convert to a memcpy op
306 op.type = Op.Memcpy
307 DebugDatabase.add_optimised(op, op)
308 else:
309 # Bypass op
310 ofm = op.ofm
311 ifm = op.ifm
312 ofm.ops = []
313 for prev_op in ifm.ops:
314 prev_op.outputs = [ofm]
315 ofm.ops.append(prev_op)
Patrik Gustavssondf995102021-08-23 15:33:59 +0200316
317 return op
318
319
Raul Farkas66207142023-05-25 11:15:20 +0100320def convert_depthwise_to_conv(op: Operation, arch, nng) -> Operation:
321 """Convert DepthwiseConv2DBias to Conv2D to allow support for DepthwiseConv2DBias ops with 'depth multiplier' > 1,
322 as long as IFM depth = 1 and OFM depth is equal to the depth multiplier.
323 """
Patrik Gustavssondf995102021-08-23 15:33:59 +0200324 if op.type == Op.DepthwiseConv2DBias and (op.attrs["depth_multiplier"] != 1):
325 ifm_shape = op.ifm_shapes[0]
326 weight_tensor = op.inputs[1]
327 ofm_shape = op.ofm_shapes[0]
Raul Farkas66207142023-05-25 11:15:20 +0100328 # Depthwise is equivalent to a single conv2d if the ifm depth is 1 and
329 # the ofm depth equals the depth multipler.
Patrik Gustavssondf995102021-08-23 15:33:59 +0200330 if (ifm_shape.depth == 1) and (ofm_shape.depth == op.attrs["depth_multiplier"]):
331 # Change op type to Conv2d
332 op.type = Op.Conv2DBias
333 del op.attrs["channel_multiplier"]
334 del op.attrs["depth_multiplier"]
335
336 weight_tensor.values = np.transpose(weight_tensor.values, (0, 1, 3, 2))
337 weight_tensor.set_all_shapes(list(weight_tensor.values.shape))
wilisa0179a89042022-11-02 17:18:43 +0000338 DebugDatabase.add_optimised(op, op)
Patrik Gustavssondf995102021-08-23 15:33:59 +0200339 else:
340 raise UnsupportedFeatureError(
Raul Farkas66207142023-05-25 11:15:20 +0100341 f"Unsupported 'DEPTHWISE_CONV_2D' with depth_multiplier = {op.attrs['depth_multiplier']},"
342 f" ifm channels = {ifm_shape.depth}, ofm channels = {ofm_shape.depth}"
Patrik Gustavssondf995102021-08-23 15:33:59 +0200343 )
Patrik Gustavssondf995102021-08-23 15:33:59 +0200344 return op
Patrik Gustavssonf436ada2021-09-14 14:56:48 +0200345
346
Fredrik Svedberg0ac08042023-04-11 22:35:04 +0200347def create_avg_pool_for_concat(concat_op, name, ifm, ifm_shape: Shape4D, write_offset: Shape4D):
348 """Creates an average pool for the given concat op/input feature map"""
349 ofm = concat_op.ofm
350 avgpool_op = create_avgpool_nop(name)
Johan Alfven89146852024-05-13 13:44:42 +0200351 # Enforce original type since this is used in pass packing to group concat ops
352 avgpool_op._original_type = concat_op.type
Fredrik Svedberg0ac08042023-04-11 22:35:04 +0200353 avgpool_op.inputs = [ifm]
354 avgpool_op.outputs = [ofm]
355
356 avgpool_op.write_offset = write_offset
357 avgpool_op.write_shape = ifm_shape
358 ofm.ops.append(avgpool_op)
359 avgpool_op.ifm_shapes.append(ifm_shape)
360 avgpool_op.ofm_shapes.append(concat_op.ofm_shapes[0])
361 avgpool_op.memory_function = Op.ConcatSliceWrite
362 DebugDatabase.add_optimised(concat_op, avgpool_op)
363 return avgpool_op