Johan Alfven | 7647b0f | 2024-04-02 20:56:09 +0200 | [diff] [blame] | 1 | # SPDX-FileCopyrightText: Copyright 2021-2024 Arm Limited and/or its affiliates <open-source-office@arm.com> |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 2 | # |
| 3 | # SPDX-License-Identifier: Apache-2.0 |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the License); you may |
| 6 | # not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an AS IS BASIS, WITHOUT |
| 13 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
Rickard Bolin | bc6ee58 | 2022-11-04 08:24:29 +0000 | [diff] [blame] | 16 | # |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 17 | # Description: |
| 18 | # Common functions and definitions used during the graph optimization. |
Patrik Gustavsson | c74682c | 2021-08-17 14:26:38 +0200 | [diff] [blame] | 19 | from typing import Tuple |
| 20 | |
Patrik Gustavsson | df99510 | 2021-08-23 15:33:59 +0200 | [diff] [blame] | 21 | import numpy as np |
| 22 | |
Tim Hall | d6efcd3 | 2022-09-02 15:01:01 +0100 | [diff] [blame] | 23 | from .architecture_features import Accelerator |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 24 | from .data_type import DataType |
| 25 | from .debug_database import DebugDatabase |
Patrik Gustavsson | df99510 | 2021-08-23 15:33:59 +0200 | [diff] [blame] | 26 | from .errors import UnsupportedFeatureError |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 27 | from .errors import VelaError |
| 28 | from .operation import Op |
Raul Farkas | 6620714 | 2023-05-25 11:15:20 +0100 | [diff] [blame] | 29 | from .operation import Operation |
Fredrik Svedberg | 0ac0804 | 2023-04-11 22:35:04 +0200 | [diff] [blame] | 30 | from .operation_util import create_avgpool_nop |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 31 | from .shape4d import Shape4D |
Raul Farkas | 72c6a24 | 2023-03-16 16:38:05 +0000 | [diff] [blame] | 32 | from .tensor import Tensor |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 33 | |
Jonas Ohlsson | 81942e9 | 2021-08-20 09:33:28 +0200 | [diff] [blame] | 34 | memory_only_ops = ( |
| 35 | Op.Reshape, |
Jonas Ohlsson | 0957e3e | 2021-09-01 15:57:21 +0200 | [diff] [blame] | 36 | Op.QuantizedReshape, |
Jonas Ohlsson | 81942e9 | 2021-08-20 09:33:28 +0200 | [diff] [blame] | 37 | Op.Squeeze, |
Jonas Ohlsson | 0957e3e | 2021-09-01 15:57:21 +0200 | [diff] [blame] | 38 | Op.ExpandDims, |
Patrik Gustavsson | ef3ebdd | 2021-10-01 11:10:25 +0200 | [diff] [blame] | 39 | Op.Identity, |
Jonas Ohlsson | 81942e9 | 2021-08-20 09:33:28 +0200 | [diff] [blame] | 40 | ) |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 41 | |
| 42 | |
| 43 | def _avoid_nhcwb16_for_concat(tens): |
| 44 | # If axis corresponds to C-dimension, NHCWB16 can only be used in the output if all the concat_start's are a |
| 45 | # multiple of 16. This as, it is only then the address offset for the ofm, for all operations, will be 16 byte |
| 46 | # aligned. For other values of axis the address offsets will be 16 byte aligned, as they are all based on c = 0 |
| 47 | # and those addresses are always 16 byte aligned due to the NHCWB16 format. |
| 48 | return any(op.write_offset.depth % 16 != 0 for op in tens.ops if op.write_offset is not None) |
| 49 | |
| 50 | |
| 51 | def _avoid_nhcwb16_for_split(tens): |
| 52 | # If read offset is not a multiple of 16 in the C-dimension, NHCWB16 need to be avoided in the input |
James Ward | 6bf1613 | 2021-09-08 11:14:20 +0100 | [diff] [blame] | 53 | |
| 54 | # Return True if NHCWB16 needs to be avoided |
| 55 | def offset_not_aligned(read_offset): |
| 56 | return read_offset is not None and (read_offset.depth % 16) != 0 |
| 57 | |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 58 | for cons_op in tens.consumer_list: |
| 59 | if cons_op.ifm == tens: |
James Ward | 6bf1613 | 2021-09-08 11:14:20 +0100 | [diff] [blame] | 60 | if offset_not_aligned(cons_op.read_offsets[0]): |
| 61 | return True |
| 62 | if cons_op.ifm2 is not None and cons_op.ifm2 == tens: |
| 63 | if offset_not_aligned(cons_op.read_offsets[1]): |
| 64 | return True |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 65 | return False |
| 66 | |
| 67 | |
| 68 | def _avoid_nhcwb16_for_shapes(tens): |
| 69 | # check all producers/consumers to see if any op shape is preventing NHCWB16 |
| 70 | for cons_op in tens.consumer_list: |
| 71 | if cons_op.ifm == tens: |
| 72 | cons_op_shape = cons_op.ifm_shapes[0] |
| 73 | elif cons_op.type.is_binary_elementwise_op() and cons_op.ifm2 == tens: |
| 74 | cons_op_shape = cons_op.ifm_shapes[1] |
| 75 | else: |
| 76 | assert False |
| 77 | if Shape4D(tens.shape) != cons_op_shape: |
| 78 | return True |
| 79 | |
| 80 | for prod_op in tens.ops: |
| 81 | if Shape4D(tens.shape) != prod_op.ofm_shapes[0]: |
| 82 | return True |
| 83 | |
| 84 | return False |
| 85 | |
| 86 | |
Johan Alfven | 9072496 | 2023-02-02 09:07:48 +0100 | [diff] [blame] | 87 | def _avoid_nhcwb16_for_memory_only(tens): |
| 88 | # check all producers/consumers to see if any op is preventing NHCWB16 |
| 89 | return any(op.type == Op.Memcpy for op in (tens.consumer_list + tens.ops)) |
| 90 | |
| 91 | |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 92 | # Check if non linear format can be used |
Raul Farkas | 72c6a24 | 2023-03-16 16:38:05 +0000 | [diff] [blame] | 93 | def check_format_restrictions(tens: Tensor, arch): |
| 94 | if tens.force_linear_format: |
| 95 | return |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 96 | if len(tens.ops) < 1: |
| 97 | return |
| 98 | if tens.ops[0].type in (Op.Placeholder, Op.SubgraphInput, Op.Const) or any( |
| 99 | cons is None for cons in tens.consumer_list |
| 100 | ): |
| 101 | return |
| 102 | |
Fredrik Svedberg | 0ac0804 | 2023-04-11 22:35:04 +0200 | [diff] [blame] | 103 | # Writing to the buffer of a variable tensor needs to be linear format |
| 104 | if tens.ops[0].memory_function == Op.VariableTensorWrite: |
| 105 | return |
| 106 | |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 107 | # Check if any of the producers/consumers is run on CPU |
| 108 | if not all(cons.run_on_npu for cons in tens.consumer_list): |
| 109 | return |
| 110 | if not all(prod.run_on_npu for prod in tens.ops): |
| 111 | return |
| 112 | |
| 113 | # "Concat" ofm exception: |
| 114 | if _avoid_nhcwb16_for_concat(tens): |
| 115 | return |
| 116 | |
| 117 | # "Split" ifm exception: |
| 118 | if _avoid_nhcwb16_for_split(tens): |
| 119 | return |
| 120 | |
| 121 | # Shapes checking: check all producers/consumers are NHCWB16 compatible with tens.shape |
| 122 | if _avoid_nhcwb16_for_shapes(tens): |
| 123 | return |
| 124 | |
Johan Alfven | 9072496 | 2023-02-02 09:07:48 +0100 | [diff] [blame] | 125 | # Memory only ifm/ofm exception: DMA ops must use NHCW |
| 126 | if _avoid_nhcwb16_for_memory_only(tens): |
| 127 | return |
| 128 | |
Rickard Bolin | fea1516 | 2022-07-04 16:19:16 +0000 | [diff] [blame] | 129 | # Resize bilinear half pixel center implementation requires OFM with linear format to |
| 130 | # allow stride modification in H/W dimensions. |
| 131 | for op in tens.ops: |
| 132 | if op.original_type == Op.ResizeBilinear and op.type == Op.DepthwiseConv2DBias: |
| 133 | return |
| 134 | |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 135 | for op in tens.consumer_list: |
Tim Hall | d6efcd3 | 2022-09-02 15:01:01 +0100 | [diff] [blame] | 136 | if op.type == Op.ReduceSum and ( |
| 137 | tens.dtype == DataType.int32 or arch.accelerator_config == Accelerator.Ethos_U65_512 |
| 138 | ): |
| 139 | # ReduceSum requires NHWC input |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 140 | return |
| 141 | if op.type == Op.Reshape: |
| 142 | # Using NHCWB16 format for a no-op reshape is only an option if subsequent |
| 143 | # consumers do not also need to perform a reshape or if the OFM is going to |
| 144 | # be processed by CPU operations. No-op reshape consumers with empty lists |
| 145 | # (those that have no consumers, or null-consumers used as list terminators) |
| 146 | # must use normal NHWC output. |
| 147 | |
| 148 | def incompatible_consumers(oper): |
| 149 | if oper and oper.type == Op.Reshape: |
| 150 | for consumer in oper.outputs[0].consumer_list: |
| 151 | yield from incompatible_consumers(consumer) |
| 152 | yield not oper or not oper.run_on_npu |
| 153 | |
| 154 | if not any(incompatible_consumers(op)): |
| 155 | |
| 156 | def get_rewrites(oper): |
| 157 | if oper and oper.type == Op.Reshape: |
| 158 | for consumer in oper.outputs[0].consumer_list: |
| 159 | yield from get_rewrites(consumer) |
| 160 | yield oper |
| 161 | |
| 162 | # Detect no-op reshapes by comparing their full input and output tensor shapes. |
| 163 | inshape = op.ifm_shapes[0] |
| 164 | compatible_shape = [(inshape == oper.ofm_shapes[0]) for oper in get_rewrites(op)] |
| 165 | if not (compatible_shape and all(compatible_shape)): |
| 166 | return |
| 167 | else: |
| 168 | return |
| 169 | |
Raul Farkas | 72c6a24 | 2023-03-16 16:38:05 +0000 | [diff] [blame] | 170 | tens.force_linear_format = False |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 171 | |
| 172 | |
Patrik Gustavsson | c74682c | 2021-08-17 14:26:38 +0200 | [diff] [blame] | 173 | def calc_explicit_padding(input_size, stride, filter_size, pad_before, pad_after) -> Tuple[int, int]: |
| 174 | """ |
| 175 | Based on explicit padding provided in a PAD operation, returns the corresponding hardware padding |
| 176 | that provides equivalent results. |
| 177 | """ |
| 178 | total_padding = needed_total_padding(input_size, stride, filter_size) |
| 179 | |
| 180 | # The bottom/right padding might need downward adjustment depending on stride/input size |
| 181 | total_minus_before = total_padding - pad_before |
| 182 | output_pad_after = pad_after |
| 183 | while output_pad_after > 0 and output_pad_after % stride != total_minus_before % stride: |
| 184 | output_pad_after -= 1 |
| 185 | return pad_before, output_pad_after |
| 186 | |
| 187 | |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 188 | def needed_total_padding(input_size, stride, filter_size): |
Raul Farkas | 3b64f06 | 2023-05-16 17:18:31 +0100 | [diff] [blame] | 189 | """Compute hardware padding.""" |
| 190 | if input_size % stride == 0: |
| 191 | return max(filter_size - stride, 0) |
| 192 | |
| 193 | return max(filter_size - (input_size % stride), 0) |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 194 | |
| 195 | |
Raul Farkas | 6620714 | 2023-05-25 11:15:20 +0100 | [diff] [blame] | 196 | def set_tensor_equivalence(op: Operation, arch, nng) -> Operation: |
| 197 | """Set input/output tensor equivalence to the same id for memory operations.""" |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 198 | if op.type in memory_only_ops: |
| 199 | eid = op.outputs[0].equivalence_id |
| 200 | for inp in op.inputs: |
| 201 | inp.equivalence_id = eid |
| 202 | return op |
| 203 | |
| 204 | |
| 205 | def set_ifm_ofm_op_shapes(op, arch, nng): |
| 206 | if op.run_on_npu and op.type.needs_shapes(): |
| 207 | if op.ifm_shapes or op.ofm_shapes: |
| 208 | # Shapes already set |
| 209 | return op |
| 210 | op.set_ifm_ofm_shapes() |
| 211 | return op |
| 212 | |
| 213 | |
Johan Alfven | 7647b0f | 2024-04-02 20:56:09 +0200 | [diff] [blame] | 214 | def check_splitsliceread_to_consumer_shape(op, cons_op): |
| 215 | assert op.type == Op.SplitSliceRead |
Johan Alfven | 190b63a | 2024-04-04 13:26:18 +0200 | [diff] [blame] | 216 | # SplitSliceRead ofm shape must fit within the consumer ifm shape |
Johan Alfven | 7647b0f | 2024-04-02 20:56:09 +0200 | [diff] [blame] | 217 | if cons_op.ifm == op.ofm: |
Johan Alfven | 190b63a | 2024-04-04 13:26:18 +0200 | [diff] [blame] | 218 | cons_shape = cons_op.ifm_shapes[0].as_list() |
| 219 | read_shape = op.ofm_shapes[0].as_list() |
Johan Alfven | 7647b0f | 2024-04-02 20:56:09 +0200 | [diff] [blame] | 220 | elif cons_op.type.is_binary_elementwise_op() and cons_op.ifm2 == op.ofm: |
Johan Alfven | 190b63a | 2024-04-04 13:26:18 +0200 | [diff] [blame] | 221 | cons_shape = cons_op.ifm_shapes[1].as_list() |
| 222 | read_shape = op.ofm_shapes[0].as_list() |
| 223 | else: |
| 224 | return False |
Johan Alfven | 7647b0f | 2024-04-02 20:56:09 +0200 | [diff] [blame] | 225 | |
Johan Alfven | 190b63a | 2024-04-04 13:26:18 +0200 | [diff] [blame] | 226 | # All read shape values <= consumer shape values |
| 227 | return all(read_shape[idx] <= x for idx, x in enumerate(cons_shape)) |
Johan Alfven | 7647b0f | 2024-04-02 20:56:09 +0200 | [diff] [blame] | 228 | |
| 229 | |
Patrik Gustavsson | f1580f0 | 2021-09-01 12:43:02 +0200 | [diff] [blame] | 230 | def move_splitsliceread_to_consumer(op, cons_op): |
| 231 | assert op.type == Op.SplitSliceRead |
| 232 | |
| 233 | if cons_op.ifm == op.ofm: |
| 234 | cons_op.read_offsets[0] = op.read_offsets[0] |
| 235 | cons_op.read_shapes[0] = op.read_shapes[0] |
| 236 | cons_op.set_input_tensor(op.ifm, cons_op.type.info.indices.ifms[0]) |
| 237 | cons_op.ifm_shapes[0] = op.ifm_shapes[0] |
| 238 | elif cons_op.type.is_binary_elementwise_op() and cons_op.ifm2 == op.ofm: |
| 239 | cons_op.read_offsets[1] = op.read_offsets[0] |
| 240 | cons_op.read_shapes[1] = op.read_shapes[0] |
| 241 | cons_op.set_input_tensor(op.ifm, cons_op.type.info.indices.ifms[1]) |
| 242 | cons_op.ifm_shapes[1] = op.ifm_shapes[0] |
Patrik Gustavsson | f1580f0 | 2021-09-01 12:43:02 +0200 | [diff] [blame] | 243 | op.ofm.consumer_list.remove(cons_op) |
| 244 | op.ofm.ops = [] |
Fredrik Svedberg | 0ac0804 | 2023-04-11 22:35:04 +0200 | [diff] [blame] | 245 | if op in op.ifm.consumer_list: |
| 246 | op.ifm.consumer_list.remove(op) |
Patrik Gustavsson | f1580f0 | 2021-09-01 12:43:02 +0200 | [diff] [blame] | 247 | |
| 248 | |
Jonas Ohlsson | 0957e3e | 2021-09-01 15:57:21 +0200 | [diff] [blame] | 249 | def check_memory_only_removed(op, arch): |
| 250 | if op.run_on_npu and op.type in memory_only_ops: |
| 251 | # Memory only operators should have been removed |
| 252 | raise VelaError(f"Memory only {op.type} op {op} expected to have been removed, still remains") |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 253 | |
| 254 | |
| 255 | def record_optimised(op, arch): |
wilisa01 | 79a8904 | 2022-11-02 17:18:43 +0000 | [diff] [blame] | 256 | if op.type not in (Op.Const, Op.Placeholder): |
Patrik Gustavsson | 8f1f9aa | 2021-06-28 07:41:58 +0200 | [diff] [blame] | 257 | DebugDatabase.add_optimised(op, op) |
Patrik Gustavsson | df99510 | 2021-08-23 15:33:59 +0200 | [diff] [blame] | 258 | |
| 259 | |
Johan Alfven | a5e1b62 | 2023-02-02 14:59:03 +0100 | [diff] [blame] | 260 | def bypass_memory_only_ops(op, arch, nng): |
Jonas Ohlsson | 0957e3e | 2021-09-01 15:57:21 +0200 | [diff] [blame] | 261 | if not op.run_on_npu or op.type not in memory_only_ops: |
Patrik Gustavsson | df99510 | 2021-08-23 15:33:59 +0200 | [diff] [blame] | 262 | return op |
| 263 | |
Johan Alfven | a5e1b62 | 2023-02-02 14:59:03 +0100 | [diff] [blame] | 264 | # Memory only operators can be completely removed if there is a one to one |
| 265 | # connection. The reshape OFM can be connected to the previous op. |
Johan Alfvén | 48e5159 | 2022-09-28 20:06:25 +0200 | [diff] [blame] | 266 | # |
Johan Alfven | a5e1b62 | 2023-02-02 14:59:03 +0100 | [diff] [blame] | 267 | # Bypassed to |
| 268 | # ---> |
| 269 | # 1x6x6x10 1x6x6x10 |
| 270 | # ADD ADD |
| 271 | # | -------> | |
| 272 | # 1x6x6x10 | 1x20x3x6 |
| 273 | # RESHAPE | MEAN |
| 274 | # | ---------| |
| 275 | # 1x20x3x10 |
| 276 | # MEAN |
Johan Alfvén | 48e5159 | 2022-09-28 20:06:25 +0200 | [diff] [blame] | 277 | # |
Johan Alfven | a5e1b62 | 2023-02-02 14:59:03 +0100 | [diff] [blame] | 278 | # In the above the ADD OFM = RESHAPE IFM is removed and replaced by |
| 279 | # the RESHAPE OFM. |
| 280 | # |
| 281 | # Then there are two cases when bypassing is not possible. One is when |
| 282 | # the IFM is produced by the CPU. This tensor must be preserved. It |
| 283 | # cannot be removed from the graph. The other case is when the IFM has |
| 284 | # multiple consumers, then it is not possible to just bypass the op and |
| 285 | # there is a need for a DMA (nop). |
| 286 | # |
| 287 | # Converts to |
| 288 | # ---> |
| 289 | # 1x6x6x10 1x6x6x10 |
| 290 | # -----ADD----- -----ADD----- |
| 291 | # | | | | |
| 292 | # 1x6x6x10 1x6x6x10 1x6x6x10 1x6x6x10 |
| 293 | # RESHAPE MEAN DMA OP MEAN |
| 294 | # | | |
| 295 | # 1x20x3x6 1x20x3x6 |
| 296 | # MEAN MEAN |
| 297 | # |
| 298 | # If the DMA IFM and DMA OFM ends up in the same memory area |
| 299 | # the DMA op will be removed when the cmd stream is generated. |
| 300 | |
Johan Alfvén | 48e5159 | 2022-09-28 20:06:25 +0200 | [diff] [blame] | 301 | ifm_has_multiple_cons = len(op.ifm.consumer_list) > 1 |
Johan Alfvén | 5060ff5 | 2022-09-15 15:50:30 +0200 | [diff] [blame] | 302 | ifm_is_cpu_produced = any(ifm_prod is not None and not ifm_prod.run_on_npu for ifm_prod in op.ifm.ops) |
| 303 | |
Johan Alfven | a5e1b62 | 2023-02-02 14:59:03 +0100 | [diff] [blame] | 304 | if ifm_has_multiple_cons or ifm_is_cpu_produced: |
| 305 | # Convert to a memcpy op |
| 306 | op.type = Op.Memcpy |
| 307 | DebugDatabase.add_optimised(op, op) |
| 308 | else: |
| 309 | # Bypass op |
| 310 | ofm = op.ofm |
| 311 | ifm = op.ifm |
| 312 | ofm.ops = [] |
| 313 | for prev_op in ifm.ops: |
| 314 | prev_op.outputs = [ofm] |
| 315 | ofm.ops.append(prev_op) |
Patrik Gustavsson | df99510 | 2021-08-23 15:33:59 +0200 | [diff] [blame] | 316 | |
| 317 | return op |
| 318 | |
| 319 | |
Raul Farkas | 6620714 | 2023-05-25 11:15:20 +0100 | [diff] [blame] | 320 | def convert_depthwise_to_conv(op: Operation, arch, nng) -> Operation: |
| 321 | """Convert DepthwiseConv2DBias to Conv2D to allow support for DepthwiseConv2DBias ops with 'depth multiplier' > 1, |
| 322 | as long as IFM depth = 1 and OFM depth is equal to the depth multiplier. |
| 323 | """ |
Patrik Gustavsson | df99510 | 2021-08-23 15:33:59 +0200 | [diff] [blame] | 324 | if op.type == Op.DepthwiseConv2DBias and (op.attrs["depth_multiplier"] != 1): |
| 325 | ifm_shape = op.ifm_shapes[0] |
| 326 | weight_tensor = op.inputs[1] |
| 327 | ofm_shape = op.ofm_shapes[0] |
Raul Farkas | 6620714 | 2023-05-25 11:15:20 +0100 | [diff] [blame] | 328 | # Depthwise is equivalent to a single conv2d if the ifm depth is 1 and |
| 329 | # the ofm depth equals the depth multipler. |
Patrik Gustavsson | df99510 | 2021-08-23 15:33:59 +0200 | [diff] [blame] | 330 | if (ifm_shape.depth == 1) and (ofm_shape.depth == op.attrs["depth_multiplier"]): |
| 331 | # Change op type to Conv2d |
| 332 | op.type = Op.Conv2DBias |
| 333 | del op.attrs["channel_multiplier"] |
| 334 | del op.attrs["depth_multiplier"] |
| 335 | |
| 336 | weight_tensor.values = np.transpose(weight_tensor.values, (0, 1, 3, 2)) |
| 337 | weight_tensor.set_all_shapes(list(weight_tensor.values.shape)) |
wilisa01 | 79a8904 | 2022-11-02 17:18:43 +0000 | [diff] [blame] | 338 | DebugDatabase.add_optimised(op, op) |
Patrik Gustavsson | df99510 | 2021-08-23 15:33:59 +0200 | [diff] [blame] | 339 | else: |
| 340 | raise UnsupportedFeatureError( |
Raul Farkas | 6620714 | 2023-05-25 11:15:20 +0100 | [diff] [blame] | 341 | f"Unsupported 'DEPTHWISE_CONV_2D' with depth_multiplier = {op.attrs['depth_multiplier']}," |
| 342 | f" ifm channels = {ifm_shape.depth}, ofm channels = {ofm_shape.depth}" |
Patrik Gustavsson | df99510 | 2021-08-23 15:33:59 +0200 | [diff] [blame] | 343 | ) |
Patrik Gustavsson | df99510 | 2021-08-23 15:33:59 +0200 | [diff] [blame] | 344 | return op |
Patrik Gustavsson | f436ada | 2021-09-14 14:56:48 +0200 | [diff] [blame] | 345 | |
| 346 | |
Fredrik Svedberg | 0ac0804 | 2023-04-11 22:35:04 +0200 | [diff] [blame] | 347 | def create_avg_pool_for_concat(concat_op, name, ifm, ifm_shape: Shape4D, write_offset: Shape4D): |
| 348 | """Creates an average pool for the given concat op/input feature map""" |
| 349 | ofm = concat_op.ofm |
| 350 | avgpool_op = create_avgpool_nop(name) |
Johan Alfven | 8914685 | 2024-05-13 13:44:42 +0200 | [diff] [blame] | 351 | # Enforce original type since this is used in pass packing to group concat ops |
| 352 | avgpool_op._original_type = concat_op.type |
Fredrik Svedberg | 0ac0804 | 2023-04-11 22:35:04 +0200 | [diff] [blame] | 353 | avgpool_op.inputs = [ifm] |
| 354 | avgpool_op.outputs = [ofm] |
| 355 | |
| 356 | avgpool_op.write_offset = write_offset |
| 357 | avgpool_op.write_shape = ifm_shape |
| 358 | ofm.ops.append(avgpool_op) |
| 359 | avgpool_op.ifm_shapes.append(ifm_shape) |
| 360 | avgpool_op.ofm_shapes.append(concat_op.ofm_shapes[0]) |
| 361 | avgpool_op.memory_function = Op.ConcatSliceWrite |
| 362 | DebugDatabase.add_optimised(concat_op, avgpool_op) |
| 363 | return avgpool_op |