blob: a1cbb3e200a008943d7a5e49cef1cbf40e908785 [file] [log] [blame]
Tim Hall3b1578e2023-01-13 17:57:25 +00001# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02002#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Rickard Bolinbc6ee582022-11-04 08:24:29 +000016#
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020017# Description:
18# Early optimisation of a TensorFlow Lite based network graph, using the rewrite_graph module
19# to do the traversal of the graph.
20import math
21import uuid
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020022
23import numpy as np
24
25from . import fp_math
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020026from . import rewrite_graph
27from . import scaling
28from .api import NpuRoundingMode
Fredrik Svedberga04f2f72022-07-06 13:42:24 +020029from .data_type import BaseType
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020030from .data_type import DataType
31from .debug_database import DebugDatabase
32from .errors import UnsupportedFeatureError
33from .ethos_u55_regs.ethos_u55_regs import resampling_mode
Jonas Ohlsson0957e3e2021-09-01 15:57:21 +020034from .graph_optimiser_util import bypass_memory_only_ops
Patrik Gustavssonc74682c2021-08-17 14:26:38 +020035from .graph_optimiser_util import calc_explicit_padding
Patrik Gustavssondf995102021-08-23 15:33:59 +020036from .graph_optimiser_util import convert_depthwise_to_conv
Patrik Gustavssonf436ada2021-09-14 14:56:48 +020037from .graph_optimiser_util import convert_to_lut
Jonas Ohlsson0957e3e2021-09-01 15:57:21 +020038from .graph_optimiser_util import memory_only_ops
Patrik Gustavssonf1580f02021-09-01 12:43:02 +020039from .graph_optimiser_util import move_splitsliceread_to_consumer
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020040from .graph_optimiser_util import needed_total_padding
41from .graph_optimiser_util import set_ifm_ofm_op_shapes
42from .graph_optimiser_util import set_tensor_equivalence
43from .numeric_util import clamp_sigmoid
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020044from .numeric_util import round_away_zero
45from .operation import create_activation_function
Fredrik Svedberg1a7527c2021-09-13 15:52:16 +020046from .operation import ExplicitScaling
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020047from .operation import NpuBlockType
48from .operation import Op
49from .operation import Operation
50from .operation import Padding
Fredrik Svedbergf3c7d552022-11-04 09:48:49 +010051from .operation_util import create_add_nop
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020052from .operation_util import create_avgpool_nop
53from .operation_util import get_pad_values_from_input
Ayaan Masood25f48dd2022-06-29 18:16:04 +010054from .scaling import quantise_scale
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020055from .shape4d import Shape4D
56from .softmax import SoftMax
57from .tensor import check_quantized_tens_scaling_equal
58from .tensor import create_const_tensor
59from .tensor import create_equivalence_id
60from .tensor import QuantizationParameters
61from .tensor import Tensor
62from .tensor import TensorPurpose
63from .tflite_mapping import optype_to_builtintype
64
65passthrough_nodes = (Op.Identity,)
66
67
68def create_avg_pool_for_concat(concat_op, name, ifm, ifm_shape: Shape4D, write_offset: Shape4D):
69 """Creates an average pool for the given concat op/input feature map"""
70 ofm = concat_op.ofm
71 avgpool_op = create_avgpool_nop(name)
72 avgpool_op.inputs = [ifm]
73 avgpool_op.outputs = [ofm]
74
75 avgpool_op.write_offset = write_offset
76 avgpool_op.write_shape = ifm_shape
77 ofm.ops.append(avgpool_op)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020078 avgpool_op.ifm_shapes.append(ifm_shape)
79 avgpool_op.ofm_shapes.append(concat_op.ofm_shapes[0])
80 avgpool_op.memory_function = Op.ConcatSliceWrite
wilisa0179a89042022-11-02 17:18:43 +000081 DebugDatabase.add_optimised(concat_op, avgpool_op)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +020082 return avgpool_op
83
84
85def remove_passthrough_tensor(tens, arch, nng):
86 if len(tens.ops) == 1 and tens.ops[0].type in passthrough_nodes:
87 assert len(tens.ops[0].inputs) == 1
88 tens = tens.ops[0].inputs[0]
89 return tens
90
91
92def rewrite_concat_ops(op, arch):
93 if not op.run_on_npu or not op.type.is_concat_op():
94 return
95
96 axis_4D = 0
97 ofm = op.ofm
98 ofm.ops = []
99 offset = 0
100
101 unfuse_activation_function(op)
102
103 if op.type == Op.Pack:
104 # Pack is also referred to as Stack
105 axis = int(op.attrs["axis"])
106 if axis < 0: # Convert to positive axis
107 axis = len(op.inputs[0].shape) + 1 + axis
108
109 desired_shape = op.inputs[0].shape[:axis] + [1] + op.inputs[0].shape[axis:]
110
111 axis_4D = axis + (4 - len(desired_shape))
112
113 for idx, inp in enumerate(op.inputs):
114 op.ifm_shapes[idx] = Shape4D(desired_shape)
115 op.type = Op.PackReshaped
116
117 inputs, axis = op.get_concat_inputs_axis()
118 for idx, inp in enumerate(inputs):
119 if op.type != Op.PackReshaped:
120 op.ifm_shapes[idx] = Shape4D(inp.shape)
121 if axis >= 0:
122 axis_4D = axis + (4 - len(inp.shape))
123 else:
124 axis_4D = axis
125 write_offset = [0, 0, 0, 0]
126 write_offset[axis_4D] = offset
127 concat_end = offset + op.ifm_shapes[idx][axis_4D]
128 create_avg_pool_for_concat(
129 op, op.name + str(idx) + "_avgpool", inp, op.ifm_shapes[idx], Shape4D.from_list(write_offset)
130 )
131 offset = concat_end
132 assert ofm.shape[axis] == offset
133
134 return op
135
136
137def rewrite_split_ops(tens, arch, nng):
138
139 if len(tens.ops) == 1 and tens.ops[0].type.is_split_op() and tens.ops[0].type != Op.Unpack:
140 split_op = tens.ops[0]
141
142 # Not supported so leave it and run on CPU
143 if not split_op.run_on_npu:
144 return tens
145
146 inp, outputs, axis, offset_start, offset_end = split_op.get_split_inputs_axis()
147
148 tens.ops = []
149 new_op = Operation(Op.SplitSliceRead, split_op.name)
150 new_op.inputs = [inp]
151 ofm_shape_idx = 0
Tim Hall51a8dce2021-12-20 16:49:27 +0000152 if None in (offset_end, offset_start):
153 read_shape = None
154 else:
155 # the read shape is relative to each start offset
156 read_shape = [oe - os for oe, os in zip(offset_end, offset_start)]
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200157
158 # For Split the offset cannot be extracted from the tensor so it has to
159 # be calculated from the index of the output tensor
160 if axis is not None:
161 # Get the start and end of the split
162 offset_start = [0] * 4
163 axis_4D_list = split_op.attrs.get("split_axis_4D", None) # Present for UnpackReshaped and some StridedSlice
164 for idx, out in enumerate(outputs):
165 if axis_4D_list is not None:
166 axis_4D = axis_4D_list[idx]
167 else:
168 split_op.ofm_shapes[idx] = Shape4D(out.shape)
169 if axis >= 0:
170 axis_4D = axis + (4 - len(out.shape))
171 else:
172 axis_4D = axis
173
174 if out == tens:
175 ofm_shape_idx = idx
176 read_shape = split_op.ofm_shapes[idx]
177 break
178
179 offset_start[axis_4D] += split_op.ofm_shapes[idx][axis_4D]
180
181 new_op.read_offsets[0] = Shape4D.from_list(offset_start, 0)
182 new_op.read_shapes[0] = read_shape
183 new_op.run_on_npu = True
184 new_op.set_output_tensor(tens)
185 new_op.ifm_shapes.append(Shape4D(inp.shape))
186 new_op.ofm_shapes.append(split_op.ofm_shapes[ofm_shape_idx])
187 DebugDatabase.add_optimised(split_op, new_op)
188
189 return tens
190
191
192def remove_SplitSliceRead(op, arch):
193
194 if op.type == Op.SplitSliceRead:
195 # Check if it is possible to put the SplitSliceRead on the tensor consumer, or if an avgpool need to be inserted
196 if (
197 len(op.ofm.consumer_list) == 1
198 and op.ofm.consumer_list[0] is not None
199 and op.ofm.consumer_list[0].run_on_npu
Jonas Ohlsson0957e3e2021-09-01 15:57:21 +0200200 and op.ofm.consumer_list[0].type not in memory_only_ops
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200201 and op.ofm_shapes[0] == Shape4D.from_list(op.ofm.shape)
202 ):
203 # SplitSliceRead can be performed by tensor consumer
204 cons_op = op.ofm.consumer_list[0]
Patrik Gustavssonf1580f02021-09-01 12:43:02 +0200205 move_splitsliceread_to_consumer(op, cons_op)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200206 else:
207 avgpool_op = create_avgpool_nop(op.name + "_avgpool")
208 avgpool_op.add_input_tensor(op.ifm)
209 avgpool_op.outputs = [op.ofm]
210 op.ofm.ops.remove(op)
211 op.ofm.ops.append(avgpool_op)
212 avgpool_op.ifm_shapes.append(op.ifm_shapes[0])
213 avgpool_op.ofm_shapes.append(op.ofm_shapes[0])
214 avgpool_op.read_offsets[0] = op.read_offsets[0]
215 avgpool_op.read_shapes[0] = op.read_shapes[0]
216
217 op.ifm.consumer_list.remove(op)
218 DebugDatabase.add_optimised(op, avgpool_op)
219
220
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200221def calc_padding_and_skirt(padding_type, kernel, input_shape, explicit_padding):
222 k_w, k_h = kernel.dilated_wh()
223 s_x, s_y = kernel.stride
224 ypad = needed_total_padding(int(input_shape.height), int(s_y), int(k_h))
225 xpad = needed_total_padding(int(input_shape.width), int(s_x), int(k_w))
226 if padding_type == Padding.SAME:
227 left_pad = (xpad + 0) // 2
228 right_pad = (xpad + 1) // 2
229 top_pad = (ypad + 0) // 2
230 bottom_pad = (ypad + 1) // 2
231 elif padding_type == Padding.VALID:
232 left_pad = 0
233 right_pad = 0
234 top_pad = 0
235 bottom_pad = 0
236 elif padding_type == Padding.EXPLICIT:
237 # Padding is specified in a PAD operator which has been bypassed.
238 top, left, bottom, right = explicit_padding
239 top_pad, bottom_pad = calc_explicit_padding(int(input_shape.height), int(s_y), int(k_h), int(top), int(bottom))
240 left_pad, right_pad = calc_explicit_padding(int(input_shape.width), int(s_x), int(k_w), int(left), int(right))
Rickard Bolin9ae34552022-06-09 13:07:17 +0000241 elif padding_type == Padding.TILE:
242 # The values in the explicit padding only represent the "direction" in which to pad
243 top_pad, left_pad, bottom_pad, right_pad = explicit_padding
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200244 else:
Tim Hall0ab2edc2022-02-23 17:58:02 +0000245 raise UnsupportedFeatureError(f"Unsupported padding = {padding_type} for padding calculation")
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200246 padding = (top_pad, left_pad, bottom_pad, right_pad)
247 skirt = (top_pad, left_pad, ypad - top_pad, xpad - left_pad)
248 return padding, skirt
249
250
251def calc_upscaled_padding_and_skirt(padding_type, kernel_size, stride, input_shape, upscaling_factor):
252 kernel_height, kernel_width = kernel_size[0], kernel_size[1]
253 if padding_type == Padding.SAME:
254 ypad = needed_total_padding(int(input_shape.height) * upscaling_factor, int(stride[1]), int(kernel_height))
255 xpad = needed_total_padding(int(input_shape.width) * upscaling_factor, int(stride[2]), int(kernel_width))
256 right_pad = max(((xpad + 1) // upscaling_factor) - 1, 0)
257 bottom_pad = max(((ypad + 1) // upscaling_factor) - 1, 0)
258 left_pad = max(kernel_width - 1 - right_pad, 0)
259 top_pad = max(kernel_height - 1 - bottom_pad, 0)
260 elif padding_type == Padding.VALID:
261 right_pad = max(kernel_width - 2, 0)
262 bottom_pad = max(kernel_height - 2, 0)
263 left_pad = kernel_width - 1
264 top_pad = kernel_height - 1
265 else:
Tim Hall0ab2edc2022-02-23 17:58:02 +0000266 raise UnsupportedFeatureError(f"Unsupported padding = {padding_type} for up-scaled padding calculation")
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200267 padding = (top_pad, left_pad, bottom_pad, right_pad)
268 skirt = padding
269 return padding, skirt
270
271
272def fixup_conv2d_backprop(op, arch, nng):
273 if op.type == Op.Conv2DBackpropInput:
274 # flip the inputs
275 op.inputs[0], op.inputs[2] = op.inputs[2], op.inputs[0]
276 op.type = Op.Conv2DBackpropInputSwitchedBias
Tim Hall3c5cfe92022-03-16 16:31:57 +0000277 op.ifm_resampling_mode = resampling_mode.TRANSPOSE
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200278
279 # Update strides
280 op.attrs.update({"stride_w": 1, "stride_h": 1, "strides": (1, 1, 1, 1)})
wilisa0179a89042022-11-02 17:18:43 +0000281 DebugDatabase.add_optimised(op, op)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200282
283 return op
284
285
286# Convert the op to an elementwise add
Tim Hall885033b2022-07-21 11:46:03 +0100287def convert_resize_1x1_to_add(op):
288 op.type = Op.Add # original_type will stay as Op.ResizeBilinear or Op.ResizeNearestNeighbor
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200289 op.name = op.name + "_add"
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200290 # Create an input tensor filled with zeros
wilisa018289d512023-01-12 08:17:23 +0000291 name = op.inputs[1].name + "_add"
292 dtype = op.inputs[0].dtype
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200293 shape = op.ofm_shapes[0].as_list()
wilisa018289d512023-01-12 08:17:23 +0000294 values = np.zeros(shape, dtype.as_numpy_type())
295 quantization = QuantizationParameters(0.0, 255.0)
296 quantization.scale_f32 = 1.0
297 quantization.zero_point = 0
wilisa0116b5e5e2023-02-14 12:03:59 +0000298 op.inputs[1] = op.inputs[0]
299 op.set_input_tensor(create_const_tensor(name, shape, dtype, values, quantization=quantization), 0)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200300 op.set_ifm_ofm_shapes()
wilisa0179a89042022-11-02 17:18:43 +0000301 DebugDatabase.add_optimised(op, op)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200302
303 return op
304
305
Tim Hall885033b2022-07-21 11:46:03 +0100306# Convert ResizeNearestNeightbor with align corners to a depthwise convolution. The IFM will already have been upscaled
307# apart from the final x2 scaling which will be done as part of this operation. The kernel contains a single coefficient
308# to select the appropriate nearest neighbor value
309def convert_resizenn_ac_to_depthwise_conv(op, upscale_factor):
310 ifm = op.ifm
311 ofm = op.ofm
312 output_depth = ofm.shape[-1]
313 dw_op_attrs = {
314 "padding": Padding.VALID,
315 "stride_h": 1,
316 "stride_w": 1,
317 "strides": (1, 1, 1, 1),
318 "depth_multiplier": 1,
319 "channel_multiplier": 1,
320 "dilation_h_factor": 1,
321 "dilation_w_factor": 1,
322 "dilation": (1, 1, 1, 1),
323 }
324
325 # change resizebilinear to depthwise
326 op.type = Op.DepthwiseConv2DBias
327 op.attrs.update(dw_op_attrs)
328 op.set_input_tensor(ifm, 0) # ifm tensor index
329 op.activation = None
330
331 # add input resample to resize by x2
332 op.ifm_resampling_mode = resampling_mode.NEAREST
333
334 # don't care about the rounding mode as it is nearest neighbor
335
336 # setup weight tensor
337 weight_quant = QuantizationParameters()
338 weight_quant.scale_f32 = 1.0 # no scaling as only a single non-zero coeff to select the desired value
339 weight_quant.zero_point = 0
340 weight_quant.quant_dim = 0
341 ofm_dtype = ofm.dtype
Tim Hall3b1578e2023-01-13 17:57:25 +0000342 if ofm_dtype.type == BaseType.UnsignedInt:
Tim Hall885033b2022-07-21 11:46:03 +0100343 weight_quant.quant_min = 0
344 weight_quant.quant_max = (1 << ofm_dtype.bits) - 1
345 else:
Tim Hall885033b2022-07-21 11:46:03 +0100346 weight_quant.quant_min = -(1 << (ofm_dtype.bits - 1))
347 weight_quant.quant_max = (1 << (ofm_dtype.bits - 1)) - 1
348
349 weight_shape = [upscale_factor, upscale_factor, output_depth, output_depth] # HWIO
350
351 # the single non-zero coefficient used to select the desired value needs to be placed in the 'centre value', which
352 # is calculated by finding the 'centre position' ('*' in the diagram below) and then choosing the 'value' that is
353 # below-and-right (i.e. next) to it (D).
354 # 0---1---2
355 # | A | B |
356 # 1---*---+
357 # | C | D |
358 # 2---+---+
359 weight_values = [0] * (upscale_factor * upscale_factor)
360 centre_coeff = (upscale_factor // 2) * upscale_factor + (upscale_factor // 2)
361 weight_values[centre_coeff] = 1
362
363 # add weight tensor, this will discard the size tensor of the resize op
364 op.set_input_tensor(
365 create_const_tensor(
366 "weights",
367 weight_shape,
Tim Hall3b1578e2023-01-13 17:57:25 +0000368 ofm_dtype,
Tim Hall885033b2022-07-21 11:46:03 +0100369 np.array(weight_values).reshape(weight_shape),
Tim Hall885033b2022-07-21 11:46:03 +0100370 quantization=weight_quant,
371 ),
372 1, # inputs tensor weight index
373 )
374
375 # setup bias tensor by assign None and then call the fix-up function to create a suitable tensor.
376 # need to append the bias tensor as resize ops only have 2 inputs
377 assert len(op.inputs) == 2
378 op.inputs.append(None)
Fredrik Svedbergcc219be2022-09-20 16:32:52 +0200379 fixup_bias_tensors(op, None, None, DataType.int32)
Tim Hall885033b2022-07-21 11:46:03 +0100380
381 # finally update the shape incase we've change the tensor shapes or connections
382 op.set_ifm_ofm_shapes()
wilisa0179a89042022-11-02 17:18:43 +0000383 DebugDatabase.add_optimised(op, op)
Tim Hall885033b2022-07-21 11:46:03 +0100384
385 return op
386
387
388# Convert ResizeBilinear/NearestNeighbor to a number of 1x1 average pools with nearest neighbor x2 upscaling and one
389# final average pool with a kernel size that depends upon the resize ops upscaling factor (x2, x4 or x8). The maximum
390# upscale factor is limited to x8 because of the limit 8x8 kernel size limit for average pool with padding.
391def convert_resize_to_upscale_and_average_pool(op):
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200392 pre_op = op
393 outputs = op.outputs
Rickard Boline546def2022-01-25 15:45:00 +0000394 dtype = op.ifm.dtype
Tim Hall885033b2022-07-21 11:46:03 +0100395
Rickard Boline546def2022-01-25 15:45:00 +0000396 op.attrs.update({"strides": (1, 1, 1, 1), "ksize": (1, 1, 1, 1)})
Tim Hall47c76362022-07-18 21:26:47 +0100397 op.attrs["padding"] = Padding.SAME # doesn't really matter as the kernel is 1x1
Tim Hall3c5cfe92022-03-16 16:31:57 +0000398 op.ifm_resampling_mode = resampling_mode.NEAREST
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200399
400 upscaled_shape = np.array(op.ifm_shapes[0].get_hw_as_list())
Tim Hall47c76362022-07-18 21:26:47 +0100401
402 # Get upscale factor that was calculated in the supported operators check
403 upscale_factor = op.attrs["upscale_factor"]
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200404
Rickard Boline546def2022-01-25 15:45:00 +0000405 # Calculate how many times 2x2 upscaling needs to be performed
Tim Hallf9267da2022-04-20 20:19:48 +0100406 # Force the result of round to be an integer. This is because the behaviour of rounding numpy.float64 values changed
407 # between different versions of numpy. This consistency ensures that the kernel dimensions are kept integral
Rickard Boline546def2022-01-25 15:45:00 +0000408 n = int(np.log2(upscale_factor))
409
Tim Hall885033b2022-07-21 11:46:03 +0100410 # Perform x2 upscaling n-1 times
Rickard Boline546def2022-01-25 15:45:00 +0000411 scaled_op = pre_op
412 for count in range(n - 1):
413 if count > 0:
414 scaled_op = op.clone(f"_{count}")
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200415 scaled_op.inputs[0] = pre_op.outputs[0]
416
Tim Hall885033b2022-07-21 11:46:03 +0100417 # Nearest neighbor x2 upscaling
Tim Hall47c76362022-07-18 21:26:47 +0100418 upscaled_shape = upscaled_shape * 2
Rickard Boline546def2022-01-25 15:45:00 +0000419 shape = op.ofm_shapes[0].as_list()
420 shape[1:3] = upscaled_shape
421 out_tens = Tensor(shape, dtype, f"{op.outputs[0].name}_{count}")
422 out_tens.quantization = op.outputs[0].quantization.clone()
423 scaled_op.set_output_tensor(out_tens)
424 pre_op = scaled_op
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200425
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200426 scaled_op.set_ifm_ofm_shapes()
wilisa0179a89042022-11-02 17:18:43 +0000427 DebugDatabase.add_optimised(op, scaled_op)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200428
Tim Hall885033b2022-07-21 11:46:03 +0100429 # Last x2 upscaling
Rickard Boline546def2022-01-25 15:45:00 +0000430 if n > 1:
431 scaled_op = op.clone(f"_{n-1}")
432 scaled_op.inputs[0] = pre_op.outputs[0]
Tim Hall885033b2022-07-21 11:46:03 +0100433
434 if scaled_op.original_type == Op.ResizeBilinear:
435 if scaled_op.attrs["align_corners"]:
436 # no padding
437 scaled_op.attrs["padding"] = Padding.VALID
438 else:
439 # padding to the right and bottom (limits average pool to 8x8 kernel)
440 scaled_op.attrs["padding"] = Padding.EXPLICIT
441 scaled_op.attrs["explicit_padding"] = [0, 0, upscale_factor - 1, upscale_factor - 1]
442
443 # kernal size dependent on the upscaling factor
444 scaled_op.attrs.update({"ksize": (1, upscale_factor, upscale_factor, 1)})
445 else: # Op.ResizeNearestNeighbor
446 if scaled_op.attrs["align_corners"]:
447 # use depthwise conv to select the correct value
448 scaled_op = convert_resizenn_ac_to_depthwise_conv(scaled_op, upscale_factor)
449 else:
Johan Alfvéna64616c2022-10-17 12:29:12 +0200450 # Keep 1x1 kernel and average pool, this applies both when
451 # half-pixel-centers is True and False. Calculations are the
452 # same in the reference.
Tim Hall885033b2022-07-21 11:46:03 +0100453 pass
454
Rickard Boline546def2022-01-25 15:45:00 +0000455 scaled_op.outputs = outputs
456 scaled_op.outputs[0].ops = [scaled_op]
457 scaled_op.set_ifm_ofm_shapes()
wilisa0179a89042022-11-02 17:18:43 +0000458 DebugDatabase.add_optimised(op, scaled_op)
Rickard Boline546def2022-01-25 15:45:00 +0000459
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200460 return op
461
462
Rickard Bolinfea15162022-07-04 16:19:16 +0000463def convert_resizebilinear_to_depthwise_convolutions(op, half_pixel_centers=True):
464 def _compute_interpolation_values(index, input_size, output_size):
465 scale = input_size / output_size
466 scaled_value = (index + 0.5 * half_pixel_centers) * scale - 0.5 * half_pixel_centers
467 lower_bound = max(np.floor(scaled_value), 0)
468
469 return scaled_value, lower_bound
470
471 def _compute_kernels(input_height, input_width, output_height, output_width):
472 kernels = []
473 for y in (1, 2):
474 for x in (1, 2):
475 sv_h, lb_h = _compute_interpolation_values(y, input_height, output_height)
476 sv_w, lb_w = _compute_interpolation_values(x, input_width, output_width)
477
478 # Interpolation values calculated for (x, y) = ([1, 2], [1, 2]) will always generalize to the whole
479 # input for upscale = 2 and input sizes >= 2x2 and be in the correct order for going left-to-right,
480 # top-to-bottom - same as the depthwise convolution strides across each tile
481 kernel = np.zeros((2, 2))
482 kernel[1, 1] = (1 - (sv_h - lb_h)) * (1 - (sv_w - lb_w))
483 kernel[0, 1] = (sv_h - lb_h) * (1 - (sv_w - lb_w))
484 kernel[1, 0] = (1 - (sv_h - lb_h)) * (sv_w - lb_w)
485 kernel[0, 0] = (sv_h - lb_h) * (sv_w - lb_w)
486 kernel *= 16
487 kernels.append(kernel)
488
489 return kernels
490
491 def _build_convolutions(op, kernels):
492 dw_op_attrs = {
493 "padding": Padding.TILE,
494 "stride_h": 1,
495 "stride_w": 1,
496 "strides": (1, 1, 1, 1),
497 "depth_multiplier": 1,
498 "channel_multiplier": 1,
499 "dilation_h_factor": 1,
500 "dilation_w_factor": 1,
501 "dilation": (1, 1, 1, 1),
502 }
503 ifm = op.ifm
504 ofm = op.ofm
505 ofm.ops = []
506 elem_size = 2 if ofm.dtype == DataType.int16 else 1
507
508 n, h, w, c = ifm.shape
509 _, _, ow, _ = ofm.shape
510
511 intermediate_tens = Tensor(ifm.shape, ifm.dtype, "intermediate_tens")
512 intermediate_tens.quantization = op.outputs[0].quantization.clone()
513 avgpool_op = op
514 avgpool_op.name = "rb_init_avgpool"
515 avgpool_op.type = Op.AvgPool
516 avgpool_op.attrs["padding"] = Padding.VALID
517 avgpool_op.attrs["stride_w"] = 1
518 avgpool_op.attrs["stride_h"] = 1
519 avgpool_op.attrs["filter_width"] = 1
520 avgpool_op.attrs["filter_height"] = 1
521 avgpool_op.attrs["strides"] = [1, 1, 1, 1]
522 avgpool_op.attrs["ksize"] = [1, 1, 1, 1]
523
524 avgpool_op.add_input_tensor(ifm)
525 avgpool_op.set_output_tensor(intermediate_tens)
526 avgpool_op.set_ifm_ofm_shapes()
wilisa0179a89042022-11-02 17:18:43 +0000527 DebugDatabase.add_optimised(op, op)
Rickard Bolinfea15162022-07-04 16:19:16 +0000528
529 dw_conv = Operation(Op.DepthwiseConv2DBias, "depthwise_conv")
530 dw_conv._original_type = Op.ResizeBilinear
531 dw_conv.write_shape = Shape4D(n, h, w, c)
532 dw_conv.write_offset = Shape4D(0, 0, 0, 0)
533
534 # Set the output rounding mode. Resize bilinear requires rounding away from zero. Therefore, we need to
535 # adjust the accumulated value by a "small" amount before applying natural rounding. The "small" amount
536 # should be big enough to cause a x.5 to be rounded correctly but small enough not to cause smaller
537 # values to be incorrectly rounded
538 ofm.quantization.next_after = True
539 dw_conv.rounding_mode = NpuRoundingMode.NATURAL
540
541 # Double height and width stride to write the output of each of the four depthwise convolutions below
542 # interleaved with each other when combined with OFM tile base offsets.
543 dw_conv.ofm_stride_multiplier = [1, 2, 2] # C/H/W
544
545 # Choose tile padding direction - pad by 1 with edge values in two direction.
546 # For example, TL (top left) will pad top and left in H/W-plane in all channels.
547 directions = [[1, 1, 0, 0], [1, 0, 0, 1], [0, 1, 1, 0], [0, 0, 1, 1]] # TL, TR, BL, BR
548 for i in (0, 1):
549 for j in (0, 1):
550 index = i * 2 + j
551 dw_conv.name = f"depthwise_conv_{index}"
552 dw_op_attrs["explicit_padding"] = directions[index]
553 dw_conv.attrs.update(dw_op_attrs)
554
555 # This will offset the start of the write by modifying the Tile 0 base address
556 dw_conv.tile_base_offsets_ofm[0] = (i * ow + j) * c * elem_size
557
558 ofm.ops.append(dw_conv)
559 dw_conv.outputs = [ofm]
560
561 kernel = kernels[index]
562 shape = [2, 2, 1, c]
563 kernel = np.dstack([kernel] * c)
564
565 quant = QuantizationParameters()
566 quant.zero_point = 0
567 quant.scale_f32 = 1.0 / 16
568
569 dw_conv.inputs = []
570 dw_conv.add_input_tensor(intermediate_tens)
571 dw_conv.add_input_tensor(
572 create_const_tensor(
573 "weights",
574 shape,
575 intermediate_tens.dtype,
576 np.array(kernel).reshape(shape),
Rickard Bolinfea15162022-07-04 16:19:16 +0000577 quantization=quant,
578 ),
579 )
580
581 # setup bias tensor by assign None and then call the fix-up function to create a suitable tensor.
582 # need to append the bias tensor as resize ops only have 2 inputs
583 assert len(dw_conv.inputs) == 2
584 dw_conv.inputs.append(None)
Rickard Bolin017b4cc2022-09-23 10:16:48 +0000585 fixup_bias_tensors(dw_conv, None, None, dtype=DataType.int32)
Rickard Bolinfea15162022-07-04 16:19:16 +0000586
587 dw_conv.set_ifm_ofm_shapes()
wilisa0179a89042022-11-02 17:18:43 +0000588 DebugDatabase.add_optimised(op, dw_conv)
589
Rickard Bolinfea15162022-07-04 16:19:16 +0000590 dw_conv = dw_conv.clone(f"_{index}")
591 return op
592
593 _, input_height, input_width, _ = op.ifm.shape
594 _, output_height, output_width, _ = op.ofm.shape
595
596 kernels = _compute_kernels(input_height, input_width, output_height, output_width)
597 op = _build_convolutions(op, kernels)
598
599 return op
600
601
Tim Hall885033b2022-07-21 11:46:03 +0100602def fixup_resize(op, arch, nng):
603 if op.type.is_resize_op() and op.run_on_npu:
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200604 if op.ifm_shapes[0] == op.ofm_shapes[0]:
Tim Hall885033b2022-07-21 11:46:03 +0100605 # Bypass the resize op which is essentially a NOP
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200606 op.inputs = op.inputs[:1]
607 op.type = Op.Identity
608 elif op.ifm_shapes[0].height == 1 and op.ifm_shapes[0].width == 1:
Tim Hall885033b2022-07-21 11:46:03 +0100609 convert_resize_1x1_to_add(op)
Rickard Bolinfea15162022-07-04 16:19:16 +0000610 elif op.type == Op.ResizeBilinear and op.attrs.get("half_pixel_centers", False):
611 convert_resizebilinear_to_depthwise_convolutions(op)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200612 else:
Tim Hall885033b2022-07-21 11:46:03 +0100613 convert_resize_to_upscale_and_average_pool(op)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200614
615 return op
616
617
618def convert_nop_split_to_identity(op, arch, nng):
619 if op.type == Op.Split and op.attrs.get("num_splits") == 1:
620 # the list comprehension should return a list with a single tensor
621 # if it shouldn't, remove_passthrough_tensor will fail appropriately
622 op.inputs = [i for i in op.inputs if i.shape == op.outputs[0].shape]
623 op.type = Op.Identity
624 return op
625
626
Ayaan Masooda2ec5aa2022-04-21 14:28:03 +0100627def rewrite_fully_connected_input(op: Operation, arch, nng):
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200628
Ayaan Masooda2ec5aa2022-04-21 14:28:03 +0100629 if op.type == Op.FullyConnected:
630 new_shape = op.ifm.get_shape_as_2d(op.weights.shape[-2])
631 assert new_shape is not None, "Tensor can not be reshaped to 2D"
632 op.ifm_shapes[0] = new_shape
Johan Alfvén65835e02022-10-13 10:49:30 +0200633
634 if op.ifm_shapes[0].batch > 1 and op.ofm_shapes[0].batch == 1:
635 # If IFM is batching then also make sure OFM is batching
636 h, w = op.ofm_shapes[0].height, op.ofm_shapes[0].width
637 op.ofm_shapes[0] = Shape4D([h * w, 1, 1, op.ofm_shapes[0].depth])
638
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200639 return op
640
641
642def convert_batched_fc_shape(op, arch, nng):
643 if op.type == Op.FullyConnected:
644 # Check if the first dimension indicates batching
645 if op.ifm_shapes[0].batch > 1:
646 batching_split = {4: (2, 2), 8: (2, 4), 16: (4, 4)}
647 n = op.ifm_shapes[0].batch
648 h, w = batching_split.get(n, (1, n))
649 op.ifm_shapes[0] = Shape4D([1, h, w, op.ifm_shapes[0].depth])
650
651 # Reshape Weights to be 4D. IO becomes HWIO
652 weight_tensor = op.inputs[1]
James Peet7519d502021-07-19 16:47:58 +0100653 weight_tensor.values = np.expand_dims(np.expand_dims(weight_tensor.values, axis=0), axis=0)
654 weight_tensor.set_all_shapes(list(weight_tensor.values.shape))
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200655
656 n = op.ofm_shapes[0].batch
657 h, w = batching_split.get(n, (1, n))
658 op.ofm_shapes[0] = Shape4D([1, h, w, op.ofm_shapes[0].depth])
659 return op
660
661
662def unfuse_activation_function(op):
663 if op.type == Op.ConcatTFLite and op.run_on_npu and op.activation is not None:
664 act_op = Operation(op.activation.op_type, op.name + op.activation.op_type.name)
665 op.activation = None
666 out_tens = op.outputs[0]
667 intermediate_tens = out_tens.clone("_act_intermediate")
668 act_op.set_output_tensor(out_tens)
669 act_op.add_input_tensor(intermediate_tens)
670 op.set_output_tensor(intermediate_tens)
671 act_op.set_ifm_ofm_shapes()
wilisa0179a89042022-11-02 17:18:43 +0000672 DebugDatabase.add_optimised(op, act_op)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200673
674
675def rewrite_stridedslice_output(op, arch, nng):
676 if not op.run_on_npu or op.type != Op.StridedSlice:
677 return op
678
679 new_axis_mask = op.attrs["new_axis_mask"]
680 shrink_axis_mask = op.attrs["shrink_axis_mask"]
681
682 if shrink_axis_mask == 0 and new_axis_mask == 0:
683 return op
684
685 axis_4D = [0] * len(op.outputs)
686 for idx, out_tens in enumerate(op.outputs):
687 output_shape = list(out_tens.shape)
688
689 if shrink_axis_mask != 0:
690 n = 0
691 axis = 0
692 while shrink_axis_mask:
693 prev_mask = shrink_axis_mask
694 n += 1
695 shrink_axis_mask &= shrink_axis_mask - 1
696 axis = int(math.log2(prev_mask - shrink_axis_mask))
697 output_shape = output_shape[:axis] + [1] + output_shape[axis:]
698
699 assert len(out_tens.shape) == (len(op.inputs[0].shape) - n)
700 op.attrs["shrink_axis_mask"] = 0
701 if axis >= 0:
702 axis_4D[idx] = axis + (4 - len(output_shape))
703 else:
704 axis_4D[idx] = axis
705 op.ofm_shapes[idx] = Shape4D(output_shape)
706
707 elif new_axis_mask != 0:
708 n = 0
709 axis = 0
710 while new_axis_mask:
711 prev_mask = new_axis_mask
712 n += 1
713 new_axis_mask &= new_axis_mask - 1
714 axis = int(math.log2(prev_mask - new_axis_mask))
715 output_shape = output_shape[:axis] + output_shape[(axis + 1) :]
716 new_axis_mask >>= 1
717
718 assert len(out_tens.shape) == (len(op.inputs[0].shape) + n)
719 op.attrs["new_axis_mask"] = 0
720 if axis >= 0:
721 axis_4D[idx] = axis + (4 - len(output_shape))
722 else:
723 axis_4D[idx] = axis
724 op.ofm_shapes[idx] = Shape4D(output_shape)
725
726 op.attrs["split_axis_4D"] = axis_4D
727 return op
728
729
730def rewrite_unpack_output(op, arch, nng):
731 tens = op.outputs[0]
732 if op.run_on_npu and op.type == Op.Unpack:
733 # Unpack is also referred to as Unstack
734 axis = int(op.attrs["axis"])
735 if axis < 0: # Convert to positive axis
736 axis = len(op.inputs[0].shape) + 1 + axis
737 op.type = Op.UnpackReshaped
738 desired_output_shape = tens.shape[:axis] + [1] + tens.shape[axis:]
739
740 axis_4D = axis + (4 - len(desired_output_shape))
741 op.attrs["split_axis_4D"] = [axis_4D] * len(op.outputs)
742
743 for idx, out_tens in enumerate(op.outputs):
744 op.ofm_shapes[idx] = Shape4D(desired_output_shape)
745 return op
746
747
748def add_padding_fields(op, arch, nng):
749 if op.run_on_npu:
750 if "padding" in op.attrs:
751 input_shape = op.ifm_shapes[0]
752 output_shape = op.ofm_shapes[0]
753 if op.type.is_conv2d_op() or op.type.is_depthwise_conv2d_op():
754 kernel_size = op.inputs[1].shape[:2]
755 elif op.type.is_pool_op() or op.type.npu_block_type == NpuBlockType.ReduceSum:
756 kernel_size = op.attrs["ksize"][1:3]
757 else:
758 raise UnsupportedFeatureError(f"Unknown operation that uses padding: {optype_to_builtintype(op.type)}")
759
760 if op.type == Op.Conv2DBackpropInputSwitchedBias:
761 upscaling_factor = output_shape.height // input_shape.height
762 padding, skirt = calc_upscaled_padding_and_skirt(
763 op.attrs["padding"], kernel_size, op.attrs["strides"], input_shape, upscaling_factor
764 )
765 else:
766 padding, skirt = calc_padding_and_skirt(
Jonas Ohlssond8575072022-03-30 10:30:25 +0200767 op.attrs["padding"],
768 op.kernel,
769 input_shape,
770 op.attrs.get("explicit_padding"),
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200771 )
772
773 op.attrs["explicit_padding"] = padding
774 op.attrs["skirt"] = skirt
775
776 return op
777
778
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200779def reorder_depthwise_weights(op, arch, nng):
780 if op.type.is_depthwise_conv2d_op():
781 weight_tensor = op.inputs[1]
James Peet7519d502021-07-19 16:47:58 +0100782 weight_tensor.values = np.transpose(weight_tensor.values, (0, 1, 3, 2))
783 weight_tensor.set_all_shapes(list(weight_tensor.values.shape))
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200784 weight_tensor.weight_transpose_depthwise = True
785
786 return op
787
788
Raul Farkas090f18a2023-01-24 16:29:06 +0000789def fixup_strided_conv(op, arch, nng):
790 if op.type != Op.Conv2DBias:
Louis Verhaard43d27582022-03-17 14:06:00 +0100791 return op
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200792 stride_x, stride_y = op.get_kernel_stride()
Louis Verhaard43d27582022-03-17 14:06:00 +0100793 weight_tensor = op.weights
794 ifm_shape = op.ifm_shapes[0]
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200795
Raul Farkas090f18a2023-01-24 16:29:06 +0000796 # Do not optimize if op is not the first in the network and stride is
797 # supported by the hardware
798 if op.op_index != 0 and stride_x < 4:
799 return op
800 op.ifm.needs_linear_format = True
801
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200802 if (
Raul Farkas090f18a2023-01-24 16:29:06 +0000803 (stride_x == 2 or stride_x == 4)
Louis Verhaard43d27582022-03-17 14:06:00 +0100804 and ifm_shape.depth <= 4
805 and ifm_shape.width % 2 == 0
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200806 and weight_tensor is not None
807 and weight_tensor.shape[1] >= 2
808 ):
Louis Verhaard43d27582022-03-17 14:06:00 +0100809 k_w, _ = op.get_kernel_size()
Raul Farkas090f18a2023-01-24 16:29:06 +0000810 curr_padding_x = needed_total_padding(ifm_shape.width, stride_x, k_w)
811 optimised_padding_x = needed_total_padding(ifm_shape.width // stride_x, 1, (k_w + 1) // stride_x)
812 padding_type = op.attrs.get("padding", None)
813
814 # If padding is enabled, check if current padding matches optimised padding
815 if not padding_type or (padding_type != Padding.VALID and curr_padding_x != optimised_padding_x):
Louis Verhaard43d27582022-03-17 14:06:00 +0100816 # Horizontal padding would become different after optimisation; this would not work
817 return op
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200818 # IFM
Raul Farkas090f18a2023-01-24 16:29:06 +0000819 op.ifm_shapes[0] = Shape4D(
820 [ifm_shape.batch, ifm_shape.height, ifm_shape.width // stride_x, ifm_shape.depth * stride_x]
821 )
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200822
823 # Weights
824 weight_shape = weight_tensor.shape
825 if weight_shape[1] % 2 != 0:
826 weight_shape[1] = weight_shape[1] + 1
827 padded_array = np.zeros(weight_shape)
828 for i in range(weight_shape[0]):
829 padded_array[i] = np.vstack(
830 [
James Peet7519d502021-07-19 16:47:58 +0100831 weight_tensor.values[i],
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200832 np.full((1, weight_shape[2], weight_shape[3]), weight_tensor.quantization.zero_point),
833 ]
834 )
James Peet7519d502021-07-19 16:47:58 +0100835 weight_tensor.values = padded_array
Raul Farkas090f18a2023-01-24 16:29:06 +0000836
837 # Change weight shape based on stride_x
838 weight_shape[1] //= stride_x
839 weight_shape[2] *= stride_x
840
James Peet7519d502021-07-19 16:47:58 +0100841 weight_tensor.values = np.reshape(weight_tensor.values, weight_shape)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200842 weight_tensor.set_all_shapes(weight_shape)
843 # If multiple copies of the weights are used, we could avoid
844 # them having the same address by changing the value_id
845 weight_tensor.value_id = uuid.uuid4()
846
847 # Strides
848 stride_x = 1
849 op.attrs.update({"stride_w": stride_x, "stride_h": stride_y, "strides": (1, stride_y, stride_x, 1)})
850
851 return op
852
853
854def convert_conv_to_fc(op, arch, nng):
855 # Conv 1x1 can be equivalent to Fully Connected.
856 # By representing certain convs as fully connected layers, Vela can better determine wether or not to use
857 # caching/double buffering for the weights.
858 # (Weights dont need to be reloaded for convs when IFM H and W are 1)
859 if op.type == Op.Conv2DBias:
860 h = op.ifm_shapes[0].height
861 w = op.ifm_shapes[0].width
862 kh, kw, _, _ = op.inputs[1].shape
863 if h == 1 and w == 1 and kh == 1 and kw == 1:
864 # Overwrite this op as a Fully Connected Op
865 op.name += "_fc"
866 op.type = Op.FullyConnected
867 op.attrs = {
868 "weights_format": 0,
869 }
870 # Reshape Weights to be 2D. HWIO becomes just IO (as H and W are 1, they can just be dropped)
871 weight_tensor = op.inputs[1]
James Peet7519d502021-07-19 16:47:58 +0100872 weight_tensor.values = weight_tensor.values.squeeze(axis=(0, 1))
873 weight_tensor.set_all_shapes(list(weight_tensor.values.shape))
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200874
875 DebugDatabase.add_optimised(op, op)
876 return op
877
878
879def fixup_relus_with_differing_ifm_ofm_scaling(op, arch, nng):
880 if op.run_on_npu and op.type.is_relu_op():
881 ifm = op.inputs[0]
882 ofm = op.outputs[0]
883 # Relu with differing IFM and OFM scaling cannot be fused with another primary op
884 # and requires its own to be inserted
885 if not check_quantized_tens_scaling_equal(ifm, ofm):
886 # Override this op with its own primary op (avgpool)
887 relu_fused_op = create_avgpool_nop(op.name + "_avgpool")
888 # And fuse the original activation function to it
889 relu_fused_op.activation = create_activation_function(op.type)
Fredrik Svedberg1a7527c2021-09-13 15:52:16 +0200890 # Add explicit rescaling
891 rescale = ifm.quantization.scale_f32 / ofm.quantization.scale_f32
892 multiplier, shift = scaling.quantise_scale(rescale)
Fredrik Svedberg4a434cb2022-09-27 14:13:01 +0200893 relu_fused_op.explicit_scaling = ExplicitScaling(False, [shift], [multiplier])
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200894 # Tidy up and assign the ifm and ofm to the new op
895 ifm.consumer_list.remove(op)
896
897 relu_fused_op.add_input_tensor(ifm)
898 relu_fused_op.set_output_tensor(ofm)
899 relu_fused_op.set_ifm_ofm_shapes()
900 op = relu_fused_op
901 return op
902
903
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +0200904def convert_softmax(op, arch, nng):
905 if op.type == Op.Softmax and op.run_on_npu:
906 softmax = SoftMax(op)
907 op = softmax.get_graph()
908 return op
909
910
Fredrik Svedberg8ddd4892022-08-19 16:06:04 +0200911def convert_prelu(op, arch, nng):
912 if op.type == Op.Prelu:
913 ifm, alpha, ofm = op.get_ifm_ifm2_ofm()
914 if None in (ifm, alpha, ofm):
915 return op
916
Fredrik Svedberg66591652022-08-29 10:51:27 +0200917 if alpha.values is not None:
918 # If const alpha check for possible optimisations
919 alpha_zp = alpha.quantization.zero_point
920 alpha_scale = alpha.quantization.scale_f32
921 # If all alpha values are the same the PReLU can be converted to LeakyRelu
Rickard Bolin5fdcf172022-12-19 12:56:17 +0000922 alpha_min = (alpha.values.min().astype(int) - alpha_zp) * alpha_scale
923 alpha_max = (alpha.values.max().astype(int) - alpha_zp) * alpha_scale
Fredrik Svedberg66591652022-08-29 10:51:27 +0200924 if alpha_min == alpha_max:
925 # or even a Relu
926 if alpha_min == 0:
927 new_op = Op.Relu
928 else:
929 new_op = Op.LeakyRelu
930 op.attrs["alpha"] = alpha_min
931 # setup alpha_scaling for bit exact result
932 ifm_scale = ifm.quantization.scale_f32
933 ofm_scale = ofm.quantization.scale_f32
934 alpha_scale, alpha_shift = scaling.elementwise_mul_scale(ifm_scale, alpha_scale, ofm_scale)
935 op.attrs["alpha_scaling"] = (alpha.values.min() - alpha_zp, alpha_scale, alpha_shift)
936 # Change op type
937 op.type = new_op
938 op.name = op.name.replace("Prelu", new_op.name)
939 del op.inputs[1] # Remove alpha tensor
940 return op
941 elif alpha_max < 1:
942 # If alpha_max is less than 1 convert PReLU to Max(alpha * IFM, identity * IFM)
943 # Multiply with alpha tensor
944 mul_alpha = Operation(Op.Mul, op.name + "_mul_alpha")
945 mul_alpha.add_input_tensor(ifm)
946 mul_alpha.add_input_tensor(alpha)
947 fm_alpha = ofm.clone(op.name + "_alpha", set_unique=True)
948 mul_alpha.set_output_tensor(fm_alpha)
949 mul_alpha.set_ifm_ofm_shapes()
950 DebugDatabase.add_optimised(op, mul_alpha)
951 if check_quantized_tens_scaling_equal(ifm, ofm):
952 # No scaling is needed
953 fm_id = ifm
954 else:
955 # Add multiplication with identity
956 mul_identity = Operation(Op.Mul, op.name + "_mul_identity")
957 mul_identity.add_input_tensor(ifm)
958 # Create const tensor containing identity as scalar
959 quantization = ifm.quantization.clone()
960 quantization.scale_f32 = np.float32(1)
961 quantization.zero_point = 0
962 one = create_const_tensor("one_const", [], ifm.dtype, [1], quantization=quantization)
963 mul_identity.add_input_tensor(one)
964 # Make sure that fm_id is allocated to a different address than fm_alpha
965 fm_id = ofm.clone(op.name + "_id", set_unique=True)
966 mul_identity.set_output_tensor(fm_id)
967 mul_identity.set_ifm_ofm_shapes()
wilisa0179a89042022-11-02 17:18:43 +0000968 DebugDatabase.add_optimised(op, mul_identity)
Fredrik Svedberg66591652022-08-29 10:51:27 +0200969
970 # Combine scaled and alpha multiplied values
971 max_op = Operation(Op.Maximum, op.name + "_max")
972 max_op.add_input_tensor(fm_alpha)
973 max_op.add_input_tensor(fm_id)
974 max_op.set_output_tensor(ofm)
975 max_op.set_ifm_ofm_shapes()
976
977 DebugDatabase.add_optimised(op, max_op)
978 ifm.consumer_list.remove(op)
979 return max_op
980
981 # Catch all PReLU conversion for the cases that could not be optimised above
Fredrik Svedberg8ddd4892022-08-19 16:06:04 +0200982 no_scale_quant = ifm.quantization.clone()
983 no_scale_quant.scale_f32 = None
984 no_scale_quant.zero_point = 0
Fredrik Svedberg66591652022-08-29 10:51:27 +0200985 zero = create_const_tensor("zero_const", [], ifm.dtype, [0], quantization=no_scale_quant)
Fredrik Svedberg8ddd4892022-08-19 16:06:04 +0200986
987 # Select values < 0
988 min_op = Operation(Op.Minimum, op.name + "_min")
989 min_op.add_input_tensor(ifm)
990 min_op.add_input_tensor(zero)
991 fm_negative = ifm.clone(op.name + "_negative", set_unique=True)
992 min_op.set_output_tensor(fm_negative)
993 min_op.set_ifm_ofm_shapes()
994 DebugDatabase.add_optimised(op, min_op)
995
996 # and multiply with alpha tensor
997 mul_alpha = Operation(Op.Mul, op.name + "_mul_alpha")
998 mul_alpha.add_input_tensor(fm_negative)
999 mul_alpha.add_input_tensor(alpha)
1000 fm_alpha = ofm.clone(op.name + "_negative_alpha", set_unique=True)
1001 mul_alpha.set_output_tensor(fm_alpha)
1002 mul_alpha.set_ifm_ofm_shapes()
1003 DebugDatabase.add_optimised(op, mul_alpha)
1004
1005 # Select (and scale) values > 0
1006 relu_op = Operation(Op.Relu, op.name + "_relu")
1007 relu_op.add_input_tensor(ifm)
1008 fm_scaled = ofm.clone(op.name + "_positive_scaled", set_unique=True)
1009 relu_op.set_output_tensor(fm_scaled)
1010 relu_op.set_ifm_ofm_shapes()
1011 DebugDatabase.add_optimised(op, relu_op)
1012
1013 # Add scaled and alpha multiplied values (without scaling)
Fredrik Svedberg4a434cb2022-09-27 14:13:01 +02001014 add_op = Operation(Op.Add, op.name + "_add")
1015 add_op.explicit_scaling = ExplicitScaling(False, shift=[0], multiplier=[1]) # No scaling
Fredrik Svedberg8ddd4892022-08-19 16:06:04 +02001016 add_op.add_input_tensor(fm_alpha)
1017 add_op.add_input_tensor(fm_scaled)
1018 add_op.set_output_tensor(ofm)
1019 add_op.set_ifm_ofm_shapes()
1020
1021 DebugDatabase.add_optimised(op, add_op)
1022 ifm.consumer_list.remove(op)
1023 op = add_op
1024
1025 return op
1026
1027
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001028def convert_mul_max_to_abs_or_lrelu(op, arch, nng):
1029 r"""Whenever there is a subgraph with this topology:
1030
Jonas Ohlssond8575072022-03-30 10:30:25 +02001031 Input X For X = -1 or X > 0
1032 | \ / This subgraph can be replaced with either
1033 | Mul an Abs (if X = -1) or a LeakyReLU (if X > 0)
1034 | /
1035 Max
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001036 """
1037
1038 if op.type == Op.Maximum:
1039 # finds the Mul input(s) to the Max
1040 muls = [i for i in op.inputs if i.ops[0].type == Op.Mul]
1041 if len(muls) == 1:
1042 mul = muls[0].ops[0]
1043 elif len(muls) == 2:
1044 # In the case both inputs are Muls, find the one with the same input as the Max
Fredrik Svedberg66591652022-08-29 10:51:27 +02001045 mul_ifms = [m for m in muls if len(set(op.inputs + m.ops[0].inputs)) == 1]
1046 if len(mul_ifms):
1047 mul = mul_ifms[0].ops[0]
1048 else:
1049 # Not using same input
1050 return op
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001051 else:
1052 # No Mul inputs
1053 return op
1054
1055 # make sure the Mul doesn't have any other consumers
1056 mul_ofm = mul.outputs[0]
1057 if len(mul_ofm.consumers()) != 1:
1058 return op
1059 # make sure the Mul doesn't have a fused activation function
1060 if mul.activation:
1061 return op
1062 ifm, ofm = op.get_ifm_ofm()
1063 if ifm is None or ofm is None:
1064 return op
1065
1066 if ifm.dtype not in (DataType.uint8, DataType.int8) or ifm.dtype != ofm.dtype:
1067 return op
1068 if not check_quantized_tens_scaling_equal(ifm, ofm) or not check_quantized_tens_scaling_equal(ifm, mul_ofm):
1069 # rewrite to LeakyRelu currently only makes sense if the quantization is identical
1070 return op
1071
1072 # finds the branched input that goes to both the Max and the Mul
1073 shared = set(op.inputs) & set(mul.inputs)
1074 if len(shared) == 1:
1075 shared_in = shared.pop()
1076 # find the constant scalar input to the Mul
1077 const_tens = (set(mul.inputs) - {shared_in}).pop()
1078 # check that it is a scalar
1079 if const_tens.shape != []:
1080 return op
1081 const = const_tens.ops[0]
1082 # check that it is a constant
1083 if const.type != Op.Const:
1084 return op
1085 # Remove the Mul from the shared input's consumers
1086 shared_in.consumer_list.remove(mul)
1087 else:
1088 return op
1089
1090 val = const.outputs[0].values
1091 if val >= 0:
1092 new_op = Op.LeakyRelu
1093 op.attrs["alpha"] = val
1094 # to produce bit exact results, the alpha is not enough;
1095 # save additional scaling info in attr "alpha_scale", to be used as input
1096 # to the LUT construction
James Peet7519d502021-07-19 16:47:58 +01001097 alpha_scalar = const_tens.values - const_tens.quantization.zero_point
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001098 mul_ifm_scale = np.double(ifm.quantization.scale_f32)
1099 mul_ifm2_scale = np.double(const_tens.quantization.scale_f32)
1100 mul_ofm_scale = np.double(mul_ofm.quantization.scale_f32)
1101 alpha_scale, alpha_shift = scaling.elementwise_mul_scale(mul_ifm_scale, mul_ifm2_scale, mul_ofm_scale)
1102 op.attrs["alpha_scaling"] = (alpha_scalar, alpha_scale, alpha_shift)
1103 elif val == -1:
1104 new_op = Op.Abs
1105 else:
1106 return op
1107
1108 op.type = new_op
1109 op.name = op.name.replace("Maximum", new_op.name)
1110 op.outputs[0].name = op.outputs[0].name.replace("Maximum", new_op.name)
1111 op.inputs = [shared_in]
1112 op.set_ifm_ofm_shapes()
1113
1114 # Record optimisation in debug database
1115 DebugDatabase.add_optimised(op, op)
1116
1117 return op
1118
1119
1120def convert_hardswish_to_lut(op, arch, nng):
1121 if op.type == Op.HardSwish:
1122 ifm, ofm = op.get_ifm_ofm()
1123 # Generate the LUT
1124 ifm_scale = np.double(ifm.quantization.scale_f32)
1125 ofm_scale = np.double(ofm.quantization.scale_f32)
1126 zp_in = ifm.quantization.zero_point
1127 zp_out = ofm.quantization.zero_point
1128 ifm_scale_hires = (1 / 128) * ifm_scale
1129 relu_multiplier = np.double(3 / 32768)
1130 out_scale, out_shift = scaling.quantise_scale(ifm_scale_hires / ofm_scale)
1131 relu_scale, relu_shift = scaling.quantise_scale(ifm_scale_hires / relu_multiplier)
1132 # Use 16bit scale
1133 out_scale_16 = fp_math.downscale_multiplier_int32_to_int16(out_scale)
1134 relu_scale_16 = fp_math.downscale_multiplier_int32_to_int16(relu_scale)
1135
1136 values = []
1137 ix = range(256) if ifm.dtype == DataType.uint8 else range(-128, 128)
1138 quantized_min = min(ix)
1139 quantized_max = max(ix)
1140 for x in ix:
1141 input_value = x - zp_in
1142 input_value_hires = input_value * 128
1143 # Compute the input value on essentially the output scale, not shifted yet
1144 input_value_preshift = fp_math.saturating_rounding_mul16(input_value_hires, out_scale_16)
1145 # Compute the "relu-ish multiplier". This matches the code in TensorFlow Lite Micro kernel
1146 relu_value = np.int16(input_value_hires)
1147 if relu_shift < 31:
1148 relu_value = fp_math.shift_left16(relu_value, 30 - relu_shift)
1149
1150 relu_value = fp_math.saturating_rounding_mul16(relu_value, relu_scale_16)
1151
1152 if relu_shift < 31:
1153 relu_value = fp_math.shift_left16(relu_value, 1)
1154
1155 if relu_shift > 31:
1156 relu_value = fp_math.rounding_divide_by_pot(relu_value, relu_shift - 31)
1157
1158 # Rescaled the value into a 16bit fixedpoint relu_value in [-1, 1]
1159 # Now convert that to a 16bit fixedpoint value in [0, 1]
1160 relu_value = (relu_value + (1 << 15)) >> 1
1161 lut_result = fp_math.saturating_mul16(relu_value, input_value_preshift)
1162 shift = 31 - out_shift
1163 shift = -shift if shift < 0 else 0
1164 # Finally apply the output shift
1165 lut_result = fp_math.rounding_divide_by_pot(lut_result, shift) + zp_out
1166 lut_result = min(quantized_max, max(quantized_min, lut_result))
1167 values.append(lut_result)
1168 return convert_to_lut(op, values, "hardswish")
1169 return op
1170
1171
1172def convert_lrelu_to_mul_max(op, arch):
1173 # Converts LeakyRelu to Max(alpha * IFM, identity * IFM)
1174 # (the opposite of convert_mul_max_to_abs_or_lrelu)
1175 ifm, ofm = op.get_ifm_ofm()
1176 if ifm is None or ofm is None:
1177 return op
1178
Fredrik Svedberg701ba912022-09-07 16:01:15 +02001179 alpha = np.float32(op.attrs["alpha"])
1180 use_mul_max = 0 < alpha < 1
Fredrik Svedberg36424312022-09-16 09:39:26 +02001181 is_converted_prelu = "alpha_scaling" in op.attrs
Fredrik Svedberg701ba912022-09-07 16:01:15 +02001182 if use_mul_max:
1183 mul_ifm = ifm
1184 new_op = Op.Maximum
1185 else:
Fredrik Svedberg36424312022-09-16 09:39:26 +02001186 # Need to use a different approach for alpha < 0 or alpha > 1
Fredrik Svedberg701ba912022-09-07 16:01:15 +02001187 no_scale_quant = ifm.quantization.clone()
1188 no_scale_quant.scale_f32 = None
1189 no_scale_quant.zero_point = 0
1190 zero = create_const_tensor("zero_const", [], ifm.dtype, [0], quantization=no_scale_quant)
1191
1192 # Select values < 0
1193 min_op = Operation(Op.Minimum, op.name + "_min")
1194 min_op.add_input_tensor(ifm)
1195 min_op.add_input_tensor(zero)
1196 mul_ifm = ifm.clone(op.name + "_negative", set_unique=True)
Fredrik Svedberg36424312022-09-16 09:39:26 +02001197 if alpha < 0 and not is_converted_prelu:
1198 # For negative alpha that is not from a converted PReLU we need to use
1199 # int32 Mul below to perform the (negative) alpha scaling
1200 mul_ifm.dtype = DataType.int32
Fredrik Svedberg701ba912022-09-07 16:01:15 +02001201 min_op.set_output_tensor(mul_ifm)
1202 min_op.set_ifm_ofm_shapes()
Fredrik Svedberg4a434cb2022-09-27 14:13:01 +02001203 new_op = Op.Add
1204 op.explicit_scaling = ExplicitScaling(False, shift=[0], multiplier=[1]) # No scaling
Fredrik Svedberg701ba912022-09-07 16:01:15 +02001205 DebugDatabase.add_optimised(op, min_op)
1206
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001207 # Add multiplication with alpha
1208 mul_alpha = Operation(Op.Mul, op.name + "_mul_alpha")
Fredrik Svedberg701ba912022-09-07 16:01:15 +02001209 mul_alpha.add_input_tensor(mul_ifm)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001210 # Create const tensor containing alpha as scalar
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001211 quantization = ifm.quantization.clone()
1212 quantization.min = 0
1213 quantization.max = alpha * (quantization.quant_max - quantization.quant_min)
1214 quantization.zero_point = 0
Fredrik Svedberg7f3ccd52022-09-13 15:22:01 +02001215 alpha_dtype = mul_ifm.dtype
Fredrik Svedberg36424312022-09-16 09:39:26 +02001216 if is_converted_prelu:
1217 # The LeakyRelu was the result from convert_prelu and the scaling is provided
Fredrik Svedberg66591652022-08-29 10:51:27 +02001218 scalar, alpha_scale, alpha_shift = op.attrs["alpha_scaling"]
Fredrik Svedberg4a434cb2022-09-27 14:13:01 +02001219 mul_alpha.explicit_scaling = ExplicitScaling(False, [alpha_shift], [alpha_scale])
Fredrik Svedberg7f3ccd52022-09-13 15:22:01 +02001220 elif alpha == 0 or np.isinf(1 / alpha):
1221 # Handling of alpha near or at zero
Fredrik Svedbergcce872b2021-09-02 15:20:52 +02001222 quantization.scale_f32 = np.float32(1)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001223 scalar = 0
1224 else:
1225 quantization.scale_f32 = alpha
Fredrik Svedberg7f3ccd52022-09-13 15:22:01 +02001226 if alpha_dtype == DataType.int32:
Fredrik Svedberg36424312022-09-16 09:39:26 +02001227 # When the datatype is int32 (alpha negative) we need to do the scaling with the multiplication
Fredrik Svedberg7f3ccd52022-09-13 15:22:01 +02001228 scalar, _ = scaling.elementwise_mul_scale(ifm.quantization.scale_f32, alpha, ofm.quantization.scale_f32)
1229 else:
1230 scalar = 1
Tim Hall3b1578e2023-01-13 17:57:25 +00001231 alpha_tens = create_const_tensor(op.name + "_alpha_scalar", [1], alpha_dtype, [scalar], quantization=quantization)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001232 mul_alpha.add_input_tensor(alpha_tens)
1233 fm_alpha = ofm.clone(op.name + "_alpha", set_unique=True)
1234 mul_alpha.set_output_tensor(fm_alpha)
1235 mul_alpha.set_ifm_ofm_shapes()
1236 DebugDatabase.add_optimised(op, mul_alpha)
1237
Fredrik Svedberg701ba912022-09-07 16:01:15 +02001238 if not use_mul_max:
1239 relu_op = Operation(Op.Relu, op.name + "_relu")
1240 relu_op.add_input_tensor(ifm)
1241 fm_id = ofm.clone(op.name + "_positive_scaled", set_unique=True)
1242 relu_op.set_output_tensor(fm_id)
1243 relu_op.set_ifm_ofm_shapes()
1244 DebugDatabase.add_optimised(op, relu_op)
1245 elif check_quantized_tens_scaling_equal(ifm, ofm):
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001246 # No identity multiplication is needed
1247 fm_id = ifm
1248 else:
1249 # Add multiplication with identity
1250 mul_identity = Operation(Op.Mul, op.name + "_mul_identity")
1251 mul_identity.add_input_tensor(ifm)
1252 # Create const tensor containing identity as scalar
1253 quantization = ifm.quantization.clone()
1254 quantization.min = 0
1255 quantization.max = quantization.quant_max - quantization.quant_min
Fredrik Svedbergcce872b2021-09-02 15:20:52 +02001256 quantization.scale_f32 = np.float32(1)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001257 quantization.zero_point = 0
Tim Hall3b1578e2023-01-13 17:57:25 +00001258 identity_tens = create_const_tensor(op.name + "_id_scalar", [], ifm.dtype, [1], quantization=quantization)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001259 mul_identity.add_input_tensor(identity_tens)
1260 # Make sure that fm_id is allocated to a different address than fm_alpha
1261 fm_id = ofm.clone(op.name + "_id", set_unique=True)
1262 mul_identity.set_output_tensor(fm_id)
1263 mul_identity.set_ifm_ofm_shapes()
1264 DebugDatabase.add_optimised(op, mul_identity)
1265
1266 # Convert LeakyRelu to Max, add the results of the multiplication(s) as inputs
Fredrik Svedberg701ba912022-09-07 16:01:15 +02001267 op.type = new_op
1268 op.name = op.name.replace("LeakyRelu", new_op.name)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001269 op.inputs = []
1270 ifm.consumer_list.remove(op)
1271 op.add_input_tensor(fm_alpha)
1272 op.add_input_tensor(fm_id)
1273 op.set_ifm_ofm_shapes()
1274
1275 DebugDatabase.add_optimised(op, op)
1276 return op
1277
1278
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001279def convert_to_lut8(op, fn, fn_name):
1280 # Converts op to a no-op + int8/uint8 LUT which is generated with the given function.
1281 # fn is a function(real) -> real
1282 ifm, ofm = op.get_ifm_ofm()
1283 if ifm.dtype not in (DataType.uint8, DataType.int8) or ifm.dtype != ofm.dtype:
1284 return op
1285 # Generate the LUT
1286 ifm_scale = np.double(ifm.quantization.scale_f32)
1287 ofm_scale = np.double(ofm.quantization.scale_f32)
1288 zp_in = ifm.quantization.zero_point
1289 zp_out = ofm.quantization.zero_point
1290 values = []
1291 ix = range(256) if ifm.dtype == DataType.uint8 else range(-128, 128)
1292 quantized_min = min(ix)
1293 quantized_max = max(ix)
1294 for x in ix:
1295 x_real = ifm_scale * (x - zp_in)
1296 y_real = fn(x_real)
1297 lut_result = round_away_zero(zp_out + y_real / ofm_scale)
1298 lut_result = min(quantized_max, max(quantized_min, lut_result))
1299 values.append(lut_result)
1300 return convert_to_lut(op, values, fn_name)
1301
1302
1303def convert_lrelu_to_lut(op, arch):
1304 ifm, ofm = op.get_ifm_ofm()
1305 # Generate the LUT
1306 alpha = op.attrs["alpha"]
1307 ifm_scale = np.double(ifm.quantization.scale_f32)
1308 ofm_scale = np.double(ofm.quantization.scale_f32)
1309 zp_in = ifm.quantization.zero_point
1310 zp_out = ofm.quantization.zero_point
1311 identity_scale, identity_shift = scaling.elementwise_mul_scale(ifm_scale, 1, ofm_scale)
1312 alpha_scalar = 1
1313 alpha_scale, alpha_shift = scaling.elementwise_mul_scale(ifm_scale, alpha, ofm_scale)
1314 if "alpha_scaling" in op.attrs:
1315 # The LeakyRelu was the result from convert_mul_max_to_abs_or_lrelu
1316 alpha_scalar, alpha_scale, alpha_shift = op.attrs["alpha_scaling"]
1317 values = []
1318 ix = range(256) if ifm.dtype == DataType.uint8 else range(-128, 128)
1319 quantized_min = min(ix)
1320 quantized_max = max(ix)
1321 for x in ix:
1322 if x < zp_in:
1323 lut_result = zp_out + fp_math.multiply_by_quantized_multiplier(
1324 alpha_scalar * (x - zp_in), alpha_scale, alpha_shift
1325 )
1326 else:
1327 lut_result = zp_out + fp_math.multiply_by_quantized_multiplier(x - zp_in, identity_scale, identity_shift)
1328 lut_result = min(quantized_max, max(quantized_min, lut_result))
1329 values.append(lut_result)
1330 return convert_to_lut(op, values, "lrelu")
1331
1332
1333def convert_lrelu(op, arch, nng):
1334 # Converts LeakyRelu to a LUT based solution if possible, otherwise a mul + max
1335 if op.type != Op.LeakyRelu:
1336 return op
1337 ifm, ofm = op.get_ifm_ofm()
1338 if ifm is None or ofm is None:
1339 return op
Fredrik Svedberg36424312022-09-16 09:39:26 +02001340 alpha = op.attrs["alpha"]
1341 if alpha == 0:
1342 # When alpha is 0 the opertion can be converted to a ReLU
1343 op.type = Op.Relu
1344 op.name = op.name.replace("LeakyRelu", op.type.name)
1345 return op
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001346 if ifm.dtype in (DataType.uint8, DataType.int8) and ifm.dtype == ofm.dtype:
1347 # use LUT for int8/uint8
1348 return convert_lrelu_to_lut(op, arch)
Fredrik Svedberg36424312022-09-16 09:39:26 +02001349 if check_quantized_tens_scaling_equal(ifm, ofm) and ifm.dtype == ofm.dtype == DataType.int16 and alpha > 0:
Fredrik Svedberg701ba912022-09-07 16:01:15 +02001350 # use LeakyRelu unmodified for int16 with equal input/output scaling and positive alpha
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001351 return op
1352 return convert_lrelu_to_mul_max(op, arch)
1353
1354
1355def convert_tanh_sigmoid_to_lut(op, arch, nng):
1356 # Converts int8/uint8 Sigmoid and Tanh to a LUT based solution
1357 if op.type == Op.Sigmoid:
1358 return convert_to_lut8(op, clamp_sigmoid, "sigmoid")
1359 elif op.type == Op.Tanh:
1360 return convert_to_lut8(op, math.tanh, "tanh")
1361 return op
1362
1363
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001364def fuse_activation_function_with_prev(op, arch, nng):
1365 # if op is a no-op: attempts to move the activation function to the preceding op
1366 if not op.attrs.get("is_nop", False) or op.activation is None:
1367 return op
1368 ifm, ofm = op.get_ifm_ofm()
1369 if ifm is None or ofm is None:
1370 return op
1371 # finds the input(s) to the operation
1372 prev_op = ifm.ops[0]
1373 # Note: the below checks on prev_op require that a first optimize pass on the full graph has been performed
1374 fuse = (
1375 prev_op.run_on_npu
1376 and prev_op.type.npu_block_type != NpuBlockType.Default
1377 and len(ifm.ops) == 1
1378 and len(prev_op.outputs[0].consumers()) == 1
1379 and prev_op.activation is None
1380 )
1381 if op.activation_lut is not None and arch.shram_reserved_unused_banks == 0:
1382 # TODO: if SHRAM LUT space is shared with SHRAM ACC (32, 64 MAC),
1383 # LUT currently only works correctly for elementwise ops
1384 fuse = False
1385 if not fuse:
1386 return op
1387 # Move the fused activation function + corresponding info to prev_op
1388 prev_op.activation = op.activation
1389 prev_op.forced_output_quantization = op.forced_output_quantization
1390 if op.activation_lut is not None:
1391 prev_op.set_activation_lut(op.activation_lut)
1392 # Bypass op
1393 prev_op.set_output_tensor(ofm)
wilisa0179a89042022-11-02 17:18:43 +00001394 DebugDatabase.add_optimised(prev_op, prev_op)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001395 return op
1396
1397
1398def _leading_pad_ok(leading_pad, stride, kernel_size):
1399 # If kernel size // 2 > stride, then (left, top) padding must be a multiple of stride,
1400 # otherwise replacing PAD by hardware padding would iterate the wrong IFM rows/columns
1401 max_size = kernel_size // 2
1402 return leading_pad == max_size or max_size <= stride or leading_pad % stride == 0
1403
1404
1405def replace_pad_by_hw_pad(op: Operation, arch, nng):
1406 """
1407 Tries to completely remove a PAD operator by using hardware padding.
1408 E.g. a PAD operation that pads 1, followed by a CONV with VALID padding and kernel size 3
1409 is rewritten such that the PAD is removed, and the CONV uses SAME padding.
1410 Converts tens1 -> PAD -> tens2 -> CONV to tens1 -> CONV
1411 if both operations can be run on the NPU.
1412 This is the most efficient way to implement PAD, but cannot be done for all pad sizes.
1413 """
1414 if (
1415 (op.type.is_conv2d_op() or op.type.is_depthwise_conv2d_op() or op.type.is_avgpool_op())
Tim Hall0ab2edc2022-02-23 17:58:02 +00001416 and op.type not in (Op.Conv2DBackpropInput, Op.Conv2DBackpropInputSwitchedBias)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001417 and op.run_on_npu
1418 and op.attrs["padding"] == Padding.VALID
1419 ):
1420 pad_op = op.ifm.ops[0]
1421 if pad_op.type != Op.Pad or not pad_op.run_on_npu:
1422 return op
1423 if pad_op.ifm.dtype != pad_op.ofm.dtype or not check_quantized_tens_scaling_equal(pad_op.ofm, pad_op.ifm):
1424 return op
1425 top, left, bottom, right = get_pad_values_from_input(pad_op.inputs[1].values)
1426 k = op.kernel
1427 k_w, k_h = k.dilated_wh()
1428
1429 # Check if the PAD operator can be replaced by hardware padding
1430 if left > k_w // 2 or right > k_w // 2 or top > k_h // 2 or bottom > k_h // 2:
1431 # Too much padding, it would require hardware padding to actually insert zeros
1432 return op
1433 if not _leading_pad_ok(top, k.stride.y, k_h) or not _leading_pad_ok(left, k.stride.x, k_w):
1434 return op
1435
1436 if op.type.is_avgpool_op():
1437 # For average pool, hardware padding can only be used if padding is 0 or kernel size / 2
1438 for pad, k_size in (
1439 (left, k_w),
1440 (right, k_w),
1441 (top, k_h),
1442 (bottom, k_h),
1443 ):
1444 if pad not in (0, k_size // 2):
1445 return op
1446 # Average pool is converted to depthwise, because NPU average pool + same padding
1447 # has a special implementation that is different from PAD followed by average pool with
1448 # valid padding.
1449 k_w, k_h = op.kernel.width, op.kernel.height
1450 ifm = op.ifm
1451 # Remember other inputs
1452 other_inputs = op.inputs[1:]
1453 # Create a weight tensor, all weights are set to 1/(kernel width * kernel height)
1454 quantization = QuantizationParameters(0.0, 255.0)
1455 quantization.scale_f32 = 1.0 / (k_w * k_h)
1456 quantization.zero_point = 0
1457 shape = [k_h, k_w, 1, op.ofm.shape[-1]]
1458 weights = np.full(shape, 1)
1459
1460 weight_tens = create_const_tensor(
1461 op.name + "_weights",
1462 shape,
1463 op.ifm.dtype,
1464 weights,
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001465 purpose=TensorPurpose.Weights,
1466 quantization=quantization,
1467 )
James Peet7519d502021-07-19 16:47:58 +01001468 weight_tens.values = weights
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001469 op.type = Op.DepthwiseConv2DBias
1470 op.inputs = []
1471 op.add_input_tensor(ifm)
1472 op.add_input_tensor(weight_tens)
1473 # Add bias tensor, all biases set to 0
1474 op.inputs.append(None)
Fredrik Svedbergcc219be2022-09-20 16:32:52 +02001475 fixup_bias_tensors(op, arch, nng, DataType.int32)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001476 # Add other inputs
1477 op.inputs.extend(other_inputs)
1478 op.rounding_mode = NpuRoundingMode.NATURAL
1479
1480 # Bypass the PAD operator
1481 op.set_input_tensor(pad_op.ifm, 0)
1482 # Adjust the padding attributes of the convolution operator
1483 op.attrs["padding"] = Padding.EXPLICIT
1484 op.attrs["explicit_padding"] = (top, left, bottom, right)
1485 op.set_ifm_ofm_shapes()
wilisa0179a89042022-11-02 17:18:43 +00001486 DebugDatabase.add_optimised(op, op)
1487
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001488 return op
1489
1490
1491def convert_pad(op: Operation, arch, nng):
1492 """
1493 Rewrites PAD operator to an average pool that copies the IFM to the OFM
1494 + up to 4 average pool operators that fill the OFM with zeros at the borders.
1495 This is done as fall-back for the PAD operators that remain after replace_pad_by_hw_pad
1496 """
1497 if op.type != Op.Pad or not op.run_on_npu:
1498 return op
1499 top, left, bottom, right = get_pad_values_from_input(op.inputs[1].values)
1500
1501 ifm = op.ifm
1502 assert ifm is not None
James Ward3e134342021-10-28 10:01:40 +01001503 ifm_shape = op.ifm_shapes[0]
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001504 ofm = op.ofm
1505 assert ofm is not None
1506 ofm.ops = []
1507 ofm_shape = op.ofm_shapes[0]
1508
1509 # Average pool op that copies IFM to the right place inside the OFM
1510 shp0 = Shape4D(0, 0, 0, 0)
1511 shp_top = shp0.with_height(top)
1512 avgpool_op = create_avg_pool_for_concat(op, op.name + "_main", ifm, ifm_shape, shp_top.with_width(left))
1513 avgpool_op.activation = op.activation
1514 quant = ofm.quantization
1515 pad_value = quant.zero_point
1516 # Add operations that fill the borders of the OFM
1517 if top > 0:
1518 shape = Shape4D(1, top, ofm_shape.width, ofm_shape.depth)
1519 zero_tens = create_const_tensor(
Tim Hall3b1578e2023-01-13 17:57:25 +00001520 op.name + "_top", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], quantization=quant
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001521 )
1522 # If top/bottom or left/right are equal, the const tensors can be allocated to the same address
1523 zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))
1524 create_avg_pool_for_concat(op, op.name + "_top", zero_tens, shape, shp0)
1525 if bottom > 0:
1526 shape = Shape4D(1, bottom, ofm_shape.width, ofm_shape.depth)
1527 zero_tens = create_const_tensor(
1528 op.name + "_bottom",
1529 shape.as_list(),
1530 ofm.dtype,
1531 shape.elements() * [pad_value],
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001532 quantization=quant,
1533 )
1534 zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))
1535 create_avg_pool_for_concat(
1536 op, op.name + "_bottom", zero_tens, shape, shp0.with_height(ofm_shape.height - bottom)
1537 )
1538 if left > 0:
1539 shape = Shape4D(1, ifm_shape.height, left, ofm_shape.depth)
1540 zero_tens = create_const_tensor(
Tim Hall3b1578e2023-01-13 17:57:25 +00001541 op.name + "_left", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], quantization=quant
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001542 )
1543 zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))
1544 create_avg_pool_for_concat(op, op.name + "_left", zero_tens, shape, shp_top)
1545 if right > 0:
1546 shape = Shape4D(1, ifm_shape.height, right, ofm_shape.depth)
1547 zero_tens = create_const_tensor(
Tim Hall3b1578e2023-01-13 17:57:25 +00001548 op.name + "_right", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], quantization=quant
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001549 )
1550 zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))
1551 create_avg_pool_for_concat(
1552 op, op.name + "_right", zero_tens, shape, shp_top.with_width(ofm_shape.width - right)
1553 )
1554
1555 op.type = Op.ConcatTFLite
1556 return avgpool_op
1557
1558
Fredrik Svedbergcc219be2022-09-20 16:32:52 +02001559def fixup_bias_tensors(op, arch, nng, dtype=None):
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001560 if op.type.needs_bias() and op.bias is None:
1561 # Op has no bias, add bias tensor filled with zeros
1562 nr_biases = op.inputs[1].shape[-1]
1563 bias_values = [0] * nr_biases
Fredrik Svedbergcc219be2022-09-20 16:32:52 +02001564 # The DataType of the bias tensor can be explicitly provided or deduced from the ifm
1565 # DataType. Default is int32 bias for 8-bit ifms and int64 for int16 ifms.
1566 # For int16 the selected bias DataType will have an impact on the scaling
1567 # used when encoding the scales and biases later. The default mode will match the
1568 # refence with reduced scaling for int64 bias.
1569 # This means that in cases (in the graph optimiser) where DepthwiseConv2DBias
1570 # is used to emulate average pool int32 bias should be selected for full precision
1571 # int16 scaling.
1572 if dtype is None:
1573 dtype = DataType.int64 if op.ifm.dtype == DataType.int16 else DataType.int32
1574 bias_tensor = create_const_tensor(op.name + "_bias", [nr_biases], dtype, bias_values)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001575 op.set_input_tensor(bias_tensor, op.type.info.indices.biases[0])
1576
1577 return op
1578
1579
wilisa0146c94772023-02-08 09:56:14 +00001580def detect_asymmetric_weights(op):
1581 # Check all ops (cpu and npu)
1582 if op.type.is_conv2d_op() or op.type.is_depthwise_conv2d_op():
1583 if op.ifm.dtype in (DataType.int8, DataType.int16):
Fredrik Svedbergcc8569f2021-11-01 14:25:29 +01001584 if not np.all(op.weights.quantization.zero_point == 0):
wilisa0146c94772023-02-08 09:56:14 +00001585 print(f"Warning: Op {op.type} '{op.name}' has asymmetric weights.", end=" ")
1586 return True
1587 return False
Fredrik Svedbergcc8569f2021-11-01 14:25:29 +01001588
wilisa0146c94772023-02-08 09:56:14 +00001589
1590def fixup_asymmetric_weights(op, arch, nng):
1591 if detect_asymmetric_weights(op):
1592 if op.run_on_npu:
1593 print("Zero points have been adjusted.")
1594 op.weights.quantization.zero_point *= 0
Fredrik Svedbergcc8569f2021-11-01 14:25:29 +01001595 return op
1596
1597
wilisa0146c94772023-02-08 09:56:14 +00001598def check_asymmetric_weights(op, arch, nng):
1599 # This function can modify the run_on_npu flag which causes an operator to be placed on the CPU. It is usually only
1600 # set by the supported operator checks. Therefore, it should be run immediately after those checks to avoid the
1601 # possibility of other graph optimiser functions modify the operator (that is later run on the CPU)
1602 if detect_asymmetric_weights(op):
1603 if op.run_on_npu:
1604 print("To run the operator on Ethos-U use the option --force-symmetric-int-weights")
1605 op.run_on_npu = False
1606 return op
1607
1608
1609def fixup_or_check_asymmetric_weights(force_symmetric_int_weights):
1610 if force_symmetric_int_weights:
1611 return fixup_asymmetric_weights
1612 else:
1613 return check_asymmetric_weights
1614
1615
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001616def convert_mean_to_depthwise_conv_or_avgpool(op, arch, nng):
1617 if op.type == Op.Mean and op.run_on_npu:
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001618 inp, axis = op.inputs
1619 shape = inp.shape
Diqing Zhong1ddb2ed2022-03-09 12:23:47 +01001620 ofm_shape = op.ofm.shape
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001621 dims = len(shape)
Diqing Zhong1ddb2ed2022-03-09 12:23:47 +01001622 dims_ofm = len(ofm_shape)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001623
1624 # Height and width axes have different index depending on dimensions
1625 if axis.shape == [] or axis.shape[0] == 1: # single axis
1626 axis = int(axis.values) if len(axis.shape) == 0 else int(axis.values[0])
1627 if dims in (2, 3):
1628 if axis == 0:
1629 h, w = shape[axis], 1
1630 else:
1631 h, w = 1, shape[axis]
1632 else:
1633 if axis == 1:
1634 h, w = shape[axis], 1
1635 else:
1636 h, w = 1, shape[axis]
1637 else: # multiple axes
1638 axis = sorted(axis.values)
1639 h, w = [shape[i] for i in axis]
1640
1641 # Set necessary depthwise attributes
1642 op.attrs.update(
1643 {
1644 "padding": Padding.VALID,
1645 "stride_h": 1,
1646 "stride_w": 1,
1647 "strides": (1, 1, 1, 1),
1648 "depth_multiplier": 1,
1649 "channel_multiplier": 1,
1650 "dilation_h_factor": 1,
1651 "dilation_w_factor": 1,
1652 "dilation": (1, 1, 1, 1),
1653 }
1654 )
1655 # Change op type
1656 op.type = Op.DepthwiseConv2DBias
1657 # Set IFM/OFM shapes after changing op type
1658 op.set_ifm_ofm_shapes()
1659
Fredrik Svedberg1e5456f2022-09-23 15:25:17 +02001660 weight_scale, bias = 1, 0
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001661 ofmq, ifmq = op.ofm.quantization, inp.quantization
Johan Alfvén9d51ec42022-10-27 16:30:01 +02001662 if ifmq.is_scaling_equal(ofmq):
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001663 # Here we can just use a simple AvgPool with truncating rounding,
1664 # as we're emulating simple integer division.
1665 op.rounding_mode = NpuRoundingMode.TRUNCATE
1666 op.type = Op.AvgPool
1667 op.attrs.update({"ksize": (1, h, w, 1), "filter_height": h, "filter_width": w})
1668 else:
1669 op.rounding_mode = NpuRoundingMode.NATURAL
1670 weight_scale = 1 / (h * w)
1671 # Input zero point is adjusted after mean calculation, so we emulate that with a bias
1672 bias = -ifmq.zero_point * h * w
1673 fiq = ifmq.clone()
1674 fiq.zero_point = 0
1675 op.forced_input_quantization = fiq
1676
1677 # Change dimensions to 4
Diqing Zhong1ddb2ed2022-03-09 12:23:47 +01001678 def extend_dims(dim, in_shape):
1679 if dim < 4:
1680 in_shape = [1] + in_shape
1681 if dim == 2:
1682 in_shape += [1]
1683 return in_shape
1684
1685 if dims < 4 or dims_ofm < 4:
1686 # Fix the ofm dimension when keep_dims is false
1687 # e.g. IFM=1xHxWxC axis=2 OFM=1xHxC, the ofm_shape should be 1xHx1xC, not 1x1xHxC
1688 if isinstance(axis, int) and dims_ofm + 1 == dims:
1689 ofm_shape.insert(axis, 1)
1690 elif isinstance(axis, list) and (dims_ofm + len(axis) == dims):
1691 for i in axis:
1692 ofm_shape.insert(i, 1)
1693 shape = extend_dims(dims, shape)
1694 dims_ofm = len(ofm_shape)
1695 ofm_shape = extend_dims(dims_ofm, ofm_shape)
1696 op.set_ifm_ofm_shapes()
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001697
Rickard Bolin7d7cb672021-12-07 09:09:14 +00001698 # If height is greater than max kernel height, reshape from HxW to 1x(HxW)
Johan Alfvéne84ed6b2022-09-26 13:46:51 +02001699 weight_shape = None
Rickard Bolin7d7cb672021-12-07 09:09:14 +00001700 if (h > 64 and op.type == Op.DepthwiseConv2DBias) or (h > 256 and op.type == Op.AvgPool):
Johan Alfvéne84ed6b2022-09-26 13:46:51 +02001701 # This can only happen and be done for multiple axes, and
1702 # h * w <= 256 for DepthwiseConv2DBias
1703 # h * w <= 4096 for AvgPool
1704 # which is checked in supported ops
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001705 shape = [shape[0], 1, h * w, shape[3]]
1706 op.ifm_shapes[0] = Shape4D(shape)
Johan Alfvéne84ed6b2022-09-26 13:46:51 +02001707 weight_shape = [1, h * w, shape[3], shape[0]]
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001708 if h > 256 and op.type == Op.AvgPool:
1709 op.attrs.update({"ksize": (1, 1, h * w, 1), "filter_height": 1, "filter_width": h * w})
1710
1711 # If the AvgPool version is used, we don't need to do anything else
1712 if op.type == Op.AvgPool:
wilisa0179a89042022-11-02 17:18:43 +00001713 DebugDatabase.add_optimised(op, op)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001714 return op
1715
1716 # Make unit weight tensor quantization
1717 weight_quant = ifmq.clone()
1718 weight_quant.min = 0
1719 weight_quant.max = 255
1720 weight_quant.scale_f32 = weight_scale
1721 weight_quant.zero_point = 0
1722
Johan Alfvéne84ed6b2022-09-26 13:46:51 +02001723 if weight_shape is None:
1724 # Set weight shape to [H,W,C,B]
1725 weight_shape = [h, w, shape[3], shape[0]]
Diqing Zhong1ddb2ed2022-03-09 12:23:47 +01001726
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001727 # Add unit weight tensor
1728 op.set_input_tensor(
1729 create_const_tensor(
1730 "weights",
1731 weight_shape,
1732 inp.dtype,
1733 np.ones(weight_shape),
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001734 quantization=weight_quant,
1735 ),
1736 1,
1737 )
James Peet7519d502021-07-19 16:47:58 +01001738 op.weights.values = np.reshape(op.inputs[1].values, weight_shape)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001739
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001740 # Add bias tensor
Fredrik Svedberg1e5456f2022-09-23 15:25:17 +02001741 bias_shape = [shape[-1]]
1742 op.inputs.append(create_const_tensor("bias", bias_shape, DataType.int32, np.ones(bias_shape) * bias))
wilisa0179a89042022-11-02 17:18:43 +00001743 DebugDatabase.add_optimised(op, op)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001744
1745 return op
1746
1747
Ayaan Masood25f48dd2022-06-29 18:16:04 +01001748def optimise_quantize(op: Operation, arch, nng):
1749
1750 if op.type == Op.Quantize and op.run_on_npu:
1751
1752 ifm, ofm = op.get_ifm_ofm()
1753 input_values = ifm.values
1754
1755 # Guard clause - input not const or no values to quantize
1756 if ifm.ops[0].type != Op.Const or input_values is None:
1757 return op
1758
1759 # Singular val in numpy array, convert to indexable array
1760 if input_values.ndim == 0:
1761 input_values = np.array([input_values])
1762
Fredrik Svedberg11563172022-07-06 14:54:12 +02001763 # requantized int8 to int8 or int16 to int16
1764 if ifm.dtype == ofm.dtype == DataType.int8 or ifm.dtype == ofm.dtype == DataType.int16:
Ayaan Masood25f48dd2022-06-29 18:16:04 +01001765
1766 # scale needs to use double precision to match TFLite reference kernel
1767 effective_scale = np.float64(ifm.quantization.scale_f32) / np.float64(ofm.quantization.scale_f32)
1768 effective_multiplier, effective_shift = quantise_scale(effective_scale)
1769
Ayaan Masood25f48dd2022-06-29 18:16:04 +01001770 requantized_vals = []
Fredrik Svedberga04f2f72022-07-06 13:42:24 +02001771 for val in input_values.flatten():
Ayaan Masood25f48dd2022-06-29 18:16:04 +01001772 input_val = val - ifm.quantization.zero_point
1773
Fredrik Svedberga04f2f72022-07-06 13:42:24 +02001774 ofm_val = fp_math.multiply_by_quantized_multiplier(input_val, effective_multiplier, effective_shift)
1775 ofm_val += ofm.quantization.zero_point
Ayaan Masood25f48dd2022-06-29 18:16:04 +01001776
Fredrik Svedberga04f2f72022-07-06 13:42:24 +02001777 clamped_ofm_value = max(min(ofm_val, ofm.quantization.quant_max), ofm.quantization.quant_min)
1778 requantized_vals.append(clamped_ofm_value)
Ayaan Masood25f48dd2022-06-29 18:16:04 +01001779
Fredrik Svedberga04f2f72022-07-06 13:42:24 +02001780 ofm.values = np.array(requantized_vals, ofm.dtype.as_numpy_type())
1781 ofm.values.shape = input_values.shape
Ayaan Masood25f48dd2022-06-29 18:16:04 +01001782
1783 # Case: Float input - quantize to int
Fredrik Svedberga04f2f72022-07-06 13:42:24 +02001784 elif ifm.dtype.type == BaseType.Float:
Ayaan Masood25f48dd2022-06-29 18:16:04 +01001785
1786 quantized_vals = []
1787 for val in input_values:
1788
1789 # Derive quantized value
1790 quant_val = (val / ofm.quantization.scale_f32) + ofm.quantization.zero_point
Fredrik Svedberga04f2f72022-07-06 13:42:24 +02001791 clamped_quantized_val = np.clip(quant_val, ofm.quantization.quant_min, ofm.quantization.quant_max)
1792 quantized_vals.append(clamped_quantized_val)
Ayaan Masood25f48dd2022-06-29 18:16:04 +01001793
1794 # Pass the statically calculated quant val to output tensor
Fredrik Svedberga04f2f72022-07-06 13:42:24 +02001795 ofm.values = np.array(quantized_vals, ofm.dtype.as_numpy_type())
1796
1797 # Unsupported data type
1798 else:
1799 return op
Ayaan Masood25f48dd2022-06-29 18:16:04 +01001800
1801 # Make quantize op const and disconnect from parent node
1802
1803 # Remove reference of the current quant op from the parent tensor's consumer list
1804 ifm.consumer_list = [consumer for consumer in ifm.consumer_list if consumer.op_index != op.op_index]
1805
1806 # Clear any references to parent node
1807 op.inputs = []
1808
1809 # Convert this quantize op to const
1810 op.type = Op.Const
1811
1812 return op
1813
1814
Ayaan Masood4965fae2022-06-29 11:30:57 +01001815def convert_shape_op_to_constant_tensor(op: Operation, arch, nng):
1816 """Static optimisation for SHAPE operator output value known at compile time"""
1817
1818 # Disconnect SHAPE operator from its parent and transform SHAPE OP into constant
1819
1820 if op.type == Op.Shape and op.run_on_npu:
1821
1822 ifm, ofm = op.get_ifm_ofm()
1823
1824 if len(ifm.shape) != ofm.shape[0]:
1825 return op
1826
1827 # Remove reference of the current shape op from the parent tensor's consumer list
1828 ifm.consumer_list = [consumer for consumer in ifm.consumer_list if consumer.op_index != op.op_index]
1829
1830 # Clear any references to parent node
1831 op.inputs = []
1832
1833 # Convert this SHAPE op to const
1834 op.type = Op.Const
1835
1836 # Add size calculation to shape output tensors
1837 ofm.values = np.array(ifm.shape)
1838
1839 return op
1840
1841
Tim Hallea4ba662022-11-11 18:19:53 +00001842def fixup_dilation_gt2(op, arch, nng):
1843 assert op.run_on_npu
1844 if op.type == Op.Conv2DBias or op.type == Op.DepthwiseConv2DBias:
1845 dilation_w, dilation_h = op.get_kernel_dilation()
1846
1847 # if dilation in either axis is greater than that supported by the hardware then we must manually dilate the
1848 # kernel
1849 if dilation_w > 2 or dilation_h > 2:
1850 kernel_w, kernel_h = op.get_kernel_size()
1851 kernel_ic = op.weights.shape[-2]
1852 kernel_oc = op.weights.shape[-1]
1853
1854 # if the dilation is a multiple of 2 then the hardware dialtion can be enabled to provide that multiple
1855 # of 2. this allows the kernel size to be reduced (via the scaled dilation) by half in that dimension.
1856 # odd = 1, even = 2
1857 hw_dilation_h = 1 if (dilation_h & 1) else 2
1858 hw_dilation_w = 1 if (dilation_w & 1) else 2
1859
1860 scale_dilation_h = dilation_h // hw_dilation_h
1861 scale_dilation_w = dilation_w // hw_dilation_w
1862
1863 # create new empty kernel (HWIO format)
1864 new_kernel_h = (kernel_h - 1) * scale_dilation_h + 1
1865 new_kernel_w = (kernel_w - 1) * scale_dilation_w + 1
1866
1867 new_kernel_shape = [new_kernel_h, new_kernel_w, kernel_ic, kernel_oc]
1868 new_kernel_values = np.zeros(new_kernel_shape, dtype=op.weights.values.dtype)
1869
1870 # copy the original kernel values into the new sparse kernel
1871 for h in range(0, kernel_h):
1872 for w in range(0, kernel_w):
1873 new_h = h * scale_dilation_h
1874 new_w = w * scale_dilation_w
1875 new_kernel_values[new_h, new_w, :, :] = op.weights.values[h, w, :, :]
1876
1877 # update the weight tensor with the new dilated kernel
1878 op.weights.shape = new_kernel_shape
1879 op.weights.values = new_kernel_values
1880
1881 # enable(=2) / disable(=1) hardware dilation
1882 op.attrs["dilation"] = (1, hw_dilation_h, hw_dilation_w, 1) # nhwc format
1883 op.attrs["dilation_h_factor"] = hw_dilation_h
1884 op.attrs["dilation_w_factor"] = hw_dilation_w
1885
1886 return op
1887
1888
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001889def supported_operator_check(op, arch, nng):
Jonas Ohlsson45e653d2021-07-26 16:13:12 +02001890 op.run_on_npu = arch.tflite_supported_operators.is_operator_supported(op)
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001891 return op
1892
1893
wilisa0146c94772023-02-08 09:56:14 +00001894def tflite_optimise_graph(nng, arch, force_symmetric_int_weights):
Fredrik Svedberg11563172022-07-06 14:54:12 +02001895 # Compile time static optimisations
wilisa0146c94772023-02-08 09:56:14 +00001896 optimisation_list = [
1897 optimise_quantize,
1898 convert_shape_op_to_constant_tensor,
1899 fixup_or_check_asymmetric_weights(force_symmetric_int_weights),
1900 ]
Ayaan Masood25f48dd2022-06-29 18:16:04 +01001901
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001902 for idx, sg in enumerate(nng.subgraphs):
1903 nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(
Jonas Ohlssond8575072022-03-30 10:30:25 +02001904 nng,
1905 sg,
1906 arch,
1907 [],
Ayaan Masood4965fae2022-06-29 11:30:57 +01001908 optimisation_list,
1909 rewrite_unsupported=False,
1910 )
1911
Fredrik Svedberga04f2f72022-07-06 13:42:24 +02001912 # Pre-processing step
wilisa0146c94772023-02-08 09:56:14 +00001913 pre_process_list = [supported_operator_check, set_ifm_ofm_op_shapes]
Fredrik Svedberga04f2f72022-07-06 13:42:24 +02001914
Ayaan Masood4965fae2022-06-29 11:30:57 +01001915 for idx, sg in enumerate(nng.subgraphs):
1916 nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(
1917 nng,
1918 sg,
1919 arch,
1920 [],
Jonas Ohlssond8575072022-03-30 10:30:25 +02001921 pre_process_list,
1922 rewrite_unsupported=False,
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001923 )
1924
1925 # Handle Concat Ops
1926 for idx, sg in enumerate(nng.subgraphs):
1927 rewrite_graph.visit_graph_post_order(sg.output_tensors, arch, [], [rewrite_concat_ops])
1928 sg.refresh_after_modification()
1929
1930 # Handle Split Ops
1931 for idx, sg in enumerate(nng.subgraphs):
1932 nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(
1933 nng,
1934 sg,
1935 arch,
1936 [],
1937 [rewrite_unpack_output, rewrite_stridedslice_output, convert_nop_split_to_identity],
1938 rewrite_unsupported=False,
1939 )
1940
1941 for idx, sg in enumerate(nng.subgraphs):
1942 nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(
Jonas Ohlssond8575072022-03-30 10:30:25 +02001943 nng,
1944 sg,
1945 arch,
1946 [rewrite_split_ops],
1947 [],
1948 rewrite_unsupported=False,
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001949 )
1950
Johan Alfvena5e1b622023-02-02 14:59:03 +01001951 # Bypass or rewrite memory only operators
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001952 for idx, sg in enumerate(nng.subgraphs):
1953 nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(
Jonas Ohlssond8575072022-03-30 10:30:25 +02001954 nng,
1955 sg,
1956 arch,
1957 [],
Johan Alfvena5e1b622023-02-02 14:59:03 +01001958 [bypass_memory_only_ops],
Jonas Ohlssond8575072022-03-30 10:30:25 +02001959 rewrite_unsupported=False,
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001960 )
1961
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001962 # Rewrite of operators
1963 op_rewrite_list = [
1964 set_tensor_equivalence,
1965 convert_mean_to_depthwise_conv_or_avgpool,
1966 convert_depthwise_to_conv,
1967 convert_conv_to_fc,
1968 convert_softmax,
Fredrik Svedberg8ddd4892022-08-19 16:06:04 +02001969 convert_prelu,
Fredrik Svedberg36424312022-09-16 09:39:26 +02001970 convert_mul_max_to_abs_or_lrelu,
1971 convert_lrelu,
Raul Farkas090f18a2023-01-24 16:29:06 +00001972 fixup_strided_conv,
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001973 convert_hardswish_to_lut,
1974 rewrite_fully_connected_input,
1975 convert_batched_fc_shape,
1976 fixup_conv2d_backprop,
1977 fixup_relus_with_differing_ifm_ofm_scaling,
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001978 reorder_depthwise_weights,
Tim Hall885033b2022-07-21 11:46:03 +01001979 fixup_resize,
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001980 fixup_bias_tensors,
Fredrik Svedbergcc8569f2021-11-01 14:25:29 +01001981 fixup_asymmetric_weights,
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001982 convert_tanh_sigmoid_to_lut,
1983 replace_pad_by_hw_pad,
Tim Hallea4ba662022-11-11 18:19:53 +00001984 fixup_dilation_gt2,
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001985 ]
1986
1987 for idx, sg in enumerate(nng.subgraphs):
1988 nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(
Jonas Ohlssond8575072022-03-30 10:30:25 +02001989 nng,
1990 sg,
1991 arch,
1992 [],
1993 op_rewrite_list,
1994 rewrite_unsupported=False,
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02001995 )
1996
1997 for idx, sg in enumerate(nng.subgraphs):
1998 # remove passthrough tensors and attempt further optimizations
1999 nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(
2000 nng,
2001 sg,
2002 arch,
2003 [remove_passthrough_tensor],
2004 [fuse_activation_function_with_prev, convert_pad, add_padding_fields],
2005 )
2006
2007 # Removal of SplitSliceRead, need to be done after optimisation has been performed,
2008 # since ifm/ofm_shapes are of importance to this function
2009 for sg in nng.subgraphs:
2010 rewrite_graph.visit_graph_post_order(sg.output_tensors, arch, [], [remove_SplitSliceRead])
2011 sg.refresh_after_modification()
2012
Fredrik Svedbergf3c7d552022-11-04 09:48:49 +01002013 # Make sure that const optimisations on subgraph outputs are handled correctly
2014 for sg in nng.subgraphs:
2015 for ofm in sg.output_tensors:
2016 if ofm.is_const and ofm.ops[0].type_changed:
2017 # Subgraph output cannot be const - insert a memory copy
2018 op = ofm.ops[0]
2019 ofm_clone = ofm.clone()
2020 ofm_clone.values = ofm.values
2021 ofm.values = None
Tim Hall3b1578e2023-01-13 17:57:25 +00002022 zero = create_const_tensor("zero", [1], ofm.dtype, [0], quantization=ofm.quantization)
Fredrik Svedbergf3c7d552022-11-04 09:48:49 +01002023 memcpy = create_add_nop(f"{ofm.name}_copy")
2024 memcpy.add_input_tensor(ofm_clone)
2025 memcpy.add_input_tensor(zero)
2026 memcpy.set_output_tensor(ofm)
2027 memcpy.set_ifm_ofm_shapes()
2028 op.set_output_tensor(ofm_clone)
2029 DebugDatabase.add_optimised(op, memcpy)
2030
Patrik Gustavsson8f1f9aa2021-06-28 07:41:58 +02002031 return nng