Blame - ethosu/vela/tosa_graph_optimiser.py - ml/ethos-u/ethos-u-vela

2024-01-25 13:05:16 +0000

[diff] [blame]

1

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Rickard Bolin

bc6ee58

2022-11-04 08:24:29 +0000

[diff] [blame]

16

#

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

17

# Description:

18

# Early optimisation of the TOSA based network graph, using the rewrite_graph module to do the traversal of the graph.

Patrik Gustavsson

2021-09-07 13:30:29 +0200

[diff] [blame]

19

import numpy as np

20

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

21

from . import rewrite_graph

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

22

from .data_type import DataType

23

from .debug_database import DebugDatabase

Jonas Ohlsson

0957e3e

2021-09-01 15:57:21 +0200

[diff] [blame]

24

from .graph_optimiser_util import bypass_memory_only_ops

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

25

from .graph_optimiser_util import calc_explicit_padding

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

26

from .graph_optimiser_util import convert_depthwise_to_conv

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

27

from .graph_optimiser_util import move_splitsliceread_to_consumer

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

28

from .graph_optimiser_util import needed_total_padding

29

from .graph_optimiser_util import set_ifm_ofm_op_shapes

30

from .graph_optimiser_util import set_tensor_equivalence

Johan Alfven

ce50273

2023-04-24 13:35:40 +0200

[diff] [blame]

31

from .lut import convert_to_lut

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

32

from .operation import ExplicitScaling

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

33

from .operation import Op

Tim Hall

2023-05-16 22:39:14 +0100

[diff] [blame]

34

from .operation import RoundingMode

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

35

from .operation_util import create_add_nop

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

36

from .operation_util import create_avgpool_nop

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

37

from .operation_util import create_pad_nop

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

38

from .shape4d import Shape4D

39

from .tensor import create_const_tensor

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

40

from .tensor import create_equivalence_id

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

41

from .tensor import shape_num_elements

42

from .tensor import Tensor

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

43

44

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

45

def replace_rescale_with_avg_pool(rescale_op):

46

assert rescale_op.type == Op.Rescale

47

48

avgpool_op = create_avgpool_nop(rescale_op.name + "_avgpool")

49

rescale_op_clone = rescale_op.clone()

50

op = rescale_op

51

op.attrs = avgpool_op.attrs.copy()

52

op.type = Op.AvgPool

53

DebugDatabase.add_optimised(rescale_op_clone, op)

return op

def calc_skirt(kernel, input_shape, explicit_padding):

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

59

k_w, k_h = kernel.dilated_wh()

60

s_x, s_y = kernel.stride

61

ypad = needed_total_padding(int(input_shape.height), int(s_y), int(k_h))

62

xpad = needed_total_padding(int(input_shape.width), int(s_x), int(k_w))

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

63

64

top, left, bottom, right = explicit_padding

65

top_pad, bottom_pad = calc_explicit_padding(int(input_shape.height), int(s_y), int(k_h), int(top), int(bottom))

66

left_pad, right_pad = calc_explicit_padding(int(input_shape.width), int(s_x), int(k_w), int(left), int(right))

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

67

68

padding = (top_pad, left_pad, bottom_pad, right_pad)

69

skirt = (top_pad, left_pad, ypad - top_pad, xpad - left_pad)

70

return padding, skirt

71

72

73

def add_padding_fields(op, arch, nng):

74

if op.run_on_npu:

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

75

if "explicit_padding" in op.attrs:

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

76

input_shape = op.ifm_shapes[0]

77

78

if op.type == Op.Conv2DBackpropInputSwitchedBias:

79

# TODO not yet supported, but there will be need for separate handling

80

assert False

81

else:

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

82

padding, skirt = calc_skirt(op.kernel, input_shape, op.attrs.get("explicit_padding"))

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

83

84

op.attrs["explicit_padding"] = padding

85

op.attrs["skirt"] = skirt

return op

Patrik Gustavsson

2021-09-07 13:30:29 +0200

[diff] [blame]

90

# Counts leading zeroes for a (int32)

91

def count_leading_zeros(a):

lz = int(32)

if a != 0:

mask = 1 << (32 - 1)

lz = 0

while (mask & a) == 0:

mask = mask >> 1

lz = lz + 1

return lz

def calc_scaling_avgpool(op, arch, nng):

103

if op.type == Op.AvgPool:

104

top, left, _, _ = op.attrs["explicit_padding"]

105

# TODO Only support for when global scaling can be used.

106

# That is when there is no padding

107

assert top == 0 and left == 0

108

assert op.explicit_scaling is None

multiplier = []

shift = []

kernel_wh = op.kernel.elements_wh()

113

k = 32 - count_leading_zeros(kernel_wh - 1)

114

numerator = np.int64(((1 << 30) + 1) << k)

115

multiplier.append(numerator // kernel_wh)

116

shift.append(30 + k)

117

Tim Hall

2023-05-16 22:39:14 +0100

[diff] [blame]

118

op.rounding_mode = RoundingMode.HalfUp

Patrik Gustavsson

2021-09-07 13:30:29 +0200

[diff] [blame]

119

op.explicit_scaling = ExplicitScaling(False, shift, multiplier)

return op

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

123

def remove_const_transpose(op, arch, nng):

124

if op.type == Op.Transpose:

125

removed = False

126

if len(op.ifm.ops) == 1:

127

prev_op = op.ifm.ops[0]

128

if prev_op.type == Op.Const:

129

# Transpose the Tensor and data and remove Transpose

130

# TODO move to Tensor?

131

reorder = op.attrs["perms"]

132

shape = op.ifm.shape.copy()

133

tens = op.ifm

134

135

tens.shape = [shape[idx] for idx in reorder]

136

tens.bandwidth_shape = tens.shape

137

tens.storage_shape = tens.shape

138

139

if tens.values is not None:

140

tens.values = tens.values.transpose(reorder)

141

142

op.ofm.values = tens.values

143

# Bypass the Transpose op

144

prev_op.set_output_tensor(op.ofm)

145

DebugDatabase.add_optimised(op, prev_op)

146

removed = True

147

148

if not removed:

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

149

print("Warning: Cannot remove Transpose, and handling of Transpose is not supported")

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

assert False

return op

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

155

def insert_add_copy_for_const(op, ifm_ofm_shape):

156

assert op.type == Op.Const

157

ofm = op.ofm

158

copy_tens = ofm.clone()

159

op.set_output_tensor(copy_tens)

160

161

name = ofm.name + "_add"

162

ifm2 = create_const_tensor(

163

name + "_zero_scalar",

164

[1],

165

copy_tens.dtype,

166

[0],

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

167

quantization=copy_tens.quantization,

168

)

169

copy_op = create_add_nop(name)

170

copy_op.add_input_tensor(copy_tens)

171

copy_op.add_input_tensor(ifm2)

172

copy_op.set_output_tensor(ofm)

173

copy_op.ifm_shapes.append(ifm_ofm_shape)

174

copy_op.ifm_shapes.append(Shape4D(ifm2.shape))

175

copy_op.ofm_shapes.append(ifm_ofm_shape)

176

copy_op.run_on_npu = True

177

178

DebugDatabase.add_optimised(op, copy_op)

179

180

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

181

# TODO can we change to add for both TFLite and TOSA?

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

182

def insert_add_copy_op_after_tens(tens, ifm_ofm_shape):

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

183

tens_cons_list_copy = tens.consumer_list.copy()

184

copy_tens = tens.clone()

185

186

name = tens.name + "_add"

187

ifm2 = create_const_tensor(

188

name + "_zero_scalar",

189

[1],

190

copy_tens.dtype,

191

[0],

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

192

quantization=copy_tens.quantization,

193

)

194

copy_op = create_add_nop(name)

195

copy_op.add_input_tensor(tens)

196

copy_op.add_input_tensor(ifm2)

197

copy_op.set_output_tensor(copy_tens)

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

198

copy_op.ifm_shapes.append(ifm_ofm_shape)

199

copy_op.ifm_shapes.append(Shape4D(ifm2.shape))

200

copy_op.ofm_shapes.append(ifm_ofm_shape)

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

201

copy_op.run_on_npu = True

202

203

# Set copy_ifm consumers

204

for tens_cons in tens_cons_list_copy:

205

if tens_cons is not None:

206

for ifm_idx, cons_inp in enumerate(tens_cons.inputs):

207

if cons_inp == tens:

208

tens_cons.set_input_tensor(copy_tens, ifm_idx)

209

210

DebugDatabase.add_optimised(tens.ops[0], copy_op)

211

212

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

213

def get_shape_for_copy_op(shape):

214

# remove dimensions that are set to 1

new_shape = []

for dim in shape:

if dim != 1:

new_shape.append(dim)

if not new_shape:

new_shape = [1]

rank = len(new_shape)

223

if rank > 3:

224

# Reshape so that batch becomes 1, by moving elements to H dimension

n = rank - 2

h = 1

for i in range(n):

h *= shape[i]

new_shape = Shape4D(new_shape[n:]).with_height(h)

230

else:

231

new_shape = Shape4D(new_shape)

return new_shape

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

235

def fix_sg_input_output_tosa(op, arch, nng):

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

236

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

237

if op.type == Op.Const and any(ofm_cons is None for ofm_cons in op.ofm.consumer_list):

238

# Const operator with sg output, insert copy op before the ofm

239

new_shape = get_shape_for_copy_op(op.ofm.shape.copy())

240

insert_add_copy_for_const(op, new_shape)

241

elif op.run_on_npu and op.type in (Op.Reshape, Op.Identity):

242

# For the Reshape operators we want to remove, tensors are removed.

243

# But in order to to do this, they cannot be outputs of the sg,

244

# this need to be fixed prior to the removal.

245

# Solution is to add a copy op, to maintain the original tensor.

246

# This is also valid when reshape ifm/ofm is produced respectively

247

# consumed by CPU

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

248

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

249

# Check if operator ifm/ofm are sg ifm/ofm

Per Åstrand

2024-04-22 11:48:09 +0200

[diff] [blame]

250

ifm_is_sg_ifm = op.ifm.ops[0].type in (

Op.Placeholder,

Op.SubgraphInput,

Op.Const,

)

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

255

ifm_is_sg_ofm = any(ifm_cons is None for ifm_cons in op.ifm.consumer_list)

256

ofm_is_sg_ofm = any(ofm_cons is None for ofm_cons in op.ofm.consumer_list)

257

# Check if ifm/ofm is produced repectivly consumed by CPU

258

ifm_is_cpu_produced = any(ifm_prod is not None and not ifm_prod.run_on_npu for ifm_prod in op.ifm.ops)

259

ofm_is_cpu_consumed = any(ofm_cons is not None and not ofm_cons.run_on_npu for ofm_cons in op.ofm.consumer_list)

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

260

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

261

if (ifm_is_sg_ofm or ifm_is_sg_ifm or ifm_is_cpu_produced) and (ofm_is_sg_ofm or ofm_is_cpu_consumed):

262

# Both ifm and ofm need to persist, but only ifm need a copy, in order to remove the Operator

263

# Decide on ifm/ofm shapes for the copy op based on ifm

264

new_shape = get_shape_for_copy_op(op.ifm.shape.copy())

265

insert_add_copy_op_after_tens(op.ifm, new_shape)

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

return op

def create_add_for_concat(concat_op, name, ifm, ifm_shape: Shape4D, write_offset: Shape4D):

270

"""Creates an add op for the given concat op/input feature map"""

271

ofm = concat_op.ofm

Tim Hall

3b1578e

2023-01-13 17:57:25 +0000

[diff] [blame]

272

ifm2 = create_const_tensor(name + "_zero_scalar", [1], ofm.dtype, [0], quantization=ofm.quantization)

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

273

add_op = create_add_nop(name)

274

275

add_op.inputs = [ifm, ifm2]

276

add_op.outputs = [ofm]

277

add_op.write_offset = write_offset

278

add_op.write_shape = ifm_shape

279

ofm.ops.append(add_op)

280

DebugDatabase.add_optimised(concat_op, add_op)

281

add_op.ifm_shapes.append(ifm_shape)

282

add_op.ifm_shapes.append(Shape4D(ifm2.shape))

283

add_op.ofm_shapes.append(concat_op.ofm_shapes[0])

284

add_op.memory_function = Op.ConcatSliceWrite

return add_op

# TODO Could be further optimized checking the type of the consumer,

289

# rather than just mimic the TFLite behaviour depending on type.

290

# TOSA bool_t not considered yet

291

def remove_splitsliceread(op, arch):

292

293

if op.type == Op.SplitSliceRead:

294

# Check if it is possible to put the SplitSliceRead on the tensor consumer, or if an avgpool need to be inserted

295

if (

296

len(op.ofm.consumer_list) == 1

297

and op.ofm.consumer_list[0] is not None

298

and op.ofm.consumer_list[0].run_on_npu

299

and op.ofm.consumer_list[0].type != Op.Reshape

300

and op.ofm_shapes[0] == Shape4D.from_list(op.ofm.shape)

301

and op.ofm.dtype in (DataType.uint8, DataType.int8, DataType.int16)

302

):

303

# SplitSliceRead can be performed by tensor consumer

304

cons_op = op.ofm.consumer_list[0]

305

move_splitsliceread_to_consumer(op, cons_op)

306

else:

307

name = op.name + "_add"

308

ofm = op.ofm

Per Åstrand

2024-04-22 11:48:09 +0200

[diff] [blame]

309

ifm2 = create_const_tensor(

310

name + "_zero_scalar",

[1],

ofm.dtype,

[0],

quantization=ofm.quantization,

315

)

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

316

add_op = create_add_nop(name)

317

add_op.inputs = [op.ifm, ifm2]

318

add_op.outputs = [ofm]

319

op.ofm.ops.remove(op)

320

op.ofm.ops.append(add_op)

321

add_op.ifm_shapes.append(op.ifm_shapes[0])

322

add_op.ifm_shapes.append(Shape4D(ifm2.shape))

323

add_op.ofm_shapes.append(op.ofm_shapes[0])

324

add_op.read_offsets[0] = op.read_offsets[0]

325

add_op.read_shapes[0] = op.read_shapes[0]

326

327

op.ifm.consumer_list.remove(op)

328

DebugDatabase.add_optimised(op, add_op)

329

330

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

331

def rewrite_concat(op):

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

332

if not op.run_on_npu or not op.type == Op.Concat:

333

return

334

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

335

offset = 0

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

336

inputs = op.inputs

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

337

axis_4D = op.attrs["axis4D"]

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

338

339

for idx, inp in enumerate(inputs):

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

340

write_offset = [0, 0, 0, 0]

341

write_offset[axis_4D] = offset

342

concat_end = offset + op.ifm_shapes[idx][axis_4D]

Per Åstrand

2024-04-22 11:48:09 +0200

[diff] [blame]

343

create_add_for_concat(

344

op,

345

op.name + str(idx) + "_add",

346

inp,

347

op.ifm_shapes[idx],

348

Shape4D.from_list(write_offset),

349

)

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

350

offset = concat_end

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

351

assert op.ofm_shapes[0][axis_4D] == offset

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

352

353

Patrik Gustavsson

ef3ebdd

2021-10-01 11:10:25 +0200

[diff] [blame]

354

def remove_memory_ops(op, arch):

355

if op.run_on_npu and op.type in (Op.Reshape, Op.Identity):

Rob Elliott

2024-01-25 13:05:16 +0000

[diff] [blame]

356

bypass_memory_only_ops(op, arch, None)

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

357

358

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

359

def rewrite_activation(op, arch, nng):

Patrik Gustavsson

5e26eda

2021-06-30 09:07:16 +0200

[diff] [blame]

360

if op.type not in (Op.ReluN, Op.Clamp):

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

361

return op

362

363

ifm = op.ifm

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

364

zp = ifm.quantization.zero_point if ifm.quantization.zero_point else 0

365

if op.ofm.quantization.zero_point is None:

366

op.ofm.quantization.zero_point = zp

367

Patrik Gustavsson

5e26eda

2021-06-30 09:07:16 +0200

[diff] [blame]

368

if op.type == Op.Clamp:

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

369

op.attrs["min"] = op.attrs["min_int"] - zp

370

op.attrs["max"] = op.attrs["max_int"] - zp

371

elif op.type == Op.ReluN:

372

op.attrs["max"] = op.attrs["max_int"] - zp

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

return op

Johan Alfven

2024-04-04 15:50:08 +0200

[diff] [blame]

376

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

377

def rewrite_rescale(op, arch, nng):

378

if op.type == Op.Rescale:

ifm = op.ifm

ofm = op.ofm

# some error checking

383

assert len(ifm.ops) == 1

Per Åstrand

2024-03-21 12:58:50 +0100

[diff] [blame]

384

prev_op = ifm.ops[0]

385

386

# TODO currently not supported

387

assert len(ifm.consumer_list) == 1

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

388

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

389

input_zp = op.attrs["input_zp"]

390

output_zp = op.attrs["output_zp"]

391

multiplier = op.attrs["multiplier"]

392

shift = op.attrs["shift"]

393

scale32 = op.attrs["scale32"]

394

double_round = op.attrs["double_round"]

395

per_channel = op.attrs["per_channel"]

396

397

assert ifm.dtype in (DataType.uint8, DataType.int8, DataType.int32)

398

assert ifm.dtype in (DataType.uint8, DataType.int8) or input_zp == 0

399

assert ofm.dtype in (DataType.uint8, DataType.int8) or output_zp == 0

400

assert (scale32 and ifm.dtype != DataType.int48) or (not scale32 and not double_round)

401

402

# Check that input tensor has the same zp or no zp

403

ifm_zp = ifm.quantization.zero_point

404

if ifm_zp is not None and ifm_zp != input_zp:

405

print("Error (fuse_rescale): zp of tensors producer/consumer differs unexpectedidly ")

406

assert False

407

ifm.quantization.zero_point = input_zp

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

408

ofm.quantization.zero_point = output_zp

Rob Elliott

2024-01-25 13:05:16 +0000

[diff] [blame]

409

Oscar Andersson

b90666d

2024-02-29 14:35:58 +0100

[diff] [blame]

410

assert per_channel is False, "per_channel rescale not supported"

411

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

412

for s, m in zip(shift, multiplier):

413

# TODO these are the TOSA limitations

414

assert m >= 0

415

assert 2 <= s <= 62

416

# TODO these are the HW limitations

417

assert 0 <= s < (1 << 6)

418

explicit_scaling = ExplicitScaling(per_channel, shift, multiplier)

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

419

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

420

if double_round and scale32:

Tim Hall

2023-05-16 22:39:14 +0100

[diff] [blame]

421

rounding_mode = RoundingMode.TFLite

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

422

else:

Tim Hall

2023-05-16 22:39:14 +0100

[diff] [blame]

423

rounding_mode = RoundingMode.HalfUp

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

424

Per Åstrand

2024-03-21 12:58:50 +0100

[diff] [blame]

425

if prev_op.type.is_depthwise_conv2d_op() or prev_op.type.is_conv2d_op() or prev_op.type == Op.FullyConnected:

426

# Currently not supporting per_channel quantization

427

if ifm.dtype == DataType.int32 and not per_channel:

428

prev_op.explicit_scaling = explicit_scaling

429

prev_op.rounding_mode = rounding_mode

430

431

# Bypass op

432

prev_op.set_output_tensor(ofm)

433

DebugDatabase.add_optimised(op, prev_op)

434

return op

435

else:

Per Åstrand

2024-04-22 11:48:09 +0200

[diff] [blame]

436

print(

437

"Warning, unsupported fusing of TOSA Rescale previous operator is of type:",

438

prev_op.type,

439

)

Per Åstrand

2024-03-21 12:58:50 +0100

[diff] [blame]

440

assert False

441

elif (

442

(ifm.dtype == DataType.int8 and ofm.dtype == DataType.int8)

443

or (ifm.dtype == DataType.uint8 and ofm.dtype == DataType.int8)

444

or (ifm.dtype == DataType.int8 and ofm.dtype == DataType.uint8)

445

):

446

# Create NOP performing the RESCALE

447

avgpool_op = replace_rescale_with_avg_pool(op)

448

avgpool_op.rounding_mode = rounding_mode

if per_channel:

# TODO

avgpool_op.explicit_scaling = explicit_scaling

453

print("Warning, unsupported TOSA Rescale")

454

assert False

455

else:

456

avgpool_op.explicit_scaling = explicit_scaling

457

elif prev_op.type == Op.Add:

458

# Check that the operations before the Add which creates the IFMs

459

# are Op.Rescale that we can fuse into the add

460

rescale_1 = prev_op.ifm.ops[0]

461

rescale_2 = prev_op.ifm2.ops[0]

462

463

if rescale_1.type == Op.Rescale and rescale_2.type == Op.Rescale:

464

# We are assuming the quantization to be the same for IFMs

465

equal_attributes = ["multiplier", "shift", "double_round"]

466

for a in equal_attributes:

467

assert op.attrs[a] == rescale_1.attrs[a] == rescale_2.attrs[a], (

468

f"Only handling equal {a} for all operands "

Per Åstrand

2024-04-22 11:48:09 +0200

[diff] [blame]

469

f"({op.attrs[a]}, {rescale_1.attrs[a]}, {rescale_2.attrs[a]}) "

Per Åstrand

2024-03-21 12:58:50 +0100

[diff] [blame]

470

"for all the rescale operations to be fused with Add!"

471

)

472

473

assert rescale_1.attrs["input_zp"] == rescale_2.attrs["input_zp"], (

474

f"Only handling equal input_zp ({rescale_1.attrs['input_zp']}!={rescale_2.attrs['input_zp']}) "

475

"for the rescale operations to be fused with Add!"

476

)

477

for op in [rescale_1, rescale_2]:

478

assert op.attrs["output_zp"] == 0, ""

479

assert op.attrs["per_channel"] is False, "per channel quantization is not supported."

480

481

# Create a new add op to set the rescaled ifms and ofm

482

add_op = create_add_nop(prev_op.name + "_fused_rescales")

483

add_op.type = Op.Add

484

485

# set the IFMs and OFM for the cloned operation

486

add_op.set_output_tensor(ofm)

487

add_op.add_input_tensor(rescale_1.ifm)

488

add_op.add_input_tensor(rescale_2.ifm)

489

add_op.set_ifm_ofm_shapes()

490

491

# Remove the consumption of the IFMs to the Add

492

# since we are pruning them from the graph

493

for i, c in enumerate(prev_op.ifm.consumers()):

494

if c == rescale_1:

495

prev_op.ifm.consumers().pop(i)

496

for i, c in enumerate(prev_op.ifm2.consumers()):

497

if c == rescale_2:

498

prev_op.ifm2.consumers().pop(i)

499

500

DebugDatabase.add_optimised(prev_op, op)

501

DebugDatabase.add_optimised(prev_op, rescale_1)

502

DebugDatabase.add_optimised(prev_op, rescale_2)

503

op = add_op

504

else:

505

print("Warning, unsupported fusing of TOSA Rescale with Add.")

506

assert False

507

else:

Per Åstrand

2024-04-22 11:48:09 +0200

[diff] [blame]

508

print(

509

"Warning, unsupported fusing of TOSA Rescale previous operator is of type:",

510

prev_op.type,

511

)

Per Åstrand

2024-03-21 12:58:50 +0100

[diff] [blame]

512

assert False

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

513

Rob Elliott

2024-01-25 13:05:16 +0000

[diff] [blame]

514

return op

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

515

Johan Alfven

31947ad

2024-04-04 15:50:08 +0200

[diff] [blame]

516

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

517

def convert_pad_in_width(op):

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

518

"""

519

Rewrites PAD operator to an add that copies the IFM to the OFM

520

+ up to 4 add operators that fill the OFM with zeros at the borders.

521

"""

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

522

assert op.type == Op.Pad

523

assert op.ifm_shapes[0] is not None and op.ofm_shapes[0] is not None

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

524

ifm = op.ifm

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

525

ofm = op.ofm

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

526

ifm_shape = op.ifm_shapes[0]

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

527

ofm.ops = []

528

ofm_shape = op.ofm_shapes[0]

529

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

530

padding = op.inputs[1].values

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

531

left, right = padding[-2]

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

532

533

# Add op that copies IFM to the right place inside the OFM

534

shp0 = Shape4D(0, 0, 0, 0)

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

535

add_op = create_add_for_concat(op, op.name + "_main", ifm, ifm_shape, shp0.with_width(left))

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

536

add_op.activation = op.activation

537

538

quant = ofm.quantization

539

pad_value = ifm.quantization.zero_point

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

540

ifm.quantization.zero_point = 0

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

541

if left > 0:

542

shape = Shape4D(1, ifm_shape.height, left, ofm_shape.depth)

543

zero_tens = create_const_tensor(

Per Åstrand

2024-04-22 11:48:09 +0200

[diff] [blame]

op.name + "_left",

shape.as_list(),

ofm.dtype,

shape.elements() * [pad_value],

548

quantization=quant,

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

549

)

550

zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

551

create_add_for_concat(op, op.name + "_left", zero_tens, shape, shp0)

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

552

if right > 0:

553

shape = Shape4D(1, ifm_shape.height, right, ofm_shape.depth)

554

zero_tens = create_const_tensor(

Per Åstrand

2024-04-22 11:48:09 +0200

[diff] [blame]

op.name + "_right",

shape.as_list(),

ofm.dtype,

shape.elements() * [pad_value],

559

quantization=quant,

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

560

)

561

zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))

Per Åstrand

2024-04-22 11:48:09 +0200

[diff] [blame]

562

create_add_for_concat(

op,

op.name + "_right",

zero_tens,

shape,

shp0.with_width(ofm_shape.width - right),

568

)

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

569

570

op.type = Op.ConcatTFLite

return add_op

Patrik Gustavsson

2021-09-14 14:56:48 +0200

[diff] [blame]

574

def convert_table_to_lut(op, arch, nng):

575

# Converts table op to a no-op + LUT

576

if op.type is not Op.Table:

return op

table = op.inputs[1]

op.inputs.remove(table)

581

op.set_ifm_ofm_shapes()

582

583

return convert_to_lut(op, table.values, "table")

584

585

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

586

def decompose_elem_tensors_hwc(op):

587

"""

588

Decomposes elementwise op if any of the ifm(s)/ofm are to large in any dimension to be handled by the NPU

589

"""

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

590

max_t_size = 65535

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

591

ofm_shape = op.write_shape if op.write_shape is not None else op.ofm_shapes[0]

592

ifm_shape = op.read_shapes[0] if op.read_shapes[0] is not None else op.ifm_shapes[0]

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

593

ifm2_shape = op.ifm_shapes[1] if op.ifm_shapes[1] else None

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

594

ifm2_shape = op.read_shapes[1] if op.read_shapes[1] is not None else ifm2_shape

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

595

limit_shape = Shape4D(1, max_t_size, max_t_size, max_t_size)

596

597

if any(dim_size > max_t_size for dim_size in ofm_shape.as_list()):

598

ofm_split = ofm_shape.floordiv_const(max_t_size).add(1, 1, 1, 1)

599

600

for height in range(ofm_split.height):

601

for width in range(ofm_split.width):

602

for depth in range(ofm_split.depth):

603

ofm_offset = Shape4D(0, height * max_t_size, width * max_t_size, depth * max_t_size)

604

ofm_part_shape = ofm_shape.clip(ofm_offset, limit_shape)

605

ofm_cut = (ofm_offset, ofm_part_shape)

606

607

ifm_d = depth * max_t_size if ifm_shape.depth == ofm_shape.depth else 0

608

ifm_w = width * max_t_size if ifm_shape.width == ofm_shape.width else 0

609

ifm_h = height * max_t_size if ifm_shape.height == ofm_shape.height else 0

610

ifm_offset = Shape4D(0, ifm_h, ifm_w, ifm_d)

611

ifm_part_shape = ifm_shape.clip(ifm_offset, limit_shape)

612

ifm_cut = (ifm_offset, ifm_part_shape)

613

614

if ifm2_shape is not None:

615

ifm2_d = depth * max_t_size if ifm2_shape.depth == ofm_shape.depth else 0

616

ifm2_w = width * max_t_size if ifm2_shape.width == ofm_shape.width else 0

617

ifm2_h = height * max_t_size if ifm2_shape.height == ofm_shape.height else 0

618

ifm2_offset = Shape4D(0, ifm2_h, ifm2_w, ifm2_d)

619

ifm2_part_shape = ifm2_shape.clip(ifm2_offset, limit_shape)

620

ifm2_cut = (ifm2_offset, ifm2_part_shape)

621

else:

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

622

ifm2_cut = (None, None)

623

624

create_elem_part_op(op, ifm_cut, ifm2_cut, ofm_cut)

625

op.ofm.ops.remove(op)

626

op.ifm.consumer_list.remove(op)

627

if op.ifm2 is not None:

628

op.ifm2.consumer_list.remove(op)

return

def create_elem_part_op(op, ifm_cut, ifm2_cut, ofm_cut):

633

part_op = op.clone()

634

ifm_read_offset = op.read_offsets[0] if op.read_offsets[0] is not None else Shape4D(0, 0, 0, 0)

635

ofm_write_offset = op.write_offset if op.write_offset is not None else Shape4D(0, 0, 0, 0)

636

ifm_offset, ifm_shape = ifm_cut

637

ofm_offset, ofm_shape = ofm_cut

638

639

part_op.read_offsets[0] = ifm_read_offset + ifm_offset

640

part_op.read_shapes[0] = ifm_shape

641

part_op.write_offset = ofm_write_offset + ofm_offset

642

part_op.write_shape = ofm_shape

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

643

part_op.ifm_shapes = op.ifm_shapes.copy()

644

part_op.ofm_shapes = op.ofm_shapes.copy()

645

part_op.ifm.consumer_list.append(part_op)

646

op.ofm.ops.append(part_op)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

647

648

ifm2_offset, ifm2_shape = ifm2_cut

649

if ifm2_offset:

650

ifm2_read_offset = op.read_offsets[1] if op.read_offsets[1] is not None else Shape4D(0, 0, 0, 0)

651

part_op.read_offsets[1] = ifm2_read_offset + ifm2_offset

652

part_op.read_shapes[1] = ifm2_shape

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

653

part_op.ifm2.consumer_list.append(part_op)

654

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

655

return part_op

656

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

657

658

def get_nhwc_stride(shape):

659

stride_x = shape.depth

660

stride_y = shape.width * stride_x

661

stride_n = shape.height * stride_y

662

return Shape4D(stride_n, stride_y, stride_x, 1)

663

664

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

665

def pad_to_rank(shape, rank):

666

"""

667

Pads a shape to the given rank

668

"""

669

while len(shape) < rank:

shape = [1] + shape

return shape

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

675

def get_elem_shapes_removed_singles(op):

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

676

"""

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

677

Returns the shapes of ifm(s)/ofms after removing all the dimensions that are 1 for all ifm(s)/ofm

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

678

"""

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

679

binary = op.ifm2 is not None

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

680

ofm_shape = op.ofm_shapes[0].as_list() if len(op.ofm_shapes) > 0 else op.ofm.shape

681

ifm_shape = op.ifm_shapes[0].as_list() if len(op.ifm_shapes) > 0 else op.ifm.shape

682

if binary:

683

ifm2_shape = op.ifm_shapes[1].as_list() if len(op.ofm_shapes) else op.ifm2.shape

684

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

685

rank = max(len(ofm_shape), len(ifm_shape), len(ifm2_shape) if binary else 0)

686

ofm_shape = pad_to_rank(ofm_shape, rank)

687

ifm_shape = pad_to_rank(ifm_shape, rank)

688

if binary:

689

ifm2_shape = pad_to_rank(ifm2_shape, rank)

690

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

new_ofm_shape = []

new_ifm_shape = []

new_ifm2_shape = []

for idx in range(rank):

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

695

if ofm_shape[idx] != 1:

696

new_ofm_shape.append(ofm_shape[idx])

697

new_ifm_shape.append(ifm_shape[idx])

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

698

if binary:

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

699

new_ifm2_shape.append(ifm2_shape[idx])

700

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

701

if new_ofm_shape == []:

702

new_ofm_shape = [1]

703

new_ifm_shape = [1]

704

new_ifm2_shape = [1] if binary else None

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

705

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

706

return new_ofm_shape, new_ifm_shape, new_ifm2_shape

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

707

708

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

709

def decomp_dims_elementwise(op):

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

710

"""

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

711

Decompose elementwise ops with Rank > 3 (H,W,D).

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

712

If Rank > 3, all the dimensions above H are viewed as the N dimension.

713

the elementwise operation will be decomposed to N (of ofm) elementwise operations.

714

By reading and writing with offsets from/to the ifm(s)/ofm.

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

715

Note: Broadcast need to be handled for binary elementwise ops, and TOSA allowes for broadcast by both ifm and ifm2

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

"""

ifm = op.ifm

ifm2 = op.ifm2

ofm = op.ofm

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

721

binary = op.ifm2 is not None

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

722

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

723

# Remove dimensions that are all 1

724

new_ofm_shape, new_ifm_shape, new_ifm2_shape = get_elem_shapes_removed_singles(op)

725

rank = len(new_ofm_shape)

726

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

727

if rank > 3:

728

n = rank - 3

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

729

ofm_decomp_shape = Shape4D(new_ofm_shape[0:n])

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

730

ofm_decomp_stride = get_nhwc_stride(ofm_decomp_shape)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

731

ofm_part_shape = Shape4D(new_ofm_shape[n:])

732

op.ofm_shapes.append(Shape4D([ofm_decomp_shape.elements()] + new_ofm_shape[n:]))

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

733

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

734

if binary:

735

ifm_decomp_shape = Shape4D(new_ifm_shape[0:n])

736

ifm2_decomp_shape = Shape4D(new_ifm2_shape[0:n])

737

ifm_decomp_stride = get_nhwc_stride(ifm_decomp_shape)

738

ifm2_decomp_stride = get_nhwc_stride(ifm2_decomp_shape)

739

ifm_part_shape = Shape4D(new_ifm_shape[n:])

740

ifm2_part_shape = Shape4D(new_ifm2_shape[n:])

741

op.ifm_shapes.append(Shape4D([ifm_decomp_shape.elements()] + new_ifm_shape[n:]))

742

op.ifm_shapes.append(Shape4D([ifm2_decomp_shape.elements()] + new_ifm2_shape[n:]))

743

else:

744

op.ifm_shapes.append(Shape4D([ofm_decomp_shape.elements()] + new_ofm_shape[n:]))

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

745

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

746

op_list = []

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

747

for height in range(ofm_decomp_shape.height):

748

for width in range(ofm_decomp_shape.width):

749

for depth in range(ofm_decomp_shape.depth):

750

ofm_offset = Shape4D(0, height, width, depth)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

751

ofm_offset = Shape4D(ofm_offset.dot_prod(ofm_decomp_stride), 0, 0, 0)

752

ofm_cut = (ofm_offset, ofm_part_shape)

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

753

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

754

if binary:

755

ifm_d = depth if ifm_decomp_shape.depth == ofm_decomp_shape.depth else 0

756

ifm_w = width if ifm_decomp_shape.width == ofm_decomp_shape.width else 0

757

ifm_h = height if ifm_decomp_shape.height == ofm_decomp_shape.height else 0

758

ifm_offset = Shape4D(0, ifm_h, ifm_w, ifm_d)

759

ifm_offset = Shape4D(ifm_offset.dot_prod(ifm_decomp_stride), 0, 0, 0)

760

ifm_cut = (ifm_offset, ifm_part_shape)

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

761

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

762

ifm2_d = depth if ifm2_decomp_shape.depth == ofm_decomp_shape.depth else 0

763

ifm2_w = width if ifm2_decomp_shape.width == ofm_decomp_shape.width else 0

764

ifm2_h = height if ifm2_decomp_shape.height == ofm_decomp_shape.height else 0

765

ifm2_offset = Shape4D(0, ifm2_h, ifm2_w, ifm2_d)

766

ifm2_offset = Shape4D(ifm2_offset.dot_prod(ifm2_decomp_stride), 0, 0, 0)

767

ifm2_cut = (ifm2_offset, ifm2_part_shape)

768

op_list.append(create_elem_part_op(op, ifm_cut, ifm2_cut, ofm_cut))

769

else:

770

op_list.append(create_elem_part_op(op, ofm_cut, None, ofm_cut))

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

771

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

772

ofm.ops.remove(op)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

773

ifm.consumer_list.remove(op)

774

if binary:

775

ifm2.consumer_list.remove(op)

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

776

777

return op_list

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

778

else:

779

op.ofm_shapes.append(Shape4D(new_ofm_shape))

780

op.ifm_shapes.append(Shape4D(new_ifm_shape))

781

op.ifm_shapes.append(Shape4D(new_ifm2_shape))

782

783

return [op]

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

784

785

786

def decomp_elementwise(tens, arch, nng):

787

"""

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

788

Decompose elementwise ops with Rank > 3 (H,W,C).

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

789

Decompose size of tensors exceeding NPU max size

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

790

"""

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

791

tens_ops = tens.ops.copy()

792

for op in tens_ops:

793

if op.type.is_elementwise_op():

794

decomp_list = decomp_dims_elementwise(op)

795

for part_op in decomp_list:

796

decompose_elem_tensors_hwc(part_op)

return tens

def reshape_concat_shape(shape, rank, axis):

801

new_h = 1

802

for i in range(axis):

803

new_h *= shape[i]

804

new_c = 1

805

for i in range(axis + 1, rank):

806

new_c *= shape[i]

807

if axis == (rank - 1):

808

new_shape = [new_h, shape[axis], 1]

809

else:

810

new_shape = [new_h, shape[axis], new_c]

return new_shape

def reshape_concat(op):

815

"""

816

Reshapes concat ops with Rank > 3 (H,W,C).

817

"""

818

ofm = op.ofm

819

rank = len(ofm.shape)

820

axis = op.attrs["axis"]

if axis < 0:

axis += rank

if rank > 3:

# Reshape so that axis in to be concatenated is the W dimension

826

# Reshape inputs

827

for inp in op.inputs:

828

new_shape = reshape_concat_shape(inp.shape, rank, axis)

829

op.ifm_shapes.append(Shape4D(new_shape))

830

# Reshape output

831

new_shape = reshape_concat_shape(ofm.shape, rank, axis)

832

op.ofm_shapes.append(Shape4D(new_shape))

833

op.attrs["axis4D"] = 2

834

else:

835

for inp in op.inputs:

836

op.ifm_shapes.append(Shape4D(inp.shape))

837

op.ofm_shapes.append(Shape4D(ofm.shape))

838

op.attrs["axis4D"] = axis + (4 - rank)

839

840

841

def decomp_rewrite_concat(tens, arch, nng):

842

"""

843

Decompose concat ops with Rank > 3 (H,W,C).

844

Rewrite of concat to elementwise operations

845

"""

846

if len(tens.ops) == 1 and tens.ops[0].type == Op.Concat:

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

847

op = tens.ops[0]

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

848

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

849

reshape_concat(op)

850

rewrite_concat(op)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

851

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

852

op.ofm.ops.remove(op)

853

for inp in op.inputs:

854

inp.consumer_list.remove(op)

855

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

return tens

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

859

def decomp_rewrite_pad(op, arch):

860

"""

861

Decomposition of pad to elementwise operations:

862

For each dimension that needs padding:

863

-Create a new PAD operator for each dimension to be added

864

Ifm/ofm are reshape so this is the width dimension is to be padded

865

(rank for each is 3)

866

-Rewrite the the new PAD operator so there is:

867

-1 Add operator for copying the data

868

-1 Add operator for each left/right to be padded

869

"""

870

# TODO several things would be possible to optimize

871

# For instance there are cases when it should be possible to pad 2

872

# dimensions at the same time.

873

if op.type == Op.Pad:

874

ofm_elements = shape_num_elements(op.ofm.shape)

875

padding = op.inputs[1].values

876

877

rank = len(op.ifm.shape)

878

next_ifm = op.ifm

879

next_ifm_shape = next_ifm.shape.copy()

880

881

first_pad_rewrite_op = None

882

ifm_quant = op.ifm.quantization.clone()

883

884

for dim in range(padding.shape[0]):

885

# Check if padding is to be applied in this dimension

886

dim_pad = padding[dim]

887

if not (dim_pad == 0).all():

888

# Reshape so that width dimension is to be padded

889

new_ifm_shape = reshape_concat_shape(next_ifm_shape, rank, dim)

890

new_pad_input = np.zeros((4, 2), dtype=np.int32)

891

new_pad_input[2] = dim_pad

892

893

pad_op = create_pad_nop(f"{op.name}_dim_{dim}")

894

pad_op.add_input_tensor(next_ifm)

895

new_pad_tens = op.inputs[1].clone("_dim_{dim}")

896

897

name = op.inputs[1].name + f"_dim_{dim}"

Tim Hall

3b1578e

2023-01-13 17:57:25 +0000

[diff] [blame]

898

new_pad_tens = create_const_tensor(name, list(new_pad_input.shape), DataType.int32, new_pad_input)

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

899

pad_op.add_input_tensor(new_pad_tens)

900

901

new_ofm_shape = new_ifm_shape.copy()

902

new_ofm_shape[-2] = new_ofm_shape[-2] + dim_pad.sum()

903

next_ifm_shape[dim] = next_ifm_shape[dim] + dim_pad.sum()

904

905

if Shape4D(new_ofm_shape).elements() == ofm_elements:

906

# Last one, use op.ofm

907

ofm = op.ofm

908

else:

909

# add a new ofm Tensor

910

ofm = Tensor(new_ofm_shape, op.ofm.dtype, f"{pad_op.name}_tens")

911

ofm.quantization = ifm_quant.clone()

912

913

pad_op.set_output_tensor(ofm)

914

pad_op.ifm_shapes.append(Shape4D(new_ifm_shape))

915

pad_op.ofm_shapes.append(Shape4D(new_ofm_shape))

916

DebugDatabase.add_optimised(op, pad_op)

next_ifm = ofm

# Rewrite the pad op

converted_pad_op = convert_pad_in_width(pad_op)

921

first_pad_rewrite_op = converted_pad_op

922

else:

923

# Change to Identity operation (will be removed)

924

op.type = Op.Identity

925

926

if first_pad_rewrite_op:

927

assert op.ofm.shape == next_ifm_shape

928

for inp in op.inputs:

929

inp.consumer_list.remove(op)

930

return first_pad_rewrite_op

return op

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

935

def fixup_quantization(op, arch, nng):

936

if op.ifm and op.ifm.quantization.zero_point is None:

937

op.ifm.quantization.zero_point = 0

938

if op.ifm2 and op.ifm2.quantization.zero_point is None:

Patrik Gustavsson

f436ada

2021-09-14 14:56:48 +0200

[diff] [blame]

939

op.ifm2.quantization.zero_point = 0

940

if not op.forced_output_quantization:

941

if op.ofm and op.ofm.quantization and op.ofm.quantization.zero_point is None:

942

op.ofm.quantization.zero_point = 0

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

return op

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

946

def supported_operator_check(op, arch, nng):

947

op.run_on_npu = arch.tosa_supported_operators.is_operator_supported(op)

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

948

assert op.run_on_npu or op.type in (Op.Placeholder, Op.SubgraphInput, Op.Const)

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

return op

def tosa_optimise_graph(nng, arch):

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

953

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

954

# TODO the supported operator checking need to be split in semantic and HW checks

955

for idx, sg in enumerate(nng.subgraphs):

956

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[supported_operator_check],

962

rewrite_unsupported=False,

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

963

)

964

965

# Decomposing and rewrite of concat

966

for idx, sg in enumerate(nng.subgraphs):

967

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

968

nng, sg, arch, [decomp_rewrite_concat], [], rewrite_unsupported=False

969

)

970

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

971

# Decomposing of pad

972

for idx, sg in enumerate(nng.subgraphs):

973

rewrite_graph.visit_graph_post_order(sg.output_tensors, arch, [], [decomp_rewrite_pad])

974

sg.refresh_after_modification()

975

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

976

# Handle sg input output

977

for idx, sg in enumerate(nng.subgraphs):

978

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[fix_sg_input_output_tosa],

984

rewrite_unsupported=True,

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

985

)

986

987

# Removal of reshapes

988

for sg in nng.subgraphs:

Patrik Gustavsson

ef3ebdd

2021-10-01 11:10:25 +0200

[diff] [blame]

989

rewrite_graph.visit_graph_post_order(sg.output_tensors, arch, [], [remove_memory_ops])

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

990

sg.refresh_after_modification()

991

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

992

# Decomposing of elementwise

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

993

for idx, sg in enumerate(nng.subgraphs):

994

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

995

nng, sg, arch, [decomp_elementwise], [], rewrite_unsupported=False

996

)

997

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

998

for idx, sg in enumerate(nng.subgraphs):

999

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[set_ifm_ofm_op_shapes],

1005

rewrite_unsupported=False,

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

1006

)

1007

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

1008

# Removal of Transpose

1009

for idx, sg in enumerate(nng.subgraphs):

1010

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[remove_const_transpose],

1016

rewrite_unsupported=False,

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

1017

)

1018

Patrik Gustavsson

2021-09-07 13:30:29 +0200

[diff] [blame]

1019

# TODO, when and where to best handle calc_scaling_avgpool

1020

for idx, sg in enumerate(nng.subgraphs):

1021

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[calc_scaling_avgpool],

1027

rewrite_unsupported=False,

Patrik Gustavsson

2021-09-07 13:30:29 +0200

[diff] [blame]

1028

)

1029

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

1030

# Rewite Operators step

Per Åstrand

2024-04-22 11:48:09 +0200

[diff] [blame]

1031

op_rewrite_list = [

1032

set_tensor_equivalence,

1033

rewrite_rescale,

1034

convert_depthwise_to_conv,

1035

convert_table_to_lut,

1036

]

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

1037

1038

for idx, sg in enumerate(nng.subgraphs):

1039

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

op_rewrite_list,

rewrite_unsupported=False,

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

1046

)

1047

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

1048

# Post-processing step 1

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

1049

for idx, sg in enumerate(nng.subgraphs):

1050

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[rewrite_activation, add_padding_fields],

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

1056

)

1057

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

1058

# Removal of Slice, need to be done after optimisation has been performed,

1059

# since ifm/ofm_shapes are of importance to this function

1060

for sg in nng.subgraphs:

1061

rewrite_graph.visit_graph_post_order(sg.output_tensors, arch, [], [remove_splitsliceread])

1062

sg.refresh_after_modification()

1063

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

1064

# Post-processing step 2

1065

for idx, sg in enumerate(nng.subgraphs):

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

1066

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

nng,

sg,

arch,

[],

[fixup_quantization],

1072

)

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

1073

Patrik Gustavsson