Blame - ethosu/vela/tosa_graph_optimiser.py - ml/ethos-u/ethos-u-vela

2021-06-28 07:41:58 +0200

[diff] [blame]

1

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

16

# Description:

17

# Early optimisation of the TOSA based network graph, using the rewrite_graph module to do the traversal of the graph.

Patrik Gustavsson

f366fb1

2021-09-07 13:30:29 +0200

[diff] [blame]

18

import numpy as np

19

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

20

from . import rewrite_graph

21

from .api import NpuRoundingMode

22

from .data_type import DataType

23

from .debug_database import DebugDatabase

Jonas Ohlsson

0957e3e

2021-09-01 15:57:21 +0200

[diff] [blame]

24

from .graph_optimiser_util import bypass_memory_only_ops

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

25

from .graph_optimiser_util import calc_explicit_padding

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

26

from .graph_optimiser_util import convert_depthwise_to_conv

Patrik Gustavsson

f436ada

2021-09-14 14:56:48 +0200

[diff] [blame]

27

from .graph_optimiser_util import convert_to_lut

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

28

from .graph_optimiser_util import move_splitsliceread_to_consumer

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

29

from .graph_optimiser_util import needed_total_padding

30

from .graph_optimiser_util import set_ifm_ofm_op_shapes

31

from .graph_optimiser_util import set_tensor_equivalence

32

from .operation import ExplicitScaling

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

33

from .operation import Op

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

34

from .operation_util import create_add_nop

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

35

from .operation_util import create_avgpool_nop

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

36

from .shape4d import Shape4D

37

from .tensor import create_const_tensor

Patrik Gustavsson

e2bfa7e

2021-09-08 15:04:11 +0200

[diff] [blame]

38

from .tensor import create_equivalence_id

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

39

40

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

41

def replace_rescale_with_avg_pool(rescale_op):

42

assert rescale_op.type == Op.Rescale

43

44

avgpool_op = create_avgpool_nop(rescale_op.name + "_avgpool")

45

rescale_op_clone = rescale_op.clone()

46

op = rescale_op

47

op.attrs = avgpool_op.attrs.copy()

48

op.type = Op.AvgPool

49

DebugDatabase.add_optimised(rescale_op_clone, op)

return op

def calc_skirt(kernel, input_shape, explicit_padding):

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

55

k_w, k_h = kernel.dilated_wh()

56

s_x, s_y = kernel.stride

57

ypad = needed_total_padding(int(input_shape.height), int(s_y), int(k_h))

58

xpad = needed_total_padding(int(input_shape.width), int(s_x), int(k_w))

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

59

60

top, left, bottom, right = explicit_padding

61

top_pad, bottom_pad = calc_explicit_padding(int(input_shape.height), int(s_y), int(k_h), int(top), int(bottom))

62

left_pad, right_pad = calc_explicit_padding(int(input_shape.width), int(s_x), int(k_w), int(left), int(right))

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

63

64

padding = (top_pad, left_pad, bottom_pad, right_pad)

65

skirt = (top_pad, left_pad, ypad - top_pad, xpad - left_pad)

66

return padding, skirt

67

68

69

def add_padding_fields(op, arch, nng):

70

if op.run_on_npu:

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

71

if "explicit_padding" in op.attrs:

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

72

input_shape = op.ifm_shapes[0]

73

74

if op.type == Op.Conv2DBackpropInputSwitchedBias:

75

# TODO not yet supported, but there will be need for separate handling

76

assert False

77

else:

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

78

padding, skirt = calc_skirt(op.kernel, input_shape, op.attrs.get("explicit_padding"))

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

79

80

op.attrs["explicit_padding"] = padding

81

op.attrs["skirt"] = skirt

return op

Patrik Gustavsson

2021-09-07 13:30:29 +0200

[diff] [blame]

86

# Counts leading zeroes for a (int32)

87

def count_leading_zeros(a):

lz = int(32)

if a != 0:

mask = 1 << (32 - 1)

lz = 0

while (mask & a) == 0:

mask = mask >> 1

lz = lz + 1

return lz

def calc_scaling_avgpool(op, arch, nng):

99

if op.type == Op.AvgPool:

100

top, left, _, _ = op.attrs["explicit_padding"]

101

# TODO Only support for when global scaling can be used.

102

# That is when there is no padding

103

assert top == 0 and left == 0

104

assert op.explicit_scaling is None

multiplier = []

shift = []

kernel_wh = op.kernel.elements_wh()

109

k = 32 - count_leading_zeros(kernel_wh - 1)

110

numerator = np.int64(((1 << 30) + 1) << k)

111

multiplier.append(numerator // kernel_wh)

112

shift.append(30 + k)

113

114

op.rounding_mode = NpuRoundingMode.NATURAL

115

op.explicit_scaling = ExplicitScaling(False, shift, multiplier)

return op

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

119

def remove_const_transpose(op, arch, nng):

120

if op.type == Op.Transpose:

121

removed = False

122

if len(op.ifm.ops) == 1:

123

prev_op = op.ifm.ops[0]

124

if prev_op.type == Op.Const:

125

# Transpose the Tensor and data and remove Transpose

126

# TODO move to Tensor?

127

reorder = op.attrs["perms"]

128

shape = op.ifm.shape.copy()

129

tens = op.ifm

130

131

tens.shape = [shape[idx] for idx in reorder]

132

tens.bandwidth_shape = tens.shape

133

tens.storage_shape = tens.shape

134

135

if tens.values is not None:

136

tens.values = tens.values.transpose(reorder)

137

138

op.ofm.values = tens.values

139

# Bypass the Transpose op

140

prev_op.set_output_tensor(op.ofm)

141

DebugDatabase.add_optimised(op, prev_op)

142

removed = True

143

144

if not removed:

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

145

print("Warning: Cannot remove Transpose, and handling of Transpose is not supported")

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

assert False

return op

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

151

# TODO can we change to add for both TFLite and TOSA?

152

def insert_add_copy_op_after_tens(tens):

153

tens_cons_list_copy = tens.consumer_list.copy()

154

copy_tens = tens.clone()

155

156

name = tens.name + "_add"

157

ifm2 = create_const_tensor(

158

name + "_zero_scalar",

[1],

copy_tens.dtype,

[0],

copy_tens.dtype.as_numpy_type(),

163

quantization=copy_tens.quantization,

164

)

165

copy_op = create_add_nop(name)

166

copy_op.add_input_tensor(tens)

167

copy_op.add_input_tensor(ifm2)

168

copy_op.set_output_tensor(copy_tens)

169

copy_op.set_ifm_ofm_shapes()

170

copy_op.run_on_npu = True

171

172

# Set copy_ifm consumers

173

for tens_cons in tens_cons_list_copy:

174

if tens_cons is not None:

175

for ifm_idx, cons_inp in enumerate(tens_cons.inputs):

176

if cons_inp == tens:

177

tens_cons.set_input_tensor(copy_tens, ifm_idx)

178

179

DebugDatabase.add_optimised(tens.ops[0], copy_op)

180

181

182

def fix_sg_input_output_tosa(op, arch, nng):

183

if not op.run_on_npu or op.type != Op.Reshape:

184

return op

185

186

# For the Reshape operators we want to remove, tensors are removed.

187

# But in order to to do this, they cannot be outputs of the sg,

188

# this need to be fixed prior to the removal.

189

# Solution is to add a copy op, to maintain the original tensor.

190

# This is also valid when reshape ifm/ofm is produced respectively

191

# consumed by CPU

192

193

# Check if operator ifm/ofm are sg ifm/ofm

194

ifm_is_sg_ifm = op.ifm.ops[0].type in (Op.Placeholder, Op.SubgraphInput, Op.Const)

195

ifm_is_sg_ofm = any(ifm_cons is None for ifm_cons in op.ifm.consumer_list)

196

ofm_is_sg_ofm = any(ofm_cons is None for ofm_cons in op.ofm.consumer_list)

197

# Check if ifm/ofm is produced repectivly consumed by CPU

198

ifm_is_cpu_produced = any(ifm_prod is not None and not ifm_prod.run_on_npu for ifm_prod in op.ifm.ops)

199

ofm_is_cpu_consumed = any(ofm_cons is not None and not ofm_cons.run_on_npu for ofm_cons in op.ofm.consumer_list)

200

201

if (ifm_is_sg_ofm or ifm_is_sg_ifm or ifm_is_cpu_produced) and (ofm_is_sg_ofm or ofm_is_cpu_consumed):

202

# Both ifm and ofm need to persist, but only ifm need a copy, in order to remove the Reshape

203

insert_add_copy_op_after_tens(op.ifm)

return op

def create_add_for_concat(concat_op, name, ifm, ifm_shape: Shape4D, write_offset: Shape4D):

209

"""Creates an add op for the given concat op/input feature map"""

210

ofm = concat_op.ofm

211

ifm2 = create_const_tensor(

212

name + "_zero_scalar", [1], ofm.dtype, [0], ofm.dtype.as_numpy_type(), quantization=ofm.quantization

213

)

214

add_op = create_add_nop(name)

215

216

add_op.inputs = [ifm, ifm2]

217

add_op.outputs = [ofm]

218

add_op.write_offset = write_offset

219

add_op.write_shape = ifm_shape

220

ofm.ops.append(add_op)

221

DebugDatabase.add_optimised(concat_op, add_op)

222

add_op.ifm_shapes.append(ifm_shape)

223

add_op.ifm_shapes.append(Shape4D(ifm2.shape))

224

add_op.ofm_shapes.append(concat_op.ofm_shapes[0])

225

add_op.memory_function = Op.ConcatSliceWrite

return add_op

# TODO Could be further optimized checking the type of the consumer,

230

# rather than just mimic the TFLite behaviour depending on type.

231

# TOSA bool_t not considered yet

232

def remove_splitsliceread(op, arch):

233

234

if op.type == Op.SplitSliceRead:

235

# Check if it is possible to put the SplitSliceRead on the tensor consumer, or if an avgpool need to be inserted

236

if (

237

len(op.ofm.consumer_list) == 1

238

and op.ofm.consumer_list[0] is not None

239

and op.ofm.consumer_list[0].run_on_npu

240

and op.ofm.consumer_list[0].type != Op.Reshape

241

and op.ofm_shapes[0] == Shape4D.from_list(op.ofm.shape)

242

and op.ofm.dtype in (DataType.uint8, DataType.int8, DataType.int16)

243

):

244

# SplitSliceRead can be performed by tensor consumer

245

cons_op = op.ofm.consumer_list[0]

246

move_splitsliceread_to_consumer(op, cons_op)

247

else:

248

name = op.name + "_add"

249

ofm = op.ofm

250

ifm2 = create_const_tensor(

251

name + "_zero_scalar", [1], ofm.dtype, [0], ofm.dtype.as_numpy_type(), quantization=ofm.quantization

252

)

253

add_op = create_add_nop(name)

254

add_op.inputs = [op.ifm, ifm2]

255

add_op.outputs = [ofm]

256

op.ofm.ops.remove(op)

257

op.ofm.ops.append(add_op)

258

add_op.ifm_shapes.append(op.ifm_shapes[0])

259

add_op.ifm_shapes.append(Shape4D(ifm2.shape))

260

add_op.ofm_shapes.append(op.ofm_shapes[0])

261

add_op.read_offsets[0] = op.read_offsets[0]

262

add_op.read_shapes[0] = op.read_shapes[0]

263

264

op.ifm.consumer_list.remove(op)

265

DebugDatabase.add_optimised(op, add_op)

266

267

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame^]

268

def rewrite_concat(op):

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

269

if not op.run_on_npu or not op.type == Op.Concat:

270

return

271

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

272

offset = 0

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

273

inputs = op.inputs

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame^]

274

axis_4D = op.attrs["axis4D"]

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

275

276

for idx, inp in enumerate(inputs):

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

277

write_offset = [0, 0, 0, 0]

278

write_offset[axis_4D] = offset

279

concat_end = offset + op.ifm_shapes[idx][axis_4D]

280

create_add_for_concat(op, op.name + str(idx) + "_add", inp, op.ifm_shapes[idx], Shape4D.from_list(write_offset))

281

offset = concat_end

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame^]

282

assert op.ofm_shapes[0][axis_4D] == offset

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

283

284

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

285

def remove_reshapes(op, arch):

286

if op.run_on_npu and op.type == Op.Reshape:

Jonas Ohlsson

0957e3e

2021-09-01 15:57:21 +0200

[diff] [blame]

287

bypass_memory_only_ops(op)

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

288

289

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

290

def rewrite_activation(op, arch, nng):

Patrik Gustavsson

5e26eda

2021-06-30 09:07:16 +0200

[diff] [blame]

291

if op.type not in (Op.ReluN, Op.Clamp):

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

292

return op

293

294

ifm = op.ifm

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

295

zp = ifm.quantization.zero_point if ifm.quantization.zero_point else 0

296

if op.ofm.quantization.zero_point is None:

297

op.ofm.quantization.zero_point = zp

298

Patrik Gustavsson

5e26eda

2021-06-30 09:07:16 +0200

[diff] [blame]

299

if op.type == Op.Clamp:

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

300

op.attrs["min"] = op.attrs["min_int"] - zp

301

op.attrs["max"] = op.attrs["max_int"] - zp

302

elif op.type == Op.ReluN:

303

op.attrs["max"] = op.attrs["max_int"] - zp

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

return op

def rewrite_rescale(op, arch, nng):

309

if op.type == Op.Rescale:

ifm = op.ifm

ofm = op.ofm

# some error checking

314

assert len(ifm.ops) == 1

315

prev_op = ifm.ops[0]

316

317

# TODO currently not supported

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

318

assert len(ifm.consumer_list) == 1

319

320

input_zp = op.attrs["input_zp"]

321

output_zp = op.attrs["output_zp"]

322

multiplier = op.attrs["multiplier"]

323

shift = op.attrs["shift"]

324

scale32 = op.attrs["scale32"]

325

double_round = op.attrs["double_round"]

326

per_channel = op.attrs["per_channel"]

327

328

assert ifm.dtype in (DataType.uint8, DataType.int8, DataType.int32)

329

assert ifm.dtype in (DataType.uint8, DataType.int8) or input_zp == 0

330

assert ofm.dtype in (DataType.uint8, DataType.int8) or output_zp == 0

331

assert (scale32 and ifm.dtype != DataType.int48) or (not scale32 and not double_round)

332

333

# Check that input tensor has the same zp or no zp

334

ifm_zp = ifm.quantization.zero_point

335

if ifm_zp is not None and ifm_zp != input_zp:

336

print("Error (fuse_rescale): zp of tensors producer/consumer differs unexpectedidly ")

337

assert False

338

ifm.quantization.zero_point = input_zp

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

339

ofm.quantization.zero_point = output_zp

340

for s, m in zip(shift, multiplier):

341

# TODO these are the TOSA limitations

342

assert m >= 0

343

assert 2 <= s <= 62

344

# TODO these are the HW limitations

345

assert 0 <= s < (1 << 6)

346

explicit_scaling = ExplicitScaling(per_channel, shift, multiplier)

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

347

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

348

if double_round and scale32:

349

rounding_mode = NpuRoundingMode.TFL

350

else:

351

rounding_mode = NpuRoundingMode.NATURAL

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

352

353

if prev_op.type.is_depthwise_conv2d_op() or prev_op.type.is_conv2d_op() or prev_op.type == Op.FullyConnected:

354

assert len(multiplier) == len(shift) == len(prev_op.bias.values)

355

356

if ifm.dtype == DataType.int32 and per_channel:

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

357

prev_op.explicit_scaling = explicit_scaling

358

prev_op.rounding_mode = rounding_mode

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

359

360

# Bypass op

361

prev_op.set_output_tensor(ofm)

362

DebugDatabase.add_optimised(op, prev_op)

363

return op

364

else:

365

print("Warning, unsupported fusing of TOSA Rescale previous operator is of type:", prev_op.type)

366

assert False

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

367

# TODO which are the cases we need to and can do standalone Rescale?

368

# TODO should we try to identify a conversion uint8<->int8 accomplished by 2 RESCALE ops?

369

# origin might be TFLite op QUANTIZE, should we look to see if they can be translated to QUANTIZE?

370

# limited to these at the moment:

371

elif (

372

(ifm.dtype == DataType.int8 and ofm.dtype == DataType.int8)

373

or (ifm.dtype == DataType.uint8 and ofm.dtype == DataType.int8)

374

or (ifm.dtype == DataType.int8 and ofm.dtype == DataType.uint8)

375

):

376

# Create NOP performing the RESCALE

377

avgpool_op = replace_rescale_with_avg_pool(op)

378

avgpool_op.rounding_mode = rounding_mode

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

379

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

380

if per_channel:

381

# TODO

382

avgpool_op.explicit_scaling = explicit_scaling

383

print("Warning, unsupported TOSA Rescale")

384

assert False

385

else:

386

avgpool_op.explicit_scaling = explicit_scaling

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

387

else:

388

print("Warning, unsupported fusing of TOSA Rescale previous operator is of type:", prev_op.type)

assert False

return op

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

393

# TODO modified copy of TFLite, solution for TOSA PAD will change so reuse has not been considered

394

def convert_pad(op, arch, nng):

395

"""

396

Rewrites PAD operator to an add that copies the IFM to the OFM

397

+ up to 4 add operators that fill the OFM with zeros at the borders.

398

"""

399

400

if op.type != Op.Pad:

401

return op

402

403

# TODO assuming rank <= 4 and N = 1 for rank ==4

404

# This is checked in tosa_supported_operators

405

ifm = op.ifm

406

assert ifm is not None

407

ifm_shape = Shape4D(ifm.shape)

408

ofm = op.ofm

409

assert ofm is not None

410

ofm.ops = []

411

ofm_shape = op.ofm_shapes[0]

412

413

rank = len(ifm.shape)

414

padding = op.inputs[1].values

415

pad_depth = padding[-1]

416

if not (pad_depth == 0).all():

417

print("Warning: For PAD, padding in depth not supported yet")

assert False

top, bottom = 0, 0

left, right = 0, 0

if rank > 1:

left, right = padding[-2][0], padding[-2][1]

424

if rank > 2:

425

top, bottom = padding[-3][0], padding[-3][1]

426

if rank == 4 and not (padding[-4] == 0).all():

427

print("Warning: For PAD, padding not supported in first dimension when rank == 4 yet")

428

assert False

429

430

# Add op that copies IFM to the right place inside the OFM

431

shp0 = Shape4D(0, 0, 0, 0)

432

shp_top = shp0.with_height(top)

433

add_op = create_add_for_concat(op, op.name + "_main", ifm, ifm_shape, shp_top.with_width(left))

434

add_op.activation = op.activation

435

436

quant = ofm.quantization

437

pad_value = ifm.quantization.zero_point

438

# Add operations that fill the borders of the OFM

439

if top > 0:

440

shape = Shape4D(1, top, ofm_shape.width, ofm_shape.depth)

441

zero_tens = create_const_tensor(

op.name + "_top",

shape.as_list(),

ofm.dtype,

shape.elements() * [pad_value],

446

np.uint8,

447

quantization=quant, # TODO

448

)

449

# If top/bottom or left/right are equal, the const tensors can be allocated to the same address

450

zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))

451

create_add_for_concat(op, op.name + "_top", zero_tens, shape, shp0)

452

if bottom > 0:

453

shape = Shape4D(1, bottom, ofm_shape.width, ofm_shape.depth)

454

zero_tens = create_const_tensor(

op.name + "_bottom",

shape.as_list(),

ofm.dtype,

shape.elements() * [pad_value],

np.uint8,

quantization=quant,

)

zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))

463

create_add_for_concat(op, op.name + "_bottom", zero_tens, shape, shp0.with_height(ofm_shape.height - bottom))

464

if left > 0:

465

shape = Shape4D(1, ifm_shape.height, left, ofm_shape.depth)

466

zero_tens = create_const_tensor(

467

op.name + "_left", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], np.uint8, quantization=quant

468

)

469

zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))

470

create_add_for_concat(op, op.name + "_left", zero_tens, shape, shp_top)

471

if right > 0:

472

shape = Shape4D(1, ifm_shape.height, right, ofm_shape.depth)

473

zero_tens = create_const_tensor(

474

op.name + "_right", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], np.uint8, quantization=quant

475

)

476

zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))

477

create_add_for_concat(op, op.name + "_right", zero_tens, shape, shp_top.with_width(ofm_shape.width - right))

478

479

op.type = Op.ConcatTFLite

return add_op

Patrik Gustavsson

2021-09-14 14:56:48 +0200

[diff] [blame]

483

def convert_table_to_lut(op, arch, nng):

484

# Converts table op to a no-op + LUT

485

if op.type is not Op.Table:

return op

table = op.inputs[1]

op.inputs.remove(table)

490

op.set_ifm_ofm_shapes()

491

492

return convert_to_lut(op, table.values, "table")

493

494

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame^]

495

def decompose_elem_tensors_hwc(op):

496

"""

497

Decomposes elementwise op if any of the ifm(s)/ofm are to large in any dimension to be handled by the NPU

498

"""

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

499

max_t_size = 65535

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame^]

500

ofm_shape = op.write_shape if op.write_shape is not None else op.ofm_shapes[0]

501

ifm_shape = op.read_shapes[0] if op.read_shapes[0] is not None else op.ifm_shapes[0]

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

502

ifm2_shape = op.ifm_shapes[1] if op.ifm_shapes[1] else None

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame^]

503

ifm2_shape = op.read_shapes[1] if op.read_shapes[1] is not None else ifm2_shape

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

504

limit_shape = Shape4D(1, max_t_size, max_t_size, max_t_size)

505

506

if any(dim_size > max_t_size for dim_size in ofm_shape.as_list()):

507

ofm_split = ofm_shape.floordiv_const(max_t_size).add(1, 1, 1, 1)

508

509

for height in range(ofm_split.height):

510

for width in range(ofm_split.width):

511

for depth in range(ofm_split.depth):

512

ofm_offset = Shape4D(0, height * max_t_size, width * max_t_size, depth * max_t_size)

513

ofm_part_shape = ofm_shape.clip(ofm_offset, limit_shape)

514

ofm_cut = (ofm_offset, ofm_part_shape)

515

516

ifm_d = depth * max_t_size if ifm_shape.depth == ofm_shape.depth else 0

517

ifm_w = width * max_t_size if ifm_shape.width == ofm_shape.width else 0

518

ifm_h = height * max_t_size if ifm_shape.height == ofm_shape.height else 0

519

ifm_offset = Shape4D(0, ifm_h, ifm_w, ifm_d)

520

ifm_part_shape = ifm_shape.clip(ifm_offset, limit_shape)

521

ifm_cut = (ifm_offset, ifm_part_shape)

522

523

if ifm2_shape is not None:

524

ifm2_d = depth * max_t_size if ifm2_shape.depth == ofm_shape.depth else 0

525

ifm2_w = width * max_t_size if ifm2_shape.width == ofm_shape.width else 0

526

ifm2_h = height * max_t_size if ifm2_shape.height == ofm_shape.height else 0

527

ifm2_offset = Shape4D(0, ifm2_h, ifm2_w, ifm2_d)

528

ifm2_part_shape = ifm2_shape.clip(ifm2_offset, limit_shape)

529

ifm2_cut = (ifm2_offset, ifm2_part_shape)

530

else:

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

531

ifm2_cut = (None, None)

532

533

create_elem_part_op(op, ifm_cut, ifm2_cut, ofm_cut)

534

op.ofm.ops.remove(op)

535

op.ifm.consumer_list.remove(op)

536

if op.ifm2 is not None:

537

op.ifm2.consumer_list.remove(op)

return

def create_elem_part_op(op, ifm_cut, ifm2_cut, ofm_cut):

542

part_op = op.clone()

543

ifm_read_offset = op.read_offsets[0] if op.read_offsets[0] is not None else Shape4D(0, 0, 0, 0)

544

ofm_write_offset = op.write_offset if op.write_offset is not None else Shape4D(0, 0, 0, 0)

545

ifm_offset, ifm_shape = ifm_cut

546

ofm_offset, ofm_shape = ofm_cut

547

548

part_op.read_offsets[0] = ifm_read_offset + ifm_offset

549

part_op.read_shapes[0] = ifm_shape

550

part_op.write_offset = ofm_write_offset + ofm_offset

551

part_op.write_shape = ofm_shape

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

552

part_op.ifm_shapes = op.ifm_shapes.copy()

553

part_op.ofm_shapes = op.ofm_shapes.copy()

554

part_op.ifm.consumer_list.append(part_op)

555

op.ofm.ops.append(part_op)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

556

557

ifm2_offset, ifm2_shape = ifm2_cut

558

if ifm2_offset:

559

ifm2_read_offset = op.read_offsets[1] if op.read_offsets[1] is not None else Shape4D(0, 0, 0, 0)

560

part_op.read_offsets[1] = ifm2_read_offset + ifm2_offset

561

part_op.read_shapes[1] = ifm2_shape

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

562

part_op.ifm2.consumer_list.append(part_op)

563

564

565

def get_nhwc_stride(shape):

566

stride_x = shape.depth

567

stride_y = shape.width * stride_x

568

stride_n = shape.height * stride_y

569

return Shape4D(stride_n, stride_y, stride_x, 1)

570

571

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

572

def get_elem_shapes_removed_singles(op):

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

573

"""

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

574

Returns the shapes of ifm(s)/ofms after removing all the dimensions that are 1 for all ifm(s)/ofm

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

575

"""

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

576

binary = op.ifm2 is not None

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame^]

577

ofm_shape = op.ofm_shapes[0].as_list() if len(op.ofm_shapes) > 0 else op.ofm.shape

578

ifm_shape = op.ifm_shapes[0].as_list() if len(op.ifm_shapes) > 0 else op.ifm.shape

579

if binary:

580

ifm2_shape = op.ifm_shapes[1].as_list() if len(op.ofm_shapes) else op.ifm2.shape

581

582

rank = len(ofm_shape)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

new_ofm_shape = []

new_ifm_shape = []

new_ifm2_shape = []

for idx in range(rank):

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame^]

587

if ofm_shape[idx] != 1:

588

new_ofm_shape.append(ofm_shape[idx])

589

new_ifm_shape.append(ifm_shape[idx])

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

590

if binary:

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame^]

591

new_ifm2_shape.append(ifm2_shape[idx])

592

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

593

if new_ofm_shape == []:

594

new_ofm_shape = [1]

595

new_ifm_shape = [1]

596

new_ifm2_shape = [1] if binary else None

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

597

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

598

return new_ofm_shape, new_ifm_shape, new_ifm2_shape

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

599

600

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

601

def decomp_dims_elementwise(op):

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

602

"""

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

603

Decompose elementwise ops with Rank > 3 (H,W,D).

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

604

If Rank > 3, all the dimensions above H are viewed as the N dimension.

605

the elementwise operation will be decomposed to N (of ofm) elementwise operations.

606

By reading and writing with offsets from/to the ifm(s)/ofm.

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

607

Note: Broadcast need to be handled for binary elementwise ops, and TOSA allowes for broadcast by both ifm and ifm2

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

"""

ifm = op.ifm

ifm2 = op.ifm2

ofm = op.ofm

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

613

binary = op.ifm2 is not None

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

614

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

615

# Remove dimensions that are all 1

616

new_ofm_shape, new_ifm_shape, new_ifm2_shape = get_elem_shapes_removed_singles(op)

617

rank = len(new_ofm_shape)

618

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

619

if rank > 3:

620

n = rank - 3

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

621

ofm_decomp_shape = Shape4D(new_ofm_shape[0:n])

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

622

ofm_decomp_stride = get_nhwc_stride(ofm_decomp_shape)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

623

ofm_part_shape = Shape4D(new_ofm_shape[n:])

624

op.ofm_shapes.append(Shape4D([ofm_decomp_shape.elements()] + new_ofm_shape[n:]))

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

625

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

626

if binary:

627

ifm_decomp_shape = Shape4D(new_ifm_shape[0:n])

628

ifm2_decomp_shape = Shape4D(new_ifm2_shape[0:n])

629

ifm_decomp_stride = get_nhwc_stride(ifm_decomp_shape)

630

ifm2_decomp_stride = get_nhwc_stride(ifm2_decomp_shape)

631

ifm_part_shape = Shape4D(new_ifm_shape[n:])

632

ifm2_part_shape = Shape4D(new_ifm2_shape[n:])

633

op.ifm_shapes.append(Shape4D([ifm_decomp_shape.elements()] + new_ifm_shape[n:]))

634

op.ifm_shapes.append(Shape4D([ifm2_decomp_shape.elements()] + new_ifm2_shape[n:]))

635

else:

636

op.ifm_shapes.append(Shape4D([ofm_decomp_shape.elements()] + new_ofm_shape[n:]))

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

637

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

638

op_list = []

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

639

for height in range(ofm_decomp_shape.height):

640

for width in range(ofm_decomp_shape.width):

641

for depth in range(ofm_decomp_shape.depth):

642

ofm_offset = Shape4D(0, height, width, depth)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

643

ofm_offset = Shape4D(ofm_offset.dot_prod(ofm_decomp_stride), 0, 0, 0)

644

ofm_cut = (ofm_offset, ofm_part_shape)

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

645

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

646

if binary:

647

ifm_d = depth if ifm_decomp_shape.depth == ofm_decomp_shape.depth else 0

648

ifm_w = width if ifm_decomp_shape.width == ofm_decomp_shape.width else 0

649

ifm_h = height if ifm_decomp_shape.height == ofm_decomp_shape.height else 0

650

ifm_offset = Shape4D(0, ifm_h, ifm_w, ifm_d)

651

ifm_offset = Shape4D(ifm_offset.dot_prod(ifm_decomp_stride), 0, 0, 0)

652

ifm_cut = (ifm_offset, ifm_part_shape)

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

653

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

654

ifm2_d = depth if ifm2_decomp_shape.depth == ofm_decomp_shape.depth else 0

655

ifm2_w = width if ifm2_decomp_shape.width == ofm_decomp_shape.width else 0

656

ifm2_h = height if ifm2_decomp_shape.height == ofm_decomp_shape.height else 0

657

ifm2_offset = Shape4D(0, ifm2_h, ifm2_w, ifm2_d)

658

ifm2_offset = Shape4D(ifm2_offset.dot_prod(ifm2_decomp_stride), 0, 0, 0)

659

ifm2_cut = (ifm2_offset, ifm2_part_shape)

660

op_list.append(create_elem_part_op(op, ifm_cut, ifm2_cut, ofm_cut))

661

else:

662

op_list.append(create_elem_part_op(op, ofm_cut, None, ofm_cut))

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

663

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

664

ofm.ops.remove(op)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

665

ifm.consumer_list.remove(op)

666

if binary:

667

ifm2.consumer_list.remove(op)

668

else:

669

op.ofm_shapes.append(Shape4D(new_ofm_shape))

670

op.ifm_shapes.append(Shape4D(new_ifm_shape))

671

op.ifm_shapes.append(Shape4D(new_ifm2_shape))

672

673

return [op]

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

674

675

676

def decomp_elementwise(tens, arch, nng):

677

"""

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame^]

678

Decompose elementwise ops with Rank > 3 (H,W,C).

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

679

Decompose size of tensors exceeding NPU max size

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

680

"""

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame^]

681

tens_ops = tens.ops.copy()

682

for op in tens_ops:

683

if op.type.is_elementwise_op():

684

decomp_list = decomp_dims_elementwise(op)

685

for part_op in decomp_list:

686

decompose_elem_tensors_hwc(part_op)

return tens

def reshape_concat_shape(shape, rank, axis):

691

new_h = 1

692

for i in range(axis):

693

new_h *= shape[i]

694

new_c = 1

695

for i in range(axis + 1, rank):

696

new_c *= shape[i]

697

if axis == (rank - 1):

698

new_shape = [new_h, shape[axis], 1]

699

else:

700

new_shape = [new_h, shape[axis], new_c]

return new_shape

def reshape_concat(op):

705

"""

706

Reshapes concat ops with Rank > 3 (H,W,C).

707

"""

708

ofm = op.ofm

709

rank = len(ofm.shape)

710

axis = op.attrs["axis"]

if axis < 0:

axis += rank

if rank > 3:

# Reshape so that axis in to be concatenated is the W dimension

716

# Reshape inputs

717

for inp in op.inputs:

718

new_shape = reshape_concat_shape(inp.shape, rank, axis)

719

op.ifm_shapes.append(Shape4D(new_shape))

720

# Reshape output

721

new_shape = reshape_concat_shape(ofm.shape, rank, axis)

722

op.ofm_shapes.append(Shape4D(new_shape))

723

op.attrs["axis4D"] = 2

724

else:

725

for inp in op.inputs:

726

op.ifm_shapes.append(Shape4D(inp.shape))

727

op.ofm_shapes.append(Shape4D(ofm.shape))

728

op.attrs["axis4D"] = axis + (4 - rank)

729

730

731

def decomp_rewrite_concat(tens, arch, nng):

732

"""

733

Decompose concat ops with Rank > 3 (H,W,C).

734

Rewrite of concat to elementwise operations

735

"""

736

if len(tens.ops) == 1 and tens.ops[0].type == Op.Concat:

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

737

op = tens.ops[0]

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

738

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame^]

739

reshape_concat(op)

740

rewrite_concat(op)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

741

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame^]

742

op.ofm.ops.remove(op)

743

for inp in op.inputs:

744

inp.consumer_list.remove(op)

745

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

return tens

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

749

def fixup_quantization(op, arch, nng):

750

if op.ifm and op.ifm.quantization.zero_point is None:

751

op.ifm.quantization.zero_point = 0

752

if op.ifm2 and op.ifm2.quantization.zero_point is None:

Patrik Gustavsson

f436ada

2021-09-14 14:56:48 +0200

[diff] [blame]

753

op.ifm2.quantization.zero_point = 0

754

if not op.forced_output_quantization:

755

if op.ofm and op.ofm.quantization and op.ofm.quantization.zero_point is None:

756

op.ofm.quantization.zero_point = 0

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

return op

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

760

def supported_operator_check(op, arch, nng):

761

op.run_on_npu = arch.tosa_supported_operators.is_operator_supported(op)

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

762

assert op.run_on_npu or op.type in (Op.Placeholder, Op.SubgraphInput, Op.Const)

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

return op

def tosa_optimise_graph(nng, arch):

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

767

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame^]

768

# TODO the supported operator checking need to be split in semantic and HW checks

769

for idx, sg in enumerate(nng.subgraphs):

770

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

771

nng, sg, arch, [], [supported_operator_check], rewrite_unsupported=False,

772

)

773

774

# Decomposing and rewrite of concat

775

for idx, sg in enumerate(nng.subgraphs):

776

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

777

nng, sg, arch, [decomp_rewrite_concat], [], rewrite_unsupported=False

778

)

779

780

# Decomposing of elementwise

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

781

for idx, sg in enumerate(nng.subgraphs):

782

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

783

nng, sg, arch, [decomp_elementwise], [], rewrite_unsupported=False

784

)

785

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

786

for idx, sg in enumerate(nng.subgraphs):

787

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame^]

788

nng, sg, arch, [], [set_ifm_ofm_op_shapes], rewrite_unsupported=False,

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

789

)

790

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

791

# Removal of Transpose

792

for idx, sg in enumerate(nng.subgraphs):

793

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

794

nng, sg, arch, [], [remove_const_transpose], rewrite_unsupported=False,

795

)

796

797

# Handle sg input output

798

for idx, sg in enumerate(nng.subgraphs):

799

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

800

nng, sg, arch, [], [fix_sg_input_output_tosa], rewrite_unsupported=False,

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

801

)

802

803

# Removal of reshapes

804

for sg in nng.subgraphs:

805

rewrite_graph.visit_graph_post_order(sg.output_tensors, arch, [], [remove_reshapes])

806

sg.refresh_after_modification()

807

Patrik Gustavsson

f366fb1

2021-09-07 13:30:29 +0200

[diff] [blame]

808

# TODO, when and where to best handle calc_scaling_avgpool

809

for idx, sg in enumerate(nng.subgraphs):

810

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

811

nng, sg, arch, [], [calc_scaling_avgpool], rewrite_unsupported=False,

812

)

813

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

814

# Rewite Operators step

Patrik Gustavsson

f436ada

2021-09-14 14:56:48 +0200

[diff] [blame]

815

op_rewrite_list = [set_tensor_equivalence, rewrite_rescale, convert_depthwise_to_conv, convert_table_to_lut]

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

816

817

for idx, sg in enumerate(nng.subgraphs):

818

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

819

nng, sg, arch, [], op_rewrite_list, rewrite_unsupported=False,

820

)

821

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

822

# Post-processing step 1

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

823

for idx, sg in enumerate(nng.subgraphs):

824

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Patrik Gustavsson

e2bfa7e

2021-09-08 15:04:11 +0200

[diff] [blame]

825

nng, sg, arch, [], [rewrite_activation, convert_pad, add_padding_fields],

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

826

)

827

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

828

# Removal of Slice, need to be done after optimisation has been performed,

829

# since ifm/ofm_shapes are of importance to this function

830

for sg in nng.subgraphs:

831

rewrite_graph.visit_graph_post_order(sg.output_tensors, arch, [], [remove_splitsliceread])

832

sg.refresh_after_modification()

833

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

834

# Post-processing step 2

835

for idx, sg in enumerate(nng.subgraphs):

836

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(nng, sg, arch, [], [fixup_quantization],)

837

Patrik Gustavsson