Blame - ethosu/vela/tosa_graph_optimiser.py - ml/ethos-u/ethos-u-vela

2021-06-28 07:41:58 +0200

[diff] [blame]

1

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

16

# Description:

17

# Early optimisation of the TOSA based network graph, using the rewrite_graph module to do the traversal of the graph.

Patrik Gustavsson

f366fb1

2021-09-07 13:30:29 +0200

[diff] [blame]

18

import numpy as np

19

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

20

from . import rewrite_graph

21

from .api import NpuRoundingMode

22

from .data_type import DataType

23

from .debug_database import DebugDatabase

Jonas Ohlsson

0957e3e

2021-09-01 15:57:21 +0200

[diff] [blame]

24

from .graph_optimiser_util import bypass_memory_only_ops

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

25

from .graph_optimiser_util import calc_explicit_padding

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

26

from .graph_optimiser_util import convert_depthwise_to_conv

Patrik Gustavsson

f436ada

2021-09-14 14:56:48 +0200

[diff] [blame]

27

from .graph_optimiser_util import convert_to_lut

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

28

from .graph_optimiser_util import move_splitsliceread_to_consumer

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

29

from .graph_optimiser_util import needed_total_padding

30

from .graph_optimiser_util import set_ifm_ofm_op_shapes

31

from .graph_optimiser_util import set_tensor_equivalence

32

from .operation import ExplicitScaling

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

33

from .operation import Op

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

34

from .operation_util import create_add_nop

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

35

from .operation_util import create_avgpool_nop

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

36

from .operation_util import create_pad_nop

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

37

from .shape4d import Shape4D

38

from .tensor import create_const_tensor

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

39

from .tensor import create_equivalence_id

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

40

from .tensor import shape_num_elements

41

from .tensor import Tensor

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

42

43

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

44

def replace_rescale_with_avg_pool(rescale_op):

45

assert rescale_op.type == Op.Rescale

46

47

avgpool_op = create_avgpool_nop(rescale_op.name + "_avgpool")

48

rescale_op_clone = rescale_op.clone()

49

op = rescale_op

50

op.attrs = avgpool_op.attrs.copy()

51

op.type = Op.AvgPool

52

DebugDatabase.add_optimised(rescale_op_clone, op)

return op

def calc_skirt(kernel, input_shape, explicit_padding):

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

58

k_w, k_h = kernel.dilated_wh()

59

s_x, s_y = kernel.stride

60

ypad = needed_total_padding(int(input_shape.height), int(s_y), int(k_h))

61

xpad = needed_total_padding(int(input_shape.width), int(s_x), int(k_w))

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

62

63

top, left, bottom, right = explicit_padding

64

top_pad, bottom_pad = calc_explicit_padding(int(input_shape.height), int(s_y), int(k_h), int(top), int(bottom))

65

left_pad, right_pad = calc_explicit_padding(int(input_shape.width), int(s_x), int(k_w), int(left), int(right))

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

66

67

padding = (top_pad, left_pad, bottom_pad, right_pad)

68

skirt = (top_pad, left_pad, ypad - top_pad, xpad - left_pad)

69

return padding, skirt

70

71

72

def add_padding_fields(op, arch, nng):

73

if op.run_on_npu:

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

74

if "explicit_padding" in op.attrs:

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

75

input_shape = op.ifm_shapes[0]

76

77

if op.type == Op.Conv2DBackpropInputSwitchedBias:

78

# TODO not yet supported, but there will be need for separate handling

79

assert False

80

else:

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

81

padding, skirt = calc_skirt(op.kernel, input_shape, op.attrs.get("explicit_padding"))

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

82

83

op.attrs["explicit_padding"] = padding

84

op.attrs["skirt"] = skirt

return op

Patrik Gustavsson

2021-09-07 13:30:29 +0200

[diff] [blame]

89

# Counts leading zeroes for a (int32)

90

def count_leading_zeros(a):

lz = int(32)

if a != 0:

mask = 1 << (32 - 1)

lz = 0

while (mask & a) == 0:

mask = mask >> 1

lz = lz + 1

return lz

def calc_scaling_avgpool(op, arch, nng):

102

if op.type == Op.AvgPool:

103

top, left, _, _ = op.attrs["explicit_padding"]

104

# TODO Only support for when global scaling can be used.

105

# That is when there is no padding

106

assert top == 0 and left == 0

107

assert op.explicit_scaling is None

multiplier = []

shift = []

kernel_wh = op.kernel.elements_wh()

112

k = 32 - count_leading_zeros(kernel_wh - 1)

113

numerator = np.int64(((1 << 30) + 1) << k)

114

multiplier.append(numerator // kernel_wh)

115

shift.append(30 + k)

116

117

op.rounding_mode = NpuRoundingMode.NATURAL

118

op.explicit_scaling = ExplicitScaling(False, shift, multiplier)

return op

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

122

def remove_const_transpose(op, arch, nng):

123

if op.type == Op.Transpose:

124

removed = False

125

if len(op.ifm.ops) == 1:

126

prev_op = op.ifm.ops[0]

127

if prev_op.type == Op.Const:

128

# Transpose the Tensor and data and remove Transpose

129

# TODO move to Tensor?

130

reorder = op.attrs["perms"]

131

shape = op.ifm.shape.copy()

132

tens = op.ifm

133

134

tens.shape = [shape[idx] for idx in reorder]

135

tens.bandwidth_shape = tens.shape

136

tens.storage_shape = tens.shape

137

138

if tens.values is not None:

139

tens.values = tens.values.transpose(reorder)

140

141

op.ofm.values = tens.values

142

# Bypass the Transpose op

143

prev_op.set_output_tensor(op.ofm)

144

DebugDatabase.add_optimised(op, prev_op)

145

removed = True

146

147

if not removed:

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

148

print("Warning: Cannot remove Transpose, and handling of Transpose is not supported")

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

assert False

return op

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

154

def insert_add_copy_for_const(op, ifm_ofm_shape):

155

assert op.type == Op.Const

156

ofm = op.ofm

157

copy_tens = ofm.clone()

158

op.set_output_tensor(copy_tens)

159

160

name = ofm.name + "_add"

161

ifm2 = create_const_tensor(

162

name + "_zero_scalar",

[1],

copy_tens.dtype,

[0],

copy_tens.dtype.as_numpy_type(),

167

quantization=copy_tens.quantization,

168

)

169

copy_op = create_add_nop(name)

170

copy_op.add_input_tensor(copy_tens)

171

copy_op.add_input_tensor(ifm2)

172

copy_op.set_output_tensor(ofm)

173

copy_op.ifm_shapes.append(ifm_ofm_shape)

174

copy_op.ifm_shapes.append(Shape4D(ifm2.shape))

175

copy_op.ofm_shapes.append(ifm_ofm_shape)

176

copy_op.run_on_npu = True

177

178

DebugDatabase.add_optimised(op, copy_op)

179

180

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

181

# TODO can we change to add for both TFLite and TOSA?

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

182

def insert_add_copy_op_after_tens(tens, ifm_ofm_shape):

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

183

tens_cons_list_copy = tens.consumer_list.copy()

184

copy_tens = tens.clone()

185

186

name = tens.name + "_add"

187

ifm2 = create_const_tensor(

188

name + "_zero_scalar",

[1],

copy_tens.dtype,

[0],

copy_tens.dtype.as_numpy_type(),

193

quantization=copy_tens.quantization,

194

)

195

copy_op = create_add_nop(name)

196

copy_op.add_input_tensor(tens)

197

copy_op.add_input_tensor(ifm2)

198

copy_op.set_output_tensor(copy_tens)

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

199

copy_op.ifm_shapes.append(ifm_ofm_shape)

200

copy_op.ifm_shapes.append(Shape4D(ifm2.shape))

201

copy_op.ofm_shapes.append(ifm_ofm_shape)

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

202

copy_op.run_on_npu = True

203

204

# Set copy_ifm consumers

205

for tens_cons in tens_cons_list_copy:

206

if tens_cons is not None:

207

for ifm_idx, cons_inp in enumerate(tens_cons.inputs):

208

if cons_inp == tens:

209

tens_cons.set_input_tensor(copy_tens, ifm_idx)

210

211

DebugDatabase.add_optimised(tens.ops[0], copy_op)

212

213

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

214

def get_shape_for_copy_op(shape):

215

# remove dimensions that are set to 1

new_shape = []

for dim in shape:

if dim != 1:

new_shape.append(dim)

if not new_shape:

new_shape = [1]

rank = len(new_shape)

224

if rank > 3:

225

# Reshape so that batch becomes 1, by moving elements to H dimension

n = rank - 2

h = 1

for i in range(n):

h *= shape[i]

new_shape = Shape4D(new_shape[n:]).with_height(h)

231

else:

232

new_shape = Shape4D(new_shape)

return new_shape

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

236

def fix_sg_input_output_tosa(op, arch, nng):

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

237

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

238

if op.type == Op.Const and any(ofm_cons is None for ofm_cons in op.ofm.consumer_list):

239

# Const operator with sg output, insert copy op before the ofm

240

new_shape = get_shape_for_copy_op(op.ofm.shape.copy())

241

insert_add_copy_for_const(op, new_shape)

242

elif op.run_on_npu and op.type in (Op.Reshape, Op.Identity):

243

# For the Reshape operators we want to remove, tensors are removed.

244

# But in order to to do this, they cannot be outputs of the sg,

245

# this need to be fixed prior to the removal.

246

# Solution is to add a copy op, to maintain the original tensor.

247

# This is also valid when reshape ifm/ofm is produced respectively

248

# consumed by CPU

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

249

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

250

# Check if operator ifm/ofm are sg ifm/ofm

251

ifm_is_sg_ifm = op.ifm.ops[0].type in (Op.Placeholder, Op.SubgraphInput, Op.Const)

252

ifm_is_sg_ofm = any(ifm_cons is None for ifm_cons in op.ifm.consumer_list)

253

ofm_is_sg_ofm = any(ofm_cons is None for ofm_cons in op.ofm.consumer_list)

254

# Check if ifm/ofm is produced repectivly consumed by CPU

255

ifm_is_cpu_produced = any(ifm_prod is not None and not ifm_prod.run_on_npu for ifm_prod in op.ifm.ops)

256

ofm_is_cpu_consumed = any(ofm_cons is not None and not ofm_cons.run_on_npu for ofm_cons in op.ofm.consumer_list)

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

257

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

258

if (ifm_is_sg_ofm or ifm_is_sg_ifm or ifm_is_cpu_produced) and (ofm_is_sg_ofm or ofm_is_cpu_consumed):

259

# Both ifm and ofm need to persist, but only ifm need a copy, in order to remove the Operator

260

# Decide on ifm/ofm shapes for the copy op based on ifm

261

new_shape = get_shape_for_copy_op(op.ifm.shape.copy())

262

insert_add_copy_op_after_tens(op.ifm, new_shape)

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

return op

def create_add_for_concat(concat_op, name, ifm, ifm_shape: Shape4D, write_offset: Shape4D):

267

"""Creates an add op for the given concat op/input feature map"""

268

ofm = concat_op.ofm

269

ifm2 = create_const_tensor(

270

name + "_zero_scalar", [1], ofm.dtype, [0], ofm.dtype.as_numpy_type(), quantization=ofm.quantization

271

)

272

add_op = create_add_nop(name)

273

274

add_op.inputs = [ifm, ifm2]

275

add_op.outputs = [ofm]

276

add_op.write_offset = write_offset

277

add_op.write_shape = ifm_shape

278

ofm.ops.append(add_op)

279

DebugDatabase.add_optimised(concat_op, add_op)

280

add_op.ifm_shapes.append(ifm_shape)

281

add_op.ifm_shapes.append(Shape4D(ifm2.shape))

282

add_op.ofm_shapes.append(concat_op.ofm_shapes[0])

283

add_op.memory_function = Op.ConcatSliceWrite

return add_op

# TODO Could be further optimized checking the type of the consumer,

288

# rather than just mimic the TFLite behaviour depending on type.

289

# TOSA bool_t not considered yet

290

def remove_splitsliceread(op, arch):

291

292

if op.type == Op.SplitSliceRead:

293

# Check if it is possible to put the SplitSliceRead on the tensor consumer, or if an avgpool need to be inserted

294

if (

295

len(op.ofm.consumer_list) == 1

296

and op.ofm.consumer_list[0] is not None

297

and op.ofm.consumer_list[0].run_on_npu

298

and op.ofm.consumer_list[0].type != Op.Reshape

299

and op.ofm_shapes[0] == Shape4D.from_list(op.ofm.shape)

300

and op.ofm.dtype in (DataType.uint8, DataType.int8, DataType.int16)

301

):

302

# SplitSliceRead can be performed by tensor consumer

303

cons_op = op.ofm.consumer_list[0]

304

move_splitsliceread_to_consumer(op, cons_op)

305

else:

306

name = op.name + "_add"

307

ofm = op.ofm

308

ifm2 = create_const_tensor(

309

name + "_zero_scalar", [1], ofm.dtype, [0], ofm.dtype.as_numpy_type(), quantization=ofm.quantization

310

)

311

add_op = create_add_nop(name)

312

add_op.inputs = [op.ifm, ifm2]

313

add_op.outputs = [ofm]

314

op.ofm.ops.remove(op)

315

op.ofm.ops.append(add_op)

316

add_op.ifm_shapes.append(op.ifm_shapes[0])

317

add_op.ifm_shapes.append(Shape4D(ifm2.shape))

318

add_op.ofm_shapes.append(op.ofm_shapes[0])

319

add_op.read_offsets[0] = op.read_offsets[0]

320

add_op.read_shapes[0] = op.read_shapes[0]

321

322

op.ifm.consumer_list.remove(op)

323

DebugDatabase.add_optimised(op, add_op)

324

325

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

326

def rewrite_concat(op):

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

327

if not op.run_on_npu or not op.type == Op.Concat:

328

return

329

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

330

offset = 0

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

331

inputs = op.inputs

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

332

axis_4D = op.attrs["axis4D"]

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

333

334

for idx, inp in enumerate(inputs):

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

335

write_offset = [0, 0, 0, 0]

336

write_offset[axis_4D] = offset

337

concat_end = offset + op.ifm_shapes[idx][axis_4D]

338

create_add_for_concat(op, op.name + str(idx) + "_add", inp, op.ifm_shapes[idx], Shape4D.from_list(write_offset))

339

offset = concat_end

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

340

assert op.ofm_shapes[0][axis_4D] == offset

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

341

342

Patrik Gustavsson

ef3ebdd

2021-10-01 11:10:25 +0200

[diff] [blame]

343

def remove_memory_ops(op, arch):

344

if op.run_on_npu and op.type in (Op.Reshape, Op.Identity):

Jonas Ohlsson

0957e3e

2021-09-01 15:57:21 +0200

[diff] [blame]

345

bypass_memory_only_ops(op)

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

346

347

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

348

def rewrite_activation(op, arch, nng):

Patrik Gustavsson

5e26eda

2021-06-30 09:07:16 +0200

[diff] [blame]

349

if op.type not in (Op.ReluN, Op.Clamp):

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

350

return op

351

352

ifm = op.ifm

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

353

zp = ifm.quantization.zero_point if ifm.quantization.zero_point else 0

354

if op.ofm.quantization.zero_point is None:

355

op.ofm.quantization.zero_point = zp

356

Patrik Gustavsson

5e26eda

2021-06-30 09:07:16 +0200

[diff] [blame]

357

if op.type == Op.Clamp:

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

358

op.attrs["min"] = op.attrs["min_int"] - zp

359

op.attrs["max"] = op.attrs["max_int"] - zp

360

elif op.type == Op.ReluN:

361

op.attrs["max"] = op.attrs["max_int"] - zp

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

return op

def rewrite_rescale(op, arch, nng):

367

if op.type == Op.Rescale:

ifm = op.ifm

ofm = op.ofm

# some error checking

372

assert len(ifm.ops) == 1

373

prev_op = ifm.ops[0]

374

375

# TODO currently not supported

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

376

assert len(ifm.consumer_list) == 1

377

378

input_zp = op.attrs["input_zp"]

379

output_zp = op.attrs["output_zp"]

380

multiplier = op.attrs["multiplier"]

381

shift = op.attrs["shift"]

382

scale32 = op.attrs["scale32"]

383

double_round = op.attrs["double_round"]

384

per_channel = op.attrs["per_channel"]

385

386

assert ifm.dtype in (DataType.uint8, DataType.int8, DataType.int32)

387

assert ifm.dtype in (DataType.uint8, DataType.int8) or input_zp == 0

388

assert ofm.dtype in (DataType.uint8, DataType.int8) or output_zp == 0

389

assert (scale32 and ifm.dtype != DataType.int48) or (not scale32 and not double_round)

390

391

# Check that input tensor has the same zp or no zp

392

ifm_zp = ifm.quantization.zero_point

393

if ifm_zp is not None and ifm_zp != input_zp:

394

print("Error (fuse_rescale): zp of tensors producer/consumer differs unexpectedidly ")

395

assert False

396

ifm.quantization.zero_point = input_zp

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

397

ofm.quantization.zero_point = output_zp

398

for s, m in zip(shift, multiplier):

399

# TODO these are the TOSA limitations

400

assert m >= 0

401

assert 2 <= s <= 62

402

# TODO these are the HW limitations

403

assert 0 <= s < (1 << 6)

404

explicit_scaling = ExplicitScaling(per_channel, shift, multiplier)

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

405

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

406

if double_round and scale32:

407

rounding_mode = NpuRoundingMode.TFL

408

else:

409

rounding_mode = NpuRoundingMode.NATURAL

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

410

411

if prev_op.type.is_depthwise_conv2d_op() or prev_op.type.is_conv2d_op() or prev_op.type == Op.FullyConnected:

412

assert len(multiplier) == len(shift) == len(prev_op.bias.values)

413

414

if ifm.dtype == DataType.int32 and per_channel:

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

415

prev_op.explicit_scaling = explicit_scaling

416

prev_op.rounding_mode = rounding_mode

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

417

418

# Bypass op

419

prev_op.set_output_tensor(ofm)

420

DebugDatabase.add_optimised(op, prev_op)

421

return op

422

else:

423

print("Warning, unsupported fusing of TOSA Rescale previous operator is of type:", prev_op.type)

424

assert False

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

425

# TODO which are the cases we need to and can do standalone Rescale?

426

# TODO should we try to identify a conversion uint8<->int8 accomplished by 2 RESCALE ops?

427

# origin might be TFLite op QUANTIZE, should we look to see if they can be translated to QUANTIZE?

428

# limited to these at the moment:

429

elif (

430

(ifm.dtype == DataType.int8 and ofm.dtype == DataType.int8)

431

or (ifm.dtype == DataType.uint8 and ofm.dtype == DataType.int8)

432

or (ifm.dtype == DataType.int8 and ofm.dtype == DataType.uint8)

433

):

434

# Create NOP performing the RESCALE

435

avgpool_op = replace_rescale_with_avg_pool(op)

436

avgpool_op.rounding_mode = rounding_mode

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

437

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

438

if per_channel:

439

# TODO

440

avgpool_op.explicit_scaling = explicit_scaling

441

print("Warning, unsupported TOSA Rescale")

442

assert False

443

else:

444

avgpool_op.explicit_scaling = explicit_scaling

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

445

else:

446

print("Warning, unsupported fusing of TOSA Rescale previous operator is of type:", prev_op.type)

assert False

return op

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

451

def convert_pad_in_width(op):

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

452

"""

453

Rewrites PAD operator to an add that copies the IFM to the OFM

454

+ up to 4 add operators that fill the OFM with zeros at the borders.

455

"""

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

456

assert op.type == Op.Pad

457

assert op.ifm_shapes[0] is not None and op.ofm_shapes[0] is not None

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

458

ifm = op.ifm

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

459

ofm = op.ofm

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

460

ifm_shape = op.ifm_shapes[0]

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

461

ofm.ops = []

462

ofm_shape = op.ofm_shapes[0]

463

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

464

padding = op.inputs[1].values

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

465

left, right = padding[-2]

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

466

467

# Add op that copies IFM to the right place inside the OFM

468

shp0 = Shape4D(0, 0, 0, 0)

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

469

add_op = create_add_for_concat(op, op.name + "_main", ifm, ifm_shape, shp0.with_width(left))

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

470

add_op.activation = op.activation

471

472

quant = ofm.quantization

473

pad_value = ifm.quantization.zero_point

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

474

ifm.quantization.zero_point = 0

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

475

if left > 0:

476

shape = Shape4D(1, ifm_shape.height, left, ofm_shape.depth)

477

zero_tens = create_const_tensor(

478

op.name + "_left", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], np.uint8, quantization=quant

479

)

480

zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

481

create_add_for_concat(op, op.name + "_left", zero_tens, shape, shp0)

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

482

if right > 0:

483

shape = Shape4D(1, ifm_shape.height, right, ofm_shape.depth)

484

zero_tens = create_const_tensor(

485

op.name + "_right", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], np.uint8, quantization=quant

486

)

487

zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

488

create_add_for_concat(op, op.name + "_right", zero_tens, shape, shp0.with_width(ofm_shape.width - right))

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

489

490

op.type = Op.ConcatTFLite

return add_op

Patrik Gustavsson

2021-09-14 14:56:48 +0200

[diff] [blame]

494

def convert_table_to_lut(op, arch, nng):

495

# Converts table op to a no-op + LUT

496

if op.type is not Op.Table:

return op

table = op.inputs[1]

op.inputs.remove(table)

501

op.set_ifm_ofm_shapes()

502

503

return convert_to_lut(op, table.values, "table")

504

505

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

506

def decompose_elem_tensors_hwc(op):

507

"""

508

Decomposes elementwise op if any of the ifm(s)/ofm are to large in any dimension to be handled by the NPU

509

"""

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

510

max_t_size = 65535

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

511

ofm_shape = op.write_shape if op.write_shape is not None else op.ofm_shapes[0]

512

ifm_shape = op.read_shapes[0] if op.read_shapes[0] is not None else op.ifm_shapes[0]

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

513

ifm2_shape = op.ifm_shapes[1] if op.ifm_shapes[1] else None

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

514

ifm2_shape = op.read_shapes[1] if op.read_shapes[1] is not None else ifm2_shape

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

515

limit_shape = Shape4D(1, max_t_size, max_t_size, max_t_size)

516

517

if any(dim_size > max_t_size for dim_size in ofm_shape.as_list()):

518

ofm_split = ofm_shape.floordiv_const(max_t_size).add(1, 1, 1, 1)

519

520

for height in range(ofm_split.height):

521

for width in range(ofm_split.width):

522

for depth in range(ofm_split.depth):

523

ofm_offset = Shape4D(0, height * max_t_size, width * max_t_size, depth * max_t_size)

524

ofm_part_shape = ofm_shape.clip(ofm_offset, limit_shape)

525

ofm_cut = (ofm_offset, ofm_part_shape)

526

527

ifm_d = depth * max_t_size if ifm_shape.depth == ofm_shape.depth else 0

528

ifm_w = width * max_t_size if ifm_shape.width == ofm_shape.width else 0

529

ifm_h = height * max_t_size if ifm_shape.height == ofm_shape.height else 0

530

ifm_offset = Shape4D(0, ifm_h, ifm_w, ifm_d)

531

ifm_part_shape = ifm_shape.clip(ifm_offset, limit_shape)

532

ifm_cut = (ifm_offset, ifm_part_shape)

533

534

if ifm2_shape is not None:

535

ifm2_d = depth * max_t_size if ifm2_shape.depth == ofm_shape.depth else 0

536

ifm2_w = width * max_t_size if ifm2_shape.width == ofm_shape.width else 0

537

ifm2_h = height * max_t_size if ifm2_shape.height == ofm_shape.height else 0

538

ifm2_offset = Shape4D(0, ifm2_h, ifm2_w, ifm2_d)

539

ifm2_part_shape = ifm2_shape.clip(ifm2_offset, limit_shape)

540

ifm2_cut = (ifm2_offset, ifm2_part_shape)

541

else:

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

542

ifm2_cut = (None, None)

543

544

create_elem_part_op(op, ifm_cut, ifm2_cut, ofm_cut)

545

op.ofm.ops.remove(op)

546

op.ifm.consumer_list.remove(op)

547

if op.ifm2 is not None:

548

op.ifm2.consumer_list.remove(op)

return

def create_elem_part_op(op, ifm_cut, ifm2_cut, ofm_cut):

553

part_op = op.clone()

554

ifm_read_offset = op.read_offsets[0] if op.read_offsets[0] is not None else Shape4D(0, 0, 0, 0)

555

ofm_write_offset = op.write_offset if op.write_offset is not None else Shape4D(0, 0, 0, 0)

556

ifm_offset, ifm_shape = ifm_cut

557

ofm_offset, ofm_shape = ofm_cut

558

559

part_op.read_offsets[0] = ifm_read_offset + ifm_offset

560

part_op.read_shapes[0] = ifm_shape

561

part_op.write_offset = ofm_write_offset + ofm_offset

562

part_op.write_shape = ofm_shape

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

563

part_op.ifm_shapes = op.ifm_shapes.copy()

564

part_op.ofm_shapes = op.ofm_shapes.copy()

565

part_op.ifm.consumer_list.append(part_op)

566

op.ofm.ops.append(part_op)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

567

568

ifm2_offset, ifm2_shape = ifm2_cut

569

if ifm2_offset:

570

ifm2_read_offset = op.read_offsets[1] if op.read_offsets[1] is not None else Shape4D(0, 0, 0, 0)

571

part_op.read_offsets[1] = ifm2_read_offset + ifm2_offset

572

part_op.read_shapes[1] = ifm2_shape

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

573

part_op.ifm2.consumer_list.append(part_op)

574

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

575

return part_op

576

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

577

578

def get_nhwc_stride(shape):

579

stride_x = shape.depth

580

stride_y = shape.width * stride_x

581

stride_n = shape.height * stride_y

582

return Shape4D(stride_n, stride_y, stride_x, 1)

583

584

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

585

def pad_to_rank(shape, rank):

586

"""

587

Pads a shape to the given rank

588

"""

589

while len(shape) < rank:

shape = [1] + shape

return shape

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

595

def get_elem_shapes_removed_singles(op):

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

596

"""

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

597

Returns the shapes of ifm(s)/ofms after removing all the dimensions that are 1 for all ifm(s)/ofm

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

598

"""

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

599

binary = op.ifm2 is not None

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

600

ofm_shape = op.ofm_shapes[0].as_list() if len(op.ofm_shapes) > 0 else op.ofm.shape

601

ifm_shape = op.ifm_shapes[0].as_list() if len(op.ifm_shapes) > 0 else op.ifm.shape

602

if binary:

603

ifm2_shape = op.ifm_shapes[1].as_list() if len(op.ofm_shapes) else op.ifm2.shape

604

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

605

rank = max(len(ofm_shape), len(ifm_shape), len(ifm2_shape) if binary else 0)

606

ofm_shape = pad_to_rank(ofm_shape, rank)

607

ifm_shape = pad_to_rank(ifm_shape, rank)

608

if binary:

609

ifm2_shape = pad_to_rank(ifm2_shape, rank)

610

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

new_ofm_shape = []

new_ifm_shape = []

new_ifm2_shape = []

for idx in range(rank):

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

615

if ofm_shape[idx] != 1:

616

new_ofm_shape.append(ofm_shape[idx])

617

new_ifm_shape.append(ifm_shape[idx])

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

618

if binary:

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

619

new_ifm2_shape.append(ifm2_shape[idx])

620

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

621

if new_ofm_shape == []:

622

new_ofm_shape = [1]

623

new_ifm_shape = [1]

624

new_ifm2_shape = [1] if binary else None

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

625

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

626

return new_ofm_shape, new_ifm_shape, new_ifm2_shape

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

627

628

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

629

def decomp_dims_elementwise(op):

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

630

"""

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

631

Decompose elementwise ops with Rank > 3 (H,W,D).

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

632

If Rank > 3, all the dimensions above H are viewed as the N dimension.

633

the elementwise operation will be decomposed to N (of ofm) elementwise operations.

634

By reading and writing with offsets from/to the ifm(s)/ofm.

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

635

Note: Broadcast need to be handled for binary elementwise ops, and TOSA allowes for broadcast by both ifm and ifm2

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

"""

ifm = op.ifm

ifm2 = op.ifm2

ofm = op.ofm

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

641

binary = op.ifm2 is not None

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

642

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

643

# Remove dimensions that are all 1

644

new_ofm_shape, new_ifm_shape, new_ifm2_shape = get_elem_shapes_removed_singles(op)

645

rank = len(new_ofm_shape)

646

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

647

if rank > 3:

648

n = rank - 3

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

649

ofm_decomp_shape = Shape4D(new_ofm_shape[0:n])

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

650

ofm_decomp_stride = get_nhwc_stride(ofm_decomp_shape)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

651

ofm_part_shape = Shape4D(new_ofm_shape[n:])

652

op.ofm_shapes.append(Shape4D([ofm_decomp_shape.elements()] + new_ofm_shape[n:]))

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

653

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

654

if binary:

655

ifm_decomp_shape = Shape4D(new_ifm_shape[0:n])

656

ifm2_decomp_shape = Shape4D(new_ifm2_shape[0:n])

657

ifm_decomp_stride = get_nhwc_stride(ifm_decomp_shape)

658

ifm2_decomp_stride = get_nhwc_stride(ifm2_decomp_shape)

659

ifm_part_shape = Shape4D(new_ifm_shape[n:])

660

ifm2_part_shape = Shape4D(new_ifm2_shape[n:])

661

op.ifm_shapes.append(Shape4D([ifm_decomp_shape.elements()] + new_ifm_shape[n:]))

662

op.ifm_shapes.append(Shape4D([ifm2_decomp_shape.elements()] + new_ifm2_shape[n:]))

663

else:

664

op.ifm_shapes.append(Shape4D([ofm_decomp_shape.elements()] + new_ofm_shape[n:]))

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

665

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

666

op_list = []

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

667

for height in range(ofm_decomp_shape.height):

668

for width in range(ofm_decomp_shape.width):

669

for depth in range(ofm_decomp_shape.depth):

670

ofm_offset = Shape4D(0, height, width, depth)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

671

ofm_offset = Shape4D(ofm_offset.dot_prod(ofm_decomp_stride), 0, 0, 0)

672

ofm_cut = (ofm_offset, ofm_part_shape)

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

673

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

674

if binary:

675

ifm_d = depth if ifm_decomp_shape.depth == ofm_decomp_shape.depth else 0

676

ifm_w = width if ifm_decomp_shape.width == ofm_decomp_shape.width else 0

677

ifm_h = height if ifm_decomp_shape.height == ofm_decomp_shape.height else 0

678

ifm_offset = Shape4D(0, ifm_h, ifm_w, ifm_d)

679

ifm_offset = Shape4D(ifm_offset.dot_prod(ifm_decomp_stride), 0, 0, 0)

680

ifm_cut = (ifm_offset, ifm_part_shape)

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

681

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

682

ifm2_d = depth if ifm2_decomp_shape.depth == ofm_decomp_shape.depth else 0

683

ifm2_w = width if ifm2_decomp_shape.width == ofm_decomp_shape.width else 0

684

ifm2_h = height if ifm2_decomp_shape.height == ofm_decomp_shape.height else 0

685

ifm2_offset = Shape4D(0, ifm2_h, ifm2_w, ifm2_d)

686

ifm2_offset = Shape4D(ifm2_offset.dot_prod(ifm2_decomp_stride), 0, 0, 0)

687

ifm2_cut = (ifm2_offset, ifm2_part_shape)

688

op_list.append(create_elem_part_op(op, ifm_cut, ifm2_cut, ofm_cut))

689

else:

690

op_list.append(create_elem_part_op(op, ofm_cut, None, ofm_cut))

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

691

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

692

ofm.ops.remove(op)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

693

ifm.consumer_list.remove(op)

694

if binary:

695

ifm2.consumer_list.remove(op)

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

696

697

return op_list

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

698

else:

699

op.ofm_shapes.append(Shape4D(new_ofm_shape))

700

op.ifm_shapes.append(Shape4D(new_ifm_shape))

701

op.ifm_shapes.append(Shape4D(new_ifm2_shape))

702

703

return [op]

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

704

705

706

def decomp_elementwise(tens, arch, nng):

707

"""

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

708

Decompose elementwise ops with Rank > 3 (H,W,C).

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

709

Decompose size of tensors exceeding NPU max size

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

710

"""

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

711

tens_ops = tens.ops.copy()

712

for op in tens_ops:

713

if op.type.is_elementwise_op():

714

decomp_list = decomp_dims_elementwise(op)

715

for part_op in decomp_list:

716

decompose_elem_tensors_hwc(part_op)

return tens

def reshape_concat_shape(shape, rank, axis):

721

new_h = 1

722

for i in range(axis):

723

new_h *= shape[i]

724

new_c = 1

725

for i in range(axis + 1, rank):

726

new_c *= shape[i]

727

if axis == (rank - 1):

728

new_shape = [new_h, shape[axis], 1]

729

else:

730

new_shape = [new_h, shape[axis], new_c]

return new_shape

def reshape_concat(op):

735

"""

736

Reshapes concat ops with Rank > 3 (H,W,C).

737

"""

738

ofm = op.ofm

739

rank = len(ofm.shape)

740

axis = op.attrs["axis"]

if axis < 0:

axis += rank

if rank > 3:

# Reshape so that axis in to be concatenated is the W dimension

746

# Reshape inputs

747

for inp in op.inputs:

748

new_shape = reshape_concat_shape(inp.shape, rank, axis)

749

op.ifm_shapes.append(Shape4D(new_shape))

750

# Reshape output

751

new_shape = reshape_concat_shape(ofm.shape, rank, axis)

752

op.ofm_shapes.append(Shape4D(new_shape))

753

op.attrs["axis4D"] = 2

754

else:

755

for inp in op.inputs:

756

op.ifm_shapes.append(Shape4D(inp.shape))

757

op.ofm_shapes.append(Shape4D(ofm.shape))

758

op.attrs["axis4D"] = axis + (4 - rank)

759

760

761

def decomp_rewrite_concat(tens, arch, nng):

762

"""

763

Decompose concat ops with Rank > 3 (H,W,C).

764

Rewrite of concat to elementwise operations

765

"""

766

if len(tens.ops) == 1 and tens.ops[0].type == Op.Concat:

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

767

op = tens.ops[0]

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

768

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

769

reshape_concat(op)

770

rewrite_concat(op)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

771

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

772

op.ofm.ops.remove(op)

773

for inp in op.inputs:

774

inp.consumer_list.remove(op)

775

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

return tens

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

779

def decomp_rewrite_pad(op, arch):

780

"""

781

Decomposition of pad to elementwise operations:

782

For each dimension that needs padding:

783

-Create a new PAD operator for each dimension to be added

784

Ifm/ofm are reshape so this is the width dimension is to be padded

785

(rank for each is 3)

786

-Rewrite the the new PAD operator so there is:

787

-1 Add operator for copying the data

788

-1 Add operator for each left/right to be padded

789

"""

790

# TODO several things would be possible to optimize

791

# For instance there are cases when it should be possible to pad 2

792

# dimensions at the same time.

793

if op.type == Op.Pad:

794

ofm_elements = shape_num_elements(op.ofm.shape)

795

padding = op.inputs[1].values

796

797

rank = len(op.ifm.shape)

798

next_ifm = op.ifm

799

next_ifm_shape = next_ifm.shape.copy()

800

801

first_pad_rewrite_op = None

802

ifm_quant = op.ifm.quantization.clone()

803

804

for dim in range(padding.shape[0]):

805

# Check if padding is to be applied in this dimension

806

dim_pad = padding[dim]

807

if not (dim_pad == 0).all():

808

# Reshape so that width dimension is to be padded

809

new_ifm_shape = reshape_concat_shape(next_ifm_shape, rank, dim)

810

new_pad_input = np.zeros((4, 2), dtype=np.int32)

811

new_pad_input[2] = dim_pad

812

813

pad_op = create_pad_nop(f"{op.name}_dim_{dim}")

814

pad_op.add_input_tensor(next_ifm)

815

new_pad_tens = op.inputs[1].clone("_dim_{dim}")

816

817

name = op.inputs[1].name + f"_dim_{dim}"

818

new_pad_tens = create_const_tensor(

819

name, list(new_pad_input.shape), DataType.int32, new_pad_input, np.int32

820

)

821

pad_op.add_input_tensor(new_pad_tens)

822

823

new_ofm_shape = new_ifm_shape.copy()

824

new_ofm_shape[-2] = new_ofm_shape[-2] + dim_pad.sum()

825

next_ifm_shape[dim] = next_ifm_shape[dim] + dim_pad.sum()

826

827

if Shape4D(new_ofm_shape).elements() == ofm_elements:

828

# Last one, use op.ofm

829

ofm = op.ofm

830

else:

831

# add a new ofm Tensor

832

ofm = Tensor(new_ofm_shape, op.ofm.dtype, f"{pad_op.name}_tens")

833

ofm.quantization = ifm_quant.clone()

834

835

pad_op.set_output_tensor(ofm)

836

pad_op.ifm_shapes.append(Shape4D(new_ifm_shape))

837

pad_op.ofm_shapes.append(Shape4D(new_ofm_shape))

838

DebugDatabase.add_optimised(op, pad_op)

next_ifm = ofm

# Rewrite the pad op

converted_pad_op = convert_pad_in_width(pad_op)

843

first_pad_rewrite_op = converted_pad_op

844

else:

845

# Change to Identity operation (will be removed)

846

op.type = Op.Identity

847

848

if first_pad_rewrite_op:

849

assert op.ofm.shape == next_ifm_shape

850

for inp in op.inputs:

851

inp.consumer_list.remove(op)

852

return first_pad_rewrite_op

return op

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

857

def fixup_quantization(op, arch, nng):

858

if op.ifm and op.ifm.quantization.zero_point is None:

859

op.ifm.quantization.zero_point = 0

860

if op.ifm2 and op.ifm2.quantization.zero_point is None:

Patrik Gustavsson

f436ada

2021-09-14 14:56:48 +0200

[diff] [blame]

861

op.ifm2.quantization.zero_point = 0

862

if not op.forced_output_quantization:

863

if op.ofm and op.ofm.quantization and op.ofm.quantization.zero_point is None:

864

op.ofm.quantization.zero_point = 0

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

return op

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

868

def supported_operator_check(op, arch, nng):

869

op.run_on_npu = arch.tosa_supported_operators.is_operator_supported(op)

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

870

assert op.run_on_npu or op.type in (Op.Placeholder, Op.SubgraphInput, Op.Const)

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

return op

def tosa_optimise_graph(nng, arch):

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

875

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

876

# TODO the supported operator checking need to be split in semantic and HW checks

877

for idx, sg in enumerate(nng.subgraphs):

878

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[supported_operator_check],

884

rewrite_unsupported=False,

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

885

)

886

887

# Decomposing and rewrite of concat

888

for idx, sg in enumerate(nng.subgraphs):

889

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

890

nng, sg, arch, [decomp_rewrite_concat], [], rewrite_unsupported=False

891

)

892

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

893

# Decomposing of pad

894

for idx, sg in enumerate(nng.subgraphs):

895

rewrite_graph.visit_graph_post_order(sg.output_tensors, arch, [], [decomp_rewrite_pad])

896

sg.refresh_after_modification()

897

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

898

# Handle sg input output

899

for idx, sg in enumerate(nng.subgraphs):

900

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[fix_sg_input_output_tosa],

906

rewrite_unsupported=True,

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

907

)

908

909

# Removal of reshapes

910

for sg in nng.subgraphs:

Patrik Gustavsson

ef3ebdd

2021-10-01 11:10:25 +0200

[diff] [blame]

911

rewrite_graph.visit_graph_post_order(sg.output_tensors, arch, [], [remove_memory_ops])

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

912

sg.refresh_after_modification()

913

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

914

# Decomposing of elementwise

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

915

for idx, sg in enumerate(nng.subgraphs):

916

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

917

nng, sg, arch, [decomp_elementwise], [], rewrite_unsupported=False

918

)

919

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

920

for idx, sg in enumerate(nng.subgraphs):

921

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[set_ifm_ofm_op_shapes],

927

rewrite_unsupported=False,

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

928

)

929

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

930

# Removal of Transpose

931

for idx, sg in enumerate(nng.subgraphs):

932

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[remove_const_transpose],

938

rewrite_unsupported=False,

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

939

)

940

Patrik Gustavsson

f366fb1

2021-09-07 13:30:29 +0200

[diff] [blame]

941

# TODO, when and where to best handle calc_scaling_avgpool

942

for idx, sg in enumerate(nng.subgraphs):

943

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[calc_scaling_avgpool],

949

rewrite_unsupported=False,

Patrik Gustavsson

f366fb1

2021-09-07 13:30:29 +0200

[diff] [blame]

950

)

951

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

952

# Rewite Operators step

Patrik Gustavsson

f436ada

2021-09-14 14:56:48 +0200

[diff] [blame]

953

op_rewrite_list = [set_tensor_equivalence, rewrite_rescale, convert_depthwise_to_conv, convert_table_to_lut]

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

954

955

for idx, sg in enumerate(nng.subgraphs):

956

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

op_rewrite_list,

rewrite_unsupported=False,

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

963

)

964

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

965

# Post-processing step 1

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

966

for idx, sg in enumerate(nng.subgraphs):

967

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[rewrite_activation, add_padding_fields],

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

973

)

974

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

975

# Removal of Slice, need to be done after optimisation has been performed,

976

# since ifm/ofm_shapes are of importance to this function

977

for sg in nng.subgraphs:

978

rewrite_graph.visit_graph_post_order(sg.output_tensors, arch, [], [remove_splitsliceread])

979

sg.refresh_after_modification()

980

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

981

# Post-processing step 2

982

for idx, sg in enumerate(nng.subgraphs):

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

983

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

nng,

sg,

arch,

[],

[fixup_quantization],

989

)

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

990

Patrik Gustavsson