Blame - ethosu/vela/tosa_graph_optimiser.py - ml/ethos-u/ethos-u-vela

2021-06-28 07:41:58 +0200

[diff] [blame]

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Rickard Bolin

bc6ee58

2022-11-04 08:24:29 +0000

[diff] [blame^]

16

#

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

17

# Description:

18

# Early optimisation of the TOSA based network graph, using the rewrite_graph module to do the traversal of the graph.

Patrik Gustavsson

f366fb1

2021-09-07 13:30:29 +0200

[diff] [blame]

19

import numpy as np

20

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

21

from . import rewrite_graph

22

from .api import NpuRoundingMode

23

from .data_type import DataType

24

from .debug_database import DebugDatabase

Jonas Ohlsson

0957e3e

2021-09-01 15:57:21 +0200

[diff] [blame]

25

from .graph_optimiser_util import bypass_memory_only_ops

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

26

from .graph_optimiser_util import calc_explicit_padding

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

27

from .graph_optimiser_util import convert_depthwise_to_conv

Patrik Gustavsson

f436ada

2021-09-14 14:56:48 +0200

[diff] [blame]

28

from .graph_optimiser_util import convert_to_lut

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

29

from .graph_optimiser_util import move_splitsliceread_to_consumer

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

30

from .graph_optimiser_util import needed_total_padding

31

from .graph_optimiser_util import set_ifm_ofm_op_shapes

32

from .graph_optimiser_util import set_tensor_equivalence

33

from .operation import ExplicitScaling

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

34

from .operation import Op

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

35

from .operation_util import create_add_nop

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

36

from .operation_util import create_avgpool_nop

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

37

from .operation_util import create_pad_nop

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

38

from .shape4d import Shape4D

39

from .tensor import create_const_tensor

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

40

from .tensor import create_equivalence_id

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

41

from .tensor import shape_num_elements

42

from .tensor import Tensor

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

43

44

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

45

def replace_rescale_with_avg_pool(rescale_op):

46

assert rescale_op.type == Op.Rescale

47

48

avgpool_op = create_avgpool_nop(rescale_op.name + "_avgpool")

49

rescale_op_clone = rescale_op.clone()

50

op = rescale_op

51

op.attrs = avgpool_op.attrs.copy()

52

op.type = Op.AvgPool

53

DebugDatabase.add_optimised(rescale_op_clone, op)

return op

def calc_skirt(kernel, input_shape, explicit_padding):

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

59

k_w, k_h = kernel.dilated_wh()

60

s_x, s_y = kernel.stride

61

ypad = needed_total_padding(int(input_shape.height), int(s_y), int(k_h))

62

xpad = needed_total_padding(int(input_shape.width), int(s_x), int(k_w))

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

63

64

top, left, bottom, right = explicit_padding

65

top_pad, bottom_pad = calc_explicit_padding(int(input_shape.height), int(s_y), int(k_h), int(top), int(bottom))

66

left_pad, right_pad = calc_explicit_padding(int(input_shape.width), int(s_x), int(k_w), int(left), int(right))

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

67

68

padding = (top_pad, left_pad, bottom_pad, right_pad)

69

skirt = (top_pad, left_pad, ypad - top_pad, xpad - left_pad)

70

return padding, skirt

71

72

73

def add_padding_fields(op, arch, nng):

74

if op.run_on_npu:

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

75

if "explicit_padding" in op.attrs:

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

76

input_shape = op.ifm_shapes[0]

77

78

if op.type == Op.Conv2DBackpropInputSwitchedBias:

79

# TODO not yet supported, but there will be need for separate handling

80

assert False

81

else:

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

82

padding, skirt = calc_skirt(op.kernel, input_shape, op.attrs.get("explicit_padding"))

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

83

84

op.attrs["explicit_padding"] = padding

85

op.attrs["skirt"] = skirt

return op

Patrik Gustavsson

2021-09-07 13:30:29 +0200

[diff] [blame]

90

# Counts leading zeroes for a (int32)

91

def count_leading_zeros(a):

lz = int(32)

if a != 0:

mask = 1 << (32 - 1)

lz = 0

while (mask & a) == 0:

mask = mask >> 1

lz = lz + 1

return lz

def calc_scaling_avgpool(op, arch, nng):

103

if op.type == Op.AvgPool:

104

top, left, _, _ = op.attrs["explicit_padding"]

105

# TODO Only support for when global scaling can be used.

106

# That is when there is no padding

107

assert top == 0 and left == 0

108

assert op.explicit_scaling is None

multiplier = []

shift = []

kernel_wh = op.kernel.elements_wh()

113

k = 32 - count_leading_zeros(kernel_wh - 1)

114

numerator = np.int64(((1 << 30) + 1) << k)

115

multiplier.append(numerator // kernel_wh)

116

shift.append(30 + k)

117

118

op.rounding_mode = NpuRoundingMode.NATURAL

119

op.explicit_scaling = ExplicitScaling(False, shift, multiplier)

return op

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

123

def remove_const_transpose(op, arch, nng):

124

if op.type == Op.Transpose:

125

removed = False

126

if len(op.ifm.ops) == 1:

127

prev_op = op.ifm.ops[0]

128

if prev_op.type == Op.Const:

129

# Transpose the Tensor and data and remove Transpose

130

# TODO move to Tensor?

131

reorder = op.attrs["perms"]

132

shape = op.ifm.shape.copy()

133

tens = op.ifm

134

135

tens.shape = [shape[idx] for idx in reorder]

136

tens.bandwidth_shape = tens.shape

137

tens.storage_shape = tens.shape

138

139

if tens.values is not None:

140

tens.values = tens.values.transpose(reorder)

141

142

op.ofm.values = tens.values

143

# Bypass the Transpose op

144

prev_op.set_output_tensor(op.ofm)

145

DebugDatabase.add_optimised(op, prev_op)

146

removed = True

147

148

if not removed:

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

149

print("Warning: Cannot remove Transpose, and handling of Transpose is not supported")

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

assert False

return op

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

155

def insert_add_copy_for_const(op, ifm_ofm_shape):

156

assert op.type == Op.Const

157

ofm = op.ofm

158

copy_tens = ofm.clone()

159

op.set_output_tensor(copy_tens)

160

161

name = ofm.name + "_add"

162

ifm2 = create_const_tensor(

163

name + "_zero_scalar",

[1],

copy_tens.dtype,

[0],

copy_tens.dtype.as_numpy_type(),

168

quantization=copy_tens.quantization,

169

)

170

copy_op = create_add_nop(name)

171

copy_op.add_input_tensor(copy_tens)

172

copy_op.add_input_tensor(ifm2)

173

copy_op.set_output_tensor(ofm)

174

copy_op.ifm_shapes.append(ifm_ofm_shape)

175

copy_op.ifm_shapes.append(Shape4D(ifm2.shape))

176

copy_op.ofm_shapes.append(ifm_ofm_shape)

177

copy_op.run_on_npu = True

178

179

DebugDatabase.add_optimised(op, copy_op)

180

181

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

182

# TODO can we change to add for both TFLite and TOSA?

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

183

def insert_add_copy_op_after_tens(tens, ifm_ofm_shape):

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

184

tens_cons_list_copy = tens.consumer_list.copy()

185

copy_tens = tens.clone()

186

187

name = tens.name + "_add"

188

ifm2 = create_const_tensor(

189

name + "_zero_scalar",

[1],

copy_tens.dtype,

[0],

copy_tens.dtype.as_numpy_type(),

194

quantization=copy_tens.quantization,

195

)

196

copy_op = create_add_nop(name)

197

copy_op.add_input_tensor(tens)

198

copy_op.add_input_tensor(ifm2)

199

copy_op.set_output_tensor(copy_tens)

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

200

copy_op.ifm_shapes.append(ifm_ofm_shape)

201

copy_op.ifm_shapes.append(Shape4D(ifm2.shape))

202

copy_op.ofm_shapes.append(ifm_ofm_shape)

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

203

copy_op.run_on_npu = True

204

205

# Set copy_ifm consumers

206

for tens_cons in tens_cons_list_copy:

207

if tens_cons is not None:

208

for ifm_idx, cons_inp in enumerate(tens_cons.inputs):

209

if cons_inp == tens:

210

tens_cons.set_input_tensor(copy_tens, ifm_idx)

211

212

DebugDatabase.add_optimised(tens.ops[0], copy_op)

213

214

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

215

def get_shape_for_copy_op(shape):

216

# remove dimensions that are set to 1

new_shape = []

for dim in shape:

if dim != 1:

new_shape.append(dim)

if not new_shape:

new_shape = [1]

rank = len(new_shape)

225

if rank > 3:

226

# Reshape so that batch becomes 1, by moving elements to H dimension

n = rank - 2

h = 1

for i in range(n):

h *= shape[i]

new_shape = Shape4D(new_shape[n:]).with_height(h)

232

else:

233

new_shape = Shape4D(new_shape)

return new_shape

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

237

def fix_sg_input_output_tosa(op, arch, nng):

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

238

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

239

if op.type == Op.Const and any(ofm_cons is None for ofm_cons in op.ofm.consumer_list):

240

# Const operator with sg output, insert copy op before the ofm

241

new_shape = get_shape_for_copy_op(op.ofm.shape.copy())

242

insert_add_copy_for_const(op, new_shape)

243

elif op.run_on_npu and op.type in (Op.Reshape, Op.Identity):

244

# For the Reshape operators we want to remove, tensors are removed.

245

# But in order to to do this, they cannot be outputs of the sg,

246

# this need to be fixed prior to the removal.

247

# Solution is to add a copy op, to maintain the original tensor.

248

# This is also valid when reshape ifm/ofm is produced respectively

249

# consumed by CPU

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

250

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

251

# Check if operator ifm/ofm are sg ifm/ofm

252

ifm_is_sg_ifm = op.ifm.ops[0].type in (Op.Placeholder, Op.SubgraphInput, Op.Const)

253

ifm_is_sg_ofm = any(ifm_cons is None for ifm_cons in op.ifm.consumer_list)

254

ofm_is_sg_ofm = any(ofm_cons is None for ofm_cons in op.ofm.consumer_list)

255

# Check if ifm/ofm is produced repectivly consumed by CPU

256

ifm_is_cpu_produced = any(ifm_prod is not None and not ifm_prod.run_on_npu for ifm_prod in op.ifm.ops)

257

ofm_is_cpu_consumed = any(ofm_cons is not None and not ofm_cons.run_on_npu for ofm_cons in op.ofm.consumer_list)

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

258

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

259

if (ifm_is_sg_ofm or ifm_is_sg_ifm or ifm_is_cpu_produced) and (ofm_is_sg_ofm or ofm_is_cpu_consumed):

260

# Both ifm and ofm need to persist, but only ifm need a copy, in order to remove the Operator

261

# Decide on ifm/ofm shapes for the copy op based on ifm

262

new_shape = get_shape_for_copy_op(op.ifm.shape.copy())

263

insert_add_copy_op_after_tens(op.ifm, new_shape)

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

return op

def create_add_for_concat(concat_op, name, ifm, ifm_shape: Shape4D, write_offset: Shape4D):

268

"""Creates an add op for the given concat op/input feature map"""

269

ofm = concat_op.ofm

270

ifm2 = create_const_tensor(

271

name + "_zero_scalar", [1], ofm.dtype, [0], ofm.dtype.as_numpy_type(), quantization=ofm.quantization

272

)

273

add_op = create_add_nop(name)

274

275

add_op.inputs = [ifm, ifm2]

276

add_op.outputs = [ofm]

277

add_op.write_offset = write_offset

278

add_op.write_shape = ifm_shape

279

ofm.ops.append(add_op)

280

DebugDatabase.add_optimised(concat_op, add_op)

281

add_op.ifm_shapes.append(ifm_shape)

282

add_op.ifm_shapes.append(Shape4D(ifm2.shape))

283

add_op.ofm_shapes.append(concat_op.ofm_shapes[0])

284

add_op.memory_function = Op.ConcatSliceWrite

return add_op

# TODO Could be further optimized checking the type of the consumer,

289

# rather than just mimic the TFLite behaviour depending on type.

290

# TOSA bool_t not considered yet

291

def remove_splitsliceread(op, arch):

292

293

if op.type == Op.SplitSliceRead:

294

# Check if it is possible to put the SplitSliceRead on the tensor consumer, or if an avgpool need to be inserted

295

if (

296

len(op.ofm.consumer_list) == 1

297

and op.ofm.consumer_list[0] is not None

298

and op.ofm.consumer_list[0].run_on_npu

299

and op.ofm.consumer_list[0].type != Op.Reshape

300

and op.ofm_shapes[0] == Shape4D.from_list(op.ofm.shape)

301

and op.ofm.dtype in (DataType.uint8, DataType.int8, DataType.int16)

302

):

303

# SplitSliceRead can be performed by tensor consumer

304

cons_op = op.ofm.consumer_list[0]

305

move_splitsliceread_to_consumer(op, cons_op)

306

else:

307

name = op.name + "_add"

308

ofm = op.ofm

309

ifm2 = create_const_tensor(

310

name + "_zero_scalar", [1], ofm.dtype, [0], ofm.dtype.as_numpy_type(), quantization=ofm.quantization

311

)

312

add_op = create_add_nop(name)

313

add_op.inputs = [op.ifm, ifm2]

314

add_op.outputs = [ofm]

315

op.ofm.ops.remove(op)

316

op.ofm.ops.append(add_op)

317

add_op.ifm_shapes.append(op.ifm_shapes[0])

318

add_op.ifm_shapes.append(Shape4D(ifm2.shape))

319

add_op.ofm_shapes.append(op.ofm_shapes[0])

320

add_op.read_offsets[0] = op.read_offsets[0]

321

add_op.read_shapes[0] = op.read_shapes[0]

322

323

op.ifm.consumer_list.remove(op)

324

DebugDatabase.add_optimised(op, add_op)

325

326

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

327

def rewrite_concat(op):

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

328

if not op.run_on_npu or not op.type == Op.Concat:

329

return

330

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

331

offset = 0

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

332

inputs = op.inputs

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

333

axis_4D = op.attrs["axis4D"]

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

334

335

for idx, inp in enumerate(inputs):

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

336

write_offset = [0, 0, 0, 0]

337

write_offset[axis_4D] = offset

338

concat_end = offset + op.ifm_shapes[idx][axis_4D]

339

create_add_for_concat(op, op.name + str(idx) + "_add", inp, op.ifm_shapes[idx], Shape4D.from_list(write_offset))

340

offset = concat_end

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

341

assert op.ofm_shapes[0][axis_4D] == offset

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

342

343

Patrik Gustavsson

ef3ebdd

2021-10-01 11:10:25 +0200

[diff] [blame]

344

def remove_memory_ops(op, arch):

345

if op.run_on_npu and op.type in (Op.Reshape, Op.Identity):

Jonas Ohlsson

0957e3e

2021-09-01 15:57:21 +0200

[diff] [blame]

346

bypass_memory_only_ops(op)

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

347

348

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

349

def rewrite_activation(op, arch, nng):

Patrik Gustavsson

5e26eda

2021-06-30 09:07:16 +0200

[diff] [blame]

350

if op.type not in (Op.ReluN, Op.Clamp):

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

351

return op

352

353

ifm = op.ifm

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

354

zp = ifm.quantization.zero_point if ifm.quantization.zero_point else 0

355

if op.ofm.quantization.zero_point is None:

356

op.ofm.quantization.zero_point = zp

357

Patrik Gustavsson

5e26eda

2021-06-30 09:07:16 +0200

[diff] [blame]

358

if op.type == Op.Clamp:

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

359

op.attrs["min"] = op.attrs["min_int"] - zp

360

op.attrs["max"] = op.attrs["max_int"] - zp

361

elif op.type == Op.ReluN:

362

op.attrs["max"] = op.attrs["max_int"] - zp

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

return op

def rewrite_rescale(op, arch, nng):

368

if op.type == Op.Rescale:

ifm = op.ifm

ofm = op.ofm

# some error checking

373

assert len(ifm.ops) == 1

374

prev_op = ifm.ops[0]

375

376

# TODO currently not supported

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

377

assert len(ifm.consumer_list) == 1

378

379

input_zp = op.attrs["input_zp"]

380

output_zp = op.attrs["output_zp"]

381

multiplier = op.attrs["multiplier"]

382

shift = op.attrs["shift"]

383

scale32 = op.attrs["scale32"]

384

double_round = op.attrs["double_round"]

385

per_channel = op.attrs["per_channel"]

386

387

assert ifm.dtype in (DataType.uint8, DataType.int8, DataType.int32)

388

assert ifm.dtype in (DataType.uint8, DataType.int8) or input_zp == 0

389

assert ofm.dtype in (DataType.uint8, DataType.int8) or output_zp == 0

390

assert (scale32 and ifm.dtype != DataType.int48) or (not scale32 and not double_round)

391

392

# Check that input tensor has the same zp or no zp

393

ifm_zp = ifm.quantization.zero_point

394

if ifm_zp is not None and ifm_zp != input_zp:

395

print("Error (fuse_rescale): zp of tensors producer/consumer differs unexpectedidly ")

396

assert False

397

ifm.quantization.zero_point = input_zp

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

398

ofm.quantization.zero_point = output_zp

399

for s, m in zip(shift, multiplier):

400

# TODO these are the TOSA limitations

401

assert m >= 0

402

assert 2 <= s <= 62

403

# TODO these are the HW limitations

404

assert 0 <= s < (1 << 6)

405

explicit_scaling = ExplicitScaling(per_channel, shift, multiplier)

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

406

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

407

if double_round and scale32:

408

rounding_mode = NpuRoundingMode.TFL

409

else:

410

rounding_mode = NpuRoundingMode.NATURAL

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

411

412

if prev_op.type.is_depthwise_conv2d_op() or prev_op.type.is_conv2d_op() or prev_op.type == Op.FullyConnected:

413

assert len(multiplier) == len(shift) == len(prev_op.bias.values)

414

415

if ifm.dtype == DataType.int32 and per_channel:

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

416

prev_op.explicit_scaling = explicit_scaling

417

prev_op.rounding_mode = rounding_mode

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

418

419

# Bypass op

420

prev_op.set_output_tensor(ofm)

421

DebugDatabase.add_optimised(op, prev_op)

422

return op

423

else:

424

print("Warning, unsupported fusing of TOSA Rescale previous operator is of type:", prev_op.type)

425

assert False

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

426

# TODO which are the cases we need to and can do standalone Rescale?

427

# TODO should we try to identify a conversion uint8<->int8 accomplished by 2 RESCALE ops?

428

# origin might be TFLite op QUANTIZE, should we look to see if they can be translated to QUANTIZE?

429

# limited to these at the moment:

430

elif (

431

(ifm.dtype == DataType.int8 and ofm.dtype == DataType.int8)

432

or (ifm.dtype == DataType.uint8 and ofm.dtype == DataType.int8)

433

or (ifm.dtype == DataType.int8 and ofm.dtype == DataType.uint8)

434

):

435

# Create NOP performing the RESCALE

436

avgpool_op = replace_rescale_with_avg_pool(op)

437

avgpool_op.rounding_mode = rounding_mode

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

438

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

439

if per_channel:

440

# TODO

441

avgpool_op.explicit_scaling = explicit_scaling

442

print("Warning, unsupported TOSA Rescale")

443

assert False

444

else:

445

avgpool_op.explicit_scaling = explicit_scaling

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

446

else:

447

print("Warning, unsupported fusing of TOSA Rescale previous operator is of type:", prev_op.type)

assert False

return op

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

452

def convert_pad_in_width(op):

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

453

"""

454

Rewrites PAD operator to an add that copies the IFM to the OFM

455

+ up to 4 add operators that fill the OFM with zeros at the borders.

456

"""

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

457

assert op.type == Op.Pad

458

assert op.ifm_shapes[0] is not None and op.ofm_shapes[0] is not None

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

459

ifm = op.ifm

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

460

ofm = op.ofm

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

461

ifm_shape = op.ifm_shapes[0]

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

462

ofm.ops = []

463

ofm_shape = op.ofm_shapes[0]

464

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

465

padding = op.inputs[1].values

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

466

left, right = padding[-2]

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

467

468

# Add op that copies IFM to the right place inside the OFM

469

shp0 = Shape4D(0, 0, 0, 0)

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

470

add_op = create_add_for_concat(op, op.name + "_main", ifm, ifm_shape, shp0.with_width(left))

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

471

add_op.activation = op.activation

472

473

quant = ofm.quantization

474

pad_value = ifm.quantization.zero_point

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

475

ifm.quantization.zero_point = 0

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

476

if left > 0:

477

shape = Shape4D(1, ifm_shape.height, left, ofm_shape.depth)

478

zero_tens = create_const_tensor(

479

op.name + "_left", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], np.uint8, quantization=quant

480

)

481

zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

482

create_add_for_concat(op, op.name + "_left", zero_tens, shape, shp0)

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

483

if right > 0:

484

shape = Shape4D(1, ifm_shape.height, right, ofm_shape.depth)

485

zero_tens = create_const_tensor(

486

op.name + "_right", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], np.uint8, quantization=quant

487

)

488

zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

489

create_add_for_concat(op, op.name + "_right", zero_tens, shape, shp0.with_width(ofm_shape.width - right))

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

490

491

op.type = Op.ConcatTFLite

return add_op

Patrik Gustavsson

2021-09-14 14:56:48 +0200

[diff] [blame]

495

def convert_table_to_lut(op, arch, nng):

496

# Converts table op to a no-op + LUT

497

if op.type is not Op.Table:

return op

table = op.inputs[1]

op.inputs.remove(table)

502

op.set_ifm_ofm_shapes()

503

504

return convert_to_lut(op, table.values, "table")

505

506

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

507

def decompose_elem_tensors_hwc(op):

508

"""

509

Decomposes elementwise op if any of the ifm(s)/ofm are to large in any dimension to be handled by the NPU

510

"""

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

511

max_t_size = 65535

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

512

ofm_shape = op.write_shape if op.write_shape is not None else op.ofm_shapes[0]

513

ifm_shape = op.read_shapes[0] if op.read_shapes[0] is not None else op.ifm_shapes[0]

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

514

ifm2_shape = op.ifm_shapes[1] if op.ifm_shapes[1] else None

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

515

ifm2_shape = op.read_shapes[1] if op.read_shapes[1] is not None else ifm2_shape

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

516

limit_shape = Shape4D(1, max_t_size, max_t_size, max_t_size)

517

518

if any(dim_size > max_t_size for dim_size in ofm_shape.as_list()):

519

ofm_split = ofm_shape.floordiv_const(max_t_size).add(1, 1, 1, 1)

520

521

for height in range(ofm_split.height):

522

for width in range(ofm_split.width):

523

for depth in range(ofm_split.depth):

524

ofm_offset = Shape4D(0, height * max_t_size, width * max_t_size, depth * max_t_size)

525

ofm_part_shape = ofm_shape.clip(ofm_offset, limit_shape)

526

ofm_cut = (ofm_offset, ofm_part_shape)

527

528

ifm_d = depth * max_t_size if ifm_shape.depth == ofm_shape.depth else 0

529

ifm_w = width * max_t_size if ifm_shape.width == ofm_shape.width else 0

530

ifm_h = height * max_t_size if ifm_shape.height == ofm_shape.height else 0

531

ifm_offset = Shape4D(0, ifm_h, ifm_w, ifm_d)

532

ifm_part_shape = ifm_shape.clip(ifm_offset, limit_shape)

533

ifm_cut = (ifm_offset, ifm_part_shape)

534

535

if ifm2_shape is not None:

536

ifm2_d = depth * max_t_size if ifm2_shape.depth == ofm_shape.depth else 0

537

ifm2_w = width * max_t_size if ifm2_shape.width == ofm_shape.width else 0

538

ifm2_h = height * max_t_size if ifm2_shape.height == ofm_shape.height else 0

539

ifm2_offset = Shape4D(0, ifm2_h, ifm2_w, ifm2_d)

540

ifm2_part_shape = ifm2_shape.clip(ifm2_offset, limit_shape)

541

ifm2_cut = (ifm2_offset, ifm2_part_shape)

542

else:

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

543

ifm2_cut = (None, None)

544

545

create_elem_part_op(op, ifm_cut, ifm2_cut, ofm_cut)

546

op.ofm.ops.remove(op)

547

op.ifm.consumer_list.remove(op)

548

if op.ifm2 is not None:

549

op.ifm2.consumer_list.remove(op)

return

def create_elem_part_op(op, ifm_cut, ifm2_cut, ofm_cut):

554

part_op = op.clone()

555

ifm_read_offset = op.read_offsets[0] if op.read_offsets[0] is not None else Shape4D(0, 0, 0, 0)

556

ofm_write_offset = op.write_offset if op.write_offset is not None else Shape4D(0, 0, 0, 0)

557

ifm_offset, ifm_shape = ifm_cut

558

ofm_offset, ofm_shape = ofm_cut

559

560

part_op.read_offsets[0] = ifm_read_offset + ifm_offset

561

part_op.read_shapes[0] = ifm_shape

562

part_op.write_offset = ofm_write_offset + ofm_offset

563

part_op.write_shape = ofm_shape

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

564

part_op.ifm_shapes = op.ifm_shapes.copy()

565

part_op.ofm_shapes = op.ofm_shapes.copy()

566

part_op.ifm.consumer_list.append(part_op)

567

op.ofm.ops.append(part_op)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

568

569

ifm2_offset, ifm2_shape = ifm2_cut

570

if ifm2_offset:

571

ifm2_read_offset = op.read_offsets[1] if op.read_offsets[1] is not None else Shape4D(0, 0, 0, 0)

572

part_op.read_offsets[1] = ifm2_read_offset + ifm2_offset

573

part_op.read_shapes[1] = ifm2_shape

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

574

part_op.ifm2.consumer_list.append(part_op)

575

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

576

return part_op

577

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

578

579

def get_nhwc_stride(shape):

580

stride_x = shape.depth

581

stride_y = shape.width * stride_x

582

stride_n = shape.height * stride_y

583

return Shape4D(stride_n, stride_y, stride_x, 1)

584

585

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

586

def pad_to_rank(shape, rank):

587

"""

588

Pads a shape to the given rank

589

"""

590

while len(shape) < rank:

shape = [1] + shape

return shape

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

596

def get_elem_shapes_removed_singles(op):

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

597

"""

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

598

Returns the shapes of ifm(s)/ofms after removing all the dimensions that are 1 for all ifm(s)/ofm

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

599

"""

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

600

binary = op.ifm2 is not None

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

601

ofm_shape = op.ofm_shapes[0].as_list() if len(op.ofm_shapes) > 0 else op.ofm.shape

602

ifm_shape = op.ifm_shapes[0].as_list() if len(op.ifm_shapes) > 0 else op.ifm.shape

603

if binary:

604

ifm2_shape = op.ifm_shapes[1].as_list() if len(op.ofm_shapes) else op.ifm2.shape

605

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

606

rank = max(len(ofm_shape), len(ifm_shape), len(ifm2_shape) if binary else 0)

607

ofm_shape = pad_to_rank(ofm_shape, rank)

608

ifm_shape = pad_to_rank(ifm_shape, rank)

609

if binary:

610

ifm2_shape = pad_to_rank(ifm2_shape, rank)

611

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

new_ofm_shape = []

new_ifm_shape = []

new_ifm2_shape = []

for idx in range(rank):

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

616

if ofm_shape[idx] != 1:

617

new_ofm_shape.append(ofm_shape[idx])

618

new_ifm_shape.append(ifm_shape[idx])

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

619

if binary:

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

620

new_ifm2_shape.append(ifm2_shape[idx])

621

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

622

if new_ofm_shape == []:

623

new_ofm_shape = [1]

624

new_ifm_shape = [1]

625

new_ifm2_shape = [1] if binary else None

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

626

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

627

return new_ofm_shape, new_ifm_shape, new_ifm2_shape

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

628

629

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

630

def decomp_dims_elementwise(op):

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

631

"""

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

632

Decompose elementwise ops with Rank > 3 (H,W,D).

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

633

If Rank > 3, all the dimensions above H are viewed as the N dimension.

634

the elementwise operation will be decomposed to N (of ofm) elementwise operations.

635

By reading and writing with offsets from/to the ifm(s)/ofm.

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

636

Note: Broadcast need to be handled for binary elementwise ops, and TOSA allowes for broadcast by both ifm and ifm2

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

"""

ifm = op.ifm

ifm2 = op.ifm2

ofm = op.ofm

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

642

binary = op.ifm2 is not None

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

643

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

644

# Remove dimensions that are all 1

645

new_ofm_shape, new_ifm_shape, new_ifm2_shape = get_elem_shapes_removed_singles(op)

646

rank = len(new_ofm_shape)

647

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

648

if rank > 3:

649

n = rank - 3

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

650

ofm_decomp_shape = Shape4D(new_ofm_shape[0:n])

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

651

ofm_decomp_stride = get_nhwc_stride(ofm_decomp_shape)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

652

ofm_part_shape = Shape4D(new_ofm_shape[n:])

653

op.ofm_shapes.append(Shape4D([ofm_decomp_shape.elements()] + new_ofm_shape[n:]))

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

654

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

655

if binary:

656

ifm_decomp_shape = Shape4D(new_ifm_shape[0:n])

657

ifm2_decomp_shape = Shape4D(new_ifm2_shape[0:n])

658

ifm_decomp_stride = get_nhwc_stride(ifm_decomp_shape)

659

ifm2_decomp_stride = get_nhwc_stride(ifm2_decomp_shape)

660

ifm_part_shape = Shape4D(new_ifm_shape[n:])

661

ifm2_part_shape = Shape4D(new_ifm2_shape[n:])

662

op.ifm_shapes.append(Shape4D([ifm_decomp_shape.elements()] + new_ifm_shape[n:]))

663

op.ifm_shapes.append(Shape4D([ifm2_decomp_shape.elements()] + new_ifm2_shape[n:]))

664

else:

665

op.ifm_shapes.append(Shape4D([ofm_decomp_shape.elements()] + new_ofm_shape[n:]))

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

666

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

667

op_list = []

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

668

for height in range(ofm_decomp_shape.height):

669

for width in range(ofm_decomp_shape.width):

670

for depth in range(ofm_decomp_shape.depth):

671

ofm_offset = Shape4D(0, height, width, depth)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

672

ofm_offset = Shape4D(ofm_offset.dot_prod(ofm_decomp_stride), 0, 0, 0)

673

ofm_cut = (ofm_offset, ofm_part_shape)

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

674

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

675

if binary:

676

ifm_d = depth if ifm_decomp_shape.depth == ofm_decomp_shape.depth else 0

677

ifm_w = width if ifm_decomp_shape.width == ofm_decomp_shape.width else 0

678

ifm_h = height if ifm_decomp_shape.height == ofm_decomp_shape.height else 0

679

ifm_offset = Shape4D(0, ifm_h, ifm_w, ifm_d)

680

ifm_offset = Shape4D(ifm_offset.dot_prod(ifm_decomp_stride), 0, 0, 0)

681

ifm_cut = (ifm_offset, ifm_part_shape)

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

682

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

683

ifm2_d = depth if ifm2_decomp_shape.depth == ofm_decomp_shape.depth else 0

684

ifm2_w = width if ifm2_decomp_shape.width == ofm_decomp_shape.width else 0

685

ifm2_h = height if ifm2_decomp_shape.height == ofm_decomp_shape.height else 0

686

ifm2_offset = Shape4D(0, ifm2_h, ifm2_w, ifm2_d)

687

ifm2_offset = Shape4D(ifm2_offset.dot_prod(ifm2_decomp_stride), 0, 0, 0)

688

ifm2_cut = (ifm2_offset, ifm2_part_shape)

689

op_list.append(create_elem_part_op(op, ifm_cut, ifm2_cut, ofm_cut))

690

else:

691

op_list.append(create_elem_part_op(op, ofm_cut, None, ofm_cut))

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

692

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

693

ofm.ops.remove(op)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

694

ifm.consumer_list.remove(op)

695

if binary:

696

ifm2.consumer_list.remove(op)

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

697

698

return op_list

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

699

else:

700

op.ofm_shapes.append(Shape4D(new_ofm_shape))

701

op.ifm_shapes.append(Shape4D(new_ifm_shape))

702

op.ifm_shapes.append(Shape4D(new_ifm2_shape))

703

704

return [op]

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

705

706

707

def decomp_elementwise(tens, arch, nng):

708

"""

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

709

Decompose elementwise ops with Rank > 3 (H,W,C).

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

710

Decompose size of tensors exceeding NPU max size

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

711

"""

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

712

tens_ops = tens.ops.copy()

713

for op in tens_ops:

714

if op.type.is_elementwise_op():

715

decomp_list = decomp_dims_elementwise(op)

716

for part_op in decomp_list:

717

decompose_elem_tensors_hwc(part_op)

return tens

def reshape_concat_shape(shape, rank, axis):

722

new_h = 1

723

for i in range(axis):

724

new_h *= shape[i]

725

new_c = 1

726

for i in range(axis + 1, rank):

727

new_c *= shape[i]

728

if axis == (rank - 1):

729

new_shape = [new_h, shape[axis], 1]

730

else:

731

new_shape = [new_h, shape[axis], new_c]

return new_shape

def reshape_concat(op):

736

"""

737

Reshapes concat ops with Rank > 3 (H,W,C).

738

"""

739

ofm = op.ofm

740

rank = len(ofm.shape)

741

axis = op.attrs["axis"]

if axis < 0:

axis += rank

if rank > 3:

# Reshape so that axis in to be concatenated is the W dimension

747

# Reshape inputs

748

for inp in op.inputs:

749

new_shape = reshape_concat_shape(inp.shape, rank, axis)

750

op.ifm_shapes.append(Shape4D(new_shape))

751

# Reshape output

752

new_shape = reshape_concat_shape(ofm.shape, rank, axis)

753

op.ofm_shapes.append(Shape4D(new_shape))

754

op.attrs["axis4D"] = 2

755

else:

756

for inp in op.inputs:

757

op.ifm_shapes.append(Shape4D(inp.shape))

758

op.ofm_shapes.append(Shape4D(ofm.shape))

759

op.attrs["axis4D"] = axis + (4 - rank)

760

761

762

def decomp_rewrite_concat(tens, arch, nng):

763

"""

764

Decompose concat ops with Rank > 3 (H,W,C).

765

Rewrite of concat to elementwise operations

766

"""

767

if len(tens.ops) == 1 and tens.ops[0].type == Op.Concat:

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

768

op = tens.ops[0]

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

769

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

770

reshape_concat(op)

771

rewrite_concat(op)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

772

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

773

op.ofm.ops.remove(op)

774

for inp in op.inputs:

775

inp.consumer_list.remove(op)

776

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

return tens

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

780

def decomp_rewrite_pad(op, arch):

781

"""

782

Decomposition of pad to elementwise operations:

783

For each dimension that needs padding:

784

-Create a new PAD operator for each dimension to be added

785

Ifm/ofm are reshape so this is the width dimension is to be padded

786

(rank for each is 3)

787

-Rewrite the the new PAD operator so there is:

788

-1 Add operator for copying the data

789

-1 Add operator for each left/right to be padded

790

"""

791

# TODO several things would be possible to optimize

792

# For instance there are cases when it should be possible to pad 2

793

# dimensions at the same time.

794

if op.type == Op.Pad:

795

ofm_elements = shape_num_elements(op.ofm.shape)

796

padding = op.inputs[1].values

797

798

rank = len(op.ifm.shape)

799

next_ifm = op.ifm

800

next_ifm_shape = next_ifm.shape.copy()

801

802

first_pad_rewrite_op = None

803

ifm_quant = op.ifm.quantization.clone()

804

805

for dim in range(padding.shape[0]):

806

# Check if padding is to be applied in this dimension

807

dim_pad = padding[dim]

808

if not (dim_pad == 0).all():

809

# Reshape so that width dimension is to be padded

810

new_ifm_shape = reshape_concat_shape(next_ifm_shape, rank, dim)

811

new_pad_input = np.zeros((4, 2), dtype=np.int32)

812

new_pad_input[2] = dim_pad

813

814

pad_op = create_pad_nop(f"{op.name}_dim_{dim}")

815

pad_op.add_input_tensor(next_ifm)

816

new_pad_tens = op.inputs[1].clone("_dim_{dim}")

817

818

name = op.inputs[1].name + f"_dim_{dim}"

819

new_pad_tens = create_const_tensor(

820

name, list(new_pad_input.shape), DataType.int32, new_pad_input, np.int32

821

)

822

pad_op.add_input_tensor(new_pad_tens)

823

824

new_ofm_shape = new_ifm_shape.copy()

825

new_ofm_shape[-2] = new_ofm_shape[-2] + dim_pad.sum()

826

next_ifm_shape[dim] = next_ifm_shape[dim] + dim_pad.sum()

827

828

if Shape4D(new_ofm_shape).elements() == ofm_elements:

829

# Last one, use op.ofm

830

ofm = op.ofm

831

else:

832

# add a new ofm Tensor

833

ofm = Tensor(new_ofm_shape, op.ofm.dtype, f"{pad_op.name}_tens")

834

ofm.quantization = ifm_quant.clone()

835

836

pad_op.set_output_tensor(ofm)

837

pad_op.ifm_shapes.append(Shape4D(new_ifm_shape))

838

pad_op.ofm_shapes.append(Shape4D(new_ofm_shape))

839

DebugDatabase.add_optimised(op, pad_op)

next_ifm = ofm

# Rewrite the pad op

converted_pad_op = convert_pad_in_width(pad_op)

844

first_pad_rewrite_op = converted_pad_op

845

else:

846

# Change to Identity operation (will be removed)

847

op.type = Op.Identity

848

849

if first_pad_rewrite_op:

850

assert op.ofm.shape == next_ifm_shape

851

for inp in op.inputs:

852

inp.consumer_list.remove(op)

853

return first_pad_rewrite_op

return op

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

858

def fixup_quantization(op, arch, nng):

859

if op.ifm and op.ifm.quantization.zero_point is None:

860

op.ifm.quantization.zero_point = 0

861

if op.ifm2 and op.ifm2.quantization.zero_point is None:

Patrik Gustavsson

f436ada

2021-09-14 14:56:48 +0200

[diff] [blame]

862

op.ifm2.quantization.zero_point = 0

863

if not op.forced_output_quantization:

864

if op.ofm and op.ofm.quantization and op.ofm.quantization.zero_point is None:

865

op.ofm.quantization.zero_point = 0

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

return op

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

869

def supported_operator_check(op, arch, nng):

870

op.run_on_npu = arch.tosa_supported_operators.is_operator_supported(op)

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

871

assert op.run_on_npu or op.type in (Op.Placeholder, Op.SubgraphInput, Op.Const)

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

return op

def tosa_optimise_graph(nng, arch):

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

876

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

877

# TODO the supported operator checking need to be split in semantic and HW checks

878

for idx, sg in enumerate(nng.subgraphs):

879

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[supported_operator_check],

885

rewrite_unsupported=False,

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

886

)

887

888

# Decomposing and rewrite of concat

889

for idx, sg in enumerate(nng.subgraphs):

890

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

891

nng, sg, arch, [decomp_rewrite_concat], [], rewrite_unsupported=False

892

)

893

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

894

# Decomposing of pad

895

for idx, sg in enumerate(nng.subgraphs):

896

rewrite_graph.visit_graph_post_order(sg.output_tensors, arch, [], [decomp_rewrite_pad])

897

sg.refresh_after_modification()

898

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

899

# Handle sg input output

900

for idx, sg in enumerate(nng.subgraphs):

901

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[fix_sg_input_output_tosa],

907

rewrite_unsupported=True,

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

908

)

909

910

# Removal of reshapes

911

for sg in nng.subgraphs:

Patrik Gustavsson

ef3ebdd

2021-10-01 11:10:25 +0200

[diff] [blame]

912

rewrite_graph.visit_graph_post_order(sg.output_tensors, arch, [], [remove_memory_ops])

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

913

sg.refresh_after_modification()

914

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

915

# Decomposing of elementwise

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

916

for idx, sg in enumerate(nng.subgraphs):

917

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

918

nng, sg, arch, [decomp_elementwise], [], rewrite_unsupported=False

919

)

920

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

921

for idx, sg in enumerate(nng.subgraphs):

922

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[set_ifm_ofm_op_shapes],

928

rewrite_unsupported=False,

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

929

)

930

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

931

# Removal of Transpose

932

for idx, sg in enumerate(nng.subgraphs):

933

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[remove_const_transpose],

939

rewrite_unsupported=False,

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

940

)

941

Patrik Gustavsson

f366fb1

2021-09-07 13:30:29 +0200

[diff] [blame]

942

# TODO, when and where to best handle calc_scaling_avgpool

943

for idx, sg in enumerate(nng.subgraphs):

944

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[calc_scaling_avgpool],

950

rewrite_unsupported=False,

Patrik Gustavsson

f366fb1

2021-09-07 13:30:29 +0200

[diff] [blame]

951

)

952

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

953

# Rewite Operators step

Patrik Gustavsson

f436ada

2021-09-14 14:56:48 +0200

[diff] [blame]

954

op_rewrite_list = [set_tensor_equivalence, rewrite_rescale, convert_depthwise_to_conv, convert_table_to_lut]

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

955

956

for idx, sg in enumerate(nng.subgraphs):

957

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

op_rewrite_list,

rewrite_unsupported=False,

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

964

)

965

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

966

# Post-processing step 1

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

967

for idx, sg in enumerate(nng.subgraphs):

968

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[rewrite_activation, add_padding_fields],

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

974

)

975

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

976

# Removal of Slice, need to be done after optimisation has been performed,

977

# since ifm/ofm_shapes are of importance to this function

978

for sg in nng.subgraphs:

979

rewrite_graph.visit_graph_post_order(sg.output_tensors, arch, [], [remove_splitsliceread])

980

sg.refresh_after_modification()

981

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

982

# Post-processing step 2

983

for idx, sg in enumerate(nng.subgraphs):

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

984

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

nng,

sg,

arch,

[],

[fixup_quantization],

990

)

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

991

Patrik Gustavsson