Blame - ethosu/vela/tosa_graph_optimiser.py - ml/ethos-u/ethos-u-vela

2023-01-13 17:57:25 +0000

[diff] [blame^]

1

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Rickard Bolin

bc6ee58

2022-11-04 08:24:29 +0000

[diff] [blame]

16

#

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

17

# Description:

18

# Early optimisation of the TOSA based network graph, using the rewrite_graph module to do the traversal of the graph.

Patrik Gustavsson

f366fb1

2021-09-07 13:30:29 +0200

[diff] [blame]

19

import numpy as np

20

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

21

from . import rewrite_graph

22

from .api import NpuRoundingMode

23

from .data_type import DataType

24

from .debug_database import DebugDatabase

Jonas Ohlsson

0957e3e

2021-09-01 15:57:21 +0200

[diff] [blame]

25

from .graph_optimiser_util import bypass_memory_only_ops

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

26

from .graph_optimiser_util import calc_explicit_padding

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

27

from .graph_optimiser_util import convert_depthwise_to_conv

Patrik Gustavsson

f436ada

2021-09-14 14:56:48 +0200

[diff] [blame]

28

from .graph_optimiser_util import convert_to_lut

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

29

from .graph_optimiser_util import move_splitsliceread_to_consumer

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

30

from .graph_optimiser_util import needed_total_padding

31

from .graph_optimiser_util import set_ifm_ofm_op_shapes

32

from .graph_optimiser_util import set_tensor_equivalence

33

from .operation import ExplicitScaling

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

34

from .operation import Op

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

35

from .operation_util import create_add_nop

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

36

from .operation_util import create_avgpool_nop

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

37

from .operation_util import create_pad_nop

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

38

from .shape4d import Shape4D

39

from .tensor import create_const_tensor

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

40

from .tensor import create_equivalence_id

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

41

from .tensor import shape_num_elements

42

from .tensor import Tensor

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

43

44

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

45

def replace_rescale_with_avg_pool(rescale_op):

46

assert rescale_op.type == Op.Rescale

47

48

avgpool_op = create_avgpool_nop(rescale_op.name + "_avgpool")

49

rescale_op_clone = rescale_op.clone()

50

op = rescale_op

51

op.attrs = avgpool_op.attrs.copy()

52

op.type = Op.AvgPool

53

DebugDatabase.add_optimised(rescale_op_clone, op)

return op

def calc_skirt(kernel, input_shape, explicit_padding):

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

59

k_w, k_h = kernel.dilated_wh()

60

s_x, s_y = kernel.stride

61

ypad = needed_total_padding(int(input_shape.height), int(s_y), int(k_h))

62

xpad = needed_total_padding(int(input_shape.width), int(s_x), int(k_w))

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

63

64

top, left, bottom, right = explicit_padding

65

top_pad, bottom_pad = calc_explicit_padding(int(input_shape.height), int(s_y), int(k_h), int(top), int(bottom))

66

left_pad, right_pad = calc_explicit_padding(int(input_shape.width), int(s_x), int(k_w), int(left), int(right))

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

67

68

padding = (top_pad, left_pad, bottom_pad, right_pad)

69

skirt = (top_pad, left_pad, ypad - top_pad, xpad - left_pad)

70

return padding, skirt

71

72

73

def add_padding_fields(op, arch, nng):

74

if op.run_on_npu:

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

75

if "explicit_padding" in op.attrs:

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

76

input_shape = op.ifm_shapes[0]

77

78

if op.type == Op.Conv2DBackpropInputSwitchedBias:

79

# TODO not yet supported, but there will be need for separate handling

80

assert False

81

else:

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

82

padding, skirt = calc_skirt(op.kernel, input_shape, op.attrs.get("explicit_padding"))

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

83

84

op.attrs["explicit_padding"] = padding

85

op.attrs["skirt"] = skirt

return op

Patrik Gustavsson

2021-09-07 13:30:29 +0200

[diff] [blame]

90

# Counts leading zeroes for a (int32)

91

def count_leading_zeros(a):

lz = int(32)

if a != 0:

mask = 1 << (32 - 1)

lz = 0

while (mask & a) == 0:

mask = mask >> 1

lz = lz + 1

return lz

def calc_scaling_avgpool(op, arch, nng):

103

if op.type == Op.AvgPool:

104

top, left, _, _ = op.attrs["explicit_padding"]

105

# TODO Only support for when global scaling can be used.

106

# That is when there is no padding

107

assert top == 0 and left == 0

108

assert op.explicit_scaling is None

multiplier = []

shift = []

kernel_wh = op.kernel.elements_wh()

113

k = 32 - count_leading_zeros(kernel_wh - 1)

114

numerator = np.int64(((1 << 30) + 1) << k)

115

multiplier.append(numerator // kernel_wh)

116

shift.append(30 + k)

117

118

op.rounding_mode = NpuRoundingMode.NATURAL

119

op.explicit_scaling = ExplicitScaling(False, shift, multiplier)

return op

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

123

def remove_const_transpose(op, arch, nng):

124

if op.type == Op.Transpose:

125

removed = False

126

if len(op.ifm.ops) == 1:

127

prev_op = op.ifm.ops[0]

128

if prev_op.type == Op.Const:

129

# Transpose the Tensor and data and remove Transpose

130

# TODO move to Tensor?

131

reorder = op.attrs["perms"]

132

shape = op.ifm.shape.copy()

133

tens = op.ifm

134

135

tens.shape = [shape[idx] for idx in reorder]

136

tens.bandwidth_shape = tens.shape

137

tens.storage_shape = tens.shape

138

139

if tens.values is not None:

140

tens.values = tens.values.transpose(reorder)

141

142

op.ofm.values = tens.values

143

# Bypass the Transpose op

144

prev_op.set_output_tensor(op.ofm)

145

DebugDatabase.add_optimised(op, prev_op)

146

removed = True

147

148

if not removed:

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

149

print("Warning: Cannot remove Transpose, and handling of Transpose is not supported")

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

assert False

return op

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

155

def insert_add_copy_for_const(op, ifm_ofm_shape):

156

assert op.type == Op.Const

157

ofm = op.ofm

158

copy_tens = ofm.clone()

159

op.set_output_tensor(copy_tens)

160

161

name = ofm.name + "_add"

162

ifm2 = create_const_tensor(

163

name + "_zero_scalar",

164

[1],

165

copy_tens.dtype,

166

[0],

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

167

quantization=copy_tens.quantization,

168

)

169

copy_op = create_add_nop(name)

170

copy_op.add_input_tensor(copy_tens)

171

copy_op.add_input_tensor(ifm2)

172

copy_op.set_output_tensor(ofm)

173

copy_op.ifm_shapes.append(ifm_ofm_shape)

174

copy_op.ifm_shapes.append(Shape4D(ifm2.shape))

175

copy_op.ofm_shapes.append(ifm_ofm_shape)

176

copy_op.run_on_npu = True

177

178

DebugDatabase.add_optimised(op, copy_op)

179

180

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

181

# TODO can we change to add for both TFLite and TOSA?

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

182

def insert_add_copy_op_after_tens(tens, ifm_ofm_shape):

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

183

tens_cons_list_copy = tens.consumer_list.copy()

184

copy_tens = tens.clone()

185

186

name = tens.name + "_add"

187

ifm2 = create_const_tensor(

188

name + "_zero_scalar",

189

[1],

190

copy_tens.dtype,

191

[0],

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

192

quantization=copy_tens.quantization,

193

)

194

copy_op = create_add_nop(name)

195

copy_op.add_input_tensor(tens)

196

copy_op.add_input_tensor(ifm2)

197

copy_op.set_output_tensor(copy_tens)

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

198

copy_op.ifm_shapes.append(ifm_ofm_shape)

199

copy_op.ifm_shapes.append(Shape4D(ifm2.shape))

200

copy_op.ofm_shapes.append(ifm_ofm_shape)

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

201

copy_op.run_on_npu = True

202

203

# Set copy_ifm consumers

204

for tens_cons in tens_cons_list_copy:

205

if tens_cons is not None:

206

for ifm_idx, cons_inp in enumerate(tens_cons.inputs):

207

if cons_inp == tens:

208

tens_cons.set_input_tensor(copy_tens, ifm_idx)

209

210

DebugDatabase.add_optimised(tens.ops[0], copy_op)

211

212

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

213

def get_shape_for_copy_op(shape):

214

# remove dimensions that are set to 1

new_shape = []

for dim in shape:

if dim != 1:

new_shape.append(dim)

if not new_shape:

new_shape = [1]

rank = len(new_shape)

223

if rank > 3:

224

# Reshape so that batch becomes 1, by moving elements to H dimension

n = rank - 2

h = 1

for i in range(n):

h *= shape[i]

new_shape = Shape4D(new_shape[n:]).with_height(h)

230

else:

231

new_shape = Shape4D(new_shape)

return new_shape

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

235

def fix_sg_input_output_tosa(op, arch, nng):

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

236

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

237

if op.type == Op.Const and any(ofm_cons is None for ofm_cons in op.ofm.consumer_list):

238

# Const operator with sg output, insert copy op before the ofm

239

new_shape = get_shape_for_copy_op(op.ofm.shape.copy())

240

insert_add_copy_for_const(op, new_shape)

241

elif op.run_on_npu and op.type in (Op.Reshape, Op.Identity):

242

# For the Reshape operators we want to remove, tensors are removed.

243

# But in order to to do this, they cannot be outputs of the sg,

244

# this need to be fixed prior to the removal.

245

# Solution is to add a copy op, to maintain the original tensor.

246

# This is also valid when reshape ifm/ofm is produced respectively

247

# consumed by CPU

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

248

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

249

# Check if operator ifm/ofm are sg ifm/ofm

250

ifm_is_sg_ifm = op.ifm.ops[0].type in (Op.Placeholder, Op.SubgraphInput, Op.Const)

251

ifm_is_sg_ofm = any(ifm_cons is None for ifm_cons in op.ifm.consumer_list)

252

ofm_is_sg_ofm = any(ofm_cons is None for ofm_cons in op.ofm.consumer_list)

253

# Check if ifm/ofm is produced repectivly consumed by CPU

254

ifm_is_cpu_produced = any(ifm_prod is not None and not ifm_prod.run_on_npu for ifm_prod in op.ifm.ops)

255

ofm_is_cpu_consumed = any(ofm_cons is not None and not ofm_cons.run_on_npu for ofm_cons in op.ofm.consumer_list)

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

256

Patrik Gustavsson

2021-10-06 14:46:46 +0200

[diff] [blame]

257

if (ifm_is_sg_ofm or ifm_is_sg_ifm or ifm_is_cpu_produced) and (ofm_is_sg_ofm or ofm_is_cpu_consumed):

258

# Both ifm and ofm need to persist, but only ifm need a copy, in order to remove the Operator

259

# Decide on ifm/ofm shapes for the copy op based on ifm

260

new_shape = get_shape_for_copy_op(op.ifm.shape.copy())

261

insert_add_copy_op_after_tens(op.ifm, new_shape)

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

return op

def create_add_for_concat(concat_op, name, ifm, ifm_shape: Shape4D, write_offset: Shape4D):

266

"""Creates an add op for the given concat op/input feature map"""

267

ofm = concat_op.ofm

Tim Hall

2023-01-13 17:57:25 +0000

[diff] [blame^]

268

ifm2 = create_const_tensor(name + "_zero_scalar", [1], ofm.dtype, [0], quantization=ofm.quantization)

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

269

add_op = create_add_nop(name)

270

271

add_op.inputs = [ifm, ifm2]

272

add_op.outputs = [ofm]

273

add_op.write_offset = write_offset

274

add_op.write_shape = ifm_shape

275

ofm.ops.append(add_op)

276

DebugDatabase.add_optimised(concat_op, add_op)

277

add_op.ifm_shapes.append(ifm_shape)

278

add_op.ifm_shapes.append(Shape4D(ifm2.shape))

279

add_op.ofm_shapes.append(concat_op.ofm_shapes[0])

280

add_op.memory_function = Op.ConcatSliceWrite

return add_op

# TODO Could be further optimized checking the type of the consumer,

285

# rather than just mimic the TFLite behaviour depending on type.

286

# TOSA bool_t not considered yet

287

def remove_splitsliceread(op, arch):

288

289

if op.type == Op.SplitSliceRead:

290

# Check if it is possible to put the SplitSliceRead on the tensor consumer, or if an avgpool need to be inserted

291

if (

292

len(op.ofm.consumer_list) == 1

293

and op.ofm.consumer_list[0] is not None

294

and op.ofm.consumer_list[0].run_on_npu

295

and op.ofm.consumer_list[0].type != Op.Reshape

296

and op.ofm_shapes[0] == Shape4D.from_list(op.ofm.shape)

297

and op.ofm.dtype in (DataType.uint8, DataType.int8, DataType.int16)

298

):

299

# SplitSliceRead can be performed by tensor consumer

300

cons_op = op.ofm.consumer_list[0]

301

move_splitsliceread_to_consumer(op, cons_op)

302

else:

303

name = op.name + "_add"

304

ofm = op.ofm

Tim Hall

2023-01-13 17:57:25 +0000

[diff] [blame^]

305

ifm2 = create_const_tensor(name + "_zero_scalar", [1], ofm.dtype, [0], quantization=ofm.quantization)

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

306

add_op = create_add_nop(name)

307

add_op.inputs = [op.ifm, ifm2]

308

add_op.outputs = [ofm]

309

op.ofm.ops.remove(op)

310

op.ofm.ops.append(add_op)

311

add_op.ifm_shapes.append(op.ifm_shapes[0])

312

add_op.ifm_shapes.append(Shape4D(ifm2.shape))

313

add_op.ofm_shapes.append(op.ofm_shapes[0])

314

add_op.read_offsets[0] = op.read_offsets[0]

315

add_op.read_shapes[0] = op.read_shapes[0]

316

317

op.ifm.consumer_list.remove(op)

318

DebugDatabase.add_optimised(op, add_op)

319

320

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

321

def rewrite_concat(op):

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

322

if not op.run_on_npu or not op.type == Op.Concat:

323

return

324

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

325

offset = 0

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

326

inputs = op.inputs

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

327

axis_4D = op.attrs["axis4D"]

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

328

329

for idx, inp in enumerate(inputs):

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

330

write_offset = [0, 0, 0, 0]

331

write_offset[axis_4D] = offset

332

concat_end = offset + op.ifm_shapes[idx][axis_4D]

333

create_add_for_concat(op, op.name + str(idx) + "_add", inp, op.ifm_shapes[idx], Shape4D.from_list(write_offset))

334

offset = concat_end

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

335

assert op.ofm_shapes[0][axis_4D] == offset

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

336

337

Patrik Gustavsson

ef3ebdd

2021-10-01 11:10:25 +0200

[diff] [blame]

338

def remove_memory_ops(op, arch):

339

if op.run_on_npu and op.type in (Op.Reshape, Op.Identity):

Jonas Ohlsson

0957e3e

2021-09-01 15:57:21 +0200

[diff] [blame]

340

bypass_memory_only_ops(op)

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

341

342

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

343

def rewrite_activation(op, arch, nng):

Patrik Gustavsson

5e26eda

2021-06-30 09:07:16 +0200

[diff] [blame]

344

if op.type not in (Op.ReluN, Op.Clamp):

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

345

return op

346

347

ifm = op.ifm

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

348

zp = ifm.quantization.zero_point if ifm.quantization.zero_point else 0

349

if op.ofm.quantization.zero_point is None:

350

op.ofm.quantization.zero_point = zp

351

Patrik Gustavsson

5e26eda

2021-06-30 09:07:16 +0200

[diff] [blame]

352

if op.type == Op.Clamp:

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

353

op.attrs["min"] = op.attrs["min_int"] - zp

354

op.attrs["max"] = op.attrs["max_int"] - zp

355

elif op.type == Op.ReluN:

356

op.attrs["max"] = op.attrs["max_int"] - zp

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

return op

def rewrite_rescale(op, arch, nng):

362

if op.type == Op.Rescale:

ifm = op.ifm

ofm = op.ofm

# some error checking

367

assert len(ifm.ops) == 1

368

prev_op = ifm.ops[0]

369

370

# TODO currently not supported

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

371

assert len(ifm.consumer_list) == 1

372

373

input_zp = op.attrs["input_zp"]

374

output_zp = op.attrs["output_zp"]

375

multiplier = op.attrs["multiplier"]

376

shift = op.attrs["shift"]

377

scale32 = op.attrs["scale32"]

378

double_round = op.attrs["double_round"]

379

per_channel = op.attrs["per_channel"]

380

381

assert ifm.dtype in (DataType.uint8, DataType.int8, DataType.int32)

382

assert ifm.dtype in (DataType.uint8, DataType.int8) or input_zp == 0

383

assert ofm.dtype in (DataType.uint8, DataType.int8) or output_zp == 0

384

assert (scale32 and ifm.dtype != DataType.int48) or (not scale32 and not double_round)

385

386

# Check that input tensor has the same zp or no zp

387

ifm_zp = ifm.quantization.zero_point

388

if ifm_zp is not None and ifm_zp != input_zp:

389

print("Error (fuse_rescale): zp of tensors producer/consumer differs unexpectedidly ")

390

assert False

391

ifm.quantization.zero_point = input_zp

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

392

ofm.quantization.zero_point = output_zp

393

for s, m in zip(shift, multiplier):

394

# TODO these are the TOSA limitations

395

assert m >= 0

396

assert 2 <= s <= 62

397

# TODO these are the HW limitations

398

assert 0 <= s < (1 << 6)

399

explicit_scaling = ExplicitScaling(per_channel, shift, multiplier)

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

400

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

401

if double_round and scale32:

402

rounding_mode = NpuRoundingMode.TFL

403

else:

404

rounding_mode = NpuRoundingMode.NATURAL

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

405

406

if prev_op.type.is_depthwise_conv2d_op() or prev_op.type.is_conv2d_op() or prev_op.type == Op.FullyConnected:

407

assert len(multiplier) == len(shift) == len(prev_op.bias.values)

408

409

if ifm.dtype == DataType.int32 and per_channel:

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

410

prev_op.explicit_scaling = explicit_scaling

411

prev_op.rounding_mode = rounding_mode

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

412

413

# Bypass op

414

prev_op.set_output_tensor(ofm)

415

DebugDatabase.add_optimised(op, prev_op)

416

return op

417

else:

418

print("Warning, unsupported fusing of TOSA Rescale previous operator is of type:", prev_op.type)

419

assert False

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

420

# TODO which are the cases we need to and can do standalone Rescale?

421

# TODO should we try to identify a conversion uint8<->int8 accomplished by 2 RESCALE ops?

422

# origin might be TFLite op QUANTIZE, should we look to see if they can be translated to QUANTIZE?

423

# limited to these at the moment:

424

elif (

425

(ifm.dtype == DataType.int8 and ofm.dtype == DataType.int8)

426

or (ifm.dtype == DataType.uint8 and ofm.dtype == DataType.int8)

427

or (ifm.dtype == DataType.int8 and ofm.dtype == DataType.uint8)

428

):

429

# Create NOP performing the RESCALE

430

avgpool_op = replace_rescale_with_avg_pool(op)

431

avgpool_op.rounding_mode = rounding_mode

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

432

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

433

if per_channel:

434

# TODO

435

avgpool_op.explicit_scaling = explicit_scaling

436

print("Warning, unsupported TOSA Rescale")

437

assert False

438

else:

439

avgpool_op.explicit_scaling = explicit_scaling

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

440

else:

441

print("Warning, unsupported fusing of TOSA Rescale previous operator is of type:", prev_op.type)

assert False

return op

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

446

def convert_pad_in_width(op):

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

447

"""

448

Rewrites PAD operator to an add that copies the IFM to the OFM

449

+ up to 4 add operators that fill the OFM with zeros at the borders.

450

"""

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

451

assert op.type == Op.Pad

452

assert op.ifm_shapes[0] is not None and op.ofm_shapes[0] is not None

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

453

ifm = op.ifm

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

454

ofm = op.ofm

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

455

ifm_shape = op.ifm_shapes[0]

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

456

ofm.ops = []

457

ofm_shape = op.ofm_shapes[0]

458

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

459

padding = op.inputs[1].values

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

460

left, right = padding[-2]

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

461

462

# Add op that copies IFM to the right place inside the OFM

463

shp0 = Shape4D(0, 0, 0, 0)

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

464

add_op = create_add_for_concat(op, op.name + "_main", ifm, ifm_shape, shp0.with_width(left))

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

465

add_op.activation = op.activation

466

467

quant = ofm.quantization

468

pad_value = ifm.quantization.zero_point

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

469

ifm.quantization.zero_point = 0

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

470

if left > 0:

471

shape = Shape4D(1, ifm_shape.height, left, ofm_shape.depth)

472

zero_tens = create_const_tensor(

Tim Hall

2023-01-13 17:57:25 +0000

[diff] [blame^]

473

op.name + "_left", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], quantization=quant

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

474

)

475

zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

476

create_add_for_concat(op, op.name + "_left", zero_tens, shape, shp0)

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

477

if right > 0:

478

shape = Shape4D(1, ifm_shape.height, right, ofm_shape.depth)

479

zero_tens = create_const_tensor(

Tim Hall

2023-01-13 17:57:25 +0000

[diff] [blame^]

480

op.name + "_right", shape.as_list(), ofm.dtype, shape.elements() * [pad_value], quantization=quant

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

481

)

482

zero_tens.equivalence_id = create_equivalence_id(tuple(zero_tens.values))

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

483

create_add_for_concat(op, op.name + "_right", zero_tens, shape, shp0.with_width(ofm_shape.width - right))

Patrik Gustavsson

2021-09-08 15:04:11 +0200

[diff] [blame]

484

485

op.type = Op.ConcatTFLite

return add_op

Patrik Gustavsson

2021-09-14 14:56:48 +0200

[diff] [blame]

489

def convert_table_to_lut(op, arch, nng):

490

# Converts table op to a no-op + LUT

491

if op.type is not Op.Table:

return op

table = op.inputs[1]

op.inputs.remove(table)

496

op.set_ifm_ofm_shapes()

497

498

return convert_to_lut(op, table.values, "table")

499

500

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

501

def decompose_elem_tensors_hwc(op):

502

"""

503

Decomposes elementwise op if any of the ifm(s)/ofm are to large in any dimension to be handled by the NPU

504

"""

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

505

max_t_size = 65535

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

506

ofm_shape = op.write_shape if op.write_shape is not None else op.ofm_shapes[0]

507

ifm_shape = op.read_shapes[0] if op.read_shapes[0] is not None else op.ifm_shapes[0]

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

508

ifm2_shape = op.ifm_shapes[1] if op.ifm_shapes[1] else None

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

509

ifm2_shape = op.read_shapes[1] if op.read_shapes[1] is not None else ifm2_shape

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

510

limit_shape = Shape4D(1, max_t_size, max_t_size, max_t_size)

511

512

if any(dim_size > max_t_size for dim_size in ofm_shape.as_list()):

513

ofm_split = ofm_shape.floordiv_const(max_t_size).add(1, 1, 1, 1)

514

515

for height in range(ofm_split.height):

516

for width in range(ofm_split.width):

517

for depth in range(ofm_split.depth):

518

ofm_offset = Shape4D(0, height * max_t_size, width * max_t_size, depth * max_t_size)

519

ofm_part_shape = ofm_shape.clip(ofm_offset, limit_shape)

520

ofm_cut = (ofm_offset, ofm_part_shape)

521

522

ifm_d = depth * max_t_size if ifm_shape.depth == ofm_shape.depth else 0

523

ifm_w = width * max_t_size if ifm_shape.width == ofm_shape.width else 0

524

ifm_h = height * max_t_size if ifm_shape.height == ofm_shape.height else 0

525

ifm_offset = Shape4D(0, ifm_h, ifm_w, ifm_d)

526

ifm_part_shape = ifm_shape.clip(ifm_offset, limit_shape)

527

ifm_cut = (ifm_offset, ifm_part_shape)

528

529

if ifm2_shape is not None:

530

ifm2_d = depth * max_t_size if ifm2_shape.depth == ofm_shape.depth else 0

531

ifm2_w = width * max_t_size if ifm2_shape.width == ofm_shape.width else 0

532

ifm2_h = height * max_t_size if ifm2_shape.height == ofm_shape.height else 0

533

ifm2_offset = Shape4D(0, ifm2_h, ifm2_w, ifm2_d)

534

ifm2_part_shape = ifm2_shape.clip(ifm2_offset, limit_shape)

535

ifm2_cut = (ifm2_offset, ifm2_part_shape)

536

else:

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

537

ifm2_cut = (None, None)

538

539

create_elem_part_op(op, ifm_cut, ifm2_cut, ofm_cut)

540

op.ofm.ops.remove(op)

541

op.ifm.consumer_list.remove(op)

542

if op.ifm2 is not None:

543

op.ifm2.consumer_list.remove(op)

return

def create_elem_part_op(op, ifm_cut, ifm2_cut, ofm_cut):

548

part_op = op.clone()

549

ifm_read_offset = op.read_offsets[0] if op.read_offsets[0] is not None else Shape4D(0, 0, 0, 0)

550

ofm_write_offset = op.write_offset if op.write_offset is not None else Shape4D(0, 0, 0, 0)

551

ifm_offset, ifm_shape = ifm_cut

552

ofm_offset, ofm_shape = ofm_cut

553

554

part_op.read_offsets[0] = ifm_read_offset + ifm_offset

555

part_op.read_shapes[0] = ifm_shape

556

part_op.write_offset = ofm_write_offset + ofm_offset

557

part_op.write_shape = ofm_shape

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

558

part_op.ifm_shapes = op.ifm_shapes.copy()

559

part_op.ofm_shapes = op.ofm_shapes.copy()

560

part_op.ifm.consumer_list.append(part_op)

561

op.ofm.ops.append(part_op)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

562

563

ifm2_offset, ifm2_shape = ifm2_cut

564

if ifm2_offset:

565

ifm2_read_offset = op.read_offsets[1] if op.read_offsets[1] is not None else Shape4D(0, 0, 0, 0)

566

part_op.read_offsets[1] = ifm2_read_offset + ifm2_offset

567

part_op.read_shapes[1] = ifm2_shape

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

568

part_op.ifm2.consumer_list.append(part_op)

569

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

570

return part_op

571

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

572

573

def get_nhwc_stride(shape):

574

stride_x = shape.depth

575

stride_y = shape.width * stride_x

576

stride_n = shape.height * stride_y

577

return Shape4D(stride_n, stride_y, stride_x, 1)

578

579

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

580

def pad_to_rank(shape, rank):

581

"""

582

Pads a shape to the given rank

583

"""

584

while len(shape) < rank:

shape = [1] + shape

return shape

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

590

def get_elem_shapes_removed_singles(op):

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

591

"""

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

592

Returns the shapes of ifm(s)/ofms after removing all the dimensions that are 1 for all ifm(s)/ofm

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

593

"""

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

594

binary = op.ifm2 is not None

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

595

ofm_shape = op.ofm_shapes[0].as_list() if len(op.ofm_shapes) > 0 else op.ofm.shape

596

ifm_shape = op.ifm_shapes[0].as_list() if len(op.ifm_shapes) > 0 else op.ifm.shape

597

if binary:

598

ifm2_shape = op.ifm_shapes[1].as_list() if len(op.ofm_shapes) else op.ifm2.shape

599

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

600

rank = max(len(ofm_shape), len(ifm_shape), len(ifm2_shape) if binary else 0)

601

ofm_shape = pad_to_rank(ofm_shape, rank)

602

ifm_shape = pad_to_rank(ifm_shape, rank)

603

if binary:

604

ifm2_shape = pad_to_rank(ifm2_shape, rank)

605

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

new_ofm_shape = []

new_ifm_shape = []

new_ifm2_shape = []

for idx in range(rank):

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

610

if ofm_shape[idx] != 1:

611

new_ofm_shape.append(ofm_shape[idx])

612

new_ifm_shape.append(ifm_shape[idx])

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

613

if binary:

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

614

new_ifm2_shape.append(ifm2_shape[idx])

615

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

616

if new_ofm_shape == []:

617

new_ofm_shape = [1]

618

new_ifm_shape = [1]

619

new_ifm2_shape = [1] if binary else None

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

620

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

621

return new_ofm_shape, new_ifm_shape, new_ifm2_shape

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

622

623

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

624

def decomp_dims_elementwise(op):

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

625

"""

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

626

Decompose elementwise ops with Rank > 3 (H,W,D).

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

627

If Rank > 3, all the dimensions above H are viewed as the N dimension.

628

the elementwise operation will be decomposed to N (of ofm) elementwise operations.

629

By reading and writing with offsets from/to the ifm(s)/ofm.

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

630

Note: Broadcast need to be handled for binary elementwise ops, and TOSA allowes for broadcast by both ifm and ifm2

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

"""

ifm = op.ifm

ifm2 = op.ifm2

ofm = op.ofm

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

636

binary = op.ifm2 is not None

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

637

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

638

# Remove dimensions that are all 1

639

new_ofm_shape, new_ifm_shape, new_ifm2_shape = get_elem_shapes_removed_singles(op)

640

rank = len(new_ofm_shape)

641

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

642

if rank > 3:

643

n = rank - 3

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

644

ofm_decomp_shape = Shape4D(new_ofm_shape[0:n])

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

645

ofm_decomp_stride = get_nhwc_stride(ofm_decomp_shape)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

646

ofm_part_shape = Shape4D(new_ofm_shape[n:])

647

op.ofm_shapes.append(Shape4D([ofm_decomp_shape.elements()] + new_ofm_shape[n:]))

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

648

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

649

if binary:

650

ifm_decomp_shape = Shape4D(new_ifm_shape[0:n])

651

ifm2_decomp_shape = Shape4D(new_ifm2_shape[0:n])

652

ifm_decomp_stride = get_nhwc_stride(ifm_decomp_shape)

653

ifm2_decomp_stride = get_nhwc_stride(ifm2_decomp_shape)

654

ifm_part_shape = Shape4D(new_ifm_shape[n:])

655

ifm2_part_shape = Shape4D(new_ifm2_shape[n:])

656

op.ifm_shapes.append(Shape4D([ifm_decomp_shape.elements()] + new_ifm_shape[n:]))

657

op.ifm_shapes.append(Shape4D([ifm2_decomp_shape.elements()] + new_ifm2_shape[n:]))

658

else:

659

op.ifm_shapes.append(Shape4D([ofm_decomp_shape.elements()] + new_ofm_shape[n:]))

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

660

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

661

op_list = []

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

662

for height in range(ofm_decomp_shape.height):

663

for width in range(ofm_decomp_shape.width):

664

for depth in range(ofm_decomp_shape.depth):

665

ofm_offset = Shape4D(0, height, width, depth)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

666

ofm_offset = Shape4D(ofm_offset.dot_prod(ofm_decomp_stride), 0, 0, 0)

667

ofm_cut = (ofm_offset, ofm_part_shape)

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

668

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

669

if binary:

670

ifm_d = depth if ifm_decomp_shape.depth == ofm_decomp_shape.depth else 0

671

ifm_w = width if ifm_decomp_shape.width == ofm_decomp_shape.width else 0

672

ifm_h = height if ifm_decomp_shape.height == ofm_decomp_shape.height else 0

673

ifm_offset = Shape4D(0, ifm_h, ifm_w, ifm_d)

674

ifm_offset = Shape4D(ifm_offset.dot_prod(ifm_decomp_stride), 0, 0, 0)

675

ifm_cut = (ifm_offset, ifm_part_shape)

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

676

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

677

ifm2_d = depth if ifm2_decomp_shape.depth == ofm_decomp_shape.depth else 0

678

ifm2_w = width if ifm2_decomp_shape.width == ofm_decomp_shape.width else 0

679

ifm2_h = height if ifm2_decomp_shape.height == ofm_decomp_shape.height else 0

680

ifm2_offset = Shape4D(0, ifm2_h, ifm2_w, ifm2_d)

681

ifm2_offset = Shape4D(ifm2_offset.dot_prod(ifm2_decomp_stride), 0, 0, 0)

682

ifm2_cut = (ifm2_offset, ifm2_part_shape)

683

op_list.append(create_elem_part_op(op, ifm_cut, ifm2_cut, ofm_cut))

684

else:

685

op_list.append(create_elem_part_op(op, ofm_cut, None, ofm_cut))

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

686

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

687

ofm.ops.remove(op)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

688

ifm.consumer_list.remove(op)

689

if binary:

690

ifm2.consumer_list.remove(op)

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

691

692

return op_list

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

693

else:

694

op.ofm_shapes.append(Shape4D(new_ofm_shape))

695

op.ifm_shapes.append(Shape4D(new_ifm_shape))

696

op.ifm_shapes.append(Shape4D(new_ifm2_shape))

697

698

return [op]

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

699

700

701

def decomp_elementwise(tens, arch, nng):

702

"""

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

703

Decompose elementwise ops with Rank > 3 (H,W,C).

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

704

Decompose size of tensors exceeding NPU max size

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

705

"""

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

706

tens_ops = tens.ops.copy()

707

for op in tens_ops:

708

if op.type.is_elementwise_op():

709

decomp_list = decomp_dims_elementwise(op)

710

for part_op in decomp_list:

711

decompose_elem_tensors_hwc(part_op)

return tens

def reshape_concat_shape(shape, rank, axis):

716

new_h = 1

717

for i in range(axis):

718

new_h *= shape[i]

719

new_c = 1

720

for i in range(axis + 1, rank):

721

new_c *= shape[i]

722

if axis == (rank - 1):

723

new_shape = [new_h, shape[axis], 1]

724

else:

725

new_shape = [new_h, shape[axis], new_c]

return new_shape

def reshape_concat(op):

730

"""

731

Reshapes concat ops with Rank > 3 (H,W,C).

732

"""

733

ofm = op.ofm

734

rank = len(ofm.shape)

735

axis = op.attrs["axis"]

if axis < 0:

axis += rank

if rank > 3:

# Reshape so that axis in to be concatenated is the W dimension

741

# Reshape inputs

742

for inp in op.inputs:

743

new_shape = reshape_concat_shape(inp.shape, rank, axis)

744

op.ifm_shapes.append(Shape4D(new_shape))

745

# Reshape output

746

new_shape = reshape_concat_shape(ofm.shape, rank, axis)

747

op.ofm_shapes.append(Shape4D(new_shape))

748

op.attrs["axis4D"] = 2

749

else:

750

for inp in op.inputs:

751

op.ifm_shapes.append(Shape4D(inp.shape))

752

op.ofm_shapes.append(Shape4D(ofm.shape))

753

op.attrs["axis4D"] = axis + (4 - rank)

754

755

756

def decomp_rewrite_concat(tens, arch, nng):

757

"""

758

Decompose concat ops with Rank > 3 (H,W,C).

759

Rewrite of concat to elementwise operations

760

"""

761

if len(tens.ops) == 1 and tens.ops[0].type == Op.Concat:

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

762

op = tens.ops[0]

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

763

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

764

reshape_concat(op)

765

rewrite_concat(op)

Patrik Gustavsson

2021-09-21 14:18:44 +0200

[diff] [blame]

766

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

767

op.ofm.ops.remove(op)

768

for inp in op.inputs:

769

inp.consumer_list.remove(op)

770

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

return tens

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

774

def decomp_rewrite_pad(op, arch):

775

"""

776

Decomposition of pad to elementwise operations:

777

For each dimension that needs padding:

778

-Create a new PAD operator for each dimension to be added

779

Ifm/ofm are reshape so this is the width dimension is to be padded

780

(rank for each is 3)

781

-Rewrite the the new PAD operator so there is:

782

-1 Add operator for copying the data

783

-1 Add operator for each left/right to be padded

784

"""

785

# TODO several things would be possible to optimize

786

# For instance there are cases when it should be possible to pad 2

787

# dimensions at the same time.

788

if op.type == Op.Pad:

789

ofm_elements = shape_num_elements(op.ofm.shape)

790

padding = op.inputs[1].values

791

792

rank = len(op.ifm.shape)

793

next_ifm = op.ifm

794

next_ifm_shape = next_ifm.shape.copy()

795

796

first_pad_rewrite_op = None

797

ifm_quant = op.ifm.quantization.clone()

798

799

for dim in range(padding.shape[0]):

800

# Check if padding is to be applied in this dimension

801

dim_pad = padding[dim]

802

if not (dim_pad == 0).all():

803

# Reshape so that width dimension is to be padded

804

new_ifm_shape = reshape_concat_shape(next_ifm_shape, rank, dim)

805

new_pad_input = np.zeros((4, 2), dtype=np.int32)

806

new_pad_input[2] = dim_pad

807

808

pad_op = create_pad_nop(f"{op.name}_dim_{dim}")

809

pad_op.add_input_tensor(next_ifm)

810

new_pad_tens = op.inputs[1].clone("_dim_{dim}")

811

812

name = op.inputs[1].name + f"_dim_{dim}"

Tim Hall

2023-01-13 17:57:25 +0000

[diff] [blame^]

813

new_pad_tens = create_const_tensor(name, list(new_pad_input.shape), DataType.int32, new_pad_input)

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

814

pad_op.add_input_tensor(new_pad_tens)

815

816

new_ofm_shape = new_ifm_shape.copy()

817

new_ofm_shape[-2] = new_ofm_shape[-2] + dim_pad.sum()

818

next_ifm_shape[dim] = next_ifm_shape[dim] + dim_pad.sum()

819

820

if Shape4D(new_ofm_shape).elements() == ofm_elements:

821

# Last one, use op.ofm

822

ofm = op.ofm

823

else:

824

# add a new ofm Tensor

825

ofm = Tensor(new_ofm_shape, op.ofm.dtype, f"{pad_op.name}_tens")

826

ofm.quantization = ifm_quant.clone()

827

828

pad_op.set_output_tensor(ofm)

829

pad_op.ifm_shapes.append(Shape4D(new_ifm_shape))

830

pad_op.ofm_shapes.append(Shape4D(new_ofm_shape))

831

DebugDatabase.add_optimised(op, pad_op)

next_ifm = ofm

# Rewrite the pad op

converted_pad_op = convert_pad_in_width(pad_op)

836

first_pad_rewrite_op = converted_pad_op

837

else:

838

# Change to Identity operation (will be removed)

839

op.type = Op.Identity

840

841

if first_pad_rewrite_op:

842

assert op.ofm.shape == next_ifm_shape

843

for inp in op.inputs:

844

inp.consumer_list.remove(op)

845

return first_pad_rewrite_op

return op

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

850

def fixup_quantization(op, arch, nng):

851

if op.ifm and op.ifm.quantization.zero_point is None:

852

op.ifm.quantization.zero_point = 0

853

if op.ifm2 and op.ifm2.quantization.zero_point is None:

Patrik Gustavsson

f436ada

2021-09-14 14:56:48 +0200

[diff] [blame]

854

op.ifm2.quantization.zero_point = 0

855

if not op.forced_output_quantization:

856

if op.ofm and op.ofm.quantization and op.ofm.quantization.zero_point is None:

857

op.ofm.quantization.zero_point = 0

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

return op

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

861

def supported_operator_check(op, arch, nng):

862

op.run_on_npu = arch.tosa_supported_operators.is_operator_supported(op)

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

863

assert op.run_on_npu or op.type in (Op.Placeholder, Op.SubgraphInput, Op.Const)

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

return op

def tosa_optimise_graph(nng, arch):

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

868

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

869

# TODO the supported operator checking need to be split in semantic and HW checks

870

for idx, sg in enumerate(nng.subgraphs):

871

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[supported_operator_check],

877

rewrite_unsupported=False,

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

878

)

879

880

# Decomposing and rewrite of concat

881

for idx, sg in enumerate(nng.subgraphs):

882

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

883

nng, sg, arch, [decomp_rewrite_concat], [], rewrite_unsupported=False

884

)

885

Patrik Gustavsson

2021-10-05 13:53:34 +0200

[diff] [blame]

886

# Decomposing of pad

887

for idx, sg in enumerate(nng.subgraphs):

888

rewrite_graph.visit_graph_post_order(sg.output_tensors, arch, [], [decomp_rewrite_pad])

889

sg.refresh_after_modification()

890

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

891

# Handle sg input output

892

for idx, sg in enumerate(nng.subgraphs):

893

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[fix_sg_input_output_tosa],

899

rewrite_unsupported=True,

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

900

)

901

902

# Removal of reshapes

903

for sg in nng.subgraphs:

Patrik Gustavsson

ef3ebdd

2021-10-01 11:10:25 +0200

[diff] [blame]

904

rewrite_graph.visit_graph_post_order(sg.output_tensors, arch, [], [remove_memory_ops])

Patrik Gustavsson

2021-09-24 13:46:42 +0200

[diff] [blame]

905

sg.refresh_after_modification()

906

Patrik Gustavsson

2021-09-23 13:52:34 +0200

[diff] [blame]

907

# Decomposing of elementwise

Patrik Gustavsson

2021-09-20 10:47:47 +0200

[diff] [blame]

908

for idx, sg in enumerate(nng.subgraphs):

909

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

910

nng, sg, arch, [decomp_elementwise], [], rewrite_unsupported=False

911

)

912

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

913

for idx, sg in enumerate(nng.subgraphs):

914

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[set_ifm_ofm_op_shapes],

920

rewrite_unsupported=False,

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

921

)

922

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

923

# Removal of Transpose

924

for idx, sg in enumerate(nng.subgraphs):

925

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[remove_const_transpose],

931

rewrite_unsupported=False,

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

932

)

933

Patrik Gustavsson

f366fb1

2021-09-07 13:30:29 +0200

[diff] [blame]

934

# TODO, when and where to best handle calc_scaling_avgpool

935

for idx, sg in enumerate(nng.subgraphs):

936

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[calc_scaling_avgpool],

942

rewrite_unsupported=False,

Patrik Gustavsson

f366fb1

2021-09-07 13:30:29 +0200

[diff] [blame]

943

)

944

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

945

# Rewite Operators step

Patrik Gustavsson

f436ada

2021-09-14 14:56:48 +0200

[diff] [blame]

946

op_rewrite_list = [set_tensor_equivalence, rewrite_rescale, convert_depthwise_to_conv, convert_table_to_lut]

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

947

948

for idx, sg in enumerate(nng.subgraphs):

949

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

op_rewrite_list,

rewrite_unsupported=False,

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

956

)

957

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

958

# Post-processing step 1

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

959

for idx, sg in enumerate(nng.subgraphs):

960

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

nng,

sg,

arch,

[],

[rewrite_activation, add_padding_fields],

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

966

)

967

Patrik Gustavsson

2021-09-01 12:43:02 +0200

[diff] [blame]

968

# Removal of Slice, need to be done after optimisation has been performed,

969

# since ifm/ofm_shapes are of importance to this function

970

for sg in nng.subgraphs:

971

rewrite_graph.visit_graph_post_order(sg.output_tensors, arch, [], [remove_splitsliceread])

972

sg.refresh_after_modification()

973

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

974

# Post-processing step 2

975

for idx, sg in enumerate(nng.subgraphs):

Jonas Ohlsson

2022-03-30 10:30:25 +0200

[diff] [blame]

976

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

nng,

sg,

arch,

[],

[fixup_quantization],

982

)

Patrik Gustavsson

2021-08-17 14:26:38 +0200

[diff] [blame]

983

Patrik Gustavsson