Blame - ethosu/vela/pass_packing.py - ml/ethos-u/ethos-u-vela

2023-02-07 13:01:03 +0100

[diff] [blame]

1

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Rickard Bolin

bc6ee58

2022-11-04 08:24:29 +0000

[diff] [blame]

16

#

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

17

# Description:

18

# Packs a subgraph with Neural Network Operations into Passes. Each Pass has one or more Operations.

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

19

import collections

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

20

import enum

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

21

Tim Hall

e6ccd87

2020-11-09 16:46:37 +0000

[diff] [blame]

22

from .debug_database import DebugDatabase

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

23

from .nn_graph import Pass

24

from .nn_graph import PassPlacement

25

from .operation import NpuBlockType

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

26

from .operation import Op

Fredrik Svedberg

d9c2c42

2020-12-01 16:33:45 +0100

[diff] [blame]

27

from .operation_util import create_avgpool_nop

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

28

from .tensor import TensorPurpose

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

29

30

31

class PassFlags(enum.Flag):

32

Empty = 0

Patrik Gustavsson

2021-02-16 12:57:03 +0100

[diff] [blame]

33

Main = 1

34

Post = 2

35

Mac = 4

Tim Hall

d8339a7

2021-05-27 18:49:40 +0100

[diff] [blame]

ElementWise = 8

Npu = 16

Cpu = 32

StartupInit = 64

MemoryOnly = 128

PostFusingLimited = 256

Johan Alfven

2023-02-02 09:07:48 +0100

[diff] [blame^]

42

Memcpy = 512

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

43

44

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

45

mac_main_ops = set(

46

(

47

# convolutions

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

Op.Conv2DBias,

Op.Conv2D,

Op.QuantizedConv2D,

Op.Conv2DBackpropInputSwitchedBias,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

52

# depth-wise convolutions

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

53

Op.DepthwiseConv2DBias,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

54

# FC layers

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

55

Op.QuantizedMatMul,

56

Op.MatMul,

57

Op.FullyConnected,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

58

# RNN/LSTM/GRU

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

59

Op.BlockLSTM,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

60

# pooling

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

Op.QuantizedMaxPool,

Op.QuantizedAvgPool,

Op.AvgPool,

Op.MaxPool,

Op.ReduceSum,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

66

)

Tim Hall

885033b

2022-07-21 11:46:03 +0100

[diff] [blame]

67

# resize ops use pooling operations unless explicitly converted to other operations prior to pass packing

68

) | Op.op_set(Op.is_resize_op)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

69

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

70

binary_elem_wise_main_ops = Op.op_set(Op.is_binary_elementwise_op)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

71

Michael McGeagh

f3e3ad7

2020-12-02 12:39:03 +0000

[diff] [blame]

72

unary_elem_wise_main_ops = Op.op_set(Op.is_unary_elementwise_op)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

73

74

elem_wise_main_ops = binary_elem_wise_main_ops | unary_elem_wise_main_ops

75

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

76

activation_ops = Op.op_set(Op.is_relu_op)

77

npu_post_ops = activation_ops

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

78

79

npu_post_fuse_limited_ops = set(

80

# Set of post operators that should not be fused with main/elementwise ops

Patrik Gustavsson

138d47f

2021-02-08 10:13:48 +0100

[diff] [blame]

81

(Op.Sigmoid, Op.Tanh, Op.Quantize)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

82

)

83

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

84

elem_wise_ops = elem_wise_main_ops | activation_ops | set((Op.Sigmoid, Op.Tanh))

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

85

86

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

87

quantization_ops = set((Op.Dequantize, Op.Max, Op.Min))

88

cpu_ops = set((Op.Softmax, Op.LRN, Op.Shape, Op.Pad, Op.AddN)) | quantization_ops

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

89

patrik.gustavsson

1068362

2020-10-14 10:57:46 +0000

[diff] [blame]

90

startup_init_ops = set((Op.Const, Op.Placeholder, Op.SubgraphInput))

Jonas Ohlsson

d857507

2022-03-30 10:30:25 +0200

[diff] [blame]

91

memory_only_ops = set(

(

Op.Squeeze,

Op.Reshape,

Op.QuantizedReshape,

Op.ExpandDims,

)

)

Johan Alfven

2023-02-02 09:07:48 +0100

[diff] [blame^]

99

memcpy_ops = set((Op.Memcpy,))

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

test_sequence = [

(

# ops_set

npu_post_ops,

# incompatible_pack_flags

Patrik Gustavsson

2021-02-16 12:57:03 +0100

[diff] [blame]

107

PassFlags.Cpu | PassFlags.MemoryOnly | PassFlags.Main,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

108

# flags_to_set

109

PassFlags.Npu | PassFlags.Post,

# flags_to_clear

PassFlags.Empty,

),

(

# ops_set

npu_post_fuse_limited_ops,

116

# incompatible_pack_flags

Tim Hall

b1a9a92

2021-10-29 12:51:53 +0100

[diff] [blame]

117

PassFlags.Cpu | PassFlags.MemoryOnly | PassFlags.Main | PassFlags.PostFusingLimited,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

118

# flags_to_set

119

PassFlags.Npu | PassFlags.PostFusingLimited,

# flags_to_clear

PassFlags.Empty,

),

(

# ops_set

mac_main_ops,

# incompatible_pack_flags

Patrik Gustavsson

2021-02-16 12:57:03 +0100

[diff] [blame]

127

PassFlags.Cpu | PassFlags.MemoryOnly | PassFlags.ElementWise | PassFlags.Main | PassFlags.PostFusingLimited,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

128

# flags_to_set

129

PassFlags.Npu | PassFlags.Mac | PassFlags.Main,

# flags_to_clear

PassFlags.Empty,

),

(

# ops_set

elem_wise_main_ops,

# incompatible_pack_flags

Patrik Gustavsson

2021-02-16 12:57:03 +0100

[diff] [blame]

137

PassFlags.Cpu | PassFlags.MemoryOnly | PassFlags.Mac | PassFlags.Main | PassFlags.PostFusingLimited,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

138

# flags_to_set

139

PassFlags.Npu | PassFlags.ElementWise | PassFlags.Main,

# flags_to_clear

PassFlags.Empty,

),

(

# ops_set

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

145

startup_init_ops,

146

# incompatible_pack_flags

147

PassFlags.Npu | PassFlags.Cpu | PassFlags.MemoryOnly,

148

# flags_to_set

149

PassFlags.StartupInit | PassFlags.Main,

# flags_to_clear

PassFlags.Empty,

),

(

# ops_set

memory_only_ops,

# incompatible_pack_flags

157

PassFlags.Npu | PassFlags.Cpu,

158

# flags_to_set

159

PassFlags.MemoryOnly | PassFlags.Main,

160

# flags_to_clear

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

161

PassFlags.Empty,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

162

),

163

(

164

# ops_set

Johan Alfven

2023-02-02 09:07:48 +0100

[diff] [blame^]

165

memcpy_ops,

166

# incompatible_pack_flags

167

PassFlags.Cpu | PassFlags.MemoryOnly | PassFlags.Mac | PassFlags.Main | PassFlags.PostFusingLimited,

168

# flags_to_set

169

PassFlags.Npu | PassFlags.Memcpy | PassFlags.Main,

# flags_to_clear

PassFlags.Empty,

),

(

# ops_set

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

175

cpu_ops,

176

# incompatible_pack_flags

177

PassFlags.Npu | PassFlags.MemoryOnly | PassFlags.Main,

178

# flags_to_set

179

PassFlags.Cpu | PassFlags.Main,

180

# flags_to_clear

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

181

PassFlags.Empty,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

182

),

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

183

( # This last one is a fallback for unrecognised operations

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

184

# ops_set

185

None,

186

# incompatible_pack_flags

187

PassFlags.Npu | PassFlags.MemoryOnly | PassFlags.Main,

188

# flags_to_set

189

PassFlags.Cpu | PassFlags.Main,

190

# flags_to_clear

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

191

PassFlags.Empty,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

),

]

# Some sanity checking

196

for (operation_set, incompatible_pack_flags, flags_to_set, flags_to_clear) in test_sequence:

197

assert not flags_to_clear & flags_to_set

198

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

199

200

def pack_into_passes(nng, arch, verbose_packing=False):

Johan Alfvén

2022-01-27 06:47:26 +0100

[diff] [blame]

201

def visit_op(op, ignored):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

202

visit_op_refcount[op] += 1

203

204

if visit_op_refcount[op] == 1: # First-time visit, go and fix up unused output tensors

205

for tens in op.outputs:

206

if len(tens.consumers()) == 0:

207

visit_op_refcount[op] += 1

208

Johan Alfvén

2022-01-27 06:47:26 +0100

[diff] [blame]

209

assert visit_op_refcount[op] <= len(op.outputs)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

210

if visit_op_refcount[op] == len(op.outputs):

211

212

if op.type in startup_init_ops:

213

startup_list.append(op)

214

else:

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

215

ofm_tensor = op.ofm

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

216

if ofm_tensor is None:

217

ofm_tensor = op.outputs[0]

Tim Hall

73e843f

2021-02-04 22:47:46 +0000

[diff] [blame]

218

ofm_shape = op.ofm_shapes[0] if op.run_on_npu else None

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

219

Johan Alfvén

2022-01-27 06:47:26 +0100

[diff] [blame]

220

build_pass((op,), ofm_tensor, ofm_shape)

Patrik Gustavsson

6bb8f67

2020-12-21 14:49:13 +0100

[diff] [blame]

221

Johan Alfvén

2022-01-27 06:47:26 +0100

[diff] [blame]

222

def build_pass(start_ops_to_process, ofm_tensor=None, ofm_shape=None):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

223

reverse_ops_list = []

224

curr_flags = PassFlags.Empty

225

npu_block_type = NpuBlockType.Default

226

227

reverse_intermediates = []

228

input_set = set()

229

ifm_tensor = None

230

primary_op = None

Patrik Gustavsson

224e99b

2021-01-14 10:55:43 +0100

[diff] [blame]

231

ifm_shapes = None

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

232

233

to_process = collections.deque()

234

for start_op in start_ops_to_process:

235

to_process.append((start_op, None))

236

237

while to_process:

238

curr_op, tens = to_process.popleft()

239

240

if curr_op in reverse_ops_list:

241

continue

242

243

for operation_set, incompatible_pack_flags, flags_to_set, flags_to_clear in test_sequence:

244

if operation_set is None or curr_op.type in operation_set:

245

if not (curr_flags & incompatible_pack_flags):

246

if flags_to_set & PassFlags.Npu:

247

if not curr_op.run_on_npu:

248

continue

249

250

reverse_ops_list.append(curr_op)

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

251

new_block_type = curr_op.type.npu_block_type

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

252

if new_block_type != NpuBlockType.Default:

253

assert npu_block_type == NpuBlockType.Default

254

npu_block_type = new_block_type # Only one major block type per pass

255

assert primary_op is None

256

primary_op = curr_op

257

258

curr_flags &= ~flags_to_clear

259

curr_flags |= flags_to_set

260

261

if flags_to_set & PassFlags.Npu:

262

if flags_to_set & (

Johan Alfven

2023-02-02 09:07:48 +0100

[diff] [blame^]

263

PassFlags.Mac

264

| PassFlags.ElementWise

265

| PassFlags.Post

266

| PassFlags.PostFusingLimited

267

| PassFlags.Memcpy

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

268

):

269

assert len(curr_op.inputs) >= 1

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

270

ifm_tensor = curr_op.ifm

Patrik Gustavsson

224e99b

2021-01-14 10:55:43 +0100

[diff] [blame]

271

ifm_shapes = curr_op.ifm_shapes.copy()

Louis Verhaard

04f8c00

2020-10-09 11:40:21 +0200

[diff] [blame]

272

assert ifm_tensor is not None, "IFM missing in {}".format(curr_op)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

273

assert ifm_tensor.purpose == TensorPurpose.FeatureMap

274

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

275

if operation_set is None:

276

print("Warning:", curr_op.type, "operation is unknown or unsupported, placing on CPU")

277

Charles Xu

600351a

2020-05-18 08:54:47 +0200

[diff] [blame]

278

for inp in reversed(curr_op.inputs):

Andreas Nevalainen

d8c032d

2020-09-11 10:25:09 +0200

[diff] [blame]

279

if inp is None:

280

continue

Patrik Gustavsson

fcb1a00

2021-02-03 09:13:57 +0100

[diff] [blame]

281

if can_pack(inp, curr_op):

282

to_process.append((inp.ops[0], inp))

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

283

else:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

input_set.add(inp)

break

else:

# This operation is not compatible with already packed operations, just register the tensor as an input

290

assert tens is not None

291

input_set.add(tens)

292

293

if curr_flags & PassFlags.Npu and not curr_flags & (PassFlags.ElementWise | PassFlags.Mac):

294

# Make the choice that if we don't have a mac operation, the ambidextrous operations go on the

295

# element wise unit

296

curr_flags |= PassFlags.ElementWise

297

298

is_element_wise = True

299

for op in reverse_ops_list:

Tim Hall

d8339a7

2021-05-27 18:49:40 +0100

[diff] [blame]

300

if op.type not in elem_wise_ops and op.type:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

301

is_element_wise = False

302

break

303

304

placement = PassPlacement.Unknown

305

if curr_flags & PassFlags.Npu:

306

assert placement == PassPlacement.Unknown

307

placement = PassPlacement.Npu

308

if curr_flags & PassFlags.Cpu:

309

assert placement == PassPlacement.Unknown

310

placement = PassPlacement.Cpu

311

if curr_flags & PassFlags.MemoryOnly:

312

assert placement == PassPlacement.Unknown

313

placement = PassPlacement.MemoryOnly

314

if curr_flags & PassFlags.StartupInit:

315

assert placement == PassPlacement.Unknown

316

placement = PassPlacement.StartupInit

317

assert placement != PassPlacement.Unknown

318

319

ops_list = list(reversed(reverse_ops_list))

320

intermediates = list(reversed(reverse_intermediates))

321

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

322

if primary_op is None:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

323

primary_op = create_primary_op(ops_list)

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

324

if primary_op is not None:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

325

visit_tensor_refcount[primary_op.inputs[0]] += 1

Louis Verhaard

2020-09-30 09:01:52 +0200

[diff] [blame]

326

npu_block_type = primary_op.type.npu_block_type

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

327

for input_tens in primary_op.inputs:

328

if input_tens not in input_set:

329

input_set.add(input_tens)

330

331

ordered_input_list = []

Louis Verhaard

0b8268a

2020-08-05 16:11:29 +0200

[diff] [blame]

332

# Keep LUT-s in a separate list and add as inputs at the end

333

# to avoid that they would accidentally be assigned as ifm or ifm2

334

lut_list = []

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

335

input_refcounts = collections.defaultdict(int)

Diqing Zhong

2abd3dd

2020-08-25 10:40:36 +0200

[diff] [blame]

336

input_ops_list = ops_list.copy()

337

338

# Check primary_op first

339

if primary_op is not None:

340

for inp in primary_op.inputs:

Andreas Nevalainen

d8c032d

2020-09-11 10:25:09 +0200

[diff] [blame]

341

if inp is None:

342

continue

Diqing Zhong

2abd3dd

2020-08-25 10:40:36 +0200

[diff] [blame]

343

add_input_list(inp, input_set, input_refcounts, lut_list, ordered_input_list)

344

input_ops_list.remove(primary_op)

345

346

# Check rest of the list

347

for op in input_ops_list:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

348

for inp in op.inputs:

Diqing Zhong

2abd3dd

2020-08-25 10:40:36 +0200

[diff] [blame]

349

add_input_list(inp, input_set, input_refcounts, lut_list, ordered_input_list)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

350

351

name = ops_list[0].name

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

352

ps = Pass(name, placement, is_element_wise, npu_block_type)

353

ps.ops = ops_list

354

ps.primary_op = primary_op

355

ps.inputs = ordered_input_list

356

ps.intermediates = intermediates

357

ps.outputs = list(ops_list[-1].outputs)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

358

359

# ElementWise operation, 2 IFMs

360

if ps.primary_op and ps.primary_op.type in binary_elem_wise_main_ops:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

361

ps.ifm_tensor = ps.inputs[0]

Fredrik Svedberg

a0c3624

2020-06-03 15:43:31 +0200

[diff] [blame]

362

ps.ifm2_tensor = ps.inputs[-1]

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

363

Fredrik Svedberg

a0c3624

2020-06-03 15:43:31 +0200

[diff] [blame]

364

if len(ps.inputs) > 2:

365

ps.ifm_tensor = ps.inputs[-2]

Patrik Gustavsson

2349d42

2020-12-01 16:02:29 +0100

[diff] [blame]

366

367

# Get the corresponding ifm_shapes

368

for op in input_ops_list + [primary_op]:

Patrik Gustavsson

0a261cd

2020-12-23 08:50:44 +0100

[diff] [blame]

369

if op.run_on_npu:

370

if ps.ifm_tensor == op.ifm:

371

ps.ifm_shapes.append(op.ifm_shapes[0])

372

elif ps.ifm_tensor == op.ifm2:

373

ps.ifm_shapes.append(op.ifm_shapes[1])

Tim Hall

ffe8e28

2021-06-24 18:29:53 +0100

[diff] [blame]

374

Patrik Gustavsson

0a261cd

2020-12-23 08:50:44 +0100

[diff] [blame]

375

if ps.ifm2_tensor == op.ifm:

376

ps.ifm_shapes.append(op.ifm_shapes[0])

377

elif ps.ifm2_tensor == op.ifm2:

378

ps.ifm_shapes.append(op.ifm_shapes[1])

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

379

else:

380

ps.ifm_tensor = ifm_tensor

381

ps.ifm2_tensor = None

Patrik Gustavsson

cc6915c

2020-12-22 09:16:50 +0100

[diff] [blame]

382

if ps.primary_op is not None and ps.primary_op.run_on_npu:

Patrik Gustavsson

224e99b

2021-01-14 10:55:43 +0100

[diff] [blame]

383

ps.ifm_shapes.append(ifm_shapes[0])

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

384

385

ps.ofm_tensor = ofm_tensor

Patrik Gustavsson

6bb8f67

2020-12-21 14:49:13 +0100

[diff] [blame]

386

ps.ofm_shapes.append(ofm_shape)

Patrik Gustavsson

2349d42

2020-12-01 16:02:29 +0100

[diff] [blame]

387

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

388

assert ps.placement != PassPlacement.Npu or ps.ofm_tensor is not None

389

ps.weight_tensor = ps.get_primary_op_ifm_weights()[1]

390

ps.scale_tensor = ps.get_primary_op_ifm_weights_biases_ofm()[2]

Fredrik Svedberg

a0c3624

2020-06-03 15:43:31 +0200

[diff] [blame]

391

ps.lut_tensor = ps.get_primary_op_lut()

Louis Verhaard

0b8268a

2020-08-05 16:11:29 +0200

[diff] [blame]

392

ps.inputs.extend(lut_list)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

393

394

for op in ps.ops:

395

op.scheduled_pass = ps

396

397

reverse_pass_list.append(ps)

398

399

for inp, refcount in input_refcounts.items():

400

for _ in range(refcount):

visit_tensor(inp)

return ps

def visit_tensor(tens):

406

visit_tensor_refcount[tens] += 1

407

assert visit_tensor_refcount[tens] <= len(tens.consumers())

408

if visit_tensor_refcount[tens] == len(tens.consumers()):

Johan Alfvén

2022-01-27 06:47:26 +0100

[diff] [blame]

409

for op in reversed(tens.ops):

410

visit_op(op, tens)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

411

Jacob Bohlin

fb85873

2020-08-17 09:42:35 +0200

[diff] [blame]

412

def create_primary_op(op_list):

Patrik Gustavsson

e3b1b91

2021-02-09 15:38:46 +0100

[diff] [blame]

413

if any(op.type in (npu_post_ops | npu_post_fuse_limited_ops) and op.run_on_npu for op in op_list):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

414

# Configure a 1x1 AvgPool and attach the op onto it

Jacob Bohlin

fb85873

2020-08-17 09:42:35 +0200

[diff] [blame]

415

op = op_list[0]

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

416

inp = op.inputs[0]

Michael McGeagh

8dbf8cf

2020-09-08 11:09:48 +0100

[diff] [blame]

417

avgpool_op = create_avgpool_nop(op.name + "_avgpool")

418

avgpool_op.add_input_tensor(inp)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

419

avgpool_out = inp.clone("_avgpooled")

420

avgpool_out.consumer_list.append(op)

Michael McGeagh

c5b549b

2020-08-07 11:54:28 +0100

[diff] [blame]

421

avgpool_op.set_output_tensor(avgpool_out)

Patrik Gustavsson

3a26920

2021-01-21 08:28:55 +0100

[diff] [blame]

422

avgpool_op.ifm_shapes = op.ifm_shapes.copy()

423

avgpool_op.ofm_shapes = op.ofm_shapes.copy()

Patrik Gustavsson

2021-02-16 12:57:03 +0100

[diff] [blame]

424

avgpool_op.read_offsets = op.read_offsets.copy()

Tim Hall

3df5b96

2021-12-17 14:09:19 +0000

[diff] [blame]

425

avgpool_op.read_shapes = op.read_shapes.copy()

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

426

427

op.inputs[0] = avgpool_out

Jacob Bohlin

fb85873

2020-08-17 09:42:35 +0200

[diff] [blame]

428

op_list.insert(0, avgpool_op)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

429

Tim Hall

e6ccd87

2020-11-09 16:46:37 +0000

[diff] [blame]

430

DebugDatabase.add_optimised(op, avgpool_op)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

return avgpool_op

return None

Patrik Gustavsson

2021-02-03 09:13:57 +0100

[diff] [blame]

435

def can_pack(inp, curr_op):

436

if len(inp.ops) == 1:

437

next_op = inp.ops[0]

438

for outp in next_op.outputs:

439

consumers = outp.consumers()

440

if len(consumers) > 1 or (len(consumers) == 1 and consumers[0] != curr_op):

441

return False

442

443

# There cannot be any reshaping between next_op ofm and corresponding curr_op ifm

444

if len(curr_op.ifm_shapes) != 0 and len(next_op.ofm_shapes) != 0:

445

if inp == curr_op.ifm and next_op.ofm_shapes[0] != curr_op.ifm_shapes[0]:

446

return False

447

elif (

448

curr_op.ifm2 is not None and inp == curr_op.ifm2 and next_op.ofm_shapes[0] != curr_op.ifm_shapes[1]

):

return False

else:

return False

return True

Diqing Zhong

2020-08-25 10:40:36 +0200

[diff] [blame]

456

def add_input_list(inp_to_add, inp_set, inp_refcnts, lut_list, ordered_inp_list):

457

if inp_to_add in inp_set:

458

if inp_refcnts[inp_to_add] == 0:

459

if inp_to_add.purpose == TensorPurpose.LUT:

460

lut_list.append(inp_to_add)

461

else:

462

ordered_inp_list.append(inp_to_add)

463

inp_refcnts[inp_to_add] += 1

464

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

465

for sg in nng.subgraphs:

466

reverse_pass_list = []

467

visit_op_refcount = collections.defaultdict(int)

468

visit_tensor_refcount = collections.defaultdict(int)

startup_list = []

for tens in sg.output_tensors:

visit_tensor(tens)

if startup_list:

startup_ps = build_pass(startup_list)

477

startup_ps.outputs = [op.outputs[0] for op in startup_list] # Need to fixup the outputs

478

startup_ps.name = "startup_weight_initialisation"

479

Johan Alfvén

2022-04-19 16:07:05 +0200

[diff] [blame]

480

# Graphs with both CPU and NPU ops might not have an optimal order in

481

# the pass list due to how the graph is traversed (depth first search).

482

# This can result in more context switching between CPU and NPU.

483

# Try to optmize this by moving/grouping CPU ops where that is possible.

484

# Criteria for CPU pass to be moved:

485

#

486

# 1) CPU passes that only depends on sg.input_tensor can be

487

# moved to the top of the list.

Johan Alfven

2023-02-07 13:01:03 +0100

[diff] [blame]

488

# ResourceVariables ops like VarHandle, ReadVariable, CallOnce

489

# can also be moved to the top of list.

Johan Alfvén

2022-04-19 16:07:05 +0200

[diff] [blame]

490

#

491

# 2) A CPU pass X is allowed to be grouped together with CPU pass Y

492

# if there is no NPU pass between pass X and pass Y that depends

493

# on output from pass X or a MemoryOnly pass.

494

#

495

# Criteria 2 will try to move as many CPU passes towards the bottom of

# the list.

pass_list_top = []

pass_list = []

# Filter out early passes from the rest

502

for ps in list(reversed(reverse_pass_list)):

503

if startup_ps == ps:

504

# startup pass belongs in the top

505

pass_list_top.insert(0, ps)

506

continue

507

Johan Alfven

2023-02-07 13:01:03 +0100

[diff] [blame]

508

if ps.placement == PassPlacement.Cpu and (

509

ps.ops[0].ifm in sg.input_tensors

Johan Alfvén

2022-04-19 16:07:05 +0200

[diff] [blame]

510

and (ps.ops[0].ifm2 in sg.input_tensors or ps.ops[0].ifm2 is None)

Johan Alfven

2023-02-07 13:01:03 +0100

[diff] [blame]

511

or (ps.ops[0].type in (Op.VarHandle, Op.ReadVariable, Op.CallOnce))

Johan Alfvén

2022-04-19 16:07:05 +0200

[diff] [blame]

512

):

Johan Alfven

2023-02-07 13:01:03 +0100

[diff] [blame]

513

# This CPU pass only depends on sg.input_tensors or resource variable

Johan Alfvén

2022-04-19 16:07:05 +0200

[diff] [blame]

514

pass_list_top.append(ps)

515

else:

516

# Add pass to the list that will be sorted in the next step

517

pass_list.append(ps)

518

Johan Alfven

2023-02-07 13:01:03 +0100

[diff] [blame]

519

# Sort ops by op_index (same call order as in the original graph)

520

pass_list_top = sorted(pass_list_top, key=lambda ps: -1 if ps.ops[0].op_index is None else ps.ops[0].op_index)

521

Johan Alfvén

2022-04-19 16:07:05 +0200

[diff] [blame]

522

# Sort the rest of the list based on critera 2.

523

# Search from bottom of list and when a CPU pass is found

524

# search forward in the list and see if it is possible to join another CPU pass.

Johan Alfvén

2022-05-19 07:26:03 +0200

[diff] [blame]

525

last_idx = len(pass_list) - 1

Johan Alfvén

2022-04-19 16:07:05 +0200

[diff] [blame]

526

for cpu_ps in reversed(pass_list):

527

if cpu_ps.placement != PassPlacement.Cpu:

528

continue

529

# CPU pass found, search forward and move pass if possible

530

idx = pass_list.index(cpu_ps)

531

for next_ps in pass_list[idx + 1 :]:

532

if next_ps.placement == PassPlacement.Cpu:

533

# It is possible to move the CPU pass

534

pass_list.remove(cpu_ps)

535

insert_index = pass_list.index(next_ps)

536

pass_list.insert(insert_index, cpu_ps)

537

break

Johan Alfvén

2022-04-19 16:07:05 +0200

[diff] [blame]

538

Johan Alfven

c72cac8

2023-03-09 16:01:00 +0100

[diff] [blame]

539

# Check all outputs from the cpu pass

Johan Alfvén

2022-05-19 07:26:03 +0200

[diff] [blame]

540

if (

Johan Alfven

c72cac8

2023-03-09 16:01:00 +0100

[diff] [blame]

541

any(ofm in [next_ps.ops[0].ifm, next_ps.ops[0].ifm2] for ofm in cpu_ps.ops[0].outputs)

Johan Alfvén

2022-05-19 07:26:03 +0200

[diff] [blame]

542

or next_ps.placement == PassPlacement.MemoryOnly

543

):

Johan Alfven

c72cac8

2023-03-09 16:01:00 +0100

[diff] [blame]

544

# Not possible to move since next pass depends on the output from the cpu pass

Johan Alfvén

2022-05-19 07:26:03 +0200

[diff] [blame]

545

break

546

547

if pass_list.index(next_ps) == last_idx:

548

# Last element, ok to move the CPU pass

549

pass_list.remove(cpu_ps)

550

pass_list.append(cpu_ps)

551

break

552

Johan Alfvén

2022-04-19 16:07:05 +0200

[diff] [blame]

553

pass_list_top.extend(pass_list)

554

555

sg.passes = pass_list_top

Tim Hall