Blame - ethosu/vela/register_command_stream_generator.py - ml/ethos-u/ethos-u-vela

2020-04-27 18:20:16 +0100

[diff] [blame]

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

16

# Description:

Tim Hall

2020-10-27 12:43:14 +0000

[diff] [blame]

17

# Register level (low-level) command stream generation for Ethos-U. Takes a list of NPU operations and generates

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

18

# all the register settings. Calculates dependencies between commands and inserts wait operations. And generates a bit

Tim Hall

2020-10-27 12:43:14 +0000

[diff] [blame]

19

# stream suitable for interpretation by the Ethos-U processor.

Louis Verhaard

c629129

2021-03-19 09:35:48 +0100

[diff] [blame]

20

import math

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

21

from collections import defaultdict

Diego Russo

2020-04-21 17:39:10 +0100

[diff] [blame]

22

from enum import Enum

23

from enum import IntEnum

Jonas Ohlsson

2022-03-01 12:39:55 +0100

[diff] [blame]

24

from typing import cast

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

25

from typing import Dict

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

26

from typing import List

27

from typing import Optional

Diego Russo

ea6111a

2020-04-14 18:41:58 +0100

[diff] [blame]

import numpy as np

from . import scaling

Louis Verhaard

aeae567

2020-11-02 18:04:27 +0100

[diff] [blame]

32

from .api import NpuAccelerator

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

33

from .api import NpuActivation

34

from .api import NpuActivationOp

35

from .api import NpuAddressRange

36

from .api import NpuBlockOperation

37

from .api import NpuBlockTraversal

38

from .api import NpuConv2DOperation

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

39

from .api import NpuConvDepthWiseOperation

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

40

from .api import NpuDataType

41

from .api import NpuDmaOperation

42

from .api import NpuElementWiseOp

43

from .api import NpuElementWiseOperation

44

from .api import NpuFeatureMap

45

from .api import NpuKernel

46

from .api import NpuLayout

47

from .api import NpuOperation

48

from .api import NpuOperationType

49

from .api import NpuPadding

50

from .api import NpuPoolingOp

51

from .api import NpuPoolingOperation

52

from .api import NpuQuantization

53

from .api import NpuResamplingMode

54

from .api import NpuRoundingMode

55

from .api import NpuShape3D

56

from .api import NpuTileBox

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

57

from .architecture_allocator import ArchitectureBlockConfig

58

from .architecture_allocator import try_block_config

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

59

from .architecture_features import Accelerator

Diego Russo

2020-04-21 17:39:10 +0100

[diff] [blame]

60

from .architecture_features import ArchitectureFeatures

Louis Verhaard

5207830

2020-11-18 13:35:06 +0100

[diff] [blame]

61

from .architecture_features import create_default_arch

Diego Russo

2020-04-21 17:39:10 +0100

[diff] [blame]

62

from .architecture_features import SHRAMElements

erik.andersson@arm.com

1878dab

2021-03-16 09:40:24 +0100

[diff] [blame]

63

from .errors import VelaError

Diego Russo

2020-04-21 17:39:10 +0100

[diff] [blame]

64

from .ethos_u55_regs.ethos_u55_regs import acc_format

65

from .ethos_u55_regs.ethos_u55_regs import activation

66

from .ethos_u55_regs.ethos_u55_regs import cmd0

67

from .ethos_u55_regs.ethos_u55_regs import cmd1

68

from .ethos_u55_regs.ethos_u55_regs import elementwise_mode

Fredrik Svedberg

a0c3624

2020-06-03 15:43:31 +0200

[diff] [blame]

69

from .ethos_u55_regs.ethos_u55_regs import pooling_mode

Jacob Bohlin

cf7da10

2020-05-20 09:03:40 +0200

[diff] [blame]

70

from .ethos_u55_regs.ethos_u55_regs import resampling_mode

Diego Russo

2020-04-21 17:39:10 +0100

[diff] [blame]

71

from .ethos_u55_regs.ethos_u55_regs import rounding

Diego Russo

2020-04-21 17:39:10 +0100

[diff] [blame]

72

from .numeric_util import quantise_float32

73

from .numeric_util import round_away_zero

Diego Russo

2020-04-21 17:39:10 +0100

[diff] [blame]

74

from .numeric_util import round_up_to_int

Patrik Gustavsson

c74682c

2021-08-17 14:26:38 +0200

[diff] [blame]

75

from .operation import ExplicitScaling

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

76

from .operation import NpuBlockType

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

77

from .range_set import MemoryAccessSet

Louis Verhaard

2021-03-17 14:26:34 +0100

[diff] [blame]

78

from .register_command_stream_util import BASE_PTR_INDEX_MEM2MEM

Louis Verhaard

2020-11-26 11:42:04 +0100

[diff] [blame]

79

from .register_command_stream_util import calc_blockdep

80

from .register_command_stream_util import get_dma_memory_accesses

81

from .register_command_stream_util import get_op_memory_accesses

82

from .register_command_stream_util import get_strides

83

from .register_command_stream_util import get_wait_dependency

84

from .register_command_stream_util import has_ifm2

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

85

from .register_command_stream_util import shape3d_to_block

Louis Verhaard

2020-11-26 11:42:04 +0100

[diff] [blame]

86

from .register_command_stream_util import to_kernel

87

from .register_command_stream_util import UNARY_ELEMWISE_OPS

88

from .register_command_stream_util import Watermark

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

89

90

91

class RegisterMachine:

92

def __init__(self):

93

self.n_banks = 1

94

self.registers = [defaultdict(lambda: None) for _ in range(self.n_banks)]

95

self.bank_idx = 0

96

97

def set_register(self, reg, value):

98

is_changed = self.registers[self.bank_idx][reg] != value

99

self.registers[self.bank_idx][reg] = value

100

# is_changed = True # force command

101

return is_changed

102

103

def switch_bank(self):

104

self.bank_idx = (self.bank_idx + 1) % self.n_banks

105

106

107

class CmdMode(IntEnum):

NoPayload = 0x0000

Payload32 = 0x4000

Mask = 0xC000

CmdOpMask = 0x03FF

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

114

class CommandStreamEmitter:

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

115

WORD_SIZE = 4

116

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

117

def __init__(self):

118

self.cmd_stream = []

119

self.reg_machine = [RegisterMachine(), RegisterMachine()]

120

self.last_absolute_wait = defaultdict(int)

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

121

self.offset = 0

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

122

123

def get_reg_machine(self, cmd):

124

if "DMA" in cmd.name:

125

return self.reg_machine[1]

126

else:

127

return self.reg_machine[0]

128

129

def size_in_bytes(self):

130

sz = 0

131

for cmd in self.cmd_stream:

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

132

sz += len(cmd) * CommandStreamEmitter.WORD_SIZE

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

133

return sz

134

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

135

def to_list(self) -> List[int]:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

136

return [elem for cmd in self.cmd_stream for elem in cmd]

137

138

def print_cmds(self):

139

print("Code: Command: Param: Payload:")

140

for words_for_one_command in self.cmd_stream:

141

code = words_for_one_command[0] & 0x0000FFFF # lower 16 bits

142

param = words_for_one_command[0] >> 16 # higher 16 bits

143

144

payload_mode = CmdMode(code & CmdMode.Mask)

145

146

# code and command

147

s = " 0x%04x " % code

148

if payload_mode == CmdMode.NoPayload:

149

s += str(cmd0(code & CmdMode.CmdOpMask))

150

else:

151

s += str(cmd1(code & CmdMode.CmdOpMask))

s = s.ljust(40)

s += "%5d" % param

# payload

if payload_mode == CmdMode.Payload32:

158

s += " 0x%08x (%d)" % (words_for_one_command[1], words_for_one_command[1])

else:

s += " -"

print(s)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

164

def cmd0_with_param(self, cmd: cmd0, param):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

165

if isinstance(param, Enum):

166

param = int(param.value)

167

else:

168

param = int(param)

169

param = param & 0xFFFF

170

command = cmd.value | (param << 16)

171

if not self.get_reg_machine(cmd).set_register(cmd, (command, param)):

172

return

173

174

# This is not a redundant command, actually write it

175

self.cmd_stream.append((command,))

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

176

self.offset += CommandStreamEmitter.WORD_SIZE

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

177

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

178

def cmd1_with_offset(self, cmd: cmd1, offset, param=0x0):

Louis Verhaard

893780c

2021-03-30 09:02:30 +0200

[diff] [blame]

179

offset = int(offset) & 0xFFFFFFFF

180

param = int(param) & 0xFFFF

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

181

command = cmd.value | CmdMode.Payload32.value | (param << 16)

182

183

if not self.get_reg_machine(cmd).set_register(cmd, (command, offset)):

184

return

185

186

# This is not a redundant command, actually write it

187

self.cmd_stream.append((command, offset))

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

188

self.offset += CommandStreamEmitter.WORD_SIZE * 2

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

189

Mauricio Briceno

2021-03-19 09:13:50 +0100

[diff] [blame]

190

def cmd1_with_address(self, cmd: cmd1, offset):

191

self.cmd1_with_offset(cmd, offset, offset >> 32)

192

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

193

def cmd_wait(self, cmd: cmd0, channel: int, outstanding_count: int):

Tim Hall

289a41d

2020-08-04 21:40:14 +0100

[diff] [blame]

194

param = (16 * channel) + outstanding_count

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

195

command = ((param & 0xFFFF) << 16) | cmd.value

196

self.cmd_stream.append((command,))

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

197

self.offset += CommandStreamEmitter.WORD_SIZE

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

198

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

199

def cmd_do_operation(self, cmd: cmd0, param=0):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

200

param = int(param)

201

command = ((param & 0xFFFF) << 16) | cmd.value

202

203

self.cmd_stream.append((command,))

Tim Hall

2020-11-09 16:46:37 +0000

[diff] [blame]

204

self.offset += CommandStreamEmitter.WORD_SIZE

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

205

self.get_reg_machine(cmd).switch_bank()

206

207

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

208

# -------------------------------------------------------------------

209

# REGISTER GENERATION

210

# -------------------------------------------------------------------

211

212

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

213

# TODO: Replace with definitions from ethos_u55_regs

214

class IFM2Broadcast(IntEnum):

215

BroadcastHdim = 1 << 0

216

BroadcastWdim = 1 << 1

217

BroadcastCdim = 1 << 2

218

ReverseOperandOrder = 1 << 6

219

UseIFM2Scalar = 1 << 7

pooling_op_map = {

NpuPoolingOp.MAX: pooling_mode.MAX.value,

224

NpuPoolingOp.AVERAGE: pooling_mode.AVERAGE.value,

225

NpuPoolingOp.REDUCE_SUM: pooling_mode.REDUCE_SUM.value,

226

}

227

228

elementwise_op_map = {

229

NpuElementWiseOp.MUL: elementwise_mode.MUL.value,

230

NpuElementWiseOp.ADD: elementwise_mode.ADD.value,

231

NpuElementWiseOp.SUB: elementwise_mode.SUB.value,

232

NpuElementWiseOp.MIN: elementwise_mode.MIN.value,

233

NpuElementWiseOp.MAX: elementwise_mode.MAX.value,

234

NpuElementWiseOp.LRELU: elementwise_mode.LRELU.value,

235

NpuElementWiseOp.ABS: elementwise_mode.ABS.value,

236

NpuElementWiseOp.CLZ: elementwise_mode.CLZ.value,

237

NpuElementWiseOp.SHR: elementwise_mode.SHR.value,

238

NpuElementWiseOp.SHL: elementwise_mode.SHL.value,

239

}

240

241

activation_op_map = {

242

NpuActivationOp.NONE_OR_RELU: activation.NONE,

243

NpuActivationOp.TANH: activation.TANH,

244

NpuActivationOp.SIGMOID: activation.SIGMOID,

245

}

246

247

# Maps an AccumulatorType enum to the corresponding acc_format value

248

acc_format_map = {

249

SHRAMElements.Acc16: acc_format.FP_S5_10.value,

250

SHRAMElements.Acc32: acc_format.INT_32BIT.value,

251

SHRAMElements.Acc40: acc_format.INT_40BIT.value,

252

}

253

254

resampling_mode_map = {

255

NpuResamplingMode.NONE: resampling_mode.NONE,

256

NpuResamplingMode.NEAREST: resampling_mode.NEAREST,

257

NpuResamplingMode.TRANSPOSE: resampling_mode.TRANSPOSE,

258

}

259

260

# Maps data type size in bits to activation precision

261

precision_map = {8: 0, 16: 1, 32: 2}

262

263

# Maps rounding mode to the corresponding value

264

rounding_mode_map = {

265

NpuRoundingMode.TFL: rounding.TFL.value,

266

NpuRoundingMode.TRUNCATE: rounding.TRUNCATE.value,

267

NpuRoundingMode.NATURAL: rounding.NATURAL.value,

}

Louis Verhaard

2021-03-17 14:26:34 +0100

[diff] [blame]

271

def check_mem_limits(memory_accesses: MemoryAccessSet, mem_limits: Dict[int, int]):

272

"""Checks that an operation's memory accesses respect the boundaries imposed by mem_limits"""

273

for mem_access in memory_accesses.accesses:

274

for region, range_set in mem_access.regions.items():

275

if region not in mem_limits:

276

raise VelaError(f"Invalid region: {region}")

277

max = mem_limits[region]

278

for start, end in range_set.ranges:

279

for offset in (start, end):

280

if offset < 0:

281

raise VelaError(f"Negative address offset: {offset}, region: {region}")

282

if offset > max:

283

raise VelaError(f"Address offset out of range: {offset}, region: {region}, max: {max}")

284

285

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

286

def quantise(value: float, quant: Optional[NpuQuantization]) -> int:

287

"""Quantizes the given value"""

288

scale = 1 if quant is None or quant.scale_f32 is None else quant.scale_f32

289

zp = 0 if quant is None else quant.zero_point

290

return quantise_float32(value, scale, zp)

291

292

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

293

def generate_padding(emit: CommandStreamEmitter, padding: NpuPadding):

294

"""Generates IFM_PAD registers"""

295

emit.cmd0_with_param(cmd0.NPU_SET_IFM_PAD_TOP, padding.top)

296

emit.cmd0_with_param(cmd0.NPU_SET_IFM_PAD_LEFT, padding.left)

297

emit.cmd0_with_param(cmd0.NPU_SET_IFM_PAD_BOTTOM, padding.bottom)

298

emit.cmd0_with_param(cmd0.NPU_SET_IFM_PAD_RIGHT, padding.right)

299

300

301

def generate_activation(emit: CommandStreamEmitter, activation: Optional[NpuActivation], ofm: NpuFeatureMap):

302

"""Generates ACTIVATION registers"""

303

act = activation if activation is not None else NpuActivation(NpuActivationOp.NONE_OR_RELU)

304

305

if act.min is None:

306

quantized_min = ofm.data_type.min_value()

307

else:

308

quantized_min = quantise(act.min, ofm.quantization)

309

if act.max is None:

310

quantized_max = ofm.data_type.max_value()

311

else:

312

quantized_max = quantise(act.max, ofm.quantization)

313

quantized_min = max(quantized_min, np.iinfo(np.int16).min, ofm.data_type.min_value())

314

quantized_max = min(quantized_max, np.iinfo(np.int16).max, ofm.data_type.max_value())

315

if act.op_type == NpuActivationOp.TABLE_LOOKUP:

316

assert 0 <= act.lookup_table_index < 8

317

activation_value = 16 + act.lookup_table_index

318

if ofm.data_type == NpuDataType.INT32:

319

activation_value |= 3 << 12 # Force I8 range

320

quantized_min = max(-128, quantized_min)

321

quantized_max = min(127, quantized_max)

322

else:

Jonas Ohlsson

2022-03-01 12:39:55 +0100

[diff] [blame]

323

activation_value = cast(int, activation_op_map[act.op_type])

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

324

emit.cmd0_with_param(cmd0.NPU_SET_ACTIVATION, activation_value)

325

emit.cmd0_with_param(cmd0.NPU_SET_ACTIVATION_MIN, quantized_min)

326

emit.cmd0_with_param(cmd0.NPU_SET_ACTIVATION_MAX, quantized_max)

327

328

329

def generate_addresses(emit: CommandStreamEmitter, ptr_cmds: List[cmd1], addresses: List[int], layout: NpuLayout):

330

"""Generates xFM_BASE registers"""

331

if layout == NpuLayout.NHCWB16:

332

# Check that all BasePointer addresses are aligned to 16 bytes

333

assert all((int(addr) % 16) == 0 for addr in addresses)

Mauricio Briceno

2021-03-19 09:13:50 +0100

[diff] [blame]

334

for i in range(4):

335

emit.cmd1_with_address(ptr_cmds[i], addresses[i])

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

336

337

338

def generate_tiles(emit: CommandStreamEmitter, tile_cmds: List[cmd0], tiles: NpuTileBox):

339

"""Generates xFM_HEIGHT0/HEIGHT1/WIDTH0 registers"""

340

emit.cmd0_with_param(tile_cmds[0], tiles.height_0 - 1)

341

emit.cmd0_with_param(tile_cmds[1], tiles.height_1 - 1)

342

emit.cmd0_with_param(tile_cmds[2], tiles.width_0 - 1)

343

344

345

def generate_strides(

346

emit: CommandStreamEmitter, fm: NpuFeatureMap, stride_c_cmd: cmd1, stride_y_cmd: cmd1, stride_x_cmd: cmd1

347

):

348

"""Generates STRIDE_C/Y/X registers"""

349

strides = get_strides(fm)

Mauricio Briceno

2021-03-19 09:13:50 +0100

[diff] [blame]

350

emit.cmd1_with_address(stride_c_cmd, strides.depth) # stride between 16-byte channel blocks (C)

351

emit.cmd1_with_address(stride_y_cmd, strides.height) # stride between vertical values (H)

352

emit.cmd1_with_address(stride_x_cmd, strides.width) # stride between horisontal values (W)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

353

354

355

def generate_ifm_precision(emit: CommandStreamEmitter, fm: NpuFeatureMap, op_to_scale: int, precision_cmd: cmd0):

356

"""Generates IFM/IFM2_PRECISION register"""

357

dtype = fm.data_type

358

prec = 1 if dtype.is_signed() else 0

359

activation_precision = precision_map[dtype.size_in_bits()]

360

prec += activation_precision << 2

361

362

if fm.layout == NpuLayout.NHCWB16:

363

prec |= 1 << 6

364

365

prec |= op_to_scale << 8

366

emit.cmd0_with_param(precision_cmd, prec)

367

368

369

def generate_ofm_precision(emit: CommandStreamEmitter, npu_op: NpuBlockOperation, use_global_scale: bool):

370

"""Generates OFM_PRECISION register"""

371

dtype = npu_op.ofm.data_type

372

prec = 1 if dtype.is_signed() else 0

373

activation_precision = precision_map[dtype.size_in_bits()]

374

prec += activation_precision << 1

375

376

if use_global_scale:

377

# Set global scale bit, as opposed to using per channel scale

378

prec |= 1 << 8

379

if npu_op.ofm.layout == NpuLayout.NHCWB16:

380

prec |= 1 << 6

381

prec |= rounding_mode_map[npu_op.rounding_mode] << 14

382

emit.cmd0_with_param(cmd0.NPU_SET_OFM_PRECISION, prec)

383

384

385

def generate_ifm2_broadcast(emit: CommandStreamEmitter, npu_op: NpuElementWiseOperation):

386

"""Generates IFM2_BROADCAST register for binary elementwise operations"""

ifm2_broadcast = 0

ifm = npu_op.ifm

ifm2 = npu_op.ifm2

if npu_op.reversed_operands:

391

ifm2_broadcast |= IFM2Broadcast.ReverseOperandOrder

392

if npu_op.ifm2_scalar is not None:

393

# IFM2 is a constant, set UseIFM2Scalar bit to IFM2_BROADCAST

394

ifm2_broadcast |= IFM2Broadcast.UseIFM2Scalar

395

else:

396

if ifm.shape.height != ifm2.shape.height:

397

# Broadcast in 'H' dimension

398

assert ifm2.shape.height == 1

399

ifm2_broadcast |= IFM2Broadcast.BroadcastHdim

400

401

if ifm.shape.width != ifm2.shape.width:

402

# Broadcast in 'W' dimension

403

assert ifm2.shape.width == 1

404

ifm2_broadcast |= IFM2Broadcast.BroadcastWdim

405

406

if ifm.shape.depth != ifm2.shape.depth:

407

# Broadcast in 'C' dimension

408

assert ifm2.shape.depth == 1

409

ifm2_broadcast |= IFM2Broadcast.BroadcastCdim

410

411

emit.cmd0_with_param(cmd0.NPU_SET_IFM2_BROADCAST, ifm2_broadcast)

412

413

414

def generate_ifm(emit: CommandStreamEmitter, ifm: NpuFeatureMap):

415

"""Generates general IFM registers"""

416

emit.cmd0_with_param(cmd0.NPU_SET_IFM_REGION, ifm.region)

417

generate_addresses(

418

emit,

419

[cmd1.NPU_SET_IFM_BASE0, cmd1.NPU_SET_IFM_BASE1, cmd1.NPU_SET_IFM_BASE2, cmd1.NPU_SET_IFM_BASE3],

ifm.tiles.addresses,

ifm.layout,

)

generate_tiles(

emit, [cmd0.NPU_SET_IFM_HEIGHT0_M1, cmd0.NPU_SET_IFM_HEIGHT1_M1, cmd0.NPU_SET_IFM_WIDTH0_M1], ifm.tiles

425

)

426

emit.cmd0_with_param(cmd0.NPU_SET_IFM_DEPTH_M1, ifm.shape.depth - 1)

427

generate_strides(emit, ifm, cmd1.NPU_SET_IFM_STRIDE_C, cmd1.NPU_SET_IFM_STRIDE_Y, cmd1.NPU_SET_IFM_STRIDE_X)

428

emit.cmd0_with_param(cmd0.NPU_SET_IFM_ZERO_POINT, int(ifm.quantization.zero_point))

429

430

431

def generate_ifm2(emit: CommandStreamEmitter, ifm2: NpuFeatureMap, has_scalar: bool):

432

"""Generates general IFM2 registers"""

433

if not has_scalar:

434

emit.cmd0_with_param(cmd0.NPU_SET_IFM2_REGION, ifm2.region)

435

generate_addresses(

436

emit,

437

[cmd1.NPU_SET_IFM2_BASE0, cmd1.NPU_SET_IFM2_BASE1, cmd1.NPU_SET_IFM2_BASE2, cmd1.NPU_SET_IFM2_BASE3],

438

ifm2.tiles.addresses,

ifm2.layout,

)

generate_tiles(

emit, [cmd0.NPU_SET_IFM2_HEIGHT0_M1, cmd0.NPU_SET_IFM2_HEIGHT1_M1, cmd0.NPU_SET_IFM2_WIDTH0_M1], ifm2.tiles

443

)

444

generate_strides(emit, ifm2, cmd1.NPU_SET_IFM2_STRIDE_C, cmd1.NPU_SET_IFM2_STRIDE_Y, cmd1.NPU_SET_IFM2_STRIDE_X)

445

emit.cmd0_with_param(cmd0.NPU_SET_IFM2_ZERO_POINT, int(ifm2.quantization.zero_point))

446

447

448

def generate_ofm(emit: CommandStreamEmitter, ofm: NpuFeatureMap):

449

"""Generates general OFM registers"""

450

emit.cmd0_with_param(cmd0.NPU_SET_OFM_REGION, ofm.region)

451

generate_addresses(

452

emit,

453

[cmd1.NPU_SET_OFM_BASE0, cmd1.NPU_SET_OFM_BASE1, cmd1.NPU_SET_OFM_BASE2, cmd1.NPU_SET_OFM_BASE3],

ofm.tiles.addresses,

ofm.layout,

)

generate_tiles(

emit, [cmd0.NPU_SET_OFM_HEIGHT0_M1, cmd0.NPU_SET_OFM_HEIGHT1_M1, cmd0.NPU_SET_OFM_WIDTH0_M1], ofm.tiles

459

)

460

emit.cmd0_with_param(cmd0.NPU_SET_OFM_HEIGHT_M1, ofm.shape.height - 1)

461

emit.cmd0_with_param(cmd0.NPU_SET_OFM_WIDTH_M1, ofm.shape.width - 1)

462

emit.cmd0_with_param(cmd0.NPU_SET_OFM_DEPTH_M1, ofm.shape.depth - 1)

463

generate_strides(emit, ofm, cmd1.NPU_SET_OFM_STRIDE_C, cmd1.NPU_SET_OFM_STRIDE_Y, cmd1.NPU_SET_OFM_STRIDE_X)

464

emit.cmd0_with_param(cmd0.NPU_SET_OFM_ZERO_POINT, int(ofm.quantization.zero_point))

465

466

467

def generate_kernel(emit: CommandStreamEmitter, kernel: NpuKernel, block_traversal: NpuBlockTraversal):

468

"""Generates KERNEL related registers"""

469

emit.cmd0_with_param(cmd0.NPU_SET_KERNEL_HEIGHT_M1, kernel.dilation_y * (kernel.height - 1))

470

emit.cmd0_with_param(cmd0.NPU_SET_KERNEL_WIDTH_M1, kernel.dilation_x * (kernel.width - 1))

471

# set kernel x stride low bit

472

stride = (kernel.stride_x - 1) & 1

473

# set kernel y stride low bit

474

stride |= (kernel.stride_y - 1 & 1) << 1

475

# set kernel x stride extension bits

476

stride |= (kernel.stride_x - 1 >> 1) << 6

477

# set kernel y stride extension bits

478

stride |= (kernel.stride_y - 1 >> 1) << 9

479

stride |= (kernel.dilation_x - 1) << 3

480

stride |= (kernel.dilation_y - 1) << 4

481

if block_traversal == NpuBlockTraversal.PART_KERNEL_FIRST:

482

stride |= 1 << 2

483

emit.cmd0_with_param(cmd0.NPU_SET_KERNEL_STRIDE, stride)

484

485

486

def generate_weights(emit: CommandStreamEmitter, weights: List[NpuAddressRange], arch: ArchitectureFeatures):

487

"""Generates WEIGHT registers"""

488

if len(weights) == 0:

489

return

490

emit.cmd0_with_param(cmd0.NPU_SET_WEIGHT_REGION, weights[0].region)

491

# Set weights sources for active and present cores

492

for core, (addr, length) in enumerate(

493

[

494

(cmd1.NPU_SET_WEIGHT_BASE, cmd1.NPU_SET_WEIGHT_LENGTH),

495

(cmd1.NPU_SET_WEIGHT1_BASE, cmd1.NPU_SET_WEIGHT1_LENGTH),

496

]

497

):

498

if core < len(weights):

Mauricio Briceno

2021-03-19 09:13:50 +0100

[diff] [blame]

499

emit.cmd1_with_address(addr, weights[core].address)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

500

emit.cmd1_with_offset(length, weights[core].length)

501

elif core < arch.ncores:

Mauricio Briceno

2021-03-19 09:13:50 +0100

[diff] [blame]

502

emit.cmd1_with_address(addr, weights[0].address)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

503

emit.cmd1_with_offset(length, 0)

504

505

506

def generate_biases(emit: CommandStreamEmitter, biases: List[NpuAddressRange], arch: ArchitectureFeatures):

507

"""Generates SCALE registers"""

508

if len(biases) == 0:

509

return

510

emit.cmd0_with_param(cmd0.NPU_SET_SCALE_REGION, biases[0].region)

511

# Set weights sources for active and present cores

512

for core, (addr, length) in enumerate(

513

[(cmd1.NPU_SET_SCALE_BASE, cmd1.NPU_SET_SCALE_LENGTH), (cmd1.NPU_SET_SCALE1_BASE, cmd1.NPU_SET_SCALE1_LENGTH)]

514

):

515

if core < len(biases):

Mauricio Briceno

2021-03-19 09:13:50 +0100

[diff] [blame]

516

emit.cmd1_with_address(addr, biases[core].address)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

517

emit.cmd1_with_offset(length, biases[core].length)

518

elif core < arch.ncores:

Mauricio Briceno

2021-03-19 09:13:50 +0100

[diff] [blame]

519

emit.cmd1_with_address(addr, biases[0].address)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

520

emit.cmd1_with_offset(length, 0)

521

522

523

def generate_block_config(

Jonas Ohlsson

d857507

2022-03-30 10:30:25 +0200

[diff] [blame]

524

emit: CommandStreamEmitter,

525

block_config: NpuShape3D,

Louis Verhaard

2020-11-25 14:10:30 +0100

[diff] [blame]

526

):

527

"""Generates OFM_BLK_HEIGHT/WIDTH/DEPTH registers"""

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

528

emit.cmd0_with_param(cmd0.NPU_SET_OFM_BLK_HEIGHT_M1, block_config.height - 1)

529

emit.cmd0_with_param(cmd0.NPU_SET_OFM_BLK_WIDTH_M1, block_config.width - 1)

530

emit.cmd0_with_param(cmd0.NPU_SET_OFM_BLK_DEPTH_M1, block_config.depth - 1)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

531

532

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

533

def generate_shram_registers(

Jonas Ohlsson

d857507

2022-03-30 10:30:25 +0200

[diff] [blame]

534

emit: CommandStreamEmitter,

535

npu_op: NpuBlockOperation,

536

arch_block_config: ArchitectureBlockConfig,

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

537

):

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

538

"""Generates IB_END/IB_START/AB_START/ACC_FORMAT registers"""

539

emit.cmd0_with_param(cmd0.NPU_SET_IFM_IB_END, arch_block_config.layout.ib_end)

540

emit.cmd0_with_param(cmd0.NPU_SET_AB_START, arch_block_config.layout.ab_start)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

541

if has_ifm2(npu_op):

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

542

emit.cmd0_with_param(cmd0.NPU_SET_IFM2_IB_START, arch_block_config.layout.ib_start2)

543

emit.cmd0_with_param(cmd0.NPU_SET_ACC_FORMAT, acc_format_map[arch_block_config.acc_type])

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

544

545

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

546

def get_block_config_for_npu_op(

547

arch, npu_op: NpuBlockOperation, npu_block_type: NpuBlockType, is_partkernel: bool, ifm_resampling: resampling_mode

548

) -> Optional[ArchitectureBlockConfig]:

549

"""

550

Given npu_op.block_config, returns a corresponding ArchitectureBlockConfig.

551

Returns None if the block_config does not fit.

552

"""

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

553

554

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

555

def get_arch_block_config(

556

npu_op: NpuBlockOperation, block_traversal: NpuBlockTraversal, arch: ArchitectureFeatures

557

) -> ArchitectureBlockConfig:

Louis Verhaard

2020-11-25 14:10:30 +0100

[diff] [blame]

558

"""Creates shared buffer allocation for the given operation"""

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

559

assert npu_op.block_config is not None, "block_config has not been set"

560

block_type = NpuBlockType.Default

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

561

if isinstance(npu_op, NpuConv2DOperation):

Louis Verhaard

2020-11-25 14:10:30 +0100

[diff] [blame]

562

block_type = NpuBlockType.ConvolutionMxN

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

563

elif isinstance(npu_op, NpuConvDepthWiseOperation):

Louis Verhaard

2020-11-25 14:10:30 +0100

[diff] [blame]

564

block_type = NpuBlockType.ConvolutionDepthWise

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

565

elif isinstance(npu_op, NpuPoolingOperation):

Louis Verhaard

2020-11-25 14:10:30 +0100

[diff] [blame]

566

block_type = NpuBlockType.ReduceSum if npu_op.sub_op_type == NpuPoolingOp.REDUCE_SUM else NpuBlockType.Pooling

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

567

elif isinstance(npu_op, NpuElementWiseOperation):

Louis Verhaard

2020-11-25 14:10:30 +0100

[diff] [blame]

568

block_type = NpuBlockType.ElementWise

569

else:

570

assert 0, "Unsupported operation"

571

ifm_resampling_mode = resampling_mode_map[npu_op.ifm_upscale]

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

572

is_partkernel = block_traversal == NpuBlockTraversal.PART_KERNEL_FIRST

573

uses_lut = npu_op.activation is not None and npu_op.activation.op_type == NpuActivationOp.TABLE_LOOKUP

574

lut_banks = 2 if uses_lut else 0

575

fms = [npu_op.ifm, npu_op.ofm]

576

if npu_op.ifm2 is not None:

577

fms.append(npu_op.ifm2)

578

all_fms_have_quant = not any(fm.quantization is None or fm.quantization.scale_f32 is None for fm in fms)

579

ifm_bits = npu_op.ifm.data_type.size_in_bits()

580

ifm_shape = shape3d_to_block(npu_op.ifm.shape)

581

if has_ifm2(npu_op):

582

ifm2_shape = shape3d_to_block(npu_op.ifm2.shape)

583

else:

584

ifm2_shape = None

585

uses_scalar = npu_op.ifm2_scalar is not None

586

block_config = shape3d_to_block(npu_op.block_config)

587

arch_block_config = try_block_config(

588

block_config,

589

arch,

590

block_type,

Jonas Ohlsson

2022-03-01 12:39:55 +0100

[diff] [blame]

591

shape3d_to_block(npu_op.ofm.shape),

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

ifm_shape,

ifm2_shape,

uses_scalar,

ifm_bits,

is_partkernel=is_partkernel,

597

kernel=to_kernel(npu_op.kernel),

598

lut_banks=lut_banks,

599

scaled=all_fms_have_quant,

600

ifm_resampling=ifm_resampling_mode,

601

)

602

assert arch_block_config is not None, f"block_config {npu_op.block_config} does not fit"

603

return arch_block_config

Louis Verhaard

2020-11-25 14:10:30 +0100

[diff] [blame]

604

605

Louis Verhaard

2020-11-26 11:42:04 +0100

[diff] [blame]

606

def generate_cmd_waits(emit: CommandStreamEmitter, cmd_waits: Watermark):

607

"""Generates KERNEL_WAIT/DMA_WAIT"""

608

if cmd_waits.npu >= 0:

609

emit.cmd_wait(cmd0.NPU_OP_KERNEL_WAIT, 0, cmd_waits.npu)

610

611

if cmd_waits.dma >= 0:

612

emit.cmd_wait(cmd0.NPU_OP_DMA_WAIT, 0, cmd_waits.dma)

613

614

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

615

def generate_common(

616

emit: CommandStreamEmitter,

617

npu_op: NpuBlockOperation,

618

block_traversal: NpuBlockTraversal,

619

arch: ArchitectureFeatures,

620

use_global_scale: bool = False,

621

op_to_scale: int = 0,

622

):

623

"""Generate registers that are common to most operations"""

624

assert npu_op.ifm is not None and npu_op.ofm is not None

625

generate_ifm(emit, npu_op.ifm)

626

generate_ifm_precision(emit, npu_op.ifm, op_to_scale, cmd0.NPU_SET_IFM_PRECISION)

627

emit.cmd0_with_param(cmd0.NPU_SET_IFM_UPSCALE, resampling_mode_map[npu_op.ifm_upscale])

628

if npu_op.padding is not None:

629

generate_padding(emit, npu_op.padding)

630

generate_ofm(emit, npu_op.ofm)

631

generate_ofm_precision(emit, npu_op, use_global_scale)

632

if npu_op.op_type != NpuOperationType.ElementWise:

633

assert npu_op.kernel is not None

634

generate_kernel(emit, npu_op.kernel, block_traversal)

635

generate_weights(emit, npu_op.weights, arch)

636

generate_biases(emit, npu_op.biases, arch)

637

generate_activation(emit, npu_op.activation, npu_op.ofm)

Tim Hall

2021-05-27 18:49:40 +0100

[diff] [blame]

638

arch_block_config = get_arch_block_config(npu_op, block_traversal, arch)

639

generate_block_config(emit, npu_op.block_config)

640

generate_shram_registers(emit, npu_op, arch_block_config)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

641

642

643

# -------------------------------------------------------------------

644

# SCALING

645

# -------------------------------------------------------------------

646

647

648

def generate_ofm_scaling_for_pooling(emit: CommandStreamEmitter, pool_op: NpuPoolingOperation):

649

"""Generates OFM_SCALE register for pooling operations"""

650

# For valid padding vela has to output scaling values

651

kernel = pool_op.kernel

652

ifm_quant = pool_op.ifm.quantization

653

ofm_quant = pool_op.ofm.quantization

654

if pool_op.activation is not None and pool_op.activation.op_type in (NpuActivationOp.SIGMOID, NpuActivationOp.TANH):

655

assert ifm_quant.scale_f32 is not None

656

rescale = 0x3000 * ifm_quant.scale_f32

657

if pool_op.ifm.data_type == NpuDataType.INT16:

658

# Calculate scale and shift for the output scale of 1/(3*4096)

Louis Verhaard

c629129

2021-03-19 09:35:48 +0100

[diff] [blame]

659

x_log2 = math.log2(ifm_quant.scale_f32)

660

rounded_log2 = int(round(x_log2))

661

is_power_of_two = abs(x_log2 - rounded_log2) < 0.001

662

shift = rounded_log2 + 12

Patrik Gustavsson

e3dd2f3

2021-12-02 09:08:26 +0100

[diff] [blame]

663

if is_power_of_two and (

664

(pool_op.activation.op_type == NpuActivationOp.TANH and shift in (0, 1))

665

or (pool_op.activation.op_type == NpuActivationOp.SIGMOID and shift == 0)

666

):

667

# Special handling if input scale is 1/2048 (tanh/sigmoid) or 1/4096 (tanh)

Louis Verhaard

c629129

2021-03-19 09:35:48 +0100

[diff] [blame]

scale = 3 << shift

shift = 0

else:

shift = 0

max_rescale = np.iinfo(np.int16).max / 2

673

while rescale <= max_rescale and shift <= 30:

674

shift += 1

675

rescale *= 2

676

scale = int(rescale)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

677

else:

678

rescale_bits = len(bin(round_up_to_int(rescale))) - 2 + 1

679

scale, shift = scaling.quantise_pooling_scale(kernel.height * kernel.width, rescale_bits)

680

scale = int(round_away_zero(scale * rescale))

681

elif pool_op.fused_quantize:

682

# Quantize op requires different scaling

683

ifm_scale_f64 = np.double(ifm_quant.scale_f32)

684

ofm_scale_f64 = np.double(ofm_quant.scale_f32)

685

scale, shift = scaling.quantise_scale(ifm_scale_f64 / ofm_scale_f64)

686

elif pool_op.rescale is not None:

Patrik Gustavsson

c74682c

2021-08-17 14:26:38 +0200

[diff] [blame]

687

if type(pool_op.rescale) == ExplicitScaling:

688

# Note: reuse of rescale for explicit scaling to not expose this in the external API

689

explicit_scaling = pool_op.rescale

690

assert explicit_scaling.per_channel is False

691

scale = explicit_scaling.multiplier[0]

692

shift = explicit_scaling.shift[0]

693

else:

694

# for ResizeBilinear operations with rescale

695

rescale = pool_op.rescale

696

rescale_bits = len(bin(round_up_to_int(rescale))) - 2 + 1

697

scale, shift = scaling.quantise_pooling_scale(kernel.height * kernel.width, rescale_bits)

698

scale = int(round_away_zero(scale * rescale))

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

699

else:

700

# In case avg pool fused with concat or other memory operation, rescaling might be needed.

701

# kernel height == kernel width == 1 is always true in this case

702

# Normally the scale is maximised, to get maximum precision, which means that

703

# if rescale != 1, scale need to consider the number of bits needed for rescaling

704

if ofm_quant.scale_f32 is not None and ifm_quant.scale_f32 is not None:

705

rescale = ifm_quant.scale_f32 / ofm_quant.scale_f32

706

rescale_bits = 0

707

if kernel.height == kernel.width == 1:

708

if rescale > 1:

709

rescale_bits = len(bin(round_up_to_int(rescale))) - 2 + 1

710

elif rescale < 1:

711

rescale_bits = -(len(bin(round_up_to_int(1 / rescale))) - 2 - 1)

712

scale, shift = scaling.quantise_pooling_scale(kernel.height * kernel.width, rescale_bits)

713

scale = int(round_away_zero(scale * rescale))

else:

scale = 1

shift = 0

emit.cmd1_with_offset(cmd1.NPU_SET_OFM_SCALE, scale, shift)

719

720

721

def generate_scaling_for_elementwise(emit: CommandStreamEmitter, npu_op: NpuElementWiseOperation) -> int:

722

"""

723

Generates OFM/OPA/OPB_SCALE registers for elementwise operators.

724

Returns the operator to scale

725

"""

726

op_to_scale = 0

727

if npu_op.sub_op_type in (NpuElementWiseOp.ADD, NpuElementWiseOp.MUL, NpuElementWiseOp.SUB):

728

input_scale = npu_op.ifm.quantization.scale_f32 if npu_op.ifm.quantization else None

729

input2_scale = npu_op.ifm2.quantization.scale_f32 if npu_op.ifm2.quantization else None

730

output_scale = npu_op.ofm.quantization.scale_f32 if npu_op.ofm.quantization else None

731

732

if npu_op.activation is not None and npu_op.activation.op_type in (

733

NpuActivationOp.SIGMOID,

734

NpuActivationOp.TANH,

735

):

736

output_scale = 1 / 0x3000

737

738

if npu_op.sub_op_type == NpuElementWiseOp.MUL:

Patrik Gustavsson

b081d67

2021-08-25 13:49:25 +0200

[diff] [blame]

739

if npu_op.rescale:

740

ofm_scale, shift = npu_op.rescale

741

elif None in (input_scale, input2_scale, output_scale):

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

ofm_scale = 1

shift = 0

else:

ofm_scale, shift = scaling.elementwise_mul_scale(input_scale, input2_scale, output_scale)

746

emit.cmd1_with_offset(cmd1.NPU_SET_OFM_SCALE, ofm_scale, shift)

747

else: # Add/Sub

Jonas Ohlsson

2022-03-01 12:39:55 +0100

[diff] [blame]

748

opa_scale: float

749

opb_scale: float

Henrik G Olsson

2021-03-19 15:50:28 +0100

[diff] [blame]

750

bitdepth = npu_op.ifm.data_type.size_in_bits()

751

use_advanced_scaling = False

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

752

if None in (input_scale, input2_scale, output_scale):

753

opa_scale = opb_scale = ofm_scale = 1

754

opa_shift = shift = 0

755

if npu_op.rescale is not None:

756

ofm_scale, shift = npu_op.rescale

Henrik G Olsson

2021-03-19 15:50:28 +0100

[diff] [blame]

757

elif input_scale == input2_scale and bitdepth == 16:

758

opa_scale, opb_scale, ofm_scale, shift = scaling.simplified_elementwise_add_sub_scale(

759

input_scale, input2_scale, output_scale

760

)

761

# align the double rounding with that of advanced scaling

opa_scale /= 2

opb_scale /= 2

shift -= 1

opa_shift = 0 # Unused for this case

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

766

elif input_scale == input2_scale:

767

opa_scale, opb_scale, ofm_scale, shift = scaling.simplified_elementwise_add_sub_scale(

768

input_scale, input2_scale, output_scale

769

)

770

opa_shift = 0 # Unused for this case

Henrik G Olsson

2021-03-19 15:50:28 +0100

[diff] [blame]

771

# For 8 bit we can't guarantee double rounding with simplified scaling will always be

772

# the same as with advanced scaling due to different shifts. When the ofm scale fulfils

773

# the following we know that double rounding will have no effect for advanced scaling

774

# no matter the input, so we can safely use simplified scaling with double rounding disabled.

775

use_advanced_scaling = int(ofm_scale) & 0xFFF != 0

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

776

else:

Henrik G Olsson

2021-03-19 15:50:28 +0100

[diff] [blame]

777

use_advanced_scaling = True

778

if use_advanced_scaling:

779

# Use advanced implementation only when input/output scales differ,

780

# or when we can't guarantee the absence of rounding errors

Jonas Ohlsson

d857507

2022-03-30 10:30:25 +0200

[diff] [blame]

(

opa_scale,

opa_shift,

ofm_scale,

shift,

op_to_scale,

) = scaling.advanced_elementwise_add_sub_scale(input_scale, input2_scale, output_scale, bitdepth)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

788

opb_scale = 0 # Unused for this case

789

if npu_op.reversed_operands:

790

# If the operand order is reversed we also have to swap which operand is scaled

791

if op_to_scale == scaling.OperandToScale.OPa:

792

op_to_scale = scaling.OperandToScale.OPb

793

else:

794

op_to_scale = scaling.OperandToScale.OPa

795

emit.cmd1_with_offset(cmd1.NPU_SET_OPA_SCALE, opa_scale, opa_shift)

796

emit.cmd1_with_offset(cmd1.NPU_SET_OPB_SCALE, opb_scale)

797

emit.cmd1_with_offset(cmd1.NPU_SET_OFM_SCALE, ofm_scale, shift)

798

elif npu_op.sub_op_type in (NpuElementWiseOp.LRELU, NpuElementWiseOp.ABS):

799

output_scale = npu_op.ofm.quantization.scale_f32

800

ofm_scale, shift = scaling.quantise_scale(output_scale)

801

emit.cmd1_with_offset(cmd1.NPU_SET_OFM_SCALE, ofm_scale, shift)

802

else:

803

emit.cmd1_with_offset(cmd1.NPU_SET_OFM_SCALE, 1, 0)

return op_to_scale

# -------------------------------------------------------------------

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

808

# PRINT

809

# -------------------------------------------------------------------

Jacob Bohlin

e99b893

2020-07-13 16:01:51 +0200

[diff] [blame]

810

811

Jonas Ohlsson

2022-03-01 12:39:55 +0100

[diff] [blame]

812

def print_feature_map(fm: Optional[NpuFeatureMap], name: str):

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

if fm is not None:

q = (

"no quantization"

if fm.quantization is None

817

else f"scale: {fm.quantization.scale_f32}, zero: {fm.quantization.zero_point}"

818

)

819

h, w, c = fm.shape

820

sz = h * w * c * fm.data_type.size_in_bytes()

821

print(f" {name}: h={h},w={w},c={c}, region={fm.region}, {fm.layout}, {fm.data_type}, size={sz}, {q}")

822

strides = get_strides(fm)

823

stride_str = f"Stride y/x/c: {strides.height}/{strides.width}/{strides.depth}"

824

t = fm.tiles

825

addresses = [hex(addr) for addr in t.addresses]

826

print(f" {stride_str}, tiles: w0={t.width_0}, h0={t.height_0}, h1={t.height_1}, base={addresses}")

Tim Hall

2022-03-16 16:51:16 +0000

[diff] [blame^]

827

print(f" name={fm.name}")

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

828

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

829

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

830

def print_operation(npu_op: NpuOperation, index: int = 0, cmd=None):

Tim Hall

2022-03-16 16:51:16 +0000

[diff] [blame^]

831

pass_info = f" {cmd}" if cmd else ""

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

832

if isinstance(npu_op, NpuOperation) and not isinstance(npu_op, (NpuDmaOperation, NpuBlockOperation)):

Tim Hall

2022-03-16 16:51:16 +0000

[diff] [blame^]

833

print(f"{index} {npu_op.op_type.name} name={npu_op.name}:{pass_info}")

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

834

return

835

if isinstance(npu_op, NpuDmaOperation):

Tim Hall

2022-03-16 16:51:16 +0000

[diff] [blame^]

836

print(f"{index} {npu_op.op_type.name} name={npu_op.name}, src={npu_op.src}, dest={npu_op.dest}:{pass_info}")

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

837

return

838

k = None if npu_op.kernel is None else to_kernel(npu_op.kernel)

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

839

if isinstance(npu_op, (NpuPoolingOperation, NpuElementWiseOperation)):

Tim Hall

2022-03-16 16:51:16 +0000

[diff] [blame^]

840

print(f"{index} {npu_op.sub_op_type.name} {npu_op.op_type.name} name={npu_op.name}:{pass_info}")

Patrik Gustavsson

eca2e95

2020-05-27 09:15:11 +0200

[diff] [blame]

841

else:

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

842

if (

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

843

isinstance(npu_op, NpuConv2DOperation)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

844

and k.elements_wh() * k.stride.x * k.stride.y * k.dilation.x * k.dilation.y == 1

845

):

846

fc = "FullyConnected "

847

else:

848

fc = ""

Tim Hall

2022-03-16 16:51:16 +0000

[diff] [blame^]

849

print(f"{index} {fc}{npu_op.op_type.name} name={npu_op.name}:{pass_info}")

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

850

print_feature_map(npu_op.ifm, "IFM")

851

if npu_op.ifm2_scalar is not None:

852

quant_val = quantise(npu_op.ifm2_scalar, npu_op.ifm2.quantization)

853

print(f" IFM2: Scalar={npu_op.ifm2_scalar} (quantized: {quant_val}), {npu_op.ifm2.quantization}")

854

else:

855

print_feature_map(npu_op.ifm2, "IFM2")

856

print_feature_map(npu_op.ofm, "OFM")

857

if k is not None and npu_op.op_type != NpuOperationType.ElementWise:

858

print(f" Kernel: {k}")

859

if npu_op.padding is not None:

860

print(f" {npu_op.padding}")

861

for weights in npu_op.weights:

862

print(f" Weights: {weights}")

863

for bias in npu_op.biases:

864

print(f" Scales: {bias}")

865

if npu_op.activation is not None:

866

act = npu_op.activation

867

if act.op_type != NpuActivationOp.NONE_OR_RELU or act.min is not None or act.max is not None:

868

lut = f", lut index={act.lookup_table_index}" if act.op_type == NpuActivationOp.TABLE_LOOKUP else ""

869

print(f" Activation: {act.op_type.name}, min={act.min}, max={act.max}{lut}")

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

870

if isinstance(npu_op, NpuConv2DOperation):

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

871

print(f" {npu_op.block_traversal}")

872

bh, bw, bc = npu_op.block_config

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

873

rescale = (

874

f", rescale={npu_op.rescale}" if isinstance(npu_op, (NpuPoolingOperation, NpuElementWiseOperation)) else ""

875

)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

876

print(f" Block config: h={bh},w={bw},c={bc}, {npu_op.ifm_upscale}, {npu_op.rounding_mode}{rescale}")

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

877

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

878

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

879

def print_operations(npu_op_list: List[NpuOperation], npu_op_to_cmd=None):

880

npu_op_to_cmd = dict() if npu_op_to_cmd is None else npu_op_to_cmd

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

881

for index, npu_op in enumerate(npu_op_list):

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

882

print_operation(npu_op, index, npu_op_to_cmd.get(npu_op))

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

883

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

884

885

# -------------------------------------------------------------------

886

# OPERATIONS

887

# -------------------------------------------------------------------

888

889

890

def generate_operation_code(emit: CommandStreamEmitter, npu_op: NpuOperation):

891

"""Generates NPU_OP_* command"""

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

892

if isinstance(npu_op, NpuDmaOperation):

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

893

emit.cmd_do_operation(cmd0.NPU_OP_DMA_START, npu_op.channel * 16 + npu_op.mode)

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

894

elif isinstance(npu_op, NpuConv2DOperation):

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

895

emit.cmd_do_operation(cmd0.NPU_OP_CONV)

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

896

elif isinstance(npu_op, NpuConvDepthWiseOperation):

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

897

emit.cmd_do_operation(cmd0.NPU_OP_DEPTHWISE)

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

898

elif isinstance(npu_op, NpuPoolingOperation):

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

899

emit.cmd_do_operation(cmd0.NPU_OP_POOL, param=pooling_op_map[npu_op.sub_op_type])

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

900

elif isinstance(npu_op, NpuElementWiseOperation):

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

901

emit.cmd_do_operation(cmd0.NPU_OP_ELEMENTWISE, param=elementwise_op_map[npu_op.sub_op_type])

902

else:

903

assert 0, "Unsupported operation"

904

905

Louis Verhaard

2020-11-25 14:10:30 +0100

[diff] [blame]

906

def generate_conv2d_op(emit: CommandStreamEmitter, npu_op: NpuConv2DOperation, arch: ArchitectureFeatures):

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

907

"""Generates register commands for Conv2D operations"""

908

generate_common(emit, npu_op, npu_op.block_traversal, arch)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

909

910

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

911

def generate_conv_depthwise_op(

912

emit: CommandStreamEmitter, npu_op: NpuConvDepthWiseOperation, arch: ArchitectureFeatures

913

):

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

914

"""Generates register commands for depthwise convolution operations"""

915

generate_common(emit, npu_op, NpuBlockTraversal.DEPTH_FIRST, arch)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

916

917

918

def generate_pooling_op(emit: CommandStreamEmitter, npu_op: NpuPoolingOperation, arch: ArchitectureFeatures):

919

"""Generates register commands for pooling operations"""

920

use_global_scale = (

921

npu_op.sub_op_type in (NpuPoolingOp.AVERAGE, NpuPoolingOp.REDUCE_SUM) and sum(npu_op.padding) == 0

922

)

Patrik Gustavsson

c74682c

2021-08-17 14:26:38 +0200

[diff] [blame]

923

# Note: reuse of rescale for explicit scaling to not expose this in the external API

924

if npu_op.rescale is not None and type(npu_op.rescale) == ExplicitScaling:

925

use_global_scale = not npu_op.rescale.per_channel

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

926

generate_common(emit, npu_op, NpuBlockTraversal.DEPTH_FIRST, arch, use_global_scale=use_global_scale)

927

# Pooling op specific

928

if use_global_scale:

929

generate_ofm_scaling_for_pooling(emit, npu_op)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

930

931

932

def generate_elementwise_op(emit: CommandStreamEmitter, npu_op: NpuElementWiseOperation, arch: ArchitectureFeatures):

933

"""Generates register commands for elementwise operations"""

934

use_global_scale = npu_op.sub_op_type in (

935

NpuElementWiseOp.ADD,

936

NpuElementWiseOp.SUB,

937

NpuElementWiseOp.MUL,

938

NpuElementWiseOp.LRELU,

939

NpuElementWiseOp.ABS,

940

)

941

op_to_scale = generate_scaling_for_elementwise(emit, npu_op)

942

generate_common(

943

emit, npu_op, NpuBlockTraversal.DEPTH_FIRST, arch, use_global_scale=use_global_scale, op_to_scale=op_to_scale

944

)

945

# Elementwise op specific

Louis Verhaard

2020-11-26 11:42:04 +0100

[diff] [blame]

946

if npu_op.sub_op_type not in UNARY_ELEMWISE_OPS:

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

947

# Binary operation; generate IFM2 registers

948

assert npu_op.ifm2 is not None

949

has_scalar = npu_op.ifm2_scalar is not None

950

generate_ifm2(emit, npu_op.ifm2, has_scalar)

951

generate_ifm_precision(emit, npu_op.ifm2, 0, cmd0.NPU_SET_IFM2_PRECISION)

952

generate_ifm2_broadcast(emit, npu_op)

953

if has_scalar:

954

quantized_scalar = quantise(npu_op.ifm2_scalar, npu_op.ifm2.quantization)

955

assert npu_op.ifm2.data_type.min_value() <= quantized_scalar <= npu_op.ifm2.data_type.max_value()

956

emit.cmd0_with_param(cmd0.NPU_SET_IFM2_SCALAR, quantized_scalar)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

957

958

959

def generate_dma_op(emit: CommandStreamEmitter, dma_op: NpuDmaOperation):

960

"""Generates register commands for DMA operations"""

961

emit.cmd0_with_param(cmd0.NPU_SET_DMA0_SRC_REGION, dma_op.src.region)

Mauricio Briceno

2021-03-19 09:13:50 +0100

[diff] [blame]

962

emit.cmd1_with_address(cmd1.NPU_SET_DMA0_SRC, dma_op.src.address)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

963

emit.cmd0_with_param(cmd0.NPU_SET_DMA0_DST_REGION, dma_op.dest.region)

964

Mauricio Briceno

2021-03-19 09:13:50 +0100

[diff] [blame]

965

emit.cmd1_with_address(cmd1.NPU_SET_DMA0_DST, dma_op.dest.address)

966

emit.cmd1_with_address(cmd1.NPU_SET_DMA0_LEN, dma_op.src.length)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

967

968

Louis Verhaard

2020-11-25 14:10:30 +0100

[diff] [blame]

969

def generate_registers_for_op(emit: CommandStreamEmitter, npu_op: NpuOperation, arch: ArchitectureFeatures):

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

970

"""

971

Generates register commands for the given operation, but not the final NPU_OP_... command.

972

Returns the selected block config

973

"""

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

974

if isinstance(npu_op, NpuConv2DOperation):

Louis Verhaard

2020-11-25 14:10:30 +0100

[diff] [blame]

975

generate_conv2d_op(emit, npu_op, arch)

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

976

elif isinstance(npu_op, NpuConvDepthWiseOperation):

Louis Verhaard

2020-11-25 14:10:30 +0100

[diff] [blame]

977

generate_conv_depthwise_op(emit, npu_op, arch)

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

978

elif isinstance(npu_op, NpuPoolingOperation):

Louis Verhaard

2020-11-25 14:10:30 +0100

[diff] [blame]

979

generate_pooling_op(emit, npu_op, arch)

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

980

elif isinstance(npu_op, NpuElementWiseOperation):

Louis Verhaard

2020-11-25 14:10:30 +0100

[diff] [blame]

981

generate_elementwise_op(emit, npu_op, arch)

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

982

elif isinstance(npu_op, NpuDmaOperation):

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

983

generate_dma_op(emit, npu_op)

984

else:

985

assert 0, "Unsupported operation"

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

986

987

988

def generate_command_stream(

Louis Verhaard

2021-03-17 14:26:34 +0100

[diff] [blame]

989

npu_op_list: List[NpuOperation],

990

arch: ArchitectureFeatures,

991

verbose: bool,

992

mem_limits: Dict[int, int],

993

add_to_debug_db=None,

994

npu_op_to_cmd=None,

Louis Verhaard

2020-11-26 11:42:04 +0100

[diff] [blame]

995

) -> List[int]:

996

"""

997

Generates register commands for the given list of NPU operations.

998

Returns Ethos-U instructions, as a list of 32-bit integers.

999

"""

1000

emit = CommandStreamEmitter()

1001

if verbose:

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

1002

print_operations(npu_op_list, npu_op_to_cmd)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

1003

# Calculate memory accesses for every operation

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

1004

memory_accesses: Dict[NpuOperation, MemoryAccessSet] = {}

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

1005

for npu_op in npu_op_list:

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

1006

if isinstance(npu_op, NpuDmaOperation):

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

1007

memory_accesses[npu_op] = get_dma_memory_accesses(npu_op)

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

1008

elif isinstance(npu_op, NpuBlockOperation):

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

1009

memory_accesses[npu_op] = get_op_memory_accesses(npu_op, arch)

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

1010

else:

1011

assert 0, "Invalid operation type"

Louis Verhaard

2021-03-17 14:26:34 +0100

[diff] [blame]

1012

Tim Hall

2020-10-27 12:43:14 +0000

[diff] [blame]

1013

if arch.is_ethos_u65_system:

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

1014

emit.cmd0_with_param(cmd0.NPU_SET_PARALLEL_MODE, arch.ncores - 1)

1015

dep_watermark = Watermark(0, 0)

1016

prev_op = None

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

1017

# Generate register commands for all operations

1018

for op_index, npu_op in enumerate(npu_op_list):

Louis Verhaard

2021-03-17 14:26:34 +0100

[diff] [blame]

1019

try:

1020

check_mem_limits(memory_accesses[npu_op], mem_limits)

1021

dep_watermark, cmd_waits = get_wait_dependency(arch, npu_op_list, memory_accesses, op_index, dep_watermark)

1022

generate_registers_for_op(emit, npu_op, arch)

1023

except VelaError as e:

1024

# Add operation info and rethrow

1025

raise VelaError(f"{e.error_msg}, in operation {op_index}:{npu_op.op_type.name}") from None

Dwight Lidman

2020-12-08 17:56:44 +0100

[diff] [blame]

1026

if not isinstance(npu_op, NpuDmaOperation) and isinstance(npu_op, NpuBlockOperation):

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

1027

# Generate BLOCKDEP

Louis Verhaard

2020-11-25 14:10:30 +0100

[diff] [blame]

1028

blockdep = calc_blockdep(arch, prev_op, npu_op)

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

1029

blockdep = min(blockdep, arch.max_blockdep)

1030

emit.cmd0_with_param(cmd0.NPU_SET_BLOCKDEP, blockdep)

1031

prev_op = npu_op

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

1032

1033

generate_cmd_waits(emit, cmd_waits)

1034

# Generate the actual NPU_OP command

1035

generate_operation_code(emit, npu_op)

1036

if add_to_debug_db is not None:

1037

add_to_debug_db(npu_op, emit.offset)

1038

# Fill in final part of command stream:

1039

emit.cmd_do_operation(cmd0.NPU_OP_STOP, param=0xFFFF)

Louis Verhaard

2020-11-26 11:42:04 +0100

[diff] [blame]

1040

res = emit.to_list()

erik.andersson@arm.com

1878dab

2021-03-16 09:40:24 +0100

[diff] [blame]

1041

1042

if emit.size_in_bytes() >= 1 << 24:

1043

raise VelaError(

1044

f"The command stream size exceeds the hardware limit of 16 MiB. "

1045

f"The current stream size is {emit.size_in_bytes()/2**20:.2F} MiB."

1046

)

1047

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

1048

if verbose:

1049

emit.print_cmds()

1050

print("number of commands", len(emit.cmd_stream))

Louis Verhaard

2020-11-26 11:42:04 +0100

[diff] [blame]

1051

print("command stream length in words", len(res))

return res

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

1055

def generate_register_command_stream(npu_op_list: List[NpuOperation], npu_accelerator: NpuAccelerator) -> List[int]:

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

1056

"""

Louis Verhaard

aeae567

2020-11-02 18:04:27 +0100

[diff] [blame]

1057

Internal implementation of the public facing API for generating an Ethos-U register command stream.

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

1058

Calculates dependencies between commands and inserts wait operations if needed.

1059

1060

:param npu_op_list: List[NpuOperation] list of high level NPU operations

Tim Hall

2020-10-27 12:43:14 +0000

[diff] [blame]

1061

:param accelerator: architecture_features.Accelerator enum to pick the correct Ethos-U accelerator

1062

:return Ethos-U instructions, as a list of 32-bit integers

Louis Verhaard

2020-11-02 18:04:27 +0100

[diff] [blame]

1063

"""

Louis Verhaard

aeae567

2020-11-02 18:04:27 +0100

[diff] [blame]

1064

accelerator = Accelerator.from_npu_accelerator(npu_accelerator)

Louis Verhaard

5207830

2020-11-18 13:35:06 +0100

[diff] [blame]

1065

arch = create_default_arch(accelerator)

Louis Verhaard