Blame - ethosu/vela/graph_optimiser_util.py - ml/ethos-u/ethos-u-vela

2024-04-02 20:56:09 +0200

[diff] [blame]

1

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Rickard Bolin

bc6ee58

2022-11-04 08:24:29 +0000

[diff] [blame]

16

#

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

17

# Description:

18

# Common functions and definitions used during the graph optimization.

Patrik Gustavsson

c74682c

2021-08-17 14:26:38 +0200

[diff] [blame]

19

from typing import Tuple

20

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

21

import numpy as np

22

Tim Hall

d6efcd3

2022-09-02 15:01:01 +0100

[diff] [blame]

23

from .architecture_features import Accelerator

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

24

from .data_type import DataType

25

from .debug_database import DebugDatabase

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

26

from .errors import UnsupportedFeatureError

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

27

from .errors import VelaError

28

from .operation import Op

Raul Farkas

2023-05-25 11:15:20 +0100

[diff] [blame]

29

from .operation import Operation

Fredrik Svedberg

2023-04-11 22:35:04 +0200

[diff] [blame]

30

from .operation_util import create_avgpool_nop

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

31

from .shape4d import Shape4D

Raul Farkas

72c6a24

2023-03-16 16:38:05 +0000

[diff] [blame]

32

from .tensor import Tensor

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

33

Jonas Ohlsson

81942e9

2021-08-20 09:33:28 +0200

[diff] [blame]

34

memory_only_ops = (

35

Op.Reshape,

Jonas Ohlsson

2021-09-01 15:57:21 +0200

[diff] [blame]

36

Op.QuantizedReshape,

Jonas Ohlsson

81942e9

2021-08-20 09:33:28 +0200

[diff] [blame]

37

Op.Squeeze,

Jonas Ohlsson

2021-09-01 15:57:21 +0200

[diff] [blame]

38

Op.ExpandDims,

Patrik Gustavsson

ef3ebdd

2021-10-01 11:10:25 +0200

[diff] [blame]

39

Op.Identity,

Jonas Ohlsson

81942e9

2021-08-20 09:33:28 +0200

[diff] [blame]

40

)

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

41

42

43

def _avoid_nhcwb16_for_concat(tens):

44

# If axis corresponds to C-dimension, NHCWB16 can only be used in the output if all the concat_start's are a

45

# multiple of 16. This as, it is only then the address offset for the ofm, for all operations, will be 16 byte

46

# aligned. For other values of axis the address offsets will be 16 byte aligned, as they are all based on c = 0

47

# and those addresses are always 16 byte aligned due to the NHCWB16 format.

48

return any(op.write_offset.depth % 16 != 0 for op in tens.ops if op.write_offset is not None)

49

50

51

def _avoid_nhcwb16_for_split(tens):

52

# If read offset is not a multiple of 16 in the C-dimension, NHCWB16 need to be avoided in the input

James Ward

6bf1613

2021-09-08 11:14:20 +0100

[diff] [blame]

53

54

# Return True if NHCWB16 needs to be avoided

55

def offset_not_aligned(read_offset):

56

return read_offset is not None and (read_offset.depth % 16) != 0

57

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

58

for cons_op in tens.consumer_list:

59

if cons_op.ifm == tens:

James Ward

6bf1613

2021-09-08 11:14:20 +0100

[diff] [blame]

60

if offset_not_aligned(cons_op.read_offsets[0]):

61

return True

62

if cons_op.ifm2 is not None and cons_op.ifm2 == tens:

63

if offset_not_aligned(cons_op.read_offsets[1]):

64

return True

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

return False

def _avoid_nhcwb16_for_shapes(tens):

69

# check all producers/consumers to see if any op shape is preventing NHCWB16

70

for cons_op in tens.consumer_list:

71

if cons_op.ifm == tens:

72

cons_op_shape = cons_op.ifm_shapes[0]

73

elif cons_op.type.is_binary_elementwise_op() and cons_op.ifm2 == tens:

74

cons_op_shape = cons_op.ifm_shapes[1]

75

else:

76

assert False

77

if Shape4D(tens.shape) != cons_op_shape:

78

return True

79

80

for prod_op in tens.ops:

81

if Shape4D(tens.shape) != prod_op.ofm_shapes[0]:

return True

return False

Johan Alfven

2023-02-02 09:07:48 +0100

[diff] [blame]

87

def _avoid_nhcwb16_for_memory_only(tens):

88

# check all producers/consumers to see if any op is preventing NHCWB16

89

return any(op.type == Op.Memcpy for op in (tens.consumer_list + tens.ops))

90

91

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

92

# Check if non linear format can be used

Raul Farkas

72c6a24

2023-03-16 16:38:05 +0000

[diff] [blame]

93

def check_format_restrictions(tens: Tensor, arch):

94

if tens.force_linear_format:

95

return

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

96

if len(tens.ops) < 1:

97

return

98

if tens.ops[0].type in (Op.Placeholder, Op.SubgraphInput, Op.Const) or any(

99

cons is None for cons in tens.consumer_list

):

return

Fredrik Svedberg

2023-04-11 22:35:04 +0200

[diff] [blame]

103

# Writing to the buffer of a variable tensor needs to be linear format

104

if tens.ops[0].memory_function == Op.VariableTensorWrite:

105

return

106

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

107

# Check if any of the producers/consumers is run on CPU

108

if not all(cons.run_on_npu for cons in tens.consumer_list):

109

return

110

if not all(prod.run_on_npu for prod in tens.ops):

111

return

112

113

# "Concat" ofm exception:

114

if _avoid_nhcwb16_for_concat(tens):

115

return

116

117

# "Split" ifm exception:

118

if _avoid_nhcwb16_for_split(tens):

119

return

120

121

# Shapes checking: check all producers/consumers are NHCWB16 compatible with tens.shape

122

if _avoid_nhcwb16_for_shapes(tens):

123

return

124

Johan Alfven

9072496

2023-02-02 09:07:48 +0100

[diff] [blame]

125

# Memory only ifm/ofm exception: DMA ops must use NHCW

126

if _avoid_nhcwb16_for_memory_only(tens):

127

return

128

Rickard Bolin

fea1516

2022-07-04 16:19:16 +0000

[diff] [blame]

129

# Resize bilinear half pixel center implementation requires OFM with linear format to

130

# allow stride modification in H/W dimensions.

131

for op in tens.ops:

132

if op.original_type == Op.ResizeBilinear and op.type == Op.DepthwiseConv2DBias:

133

return

134

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

135

for op in tens.consumer_list:

Tim Hall

d6efcd3

2022-09-02 15:01:01 +0100

[diff] [blame]

136

if op.type == Op.ReduceSum and (

137

tens.dtype == DataType.int32 or arch.accelerator_config == Accelerator.Ethos_U65_512

138

):

139

# ReduceSum requires NHWC input

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

140

return

141

if op.type == Op.Reshape:

142

# Using NHCWB16 format for a no-op reshape is only an option if subsequent

143

# consumers do not also need to perform a reshape or if the OFM is going to

144

# be processed by CPU operations. No-op reshape consumers with empty lists

145

# (those that have no consumers, or null-consumers used as list terminators)

146

# must use normal NHWC output.

147

148

def incompatible_consumers(oper):

149

if oper and oper.type == Op.Reshape:

150

for consumer in oper.outputs[0].consumer_list:

151

yield from incompatible_consumers(consumer)

152

yield not oper or not oper.run_on_npu

153

154

if not any(incompatible_consumers(op)):

155

156

def get_rewrites(oper):

157

if oper and oper.type == Op.Reshape:

158

for consumer in oper.outputs[0].consumer_list:

159

yield from get_rewrites(consumer)

160

yield oper

161

162

# Detect no-op reshapes by comparing their full input and output tensor shapes.

163

inshape = op.ifm_shapes[0]

164

compatible_shape = [(inshape == oper.ofm_shapes[0]) for oper in get_rewrites(op)]

165

if not (compatible_shape and all(compatible_shape)):

return

else:

return

Raul Farkas

2023-03-16 16:38:05 +0000

[diff] [blame]

170

tens.force_linear_format = False

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

171

172

Patrik Gustavsson

c74682c

2021-08-17 14:26:38 +0200

[diff] [blame]

173

def calc_explicit_padding(input_size, stride, filter_size, pad_before, pad_after) -> Tuple[int, int]:

174

"""

175

Based on explicit padding provided in a PAD operation, returns the corresponding hardware padding

176

that provides equivalent results.

177

"""

178

total_padding = needed_total_padding(input_size, stride, filter_size)

179

180

# The bottom/right padding might need downward adjustment depending on stride/input size

181

total_minus_before = total_padding - pad_before

182

output_pad_after = pad_after

183

while output_pad_after > 0 and output_pad_after % stride != total_minus_before % stride:

184

output_pad_after -= 1

185

return pad_before, output_pad_after

186

187

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

188

def needed_total_padding(input_size, stride, filter_size):

Raul Farkas

3b64f06

2023-05-16 17:18:31 +0100

[diff] [blame]

189

"""Compute hardware padding."""

190

if input_size % stride == 0:

191

return max(filter_size - stride, 0)

192

193

return max(filter_size - (input_size % stride), 0)

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

194

195

Raul Farkas

2023-05-25 11:15:20 +0100

[diff] [blame]

196

def set_tensor_equivalence(op: Operation, arch, nng) -> Operation:

197

"""Set input/output tensor equivalence to the same id for memory operations."""

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

198

if op.type in memory_only_ops:

199

eid = op.outputs[0].equivalence_id

200

for inp in op.inputs:

201

inp.equivalence_id = eid

return op

def set_ifm_ofm_op_shapes(op, arch, nng):

206

if op.run_on_npu and op.type.needs_shapes():

207

if op.ifm_shapes or op.ofm_shapes:

208

# Shapes already set

209

return op

210

op.set_ifm_ofm_shapes()

return op

Johan Alfven

2024-04-02 20:56:09 +0200

[diff] [blame]

214

def check_splitsliceread_to_consumer_shape(op, cons_op):

215

assert op.type == Op.SplitSliceRead

Johan Alfven

2024-04-04 13:26:18 +0200

[diff] [blame]

216

# SplitSliceRead ofm shape must fit within the consumer ifm shape

Johan Alfven

2024-04-02 20:56:09 +0200

[diff] [blame]

217

if cons_op.ifm == op.ofm:

Johan Alfven

2024-04-04 13:26:18 +0200

[diff] [blame]

218

cons_shape = cons_op.ifm_shapes[0].as_list()

219

read_shape = op.ofm_shapes[0].as_list()

Johan Alfven

2024-04-02 20:56:09 +0200

[diff] [blame]

220

elif cons_op.type.is_binary_elementwise_op() and cons_op.ifm2 == op.ofm:

Johan Alfven

2024-04-04 13:26:18 +0200

[diff] [blame]

221

cons_shape = cons_op.ifm_shapes[1].as_list()

222

read_shape = op.ofm_shapes[0].as_list()

223

else:

224

return False

Johan Alfven

2024-04-02 20:56:09 +0200

[diff] [blame]

225

Johan Alfven

2024-04-04 13:26:18 +0200

[diff] [blame]

226

# All read shape values <= consumer shape values

227

return all(read_shape[idx] <= x for idx, x in enumerate(cons_shape))

Johan Alfven

2024-04-02 20:56:09 +0200

[diff] [blame]

228

229

Patrik Gustavsson

f1580f0

2021-09-01 12:43:02 +0200

[diff] [blame]

230

def move_splitsliceread_to_consumer(op, cons_op):

231

assert op.type == Op.SplitSliceRead

232

233

if cons_op.ifm == op.ofm:

234

cons_op.read_offsets[0] = op.read_offsets[0]

235

cons_op.read_shapes[0] = op.read_shapes[0]

236

cons_op.set_input_tensor(op.ifm, cons_op.type.info.indices.ifms[0])

237

cons_op.ifm_shapes[0] = op.ifm_shapes[0]

238

elif cons_op.type.is_binary_elementwise_op() and cons_op.ifm2 == op.ofm:

239

cons_op.read_offsets[1] = op.read_offsets[0]

240

cons_op.read_shapes[1] = op.read_shapes[0]

241

cons_op.set_input_tensor(op.ifm, cons_op.type.info.indices.ifms[1])

242

cons_op.ifm_shapes[1] = op.ifm_shapes[0]

Patrik Gustavsson

f1580f0

2021-09-01 12:43:02 +0200

[diff] [blame]

243

op.ofm.consumer_list.remove(cons_op)

244

op.ofm.ops = []

Fredrik Svedberg

2023-04-11 22:35:04 +0200

[diff] [blame]

245

if op in op.ifm.consumer_list:

246

op.ifm.consumer_list.remove(op)

Patrik Gustavsson

f1580f0

2021-09-01 12:43:02 +0200

[diff] [blame]

247

248

Jonas Ohlsson

2021-09-01 15:57:21 +0200

[diff] [blame]

249

def check_memory_only_removed(op, arch):

250

if op.run_on_npu and op.type in memory_only_ops:

251

# Memory only operators should have been removed

252

raise VelaError(f"Memory only {op.type} op {op} expected to have been removed, still remains")

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

253

254

255

def record_optimised(op, arch):

wilisa01

79a8904

2022-11-02 17:18:43 +0000

[diff] [blame]

256

if op.type not in (Op.Const, Op.Placeholder):

Patrik Gustavsson

2021-06-28 07:41:58 +0200

[diff] [blame]

257

DebugDatabase.add_optimised(op, op)

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

258

259

Johan Alfven

2023-02-02 14:59:03 +0100

[diff] [blame]

260

def bypass_memory_only_ops(op, arch, nng):

Jonas Ohlsson

2021-09-01 15:57:21 +0200

[diff] [blame]

261

if not op.run_on_npu or op.type not in memory_only_ops:

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

262

return op

263

Johan Alfven

2023-02-02 14:59:03 +0100

[diff] [blame]

264

# Memory only operators can be completely removed if there is a one to one

265

# connection. The reshape OFM can be connected to the previous op.

Johan Alfvén

48e5159

2022-09-28 20:06:25 +0200

[diff] [blame]

266

#

Johan Alfven

2023-02-02 14:59:03 +0100

[diff] [blame]

# Bypassed to

# --->

# 1x6x6x10 1x6x6x10

# ADD ADD

# | -------> |

# 1x6x6x10 | 1x20x3x6

# RESHAPE | MEAN

# | ---------|

# 1x20x3x10

# MEAN

Johan Alfvén

48e5159

2022-09-28 20:06:25 +0200

[diff] [blame]

277

#

Johan Alfven

2023-02-02 14:59:03 +0100

[diff] [blame]

278

# In the above the ADD OFM = RESHAPE IFM is removed and replaced by

279

# the RESHAPE OFM.

280

#

281

# Then there are two cases when bypassing is not possible. One is when

282

# the IFM is produced by the CPU. This tensor must be preserved. It

283

# cannot be removed from the graph. The other case is when the IFM has

284

# multiple consumers, then it is not possible to just bypass the op and

285

# there is a need for a DMA (nop).

#

# Converts to

# --->

# 1x6x6x10 1x6x6x10

# -----ADD----- -----ADD-----

291

# | | | |

292

# 1x6x6x10 1x6x6x10 1x6x6x10 1x6x6x10

293

# RESHAPE MEAN DMA OP MEAN

# | |

# 1x20x3x6 1x20x3x6

# MEAN MEAN

#

# If the DMA IFM and DMA OFM ends up in the same memory area

299

# the DMA op will be removed when the cmd stream is generated.

300

Johan Alfvén

48e5159

2022-09-28 20:06:25 +0200

[diff] [blame]

301

ifm_has_multiple_cons = len(op.ifm.consumer_list) > 1

Johan Alfvén

5060ff5

2022-09-15 15:50:30 +0200

[diff] [blame]

302

ifm_is_cpu_produced = any(ifm_prod is not None and not ifm_prod.run_on_npu for ifm_prod in op.ifm.ops)

303

Johan Alfven

2023-02-02 14:59:03 +0100

[diff] [blame]

304

if ifm_has_multiple_cons or ifm_is_cpu_produced:

305

# Convert to a memcpy op

306

op.type = Op.Memcpy

307

DebugDatabase.add_optimised(op, op)

else:

# Bypass op

ofm = op.ofm

ifm = op.ifm

ofm.ops = []

for prev_op in ifm.ops:

314

prev_op.outputs = [ofm]

315

ofm.ops.append(prev_op)

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

return op

Raul Farkas

2023-05-25 11:15:20 +0100

[diff] [blame]

320

def convert_depthwise_to_conv(op: Operation, arch, nng) -> Operation:

321

"""Convert DepthwiseConv2DBias to Conv2D to allow support for DepthwiseConv2DBias ops with 'depth multiplier' > 1,

322

as long as IFM depth = 1 and OFM depth is equal to the depth multiplier.

323

"""

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

324

if op.type == Op.DepthwiseConv2DBias and (op.attrs["depth_multiplier"] != 1):

325

ifm_shape = op.ifm_shapes[0]

326

weight_tensor = op.inputs[1]

327

ofm_shape = op.ofm_shapes[0]

Raul Farkas

2023-05-25 11:15:20 +0100

[diff] [blame]

328

# Depthwise is equivalent to a single conv2d if the ifm depth is 1 and

329

# the ofm depth equals the depth multipler.

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

330

if (ifm_shape.depth == 1) and (ofm_shape.depth == op.attrs["depth_multiplier"]):

331

# Change op type to Conv2d

332

op.type = Op.Conv2DBias

333

del op.attrs["channel_multiplier"]

334

del op.attrs["depth_multiplier"]

335

336

weight_tensor.values = np.transpose(weight_tensor.values, (0, 1, 3, 2))

337

weight_tensor.set_all_shapes(list(weight_tensor.values.shape))

wilisa01

79a8904

2022-11-02 17:18:43 +0000

[diff] [blame]

338

DebugDatabase.add_optimised(op, op)

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

339

else:

340

raise UnsupportedFeatureError(

Raul Farkas

2023-05-25 11:15:20 +0100

[diff] [blame]

341

f"Unsupported 'DEPTHWISE_CONV_2D' with depth_multiplier = {op.attrs['depth_multiplier']},"

342

f" ifm channels = {ifm_shape.depth}, ofm channels = {ofm_shape.depth}"

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

343

)

Patrik Gustavsson

2021-08-23 15:33:59 +0200

[diff] [blame]

344

return op

Patrik Gustavsson

f436ada

2021-09-14 14:56:48 +0200

[diff] [blame]

345

346

Fredrik Svedberg

2023-04-11 22:35:04 +0200

[diff] [blame]

347

def create_avg_pool_for_concat(concat_op, name, ifm, ifm_shape: Shape4D, write_offset: Shape4D):

348

"""Creates an average pool for the given concat op/input feature map"""

349

ofm = concat_op.ofm

350

avgpool_op = create_avgpool_nop(name)

Johan Alfven

8914685

2024-05-13 13:44:42 +0200

[diff] [blame]

351

# Enforce original type since this is used in pass packing to group concat ops

352

avgpool_op._original_type = concat_op.type

Fredrik Svedberg