Blame - ethosu/vela/graph_optimiser.py - ml/ethos-u/ethos-u-vela

2020-04-27 18:20:16 +0100

[diff] [blame]

1

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

16

# Description:

17

# Early optimisation of the network graph, using the rewrite_graph module to do the traversal of the graph. These are

18

# split into two parts optimise_graph_a and optimise_graph_b.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

19

import math

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

import numpy as np

from . import rewrite_graph

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

24

from .data_type import DataType

Louis Verhaard

2020-05-25 15:05:26 +0200

[diff] [blame]

25

from .errors import UnsupportedFeatureError

Dwight Lidman

2020-05-29 09:37:03 +0200

[diff] [blame]

26

from .ethos_u55_regs.ethos_u55_regs import resampling_mode

Louis Verhaard

2020-06-03 08:56:44 +0200

[diff] [blame]

27

from .numeric_util import full_shape

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

28

from .operation import NpuBlockType

29

from .operation import Operation

Charles Xu

9a03fdf

2020-07-02 15:12:40 +0200

[diff] [blame^]

30

from .tensor import QuantizationParameters

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

31

from .tensor import Tensor

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

32

33

passthrough_nodes = set(("Identity",))

34

35

36

def remove_passthrough_tensor(tens, arch):

37

if len(tens.ops) == 1 and tens.ops[0].type in passthrough_nodes:

38

assert len(tens.ops[0].inputs) == 1

39

tens = tens.ops[0].inputs[0]

return tens

def rewrite_concat(tens, arch):

44

if len(tens.ops) == 1 and tens.ops[0].is_concat_op():

45

concat_op = tens.ops[0]

46

if tens != concat_op.outputs[0]:

47

return tens # don't attempt to rewrite the min/max outputs of QuantizedConcat

48

49

# Not supported so leave it and run on CPU

50

if not concat_op.run_on_npu:

51

return tens

52

53

inputs, axis = concat_op.get_concat_inputs_axis()

tens.ops = []

offset = 0

for idx, inp in enumerate(inputs):

58

new_op = Operation("ConcatSliceWrite", concat_op.name + str(idx))

59

new_op.inputs = [inp]

60

new_op.outputs = [tens]

61

new_op.attrs["concat_axis"] = axis

62

new_op.attrs["concat_start"] = offset

63

offset += inp.shape[axis]

64

new_op.attrs["concat_end"] = offset

65

new_op.run_on_npu = True

66

tens.ops.append(new_op)

67

assert tens.shape[axis] == offset

return tens

def rewrite_split(tens, arch):

73

74

if len(tens.ops) == 1 and tens.ops[0].is_split_op():

75

split_op = tens.ops[0]

76

77

# Not supported so leave it and run on CPU

78

if not split_op.run_on_npu:

79

return tens

80

81

inp, outputs, axis, offset_start, offset_end = split_op.get_split_inputs_axis()

82

83

tens.ops = []

84

new_op = Operation("SplitSliceRead", split_op.name)

85

new_op.inputs = [inp]

86

new_op.outputs = [tens]

87

88

# For Split the offset cannot be extracted from the tensor so it has to

89

# be calculated from the index of the output tensor

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

90

if axis is not None:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

91

# Get the start and end of the split

92

offset_start = [0] * len(tens.shape)

93

offset_end = [0] * len(tens.shape)

for out in outputs:

if out == tens:

break

offset_start[axis] += out.shape[axis]

98

99

offset_end[axis] = offset_start[axis] + tens.shape[axis]

100

101

new_op.attrs["split_start"] = offset_start

102

new_op.attrs["split_end"] = offset_end

103

new_op.run_on_npu = True

104

tens.ops.append(new_op)

return tens

def needed_total_padding(input_size, stride, filter_size):

110

out_size = (input_size + stride - 1) // stride

111

needed_input = (out_size - 1) * stride + filter_size

112

total_padding = max(0, needed_input - input_size)

return total_padding

def calc_padding_and_skirt(padding_type, kernel_size, stride, input_dims):

117

ypad = needed_total_padding(int(input_dims[1]), int(stride[1]), int(kernel_size[0]))

118

xpad = needed_total_padding(int(input_dims[2]), int(stride[2]), int(kernel_size[1]))

119

if padding_type == b"SAME":

120

left_pad = (xpad + 0) // 2

121

right_pad = (xpad + 1) // 2

122

top_pad = (ypad + 0) // 2

123

bottom_pad = (ypad + 1) // 2

124

elif padding_type == b"VALID":

left_pad = 0

right_pad = 0

top_pad = 0

bottom_pad = 0

else:

Louis Verhaard

2020-05-25 15:05:26 +0200

[diff] [blame]

130

raise UnsupportedFeatureError("Unknown padding {}".format(str(padding_type)))

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

131

padding = (top_pad, left_pad, bottom_pad, right_pad)

132

skirt = (top_pad, left_pad, ypad - top_pad, xpad - left_pad)

133

return padding, skirt

134

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

135

Jacob Bohlin

2020-07-07 17:15:22 +0200

[diff] [blame]

136

def calc_upscaled_padding_and_skirt(padding_type, kernel_size, stride, input_dims, upscaling_factor):

137

kernel_height, kernel_width = kernel_size[0], kernel_size[1]

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

138

if padding_type == b"SAME":

Jacob Bohlin

2020-07-07 17:15:22 +0200

[diff] [blame]

139

ypad = needed_total_padding(int(input_dims[1]) * upscaling_factor, int(stride[1]), int(kernel_height))

140

xpad = needed_total_padding(int(input_dims[2]) * upscaling_factor, int(stride[2]), int(kernel_width))

141

142

right_pad = ((xpad + 1) // upscaling_factor) - 1

143

bottom_pad = ((ypad + 1) // upscaling_factor) - 1

144

left_pad = max(kernel_width - 1 - right_pad, 0)

145

top_pad = max(kernel_height - 1 - bottom_pad, 0)

146

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

147

elif padding_type == b"VALID":

Jacob Bohlin

2020-07-07 17:15:22 +0200

[diff] [blame]

148

right_pad = max(kernel_width - 2, 0)

149

bottom_pad = max(kernel_height - 2, 0)

150

left_pad = kernel_width - 1

151

top_pad = kernel_height - 1

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

152

else:

153

assert 0, "Unknown padding"

154

155

padding = (top_pad, left_pad, bottom_pad, right_pad)

Jacob Bohlin

2020-07-07 17:15:22 +0200

[diff] [blame]

156

skirt = padding

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

157

return padding, skirt

158

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

159

160

def fixup_conv2d_backprop(op, arch):

161

if op.type == "Conv2DBackpropInput":

162

# flip the inputs

163

op.inputs[0], op.inputs[2] = op.inputs[2], op.inputs[0]

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

164

op.type = "Conv2DBackpropInputSwitchedBias"

165

weight_shape = op.inputs[1].shape

166

weight_sets = weight_shape[3]

167

168

if len(op.inputs) < 4:

169

# Add bias/scale tensor filled with zeros

170

scale_op = Operation("Const", op.name + "_bias")

171

scale_tens = Tensor([weight_sets], DataType.int32, op.name + "_bias_tens")

172

scale_tens.values = [0] * weight_sets

173

scale_tens.quant_values = [0] * weight_sets

174

scale_tens.ops = [scale_op]

175

scale_op.outputs = [scale_tens]

176

scale_tens.consumer_list = [op]

177

op.inputs.append(scale_tens)

178

179

# Update strides

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

180

op.attrs.update({"stride_w": 1, "stride_h": 1, "strides": (1, 1, 1, 1)})

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

return op

Charles Xu

2020-07-02 15:12:40 +0200

[diff] [blame^]

185

# Convert the op to an elementwise add

186

def convert_resizebilinear_1x1_to_add(op):

187

op.type = "AddAct"

188

op.name = op.name + "_add"

189

op.attrs.update({"npu_block_type": NpuBlockType.ElementWise})

190

op.attrs["resizebilinear"] = True

191

# Create an input tensor filled with zeros

192

shape = op.outputs[0].shape

193

tens = Tensor(shape, op.inputs[0].dtype, op.inputs[1].name + "_add")

194

tens.values = np.zeros(shape)

195

tens.quant_values = np.zeros(shape, np.uint8)

196

tens.quantization = QuantizationParameters(0.0, 255.0)

197

tens.quantization.scale_f32 = 1.0

198

tens.quantization.zero_point = 0

199

tens.consumer_list = [op]

200

tens_op = op.inputs[1].ops[0]

201

tens_op.outputs = [tens]

202

tens.ops = [tens_op]

203

# Set the add inputs

204

op.inputs[1] = op.inputs[0]

op.inputs[0] = tens

return op

def fixup_resizebilinear(op, arch):

211

if op.type == "ResizeBilinear":

212

if op.inputs[0].shape[1] == 1 and op.inputs[0].shape[2] == 1:

213

convert_resizebilinear_1x1_to_add(op)

return op

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

218

def fixup_fully_connected_input(op, arch):

219

if op.type == "FullyConnectedAct":

220

inp = op.inputs[0]

221

weights = op.inputs[1]

222

223

n_in_elems = weights.shape[-2]

224

elms = inp.elements()

225

batch_size = elms // n_in_elems

226

assert batch_size * n_in_elems == elms

227

228

desired_shape = [batch_size, n_in_elems]

229

if inp.shape != desired_shape:

230

# mismatch, insert a reshape to fix this.

231

reshape_name = op.name + "_reshape"

232

new_shape_tens = Tensor([1], DataType.int32, reshape_name + "_shape")

233

new_shape_tens.values = np.array(desired_shape)

234

new_shape_tens_const = Operation("Const", new_shape_tens.name + "_const")

235

new_shape_tens.ops = [new_shape_tens_const]

236

new_shape_tens_const.outputs = [new_shape_tens]

237

238

reshape_op = Operation("Reshape", reshape_name)

239

reshape_op.inputs = [inp, new_shape_tens]

240

reshape_op.attrs["new_shape"] = desired_shape

241

reshape_out = inp.clone("_reshaped")

Michael McGeagh

2020-07-28 12:17:59 +0100

[diff] [blame]

242

reshape_out.set_all_shapes(desired_shape)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

243

reshape_out.ops = [reshape_op]

244

reshape_op.outputs = [reshape_out]

245

246

op.inputs[0] = reshape_out

return op

def fixup_pack_input(op, arch):

252

if op.type == "Pack":

253

# Pack is also referred to as Stack

254

# Requires the rewrite_concat function to be called on the op afterwards

255

axis = int(op.attrs["axis"])

256

desired_shape = op.inputs[0].shape[:axis] + [1] + op.inputs[0].shape[axis:]

257

258

# Construct 1 shape tensor to be used by all inserted reshape ops

259

new_shape_name = op.name + "_reshape_shape"

260

new_shape_tens = Tensor([1], DataType.int32, new_shape_name)

261

new_shape_tens.values = np.array(desired_shape)

262

new_shape_tens_const = Operation("Const", new_shape_tens.name + "_const")

263

new_shape_tens.ops = [new_shape_tens_const]

264

new_shape_tens_const.outputs = [new_shape_tens]

265

266

for idx, inp in enumerate(op.inputs):

267

reshape_name = op.name + str(idx) + "_reshape"

268

reshape_op = Operation("Reshape", reshape_name)

269

reshape_op.inputs = [inp, new_shape_tens]

270

reshape_op.attrs["new_shape"] = desired_shape

271

reshape_out = inp.clone("_reshaped")

Michael McGeagh

2020-07-28 12:17:59 +0100

[diff] [blame]

272

reshape_out.set_all_shapes(desired_shape)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

273

reshape_out.ops = [reshape_op]

274

reshape_op.outputs = [reshape_out]

275

276

op.inputs[idx] = reshape_out

277

278

op.type = "PackReshaped"

return op

def fixup_unpack_output(tens, arch):

284

op = tens.ops[0]

285

if op.type in set(("Unpack", "StridedSlice")):

286

# Unpack is also referred to as Unstack

287

# Requires the rewrite_split function to be called on the op afterwards

Patrik Gustavsson

2020-04-30 08:57:23 +0200

[diff] [blame]

288

289

reshape_input_shape = tens.shape

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

290

if op.type == "StridedSlice":

Patrik Gustavsson

2020-04-30 08:57:23 +0200

[diff] [blame]

291

new_axis_mask = op.attrs["new_axis_mask"]

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

292

shrink_axis_mask = op.attrs["shrink_axis_mask"]

Louis Verhaard

2020-05-25 15:05:26 +0200

[diff] [blame]

293

ellipsis_mask = op.attrs["ellipsis_mask"]

Patrik Gustavsson

2020-04-30 08:57:23 +0200

[diff] [blame]

294

295

if (new_axis_mask != 0 and shrink_axis_mask != 0) or ellipsis_mask != 0:

296

# Not supported, will be put on CPU

297

return tens

298

if shrink_axis_mask == 0 and new_axis_mask == 0:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

299

# Equal Rank StridedSlice, no need to insert reshape

300

return tens

Patrik Gustavsson

2020-04-30 08:57:23 +0200

[diff] [blame]

301

elif shrink_axis_mask != 0:

302

n = 0

303

axis = 0

304

while shrink_axis_mask:

305

prev_mask = shrink_axis_mask

306

n += 1

307

shrink_axis_mask &= shrink_axis_mask - 1

308

axis = int(math.log2(prev_mask - shrink_axis_mask))

309

reshape_input_shape = reshape_input_shape[:axis] + [1] + reshape_input_shape[axis:]

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

310

Patrik Gustavsson

2020-04-30 08:57:23 +0200

[diff] [blame]

311

assert len(tens.shape) == (len(op.inputs[0].shape) - n)

312

op.attrs["shrink_axis_mask"] = 0

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

313

Patrik Gustavsson

2020-04-30 08:57:23 +0200

[diff] [blame]

314

elif new_axis_mask != 0:

n = 0

axis = 0

while new_axis_mask:

prev_mask = new_axis_mask

319

n += 1

320

new_axis_mask &= new_axis_mask - 1

321

axis = int(math.log2(prev_mask - new_axis_mask))

Louis Verhaard

2020-05-25 15:05:26 +0200

[diff] [blame]

322

reshape_input_shape = reshape_input_shape[:axis] + reshape_input_shape[(axis + 1) :]

Patrik Gustavsson

2020-04-30 08:57:23 +0200

[diff] [blame]

323

new_axis_mask >>= 1

324

325

assert len(tens.shape) == (len(op.inputs[0].shape) + n)

326

op.attrs["new_axis_mask"] = 0

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

327

else:

328

axis = int(op.attrs["axis"])

329

op.type = "UnpackReshaped"

Patrik Gustavsson

2020-04-30 08:57:23 +0200

[diff] [blame]

330

reshape_input_shape = tens.shape[:axis] + [1] + tens.shape[axis:]

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

331

332

# Construct 1 shape tensor to be used by all inserted reshape ops

333

new_shape_name = op.name + "_reshape_shape"

334

new_shape_tens = Tensor([1], DataType.int32, new_shape_name)

335

new_shape_tens.values = np.array(tens.shape)

336

new_shape_tens_const = Operation("Const", new_shape_tens.name + "_const")

337

new_shape_tens.ops = [new_shape_tens_const]

338

new_shape_tens_const.outputs = [new_shape_tens]

339

340

for idx, out_tens in enumerate(op.outputs):

341

reshape_name = op.name + str(idx) + "_reshape"

342

reshape_op = Operation("Reshape", reshape_name)

343

reshape_op.outputs = [out_tens]

344

reshape_in = out_tens.clone("_reshaped")

Michael McGeagh

2020-07-28 12:17:59 +0100

[diff] [blame]

345

reshape_in.set_all_shapes(reshape_input_shape)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

346

reshape_in.ops = [op]

347

out_tens.ops = [reshape_op]

348

reshape_op.inputs = [reshape_in, new_shape_tens]

349

350

op.outputs[idx] = reshape_in

return tens

def add_padding_fields(op, arch):

356

if "padding" in op.attrs:

357

if "Conv" in op.type:

358

kernel_size = op.inputs[1].shape[:2]

359

input_shape = op.inputs[0].shape

Dwight Lidman

3ec04ac

2020-04-30 11:54:48 +0200

[diff] [blame]

360

elif "Pool" in op.type or "ResizeBilinear" == op.type:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

361

kernel_size = op.attrs["ksize"][1:3]

362

input_shape = op.inputs[0].shape

363

elif op.type == "ExtractImagePatches":

364

kernel_size = op.attrs["ksizes"][1:3]

365

input_shape = op.inputs[0].shape

366

else:

Louis Verhaard

2020-05-25 15:05:26 +0200

[diff] [blame]

367

raise UnsupportedFeatureError("Unknown operation that uses padding: {}".format(op.type))

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

368

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

369

if op.type == "Conv2DBackpropInputSwitchedBias":

Jacob Bohlin

2020-07-07 17:15:22 +0200

[diff] [blame]

370

upscaling_factor = op.outputs[0].shape[1] // input_shape[1]

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

371

padding, skirt = calc_upscaled_padding_and_skirt(

Jacob Bohlin

2020-07-07 17:15:22 +0200

[diff] [blame]

372

op.attrs["padding"], kernel_size, op.attrs["strides"], input_shape, upscaling_factor

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

373

)

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

374

else:

375

dilation_h, dilation_w = op.get_dilation_h_w()

376

dilated_kernel_size = [dilation_h * (kernel_size[0] - 1) + 1, dilation_w * (kernel_size[1] - 1) + 1]

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

377

padding, skirt = calc_padding_and_skirt(

378

op.attrs["padding"], dilated_kernel_size, op.attrs["strides"], input_shape

379

)

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

380

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

381

op.attrs["explicit_padding"] = padding

382

op.attrs["skirt"] = skirt

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

383

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

return op

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

387

conv_op = set(("Conv2D", "QuantizedConv2D", "Conv2DBackpropInputSwitchedBias", "Conv2DBiasAct"))

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

fc_op = set(

(

"MatMul",

"QuantizedMatMul",

"BlockLSTM",

"RnnAct",

"UnidirectionalSequenceRnnAct",

395

"BidirectionalSequenceRnnAct",

396

"LstmAct",

397

"UnidirectionalSequenceLstmAct",

398

"BidirectionalSequenceLstmAct",

"FullyConnectedAct",

)

)

depthwise_op = set(("DepthwiseConv2dNative", "DepthwiseConv2dBiasAct",))

Louis Verhaard

2020-05-25 15:05:26 +0200

[diff] [blame]

403

pool_op = set(

404

("AvgPool", "MaxPool", "QuantizedAvgPool", "QuantizedMaxPool", "AvgPoolAct", "MaxPoolAct", "ResizeBilinear",)

405

)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

406

elementwise_op = set(("AddAct", "MulAct", "SubAct", "Maximum", "Minimum", "LeakyRelu", "Abs"))

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

407

binary_elementwise_op = set(("AddAct", "MulAct", "SubAct", "Maximum", "Minimum"))

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

408

activation_ops = set(("Relu", "Relu6", "ReluN1To1", "Sigmoid", "Tanh"))

409

memory_only_ops = set(("Reshape",))

410

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

411

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

412

# Check if the op can be reordered

413

def get_prepend_op(op):

414

inp = op.inputs[0]

415

# The op should be reordered between prev_op and prep_op

416

prev_op = inp.ops[-1]

417

prep_op = None

418

while prev_op.type in memory_only_ops and len(prev_op.outputs) == 1 and len(prev_op.outputs[0].consumers()) == 1:

419

prep_op = prev_op

420

inp = prev_op.inputs[0]

421

prev_op = inp.ops[-1]

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

422

if prev_op is not None and len(prev_op.outputs) == 1 and len(prev_op.outputs[0].consumers()) == 1:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

return prep_op

return None

def mark_npu_block_type(op, arch):

429

npu_block_type = NpuBlockType.Default

430

if op.type in conv_op:

431

npu_block_type = NpuBlockType.ConvolutionMxN

432

elif op.type in fc_op:

433

npu_block_type = NpuBlockType.VectorProduct

434

elif op.type in depthwise_op:

435

npu_block_type = NpuBlockType.ConvolutionDepthWise

436

elif op.type in pool_op:

437

npu_block_type = NpuBlockType.Pooling

438

elif op.type in elementwise_op:

439

npu_block_type = NpuBlockType.ElementWise

440

441

op.attrs["npu_block_type"] = npu_block_type

return op

def convert_depthwise_to_conv(op, arch):

446

# Depthwise is equivalent to a single conv2d if the ifm depth is 1 and

447

# the ofm depth equals the depth multipler.

448

# If those conditions are true, then we can perform a simple

449

# switch of the operator type (and weight order)

450

451

if ("DepthwiseConv2d" in op.type) and (op.attrs["depth_multiplier"] != 1):

452

ifm_tensor = op.inputs[0]

453

weight_tensor = op.inputs[1]

454

ofm_tensor = op.outputs[0]

455

if (ifm_tensor.shape[3] == 1) and (ofm_tensor.shape[3] == op.attrs["depth_multiplier"]):

456

# Change op type to Conv2d

457

op.type = op.type.replace("DepthwiseConv2d", "Conv2D")

458

del op.attrs["channel_multiplier"]

459

del op.attrs["depth_multiplier"]

460

461

weight_tensor.quant_values = np.transpose(weight_tensor.quant_values, (0, 1, 3, 2))

Michael McGeagh

2020-07-28 12:17:59 +0100

[diff] [blame]

462

weight_tensor.set_all_shapes(list(weight_tensor.quant_values.shape))

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

463

else:

Louis Verhaard

2020-05-25 15:05:26 +0200

[diff] [blame]

464

raise UnsupportedFeatureError(

465

"Unsupported DepthwiseConv2d with depth_multiplier = {}, ifm channels = {}, ofm channels = {}".format(

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

466

op.attrs["depth_multiplier"], ifm_tensor.shape[3], ofm_tensor.shape[3]

467

)

468

)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

return op

Jacob Bohlin

2020-06-23 12:12:56 +0200

[diff] [blame]

472

def reorder_depthwise_weights(op, arch):

473

if "DepthwiseConv2d" in op.type:

474

weight_tensor = op.inputs[1]

475

weight_tensor.quant_values = np.transpose(weight_tensor.quant_values, (0, 1, 3, 2))

Michael McGeagh

2020-07-28 12:17:59 +0100

[diff] [blame]

476

weight_tensor.set_all_shapes(list(weight_tensor.quant_values.shape))

Jacob Bohlin

e843d33

2020-06-23 12:12:56 +0200

[diff] [blame]

477

weight_tensor.weight_transpose_depthwise = True

return op

Michael McGeagh

2020-07-29 13:11:43 +0100

[diff] [blame]

482

def convert_conv_to_fc(op, arch):

483

# Conv 1x1 can be equivalent to Fully Connected.

484

# By representing certain convs as fully connected layers, Vela can better determine wether or not to use

485

# caching/double buffering for the weights.

486

# (Weights dont need to be reloaded for convs when IFM H and W are 1)

487

if op.type == "Conv2DBiasAct":

488

_, h, w, _ = op.inputs[0].shape

489

kh, kw, _, _ = op.inputs[1].shape

490

if h == 1 and w == 1 and kh == 1 and kw == 1:

491

# Overwrite this op as a Fully Connected Op

492

op.name += "_fc"

493

op.type = "FullyConnectedAct"

494

faf = op.attrs.get("fused_activation_function", None)

495

op.attrs = {

496

"fused_activation_function": faf,

497

"weights_format": 0,

498

"npu_block_type": NpuBlockType.VectorProduct,

499

}

500

# Reshape Weights to be 2D. HWIO becomes just IO (as H and W are 1, they can just be dropped)

501

weight_tensor = op.inputs[1]

502

weight_tensor.quant_values = weight_tensor.quant_values.squeeze(axis=(0, 1))

503

weight_tensor.set_all_shapes(list(weight_tensor.quant_values.shape))

504

# The output from a fully connected is expected to be 2D so we need to add a reshape layer to convert it

505

# back to 4D afterwards as the next layer is expecting that shape

506

orig_ofm_tensor = op.outputs[0]

507

# Reshape this ops output to be 2D: {(N*H*W), C} (We know N H and W are all 1 so this becomes {1, C})

508

fc_ofm_tensor = orig_ofm_tensor.clone("_fc")

509

fc_ofm_tensor.set_all_shapes([1, fc_ofm_tensor.shape[-1]])

510

fc_ofm_tensor.ops = [op]

511

# Add a reshape after the new OFM to convert it back to the original 4D shape

512

reshape_name = op.name + "_reshape_post"

513

new_shape_tens = Tensor([1], DataType.int32, reshape_name + "_shape")

514

new_shape_tens.values = np.array(orig_ofm_tensor.shape)

515

new_shape_tens_const = Operation("Const", new_shape_tens.name + "_const")

516

new_shape_tens.ops = [new_shape_tens_const]

517

new_shape_tens_const.outputs = [new_shape_tens]

518

reshape_op = Operation("Reshape", reshape_name)

519

reshape_op.inputs = [fc_ofm_tensor, new_shape_tens]

520

reshape_op.attrs["new_shape"] = orig_ofm_tensor.shape

521

orig_ofm_tensor.ops = [reshape_op]

522

reshape_op.outputs = [orig_ofm_tensor]

523

# Replace this ops OFM to point to the 2D tensor

524

op.outputs[0] = fc_ofm_tensor

return op

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

528

# Reorder activation op if it's after the memory only operations

529

def fixup_act_reorder(op, arch):

530

if op.type in activation_ops:

531

prep_op = get_prepend_op(op)

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

532

if prep_op is not None:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

533

act_op = op.clone("_reordered")

534

act_op.inputs = [prep_op.inputs[0]]

535

act_op_out = act_op.inputs[0].clone("_acted")

536

act_op_out.quantization = op.outputs[0].quantization.clone()

537

act_op_out.ops = [act_op]

538

act_op.outputs = [act_op_out]

539

prep_op.inputs[0] = act_op_out

540

prep_op.outputs[0].quantization = act_op_out.quantization.clone()

541

542

# Mark the op so that it will be removed as passthrough later on

op.type = "Identity"

return op

Louis Verhaard

2020-06-03 08:56:44 +0200

[diff] [blame]

546

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

547

def fixup_elementwise_with_scalars(op, arch):

548

if op.type in binary_elementwise_op:

Louis Verhaard

2020-06-03 08:56:44 +0200

[diff] [blame]

549

ifm_tensor, ifm2_tensor, _, _ = op.get_ifm_ifm2_weights_ofm()

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

550

if ifm2_tensor.shape != [] and ifm_tensor.shape != []:

551

diff = len(ifm_tensor.shape) - len(ifm2_tensor.shape)

552

if diff > 0:

553

ifm2_tensor.shape = full_shape(len(ifm_tensor.shape), ifm2_tensor.shape, 1)

554

elif diff < 0:

555

ifm_tensor.shape = full_shape(len(ifm2_tensor.shape), ifm_tensor.shape, 1)

Louis Verhaard

2020-06-03 08:56:44 +0200

[diff] [blame]

556

elif ifm_tensor.shape == [] and ifm_tensor.quant_values is None:

557

# IFM is marked as a scalar, but is a result of an operation; change it to a shape of size 1

558

ifm_tensor.shape = len(ifm2_tensor.shape) * [1]

559

ifm_tensor.storage_shape = ifm_tensor.shape

560

elif ifm2_tensor.shape == [] and ifm2_tensor.quant_values is None:

561

# IFM2 is marked as a scalar, but is a result of an operation; change it to a shape of size 1

562

ifm2_tensor.shape = len(ifm_tensor.shape) * [1]

563

ifm2_tensor.storage_shape = ifm2_tensor.shape

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

564

return op

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

565

Louis Verhaard

2020-06-03 08:56:44 +0200

[diff] [blame]

566

Tim Hall

4e12776

2020-05-15 16:05:49 +0100

[diff] [blame]

567

# Set input/output tensor equivalence to the same id for memory operations

568

def set_tensor_equivalence(op, arch):

569

if op.type == "Reshape":

570

eid = op.outputs[0].equivalence_id

571

for inp in op.inputs:

572

inp.equivalence_id = eid

return op

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

576

def convert_mul_max_to_abs_or_lrelu(op, arch):

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

577

r"""Whenever there is a subgraph with this topology:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

578

579

Input X For X = -1 or X > 0

580

| \ / This subgraph can be replaced with either

581

| Mul an Abs (if X = -1) or a LeakyReLU (if X > 0)

| /

Max

"""

if op.type == "Maximum":

587

# finds the Mul input(s) to the Max

588

muls = [i for i in op.inputs if i.ops[0].type == "MulAct"]

if len(muls) == 1:

mul = muls[0].ops[0]

elif len(muls) == 2:

# In the case both inputs are Muls, find the one with the same input as the Max

593

mul = [m for m in muls if len(set(op.inputs + m.ops[0].inputs)) == 1][0].ops[0]

else:

# No Mul inputs

return op

# make sure the Mul doesn't have any other consumers

599

if len(mul.outputs[0].consumers()) != 1:

600

return op

601

# make sure the Mul doesn't have a faf

602

if mul.attrs["fused_activation_function"]:

603

return op

604

605

# finds the branched input that goes to both the Max and the Mul

606

shared = set(op.inputs) & set(mul.inputs)

607

if len(shared) == 1:

608

shared_in = shared.pop()

609

# find the constant scalar input to the Mul

610

const_tens = (set(mul.inputs) - {shared_in}).pop()

611

# check that it is a scalar

612

if const_tens.shape != []:

613

return op

614

const = const_tens.ops[0]

615

# check that it is a constant

616

if const.type != "Const":

return op

else:

return op

val = const.outputs[0].values

622

if val >= 0:

623

new_op = "LeakyRelu"

624

op.attrs["alpha"] = val

elif val == -1:

new_op = "Abs"

else:

return op

op.type = op.type.replace("Maximum", new_op)

631

op.name = op.name.replace("Maximum", new_op)

632

op.outputs[0].name = op.outputs[0].name.replace("Maximum", new_op)

633

op.inputs = [shared_in]

return op

Dwight Lidman

2020-05-29 09:37:03 +0200

[diff] [blame]

637

def add_attrs_to_resizebilinear(op, arch):

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

638

if op.type == "ResizeBilinear" and op.run_on_npu:

Dwight Lidman

2020-05-29 09:37:03 +0200

[diff] [blame]

639

input_tensor = op.inputs[0]

640

upscaled_shape = [input_tensor.shape[1] * 2, input_tensor.shape[2] * 2]

641

out_shape = op.outputs[0].shape[1:3]

642

if not op.attrs["align_corners"] and out_shape == upscaled_shape:

643

# this means the output is supposed to be a x2 upscale,

644

# so we need to do SAME padding

645

op.attrs["padding"] = b"SAME"

646

elif op.attrs["align_corners"] and out_shape == [upscaled_shape[0] - 1, upscaled_shape[1] - 1]:

647

# here we can just run the avg pool without padding and

648

# produce a (M * 2 - 1, N * 2 - 1) sized output

649

op.attrs["padding"] = b"VALID"

650

else:

Charles Xu

9a03fdf

2020-07-02 15:12:40 +0200

[diff] [blame^]

651

return op

Dwight Lidman

2020-05-29 09:37:03 +0200

[diff] [blame]

652

input_tensor.resampling_mode = resampling_mode.NEAREST

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

653

op.attrs.update({"strides": (1, 1, 1, 1), "ksize": (1, 2, 2, 1)})

Dwight Lidman

2020-05-29 09:37:03 +0200

[diff] [blame]

return op

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

657

def supported_operator_check(op, arch):

658

op.run_on_npu = arch.supported_operators.is_operator_supported(op)

return op

def optimise_graph_a(nng, arch, verbose_graph=False):

if verbose_graph:

nng.print_graph()

op_rewrite_list = [

# mark block type and check if the operations are supported

668

mark_npu_block_type,

Tim Hall

4e12776

2020-05-15 16:05:49 +0100

[diff] [blame]

669

set_tensor_equivalence,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

670

supported_operator_check,

671

# then do any rewrites of supported operators

672

convert_depthwise_to_conv,

Michael McGeagh

8d939c0

2020-07-29 13:11:43 +0100

[diff] [blame]

673

convert_conv_to_fc,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

674

fixup_fully_connected_input,

675

fixup_pack_input,

676

fixup_conv2d_backprop,

677

fixup_act_reorder,

Dwight Lidman

2020-05-29 09:37:03 +0200

[diff] [blame]

678

add_attrs_to_resizebilinear,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

679

add_padding_fields,

680

mark_npu_block_type,

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

681

fixup_elementwise_with_scalars,

Jacob Bohlin

e843d33

2020-06-23 12:12:56 +0200

[diff] [blame]

682

reorder_depthwise_weights,

Charles Xu

9a03fdf

2020-07-02 15:12:40 +0200

[diff] [blame^]

683

fixup_resizebilinear,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

684

# convert_mul_max_to_abs_or_lrelu # TODO: enable optimisation once quantisation issues are resolved

685

]

686

687

for idx, sg in enumerate(nng.subgraphs):

688

# rewrite graph pass

689

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

690

sg, arch, [fixup_unpack_output], op_rewrite_list, rewrite_unsupported=False

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

691

)

692

693

for idx, sg in enumerate(nng.subgraphs):

694

# remove passthrough tensors

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

695

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(sg, arch, [remove_passthrough_tensor], [])

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

if verbose_graph:

nng.print_graph()

return nng

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

701

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

702

def optimise_graph_b(nng, arch, verbose_graph=False):

if verbose_graph:

nng.print_graph()

for idx, sg in enumerate(nng.subgraphs):

707

# combined rewrite graph pass

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

708

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(sg, arch, [rewrite_concat, rewrite_split], [])

Tim Hall