Blame - ethosu/vela/graph_optimiser.py - ml/ethos-u/ethos-u-vela

2020-04-27 18:20:16 +0100

[diff] [blame]

1

2

#

3

# SPDX-License-Identifier: Apache-2.0

4

#

5

# Licensed under the Apache License, Version 2.0 (the License); you may

6

# not use this file except in compliance with the License.

7

# You may obtain a copy of the License at

8

#

9

# www.apache.org/licenses/LICENSE-2.0

10

#

11

# Unless required by applicable law or agreed to in writing, software

12

# distributed under the License is distributed on an AS IS BASIS, WITHOUT

13

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14

# See the License for the specific language governing permissions and

15

# limitations under the License.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

16

# Description:

17

# Early optimisation of the network graph, using the rewrite_graph module to do the traversal of the graph. These are

18

# split into two parts optimise_graph_a and optimise_graph_b.

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

19

import math

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

import numpy as np

from . import rewrite_graph

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

24

from .data_type import DataType

Louis Verhaard

2020-05-25 15:05:26 +0200

[diff] [blame]

25

from .errors import UnsupportedFeatureError

Dwight Lidman

2020-05-29 09:37:03 +0200

[diff] [blame]

26

from .ethos_u55_regs.ethos_u55_regs import resampling_mode

Louis Verhaard

2020-06-03 08:56:44 +0200

[diff] [blame]

27

from .numeric_util import full_shape

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

28

from .operation import NpuBlockType

29

from .operation import Operation

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

30

from .softmax import SoftMax

Michael McGeagh

2020-08-07 11:54:28 +0100

[diff] [blame^]

31

from .tensor import create_const_tensor

32

from .tensor import create_reshape_tensor

Charles Xu

2020-07-02 15:12:40 +0200

[diff] [blame]

33

from .tensor import QuantizationParameters

Diego Russo

e8a1045

2020-04-21 17:39:10 +0100

[diff] [blame]

34

from .tensor import Tensor

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

35

36

passthrough_nodes = set(("Identity",))

37

38

39

def remove_passthrough_tensor(tens, arch):

40

if len(tens.ops) == 1 and tens.ops[0].type in passthrough_nodes:

41

assert len(tens.ops[0].inputs) == 1

42

tens = tens.ops[0].inputs[0]

return tens

def rewrite_concat(tens, arch):

47

if len(tens.ops) == 1 and tens.ops[0].is_concat_op():

48

concat_op = tens.ops[0]

49

if tens != concat_op.outputs[0]:

50

return tens # don't attempt to rewrite the min/max outputs of QuantizedConcat

51

52

# Not supported so leave it and run on CPU

53

if not concat_op.run_on_npu:

54

return tens

55

56

inputs, axis = concat_op.get_concat_inputs_axis()

tens.ops = []

offset = 0

for idx, inp in enumerate(inputs):

61

new_op = Operation("ConcatSliceWrite", concat_op.name + str(idx))

62

new_op.inputs = [inp]

63

new_op.outputs = [tens]

64

new_op.attrs["concat_axis"] = axis

65

new_op.attrs["concat_start"] = offset

66

offset += inp.shape[axis]

67

new_op.attrs["concat_end"] = offset

68

new_op.run_on_npu = True

69

tens.ops.append(new_op)

70

assert tens.shape[axis] == offset

return tens

def rewrite_split(tens, arch):

76

77

if len(tens.ops) == 1 and tens.ops[0].is_split_op():

78

split_op = tens.ops[0]

79

80

# Not supported so leave it and run on CPU

81

if not split_op.run_on_npu:

82

return tens

83

84

inp, outputs, axis, offset_start, offset_end = split_op.get_split_inputs_axis()

85

86

tens.ops = []

87

new_op = Operation("SplitSliceRead", split_op.name)

88

new_op.inputs = [inp]

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

89

90

# For Split the offset cannot be extracted from the tensor so it has to

91

# be calculated from the index of the output tensor

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

92

if axis is not None:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

93

# Get the start and end of the split

94

offset_start = [0] * len(tens.shape)

95

offset_end = [0] * len(tens.shape)

for out in outputs:

if out == tens:

break

offset_start[axis] += out.shape[axis]

100

101

offset_end[axis] = offset_start[axis] + tens.shape[axis]

102

103

new_op.attrs["split_start"] = offset_start

104

new_op.attrs["split_end"] = offset_end

105

new_op.run_on_npu = True

Michael McGeagh

2020-08-07 11:54:28 +0100

[diff] [blame^]

106

new_op.set_output_tensor(tens)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

return tens

def needed_total_padding(input_size, stride, filter_size):

112

out_size = (input_size + stride - 1) // stride

113

needed_input = (out_size - 1) * stride + filter_size

114

total_padding = max(0, needed_input - input_size)

return total_padding

def calc_padding_and_skirt(padding_type, kernel_size, stride, input_dims):

119

ypad = needed_total_padding(int(input_dims[1]), int(stride[1]), int(kernel_size[0]))

120

xpad = needed_total_padding(int(input_dims[2]), int(stride[2]), int(kernel_size[1]))

121

if padding_type == b"SAME":

122

left_pad = (xpad + 0) // 2

123

right_pad = (xpad + 1) // 2

124

top_pad = (ypad + 0) // 2

125

bottom_pad = (ypad + 1) // 2

126

elif padding_type == b"VALID":

left_pad = 0

right_pad = 0

top_pad = 0

bottom_pad = 0

else:

Louis Verhaard

2020-05-25 15:05:26 +0200

[diff] [blame]

132

raise UnsupportedFeatureError("Unknown padding {}".format(str(padding_type)))

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

133

padding = (top_pad, left_pad, bottom_pad, right_pad)

134

skirt = (top_pad, left_pad, ypad - top_pad, xpad - left_pad)

135

return padding, skirt

136

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

137

Jacob Bohlin

2020-07-07 17:15:22 +0200

[diff] [blame]

138

def calc_upscaled_padding_and_skirt(padding_type, kernel_size, stride, input_dims, upscaling_factor):

139

kernel_height, kernel_width = kernel_size[0], kernel_size[1]

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

140

if padding_type == b"SAME":

Jacob Bohlin

2020-07-07 17:15:22 +0200

[diff] [blame]

141

ypad = needed_total_padding(int(input_dims[1]) * upscaling_factor, int(stride[1]), int(kernel_height))

142

xpad = needed_total_padding(int(input_dims[2]) * upscaling_factor, int(stride[2]), int(kernel_width))

143

144

right_pad = ((xpad + 1) // upscaling_factor) - 1

145

bottom_pad = ((ypad + 1) // upscaling_factor) - 1

146

left_pad = max(kernel_width - 1 - right_pad, 0)

147

top_pad = max(kernel_height - 1 - bottom_pad, 0)

148

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

149

elif padding_type == b"VALID":

Jacob Bohlin

2020-07-07 17:15:22 +0200

[diff] [blame]

150

right_pad = max(kernel_width - 2, 0)

151

bottom_pad = max(kernel_height - 2, 0)

152

left_pad = kernel_width - 1

153

top_pad = kernel_height - 1

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

154

else:

155

assert 0, "Unknown padding"

156

157

padding = (top_pad, left_pad, bottom_pad, right_pad)

Jacob Bohlin

2020-07-07 17:15:22 +0200

[diff] [blame]

158

skirt = padding

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

159

return padding, skirt

160

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

161

162

def fixup_conv2d_backprop(op, arch):

163

if op.type == "Conv2DBackpropInput":

164

# flip the inputs

165

op.inputs[0], op.inputs[2] = op.inputs[2], op.inputs[0]

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

166

op.type = "Conv2DBackpropInputSwitchedBias"

167

weight_shape = op.inputs[1].shape

168

weight_sets = weight_shape[3]

169

170

if len(op.inputs) < 4:

171

# Add bias/scale tensor filled with zeros

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

172

scale_tens = Tensor([weight_sets], DataType.int32, op.name + "_bias_tens")

173

scale_tens.values = [0] * weight_sets

174

scale_tens.quant_values = [0] * weight_sets

Michael McGeagh

2020-08-07 11:54:28 +0100

[diff] [blame^]

175

scale_op = Operation("Const", op.name + "_bias")

176

scale_op.set_output_tensor(scale_tens)

177

op.add_input_tensor(scale_tens)

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

178

179

# Update strides

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

180

op.attrs.update({"stride_w": 1, "stride_h": 1, "strides": (1, 1, 1, 1)})

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

return op

Charles Xu

2020-07-02 15:12:40 +0200

[diff] [blame]

185

# Convert the op to an elementwise add

186

def convert_resizebilinear_1x1_to_add(op):

187

op.type = "AddAct"

188

op.name = op.name + "_add"

189

op.attrs.update({"npu_block_type": NpuBlockType.ElementWise})

190

op.attrs["resizebilinear"] = True

191

# Create an input tensor filled with zeros

192

shape = op.outputs[0].shape

193

tens = Tensor(shape, op.inputs[0].dtype, op.inputs[1].name + "_add")

194

tens.values = np.zeros(shape)

195

tens.quant_values = np.zeros(shape, np.uint8)

196

tens.quantization = QuantizationParameters(0.0, 255.0)

197

tens.quantization.scale_f32 = 1.0

198

tens.quantization.zero_point = 0

199

tens.consumer_list = [op]

200

tens_op = op.inputs[1].ops[0]

Michael McGeagh

2020-08-07 11:54:28 +0100

[diff] [blame^]

201

tens_op.set_output_tensor(tens)

Charles Xu

2020-07-02 15:12:40 +0200

[diff] [blame]

202

# Set the add inputs

203

op.inputs[1] = op.inputs[0]

op.inputs[0] = tens

return op

def fixup_resizebilinear(op, arch):

210

if op.type == "ResizeBilinear":

211

if op.inputs[0].shape[1] == 1 and op.inputs[0].shape[2] == 1:

212

convert_resizebilinear_1x1_to_add(op)

Charles Xu

36ffaf3

2020-08-05 15:40:44 +0200

[diff] [blame]

213

elif op.inputs[0].shape == op.outputs[0].shape:

214

# Bypass nop resizebilinear

215

op.inputs = op.inputs[:1]

216

op.type = "Identity"

Charles Xu

2020-07-02 15:12:40 +0200

[diff] [blame]

return op

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

221

def fixup_fully_connected_input(op, arch):

222

if op.type == "FullyConnectedAct":

223

inp = op.inputs[0]

224

weights = op.inputs[1]

225

226

n_in_elems = weights.shape[-2]

227

elms = inp.elements()

228

batch_size = elms // n_in_elems

229

assert batch_size * n_in_elems == elms

230

231

desired_shape = [batch_size, n_in_elems]

232

if inp.shape != desired_shape:

233

# mismatch, insert a reshape to fix this.

Michael McGeagh

2020-08-07 11:54:28 +0100

[diff] [blame^]

234

op.inputs[0] = create_reshape_tensor(inp, desired_shape)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

return op

def fixup_pack_input(op, arch):

240

if op.type == "Pack":

241

# Pack is also referred to as Stack

242

# Requires the rewrite_concat function to be called on the op afterwards

243

axis = int(op.attrs["axis"])

244

desired_shape = op.inputs[0].shape[:axis] + [1] + op.inputs[0].shape[axis:]

245

246

# Construct 1 shape tensor to be used by all inserted reshape ops

Michael McGeagh

2020-08-07 11:54:28 +0100

[diff] [blame^]

247

new_shape_tens = create_const_tensor(op.name + "_reshape_shape", [1], DataType.int32, desired_shape)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

248

249

for idx, inp in enumerate(op.inputs):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

250

reshape_out = inp.clone("_reshaped")

Michael McGeagh

2020-07-28 12:17:59 +0100

[diff] [blame]

251

reshape_out.set_all_shapes(desired_shape)

Michael McGeagh

2020-08-07 11:54:28 +0100

[diff] [blame^]

252

253

reshape_op = Operation("Reshape", "{}{}_reshape".format(op.name, idx))

254

reshape_op.attrs["new_shape"] = desired_shape

255

reshape_op.inputs = [inp, new_shape_tens]

256

reshape_op.set_output_tensor(reshape_out)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

257

258

op.inputs[idx] = reshape_out

259

260

op.type = "PackReshaped"

return op

def fixup_unpack_output(tens, arch):

266

op = tens.ops[0]

267

if op.type in set(("Unpack", "StridedSlice")):

268

# Unpack is also referred to as Unstack

269

# Requires the rewrite_split function to be called on the op afterwards

Patrik Gustavsson

2020-04-30 08:57:23 +0200

[diff] [blame]

270

271

reshape_input_shape = tens.shape

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

272

if op.type == "StridedSlice":

Patrik Gustavsson

2020-04-30 08:57:23 +0200

[diff] [blame]

273

new_axis_mask = op.attrs["new_axis_mask"]

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

274

shrink_axis_mask = op.attrs["shrink_axis_mask"]

Louis Verhaard

2020-05-25 15:05:26 +0200

[diff] [blame]

275

ellipsis_mask = op.attrs["ellipsis_mask"]

Patrik Gustavsson

2020-04-30 08:57:23 +0200

[diff] [blame]

276

277

if (new_axis_mask != 0 and shrink_axis_mask != 0) or ellipsis_mask != 0:

278

# Not supported, will be put on CPU

279

return tens

280

if shrink_axis_mask == 0 and new_axis_mask == 0:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

281

# Equal Rank StridedSlice, no need to insert reshape

282

return tens

Patrik Gustavsson

2020-04-30 08:57:23 +0200

[diff] [blame]

283

elif shrink_axis_mask != 0:

284

n = 0

285

axis = 0

286

while shrink_axis_mask:

287

prev_mask = shrink_axis_mask

288

n += 1

289

shrink_axis_mask &= shrink_axis_mask - 1

290

axis = int(math.log2(prev_mask - shrink_axis_mask))

291

reshape_input_shape = reshape_input_shape[:axis] + [1] + reshape_input_shape[axis:]

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

292

Patrik Gustavsson

2020-04-30 08:57:23 +0200

[diff] [blame]

293

assert len(tens.shape) == (len(op.inputs[0].shape) - n)

294

op.attrs["shrink_axis_mask"] = 0

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

295

Patrik Gustavsson

2020-04-30 08:57:23 +0200

[diff] [blame]

296

elif new_axis_mask != 0:

n = 0

axis = 0

while new_axis_mask:

prev_mask = new_axis_mask

301

n += 1

302

new_axis_mask &= new_axis_mask - 1

303

axis = int(math.log2(prev_mask - new_axis_mask))

Louis Verhaard

2020-05-25 15:05:26 +0200

[diff] [blame]

304

reshape_input_shape = reshape_input_shape[:axis] + reshape_input_shape[(axis + 1) :]

Patrik Gustavsson

2020-04-30 08:57:23 +0200

[diff] [blame]

305

new_axis_mask >>= 1

306

307

assert len(tens.shape) == (len(op.inputs[0].shape) + n)

308

op.attrs["new_axis_mask"] = 0

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

309

else:

310

axis = int(op.attrs["axis"])

311

op.type = "UnpackReshaped"

Patrik Gustavsson

2020-04-30 08:57:23 +0200

[diff] [blame]

312

reshape_input_shape = tens.shape[:axis] + [1] + tens.shape[axis:]

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

313

314

# Construct 1 shape tensor to be used by all inserted reshape ops

Michael McGeagh

2020-08-07 11:54:28 +0100

[diff] [blame^]

315

new_shape_tens = create_const_tensor(op.name + "_reshape_shape", [1], DataType.int32, tens.shape)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

316

317

for idx, out_tens in enumerate(op.outputs):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

318

reshape_in = out_tens.clone("_reshaped")

Michael McGeagh

2020-07-28 12:17:59 +0100

[diff] [blame]

319

reshape_in.set_all_shapes(reshape_input_shape)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

320

reshape_in.ops = [op]

Michael McGeagh

2020-08-07 11:54:28 +0100

[diff] [blame^]

321

322

reshape_op = Operation("Reshape", "{}{}_reshape".format(op.name, idx))

323

reshape_op.attrs["new_shape"] = reshape_input_shape

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

324

reshape_op.inputs = [reshape_in, new_shape_tens]

Michael McGeagh

2020-08-07 11:54:28 +0100

[diff] [blame^]

325

reshape_op.set_output_tensor(out_tens)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

326

327

op.outputs[idx] = reshape_in

return tens

def add_padding_fields(op, arch):

333

if "padding" in op.attrs:

334

if "Conv" in op.type:

335

kernel_size = op.inputs[1].shape[:2]

336

input_shape = op.inputs[0].shape

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

337

elif "Pool" in op.type or op.type in ("ResizeBilinear", "ReduceSum"):

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

338

kernel_size = op.attrs["ksize"][1:3]

339

input_shape = op.inputs[0].shape

340

elif op.type == "ExtractImagePatches":

341

kernel_size = op.attrs["ksizes"][1:3]

342

input_shape = op.inputs[0].shape

343

else:

Louis Verhaard

2020-05-25 15:05:26 +0200

[diff] [blame]

344

raise UnsupportedFeatureError("Unknown operation that uses padding: {}".format(op.type))

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

345

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

346

if op.type == "Conv2DBackpropInputSwitchedBias":

Jacob Bohlin

2020-07-07 17:15:22 +0200

[diff] [blame]

347

upscaling_factor = op.outputs[0].shape[1] // input_shape[1]

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

348

padding, skirt = calc_upscaled_padding_and_skirt(

Jacob Bohlin

2020-07-07 17:15:22 +0200

[diff] [blame]

349

op.attrs["padding"], kernel_size, op.attrs["strides"], input_shape, upscaling_factor

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

350

)

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

351

else:

352

dilation_h, dilation_w = op.get_dilation_h_w()

353

dilated_kernel_size = [dilation_h * (kernel_size[0] - 1) + 1, dilation_w * (kernel_size[1] - 1) + 1]

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

354

padding, skirt = calc_padding_and_skirt(

355

op.attrs["padding"], dilated_kernel_size, op.attrs["strides"], input_shape

356

)

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

357

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

358

op.attrs["explicit_padding"] = padding

359

op.attrs["skirt"] = skirt

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

360

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

return op

Jacob Bohlin

2020-05-20 09:03:40 +0200

[diff] [blame]

364

conv_op = set(("Conv2D", "QuantizedConv2D", "Conv2DBackpropInputSwitchedBias", "Conv2DBiasAct"))

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

fc_op = set(

(

"MatMul",

"QuantizedMatMul",

"BlockLSTM",

"RnnAct",

"UnidirectionalSequenceRnnAct",

372

"BidirectionalSequenceRnnAct",

373

"LstmAct",

374

"UnidirectionalSequenceLstmAct",

375

"BidirectionalSequenceLstmAct",

"FullyConnectedAct",

)

)

depthwise_op = set(("DepthwiseConv2dNative", "DepthwiseConv2dBiasAct",))

Louis Verhaard

2020-05-25 15:05:26 +0200

[diff] [blame]

380

pool_op = set(

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

381

("AvgPool", "MaxPool", "QuantizedAvgPool", "QuantizedMaxPool", "AvgPoolAct", "MaxPoolAct", "ResizeBilinear")

Louis Verhaard

2020-05-25 15:05:26 +0200

[diff] [blame]

382

)

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

383

reduce_sum_ops = set(("ReduceSum",))

384

elementwise_op = set(("AddAct", "MulAct", "SubAct", "Maximum", "Minimum", "LeakyRelu", "Abs", "CLZ", "SHL", "SHR"))

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

385

binary_elementwise_op = set(("AddAct", "MulAct", "SubAct", "Maximum", "Minimum"))

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

386

activation_ops = set(("Relu", "Relu6", "ReluN1To1", "Sigmoid", "Tanh"))

387

memory_only_ops = set(("Reshape",))

388

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

389

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

390

# Check if the op can be reordered

391

def get_prepend_op(op):

392

inp = op.inputs[0]

393

# The op should be reordered between prev_op and prep_op

394

prev_op = inp.ops[-1]

395

prep_op = None

396

while prev_op.type in memory_only_ops and len(prev_op.outputs) == 1 and len(prev_op.outputs[0].consumers()) == 1:

397

prep_op = prev_op

398

inp = prev_op.inputs[0]

399

prev_op = inp.ops[-1]

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

400

if prev_op is not None and len(prev_op.outputs) == 1 and len(prev_op.outputs[0].consumers()) == 1:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

return prep_op

return None

def mark_npu_block_type(op, arch):

407

npu_block_type = NpuBlockType.Default

408

if op.type in conv_op:

409

npu_block_type = NpuBlockType.ConvolutionMxN

410

elif op.type in fc_op:

411

npu_block_type = NpuBlockType.VectorProduct

412

elif op.type in depthwise_op:

413

npu_block_type = NpuBlockType.ConvolutionDepthWise

414

elif op.type in pool_op:

415

npu_block_type = NpuBlockType.Pooling

416

elif op.type in elementwise_op:

417

npu_block_type = NpuBlockType.ElementWise

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

418

elif op.type in reduce_sum_ops:

419

npu_block_type = NpuBlockType.ReduceSum

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

420

421

op.attrs["npu_block_type"] = npu_block_type

return op

def convert_depthwise_to_conv(op, arch):

426

# Depthwise is equivalent to a single conv2d if the ifm depth is 1 and

427

# the ofm depth equals the depth multipler.

428

# If those conditions are true, then we can perform a simple

429

# switch of the operator type (and weight order)

430

431

if ("DepthwiseConv2d" in op.type) and (op.attrs["depth_multiplier"] != 1):

432

ifm_tensor = op.inputs[0]

433

weight_tensor = op.inputs[1]

434

ofm_tensor = op.outputs[0]

435

if (ifm_tensor.shape[3] == 1) and (ofm_tensor.shape[3] == op.attrs["depth_multiplier"]):

436

# Change op type to Conv2d

437

op.type = op.type.replace("DepthwiseConv2d", "Conv2D")

438

del op.attrs["channel_multiplier"]

439

del op.attrs["depth_multiplier"]

440

441

weight_tensor.quant_values = np.transpose(weight_tensor.quant_values, (0, 1, 3, 2))

Michael McGeagh

2020-07-28 12:17:59 +0100

[diff] [blame]

442

weight_tensor.set_all_shapes(list(weight_tensor.quant_values.shape))

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

443

else:

Louis Verhaard

2020-05-25 15:05:26 +0200

[diff] [blame]

444

raise UnsupportedFeatureError(

445

"Unsupported DepthwiseConv2d with depth_multiplier = {}, ifm channels = {}, ofm channels = {}".format(

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

446

op.attrs["depth_multiplier"], ifm_tensor.shape[3], ofm_tensor.shape[3]

447

)

448

)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

return op

Jacob Bohlin

2020-06-23 12:12:56 +0200

[diff] [blame]

452

def reorder_depthwise_weights(op, arch):

453

if "DepthwiseConv2d" in op.type:

454

weight_tensor = op.inputs[1]

455

weight_tensor.quant_values = np.transpose(weight_tensor.quant_values, (0, 1, 3, 2))

Michael McGeagh

2020-07-28 12:17:59 +0100

[diff] [blame]

456

weight_tensor.set_all_shapes(list(weight_tensor.quant_values.shape))

Jacob Bohlin

e843d33

2020-06-23 12:12:56 +0200

[diff] [blame]

457

weight_tensor.weight_transpose_depthwise = True

return op

Michael McGeagh

2020-07-29 13:11:43 +0100

[diff] [blame]

462

def convert_conv_to_fc(op, arch):

463

# Conv 1x1 can be equivalent to Fully Connected.

464

# By representing certain convs as fully connected layers, Vela can better determine wether or not to use

465

# caching/double buffering for the weights.

466

# (Weights dont need to be reloaded for convs when IFM H and W are 1)

467

if op.type == "Conv2DBiasAct":

468

_, h, w, _ = op.inputs[0].shape

469

kh, kw, _, _ = op.inputs[1].shape

470

if h == 1 and w == 1 and kh == 1 and kw == 1:

471

# Overwrite this op as a Fully Connected Op

472

op.name += "_fc"

473

op.type = "FullyConnectedAct"

474

faf = op.attrs.get("fused_activation_function", None)

475

op.attrs = {

476

"fused_activation_function": faf,

477

"weights_format": 0,

478

"npu_block_type": NpuBlockType.VectorProduct,

479

}

480

# Reshape Weights to be 2D. HWIO becomes just IO (as H and W are 1, they can just be dropped)

481

weight_tensor = op.inputs[1]

482

weight_tensor.quant_values = weight_tensor.quant_values.squeeze(axis=(0, 1))

483

weight_tensor.set_all_shapes(list(weight_tensor.quant_values.shape))

484

# The output from a fully connected is expected to be 2D so we need to add a reshape layer to convert it

485

# back to 4D afterwards as the next layer is expecting that shape

486

orig_ofm_tensor = op.outputs[0]

487

# Reshape this ops output to be 2D: {(N*H*W), C} (We know N H and W are all 1 so this becomes {1, C})

488

fc_ofm_tensor = orig_ofm_tensor.clone("_fc")

489

fc_ofm_tensor.set_all_shapes([1, fc_ofm_tensor.shape[-1]])

490

fc_ofm_tensor.ops = [op]

491

# Add a reshape after the new OFM to convert it back to the original 4D shape

Michael McGeagh

2020-08-07 11:54:28 +0100

[diff] [blame^]

492

reshape_name = op.name + "_reshape"

493

new_shape_tens = create_const_tensor(reshape_name + "_shape", [1], DataType.int32, orig_ofm_tensor.shape)

Michael McGeagh

2020-07-29 13:11:43 +0100

[diff] [blame]

494

reshape_op = Operation("Reshape", reshape_name)

Michael McGeagh

2020-07-29 13:11:43 +0100

[diff] [blame]

495

reshape_op.attrs["new_shape"] = orig_ofm_tensor.shape

Michael McGeagh

2020-08-07 11:54:28 +0100

[diff] [blame^]

496

reshape_op.inputs = [fc_ofm_tensor, new_shape_tens]

497

reshape_op.set_output_tensor(orig_ofm_tensor)

Michael McGeagh

2020-07-29 13:11:43 +0100

[diff] [blame]

498

# Replace this ops OFM to point to the 2D tensor

499

op.outputs[0] = fc_ofm_tensor

return op

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

503

# Reorder activation op if it's after the memory only operations

504

def fixup_act_reorder(op, arch):

505

if op.type in activation_ops:

506

prep_op = get_prepend_op(op)

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

507

if prep_op is not None:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

508

act_op = op.clone("_reordered")

509

act_op.inputs = [prep_op.inputs[0]]

510

act_op_out = act_op.inputs[0].clone("_acted")

511

act_op_out.quantization = op.outputs[0].quantization.clone()

Michael McGeagh

2020-08-07 11:54:28 +0100

[diff] [blame^]

512

act_op.set_output_tensor(act_op_out)

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

513

prep_op.inputs[0] = act_op_out

514

prep_op.outputs[0].quantization = act_op_out.quantization.clone()

515

516

# Mark the op so that it will be removed as passthrough later on

op.type = "Identity"

return op

Louis Verhaard

2020-06-03 08:56:44 +0200

[diff] [blame]

520

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

521

def fixup_elementwise_with_scalars(op, arch):

522

if op.type in binary_elementwise_op:

Louis Verhaard

2020-06-03 08:56:44 +0200

[diff] [blame]

523

ifm_tensor, ifm2_tensor, _, _ = op.get_ifm_ifm2_weights_ofm()

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

524

if ifm2_tensor.shape != [] and ifm_tensor.shape != []:

525

diff = len(ifm_tensor.shape) - len(ifm2_tensor.shape)

526

if diff > 0:

527

ifm2_tensor.shape = full_shape(len(ifm_tensor.shape), ifm2_tensor.shape, 1)

528

elif diff < 0:

529

ifm_tensor.shape = full_shape(len(ifm2_tensor.shape), ifm_tensor.shape, 1)

Louis Verhaard

2020-06-03 08:56:44 +0200

[diff] [blame]

530

elif ifm_tensor.shape == [] and ifm_tensor.quant_values is None:

531

# IFM is marked as a scalar, but is a result of an operation; change it to a shape of size 1

532

ifm_tensor.shape = len(ifm2_tensor.shape) * [1]

533

ifm_tensor.storage_shape = ifm_tensor.shape

534

elif ifm2_tensor.shape == [] and ifm2_tensor.quant_values is None:

535

# IFM2 is marked as a scalar, but is a result of an operation; change it to a shape of size 1

536

ifm2_tensor.shape = len(ifm_tensor.shape) * [1]

537

ifm2_tensor.storage_shape = ifm2_tensor.shape

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

538

return op

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

539

Louis Verhaard

2020-06-03 08:56:44 +0200

[diff] [blame]

540

Tim Hall

4e12776

2020-05-15 16:05:49 +0100

[diff] [blame]

541

# Set input/output tensor equivalence to the same id for memory operations

542

def set_tensor_equivalence(op, arch):

543

if op.type == "Reshape":

544

eid = op.outputs[0].equivalence_id

545

for inp in op.inputs:

546

inp.equivalence_id = eid

return op

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

550

def convert_softmax(op, arch):

551

if op.type == "Softmax" and op.run_on_npu:

552

softmax = SoftMax(op)

553

op = softmax.get_graph()

return op

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

557

def convert_mul_max_to_abs_or_lrelu(op, arch):

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

558

r"""Whenever there is a subgraph with this topology:

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

559

560

Input X For X = -1 or X > 0

561

| \ / This subgraph can be replaced with either

562

| Mul an Abs (if X = -1) or a LeakyReLU (if X > 0)

| /

Max

"""

if op.type == "Maximum":

568

# finds the Mul input(s) to the Max

569

muls = [i for i in op.inputs if i.ops[0].type == "MulAct"]

if len(muls) == 1:

mul = muls[0].ops[0]

elif len(muls) == 2:

# In the case both inputs are Muls, find the one with the same input as the Max

574

mul = [m for m in muls if len(set(op.inputs + m.ops[0].inputs)) == 1][0].ops[0]

else:

# No Mul inputs

return op

# make sure the Mul doesn't have any other consumers

580

if len(mul.outputs[0].consumers()) != 1:

581

return op

582

# make sure the Mul doesn't have a faf

583

if mul.attrs["fused_activation_function"]:

584

return op

585

586

# finds the branched input that goes to both the Max and the Mul

587

shared = set(op.inputs) & set(mul.inputs)

588

if len(shared) == 1:

589

shared_in = shared.pop()

590

# find the constant scalar input to the Mul

591

const_tens = (set(mul.inputs) - {shared_in}).pop()

592

# check that it is a scalar

593

if const_tens.shape != []:

594

return op

595

const = const_tens.ops[0]

596

# check that it is a constant

597

if const.type != "Const":

return op

else:

return op

val = const.outputs[0].values

603

if val >= 0:

604

new_op = "LeakyRelu"

605

op.attrs["alpha"] = val

elif val == -1:

new_op = "Abs"

else:

return op

op.type = op.type.replace("Maximum", new_op)

612

op.name = op.name.replace("Maximum", new_op)

613

op.outputs[0].name = op.outputs[0].name.replace("Maximum", new_op)

614

op.inputs = [shared_in]

return op

Dwight Lidman

2020-05-29 09:37:03 +0200

[diff] [blame]

618

def add_attrs_to_resizebilinear(op, arch):

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

619

if op.type == "ResizeBilinear" and op.run_on_npu:

Dwight Lidman

2020-05-29 09:37:03 +0200

[diff] [blame]

620

input_tensor = op.inputs[0]

621

upscaled_shape = [input_tensor.shape[1] * 2, input_tensor.shape[2] * 2]

622

out_shape = op.outputs[0].shape[1:3]

623

if not op.attrs["align_corners"] and out_shape == upscaled_shape:

624

# this means the output is supposed to be a x2 upscale,

625

# so we need to do SAME padding

626

op.attrs["padding"] = b"SAME"

627

elif op.attrs["align_corners"] and out_shape == [upscaled_shape[0] - 1, upscaled_shape[1] - 1]:

628

# here we can just run the avg pool without padding and

629

# produce a (M * 2 - 1, N * 2 - 1) sized output

630

op.attrs["padding"] = b"VALID"

631

else:

Charles Xu

2020-07-02 15:12:40 +0200

[diff] [blame]

632

return op

Dwight Lidman

2020-05-29 09:37:03 +0200

[diff] [blame]

633

input_tensor.resampling_mode = resampling_mode.NEAREST

Tim Hall

2020-06-15 20:47:35 +0100

[diff] [blame]

634

op.attrs.update({"strides": (1, 1, 1, 1), "ksize": (1, 2, 2, 1)})

Dwight Lidman

2020-05-29 09:37:03 +0200

[diff] [blame]

return op

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

638

def supported_operator_check(op, arch):

639

op.run_on_npu = arch.supported_operators.is_operator_supported(op)

return op

def optimise_graph_a(nng, arch, verbose_graph=False):

if verbose_graph:

nng.print_graph()

op_rewrite_list = [

# mark block type and check if the operations are supported

649

mark_npu_block_type,

Tim Hall

4e12776

2020-05-15 16:05:49 +0100

[diff] [blame]

650

set_tensor_equivalence,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

651

supported_operator_check,

652

# then do any rewrites of supported operators

653

convert_depthwise_to_conv,

Michael McGeagh

2020-07-29 13:11:43 +0100

[diff] [blame]

654

convert_conv_to_fc,

Fredrik Svedberg

2020-06-03 15:43:31 +0200

[diff] [blame]

655

convert_softmax,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

656

fixup_fully_connected_input,

657

fixup_pack_input,

658

fixup_conv2d_backprop,

659

fixup_act_reorder,

Dwight Lidman

2020-05-29 09:37:03 +0200

[diff] [blame]

660

add_attrs_to_resizebilinear,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

661

add_padding_fields,

662

mark_npu_block_type,

Charles Xu

2020-05-13 10:15:26 +0200

[diff] [blame]

663

fixup_elementwise_with_scalars,

Jacob Bohlin

e843d33

2020-06-23 12:12:56 +0200

[diff] [blame]

664

reorder_depthwise_weights,

Charles Xu

2020-07-02 15:12:40 +0200

[diff] [blame]

665

fixup_resizebilinear,

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

666

# convert_mul_max_to_abs_or_lrelu # TODO: enable optimisation once quantisation issues are resolved

667

]

668

669

for idx, sg in enumerate(nng.subgraphs):

670

# rewrite graph pass

671

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

672

sg, arch, [fixup_unpack_output], op_rewrite_list, rewrite_unsupported=False

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

673

)

674

675

for idx, sg in enumerate(nng.subgraphs):

676

# remove passthrough tensors

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

677

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(sg, arch, [remove_passthrough_tensor], [])

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

if verbose_graph:

nng.print_graph()

return nng

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

683

Tim Hall

2020-04-27 18:20:16 +0100

[diff] [blame]

684

def optimise_graph_b(nng, arch, verbose_graph=False):

if verbose_graph:

nng.print_graph()

for idx, sg in enumerate(nng.subgraphs):

689

# combined rewrite graph pass

Diego Russo

2020-04-14 18:41:58 +0100

[diff] [blame]

690

nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(sg, arch, [rewrite_concat, rewrite_split], [])

Tim Hall