Blame - examples/gemm_tuner/GemmTuner.py - ml/ComputeLibrary

2020-06-24 17:29:38 +0100

[diff] [blame]

1

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

2

#

3

# SPDX-License-Identifier: MIT

4

#

5

# Permission is hereby granted, free of charge, to any person obtaining a copy

6

# of this software and associated documentation files (the "Software"), to

7

# deal in the Software without restriction, including without limitation the

8

# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

9

# sell copies of the Software, and to permit persons to whom the Software is

10

# furnished to do so, subject to the following conditions:

11

#

12

# The above copyright notice and this permission notice shall be included in all

13

# copies or substantial portions of the Software.

14

#

15

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

16

# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

17

# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

18

# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

19

# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

20

# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

# SOFTWARE.

#!/usr/bin/python3

import argparse

import csv

import json

import logging

import math

import os

from collections import Counter, defaultdict, deque, namedtuple

32

from enum import Enum

33

from pathlib import Path

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

34

from typing import Deque, Dict, Generator, List, NamedTuple, Set, Tuple, Union

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

35

36

################################################################################

37

# Types

38

################################################################################

39

40

# Gemm strategy

41

Strategy = Enum("Strategy", ["Native", "ReshapedOnlyRHS", "Reshaped"])

42

43

# Gemm parameter

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

44

45

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

46

class GEMMParam(NamedTuple):

47

M: int # Number of lhs matrix rows

48

N: int # Number of rhs matrix columns

49

K: int # Number of lhs matrix columns/rhs matrix rows

50

B: int # Batch size

Eren Kopuz

2020-07-13 12:37:06 +0100

[diff] [blame]

51

data_type: str # Data type

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

52

SiCong Li

2020-07-28 12:24:45 +0100

[diff] [blame]

53

@classmethod

54

def parse_from_strs(cls, *M_N_K_B, data_type):

55

return cls(*map(int, M_N_K_B), str(data_type))

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

56

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

57

def __str__(self):

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

58

return ",".join(map(str, self))

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

59

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

60

61

# Gemm configuration for strategy Native

62

class NativeGEMMConfig(NamedTuple):

63

m0: int # Number of rows processed by the matrix multiplication

64

n0: int # Number of columns processed by the matrix multiplication

65

k0: int # Number of partial accumulations performed by the matrix multiplication

66

SiCong Li

2020-07-28 12:24:45 +0100

[diff] [blame]

67

@classmethod

68

def parse_from_strs(cls, *args):

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

69

(*mnk,) = map(int, args)

SiCong Li

2020-07-28 12:24:45 +0100

[diff] [blame]

70

return cls(*mnk)

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

71

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

72

def __str__(self):

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

73

return ",".join(map(str, self))

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

74

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

75

76

# Gemm configuration for strategy Reshaped Only RHS

77

class ReshapedOnlyRHSGEMMConfig(NamedTuple):

78

m0: int # Number of rows processed by the matrix multiplication

79

n0: int # Number of columns processed by the matrix multiplication

80

k0: int # Number of partial accumulations performed by the matrix multiplication

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

81

# Number of horizontal blocks of size (k0xn0) stored on the same output row

82

h0: int

83

# Interleave rhs matrix (1) / Do not interleave rhs matrix (0)

84

interleave_rhs: bool

85

# Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0)

86

transpose_rhs: bool

SiCong Li

2020-07-28 12:24:45 +0100

[diff] [blame]

87

# Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0)

88

export_to_cl_image_rhs: bool

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

89

SiCong Li

2020-07-28 12:24:45 +0100

[diff] [blame]

90

@classmethod

91

def parse_from_strs(cls, *args):

92

(*mnkh, interleave_rhs, transpose_rhs, export_to_cl_image_rhs,) = map(int, args)

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

93

interleave_rhs = interleave_rhs == 1

94

transpose_rhs = transpose_rhs == 1

SiCong Li

2020-07-28 12:24:45 +0100

[diff] [blame]

95

export_to_cl_image_rhs = export_to_cl_image_rhs == 1

96

return cls(*mnkh, interleave_rhs, transpose_rhs, export_to_cl_image_rhs)

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

97

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

98

def __str__(self):

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

99

return ",".join(map(str, self))

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

100

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

101

102

# Gemm configuration for strategy Reshaped

103

class ReshapedGEMMConfig(NamedTuple):

104

m0: int # Number of rows processed by the matrix multiplication

105

n0: int # Number of columns processed by the matrix multiplication

106

k0: int # Number of partial accumulations performed by the matrix multiplication

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

107

# Number of vertical blocks of size (m0xk0) stored on the same output row

108

v0: int

109

# Number of horizontal blocks of size (k0xn0) stored on the same output row

110

h0: int

111

# Interleave lhs matrix (1) / Do not interleave lhs matrix (0)

112

interleave_lhs: bool

113

# Interleave rhs matrix (1) / Do not interleave rhs matrix (0)

114

interleave_rhs: bool

115

# Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0)

116

transpose_rhs: bool

SiCong Li

2020-07-28 12:24:45 +0100

[diff] [blame]

117

# Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0)

118

export_to_cl_image_rhs: bool

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

119

SiCong Li

2020-07-28 12:24:45 +0100

[diff] [blame]

120

@classmethod

121

def parse_from_strs(cls, *args):

122

(*mnkvh, interleave_lhs, interleave_rhs, transpose_rhs, export_to_cl_image_rhs,) = map(int, args)

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

123

interleave_lhs = interleave_lhs == 1

124

interleave_rhs = interleave_rhs == 1

125

transpose_rhs = transpose_rhs == 1

SiCong Li

2020-07-28 12:24:45 +0100

[diff] [blame]

126

export_to_cl_image_rhs = export_to_cl_image_rhs == 1

127

return cls(*mnkvh, interleave_lhs, interleave_rhs, transpose_rhs, export_to_cl_image_rhs)

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

128

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

129

def __str__(self):

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

130

return ",".join(map(str, self))

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

131

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

132

133

# Measurement we take from the benchmark result.

134

class Measurement(NamedTuple):

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

135

opencl_timer_ms_reshape: float

136

opencl_timer_ms_kernel: float

137

138

def get_total_ms(self):

139

return self.opencl_timer_ms_reshape + self.opencl_timer_ms_kernel

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

140

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

141

def is_close_to(self, other, tol):

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

142

return math.fabs(self.get_total_ms() - other.get_total_ms()) < tol

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

143

144

def is_better_than(self, other, tol):

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

145

return self.get_total_ms() < other.get_total_ms() and not self.is_close_to(

146

other

147

)

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

148

149

def __add__(self, other):

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

150

return Measurement(

151

self.opencl_timer_ms_reshape + other.opencl_timer_ms_reshape,

152

self.opencl_timer_ms_kernel + other.opencl_timer_ms_kernel,

153

)

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

154

155

def __sub__(self, other):

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

156

return Measurement(

157

self.opencl_timer_ms_reshape - other.opencl_timer_ms_reshape,

158

self.opencl_timer_ms_kernel - other.opencl_timer_ms_kernel,

159

)

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

160

161

def __mul__(self, other):

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

162

return Measurement(

163

self.opencl_timer_ms_reshape * other.opencl_timer_ms_reshape,

164

self.opencl_timer_ms_kernel * other.opencl_timer_ms_kernel,

165

)

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

166

167

def __floordiv__(self, other):

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

168

return Measurement(

169

self.opencl_timer_ms_reshape // other.opencl_timer_ms_reshape,

170

self.opencl_timer_ms_kernel // other.opencl_timer_ms_kernel,

171

)

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

172

173

def __truediv__(self, other):

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

174

return Measurement(

175

self.opencl_timer_ms_reshape / other.opencl_timer_ms_reshape,

176

self.opencl_timer_ms_kernel / other.opencl_timer_ms_kernel,

177

)

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

178

179

def __pow__(self, power):

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

180

return Measurement(

181

self.opencl_timer_ms_reshape ** power, self.opencl_timer_ms_kernel ** power

)

def __str__(self):

return ",".join(map(str, self))

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

186

187

188

# GEMMConfig Type

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

189

GEMMConfigT = Union[NativeGEMMConfig,

190

ReshapedOnlyRHSGEMMConfig, ReshapedGEMMConfig]

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

191

192

193

# Representation of the benchmark result from a single experiment

194

class BenchmarkResult(NamedTuple):

195

gemm_param: GEMMParam

196

strategy: Strategy

197

gemm_config: GEMMConfigT

198

measurement: Measurement

199

200

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

201

class GEMMBenchmarkResultRecorder:

202

""" A recorder that records and organises GEMM Benchmark results, and produces various reports on the record.

203

"""

204

205

SummaryLevel = Enum("SummaryLevel", ["Short", "Detailed"])

206

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

207

def __init__(self, tol=0.01):

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

208

""" Initializer

209

"""

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

210

self._benchmark_result_record: List[BenchmarkResult] = []

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

211

# Strategies recorded

212

self._strategies = set()

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

213

self._tol = tol

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

214

215

def add(self, benchmark_result: BenchmarkResult):

216

""" Add a benchmark result to the record.

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

217

"""

218

gemm_param, strategy, gemm_config, measurement = benchmark_result

219

# Update strategies encoutnered

220

self._strategies.add(strategy)

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

221

222

self._benchmark_result_record.append(benchmark_result)

223

224

def get_record(self) -> Generator[BenchmarkResult, None, None]:

225

""" Return an iterator that iterates over the record.

226

"""

227

yield from self._benchmark_result_record

228

229

def get_best_gemm_configs(self):

230

""" Get the best GEMMConfig set per GEMMParam per Strategy

231

"""

232

best_gc_sets: Dict[

233

Tuple[GEMMParam, Strategy], List[Tuple[GEMMConfig, Measurement]]

234

] = defaultdict(list)

235

for gemm_param, strategy, gemm_config, measurement in self.get_record():

236

best_gc_set = best_gc_sets.setdefault((gemm_param, strategy), [])

237

best_gc_set.append((gemm_config, measurement))

238

# Sort the best config set (list)

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

239

best_gc_set = sorted(

240

best_gc_set, key=lambda gc_and_m: gc_and_m[1].get_total_ms()

241

)

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

242

# Filter out configs that are beyond tolerance to the best GEMMConfig's measurement

243

best_gc, best_m = best_gc_set[0]

244

best_gc_set_new = [

245

(gemm_config, measurement)

246

for gemm_config, measurement in best_gc_set[1:]

247

if measurement.is_close_to(best_m, self._tol)

248

]

249

# Add back the best config

250

best_gc_set_new.insert(0, (best_gc, best_m))

251

best_gc_sets[(gemm_param, strategy)] = best_gc_set_new

return best_gc_sets

def get_best_gemm_configs_as_sequence(self):

256

""" Get the best GEMMConfig set per GEMMParam per Strategy, and flatten the result into a sequence

257

of BenchmarkResults

258

"""

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

259

for (

260

(gemm_param, strategy),

261

best_gc_sets,

262

) in self.get_best_gemm_configs().items():

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

263

for best_gemm_config, best_measurement in best_gc_sets:

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

264

yield BenchmarkResult(

265

gemm_param, strategy, best_gemm_config, best_measurement

266

)

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

267

268

def get_config_distributions(self):

269

""" Return GEMMConfigDistribution for each strategy

270

"""

271

gemm_config_distributions: Dict[Strategy, GEMMConfigDistribution] = defaultdict(

272

GEMMConfigDistribution

273

)

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

274

for benchmark_result in self.get_best_gemm_configs_as_sequence():

275

_, strategy, _, _ = benchmark_result

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

276

gemm_config_distributions[strategy].add(benchmark_result)

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

277

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

278

return gemm_config_distributions

279

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

280

def get_best_gemm_strategies(self):

281

""" Get the best Stratey per GEMMParam

282

"""

283

all_results: Dict[GEMMParam, List[Tuple[Strategy, Measurement]]] = defaultdict(

list

)

best_strategies: Dict[GEMMParam, Strategy] = {}

288

289

for gemm_param, strategy, gemm_config, measurement in self.get_record():

290

all_results[gemm_param].append((strategy, measurement))

291

292

for gemm_param, results_set in all_results.items():

293

# Sort the best results set (list)

294

results_set = sorted(

295

results_set, key=lambda s_and_m: s_and_m[1].get_total_ms()

296

)

297

# Select best Strategy

298

best_s, best_m = results_set[0]

299

best_strategies[gemm_param] = best_s

300

301

return best_strategies

302

303

def save_to_jsons(self, out_dir, only_best_config=True):

304

""" Save records to an output directory of JSON files.

305

The directory is organized such that each strategy gets its own JSON file.

306

The directory also includes a JSON file to define the best strategy per GEMM Param.

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

307

"""

308

if not os.path.exists(out_dir):

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

309

logging.info(

310

"Output directory {} does not exist. Creating...".format(

311

out_dir)

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

312

)

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

313

os.mkdir(out_dir)

314

315

out_json_path = os.path.join(out_dir, "gemm_type_selection.json")

316

if check_out_path(out_json_path):

317

results = self.get_best_gemm_strategies()

318

results = {str(key): value.name for key, value in results.items()}

319

dump_json(out_json_path, results)

320

321

for strategy in self._strategies:

322

out_json_path = os.path.join(

323

out_dir, ("gemm_config_" + strategy.name.lower() + ".json")

324

)

325

if check_out_path(out_json_path):

326

record = (

327

self.get_best_gemm_configs_as_sequence()

328

if only_best_config

329

else self.get_record()

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

330

)

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

331

results = defaultdict(list)

332

for res in record:

333

if res.strategy == strategy:

334

results[str(res.gemm_param)].append(

335

{

336

"GEMMConfig": str(res.gemm_config),

337

"OpenCL_Timer_ms_reshape": str(

338

res.measurement.opencl_timer_ms_reshape

339

),

340

"OpenCL_Timer_ms_kernel": str(

341

res.measurement.opencl_timer_ms_kernel

),

}

)

dump_json(out_json_path, results)

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

346

347

def summary(self, sum_level=SummaryLevel.Short):

348

""" Return the summary string of the record

349

"""

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

350

num_raw_records = sum(1 for _ in self.get_record())

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

351

gemm_params_per_strategy = defaultdict(list)

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

352

for gemm_param, strategy in self.get_best_gemm_configs().keys():

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

353

gemm_params_per_strategy[strategy].append(gemm_param)

354

global_summary = f"""

355

=== {self.__class__.__name__} Summary ===

356

[Global]

357

Strategies recorded: {", ".join(map(lambda s: s.name, self._strategies))}

358

Total number of results recorded: {num_raw_records}

[Per strategy]

"""

strategy_summaries = []

363

for strategy in gemm_params_per_strategy:

364

summary = f"""

365

Strategy {strategy.name}:

366

GEMM parameters:

367

Number of: {len(gemm_params_per_strategy[strategy])}

368

"""

369

if sum_level == self.__class__.SummaryLevel.Detailed:

370

summary += f"""

371

Content: {gemm_params_per_strategy[strategy]}

372

"""

373

strategy_summaries.append(summary)

374

return global_summary + "".join(strategy_summaries)

375

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

376

377

class GEMMConfigDistribution:

378

""" A representation of the GEMM Configuration distribution produced by the GEMMBenchmarkResultRecorder.

"""

def __init__(self):

""" Initializer

"""

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

384

self._gemm_config_dist: Dict[

385

GEMMConfig, List[Tuple[GEMMParam, Measurement]]

386

] = defaultdict(list)

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

387

self._gemm_config_freq = Counter()

388

389

def add(self, benchmark_result: BenchmarkResult):

390

""" Add a benchmark result to the distribution

391

"""

392

gemm_param, _, gemm_config, measurement = benchmark_result

393

self._gemm_config_dist[gemm_config].append((gemm_param, measurement))

394

self._gemm_config_freq[gemm_config] += 1

395

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

396

def distribution(self):

397

return self._gemm_config_dist

398

399

def frequency(self):

400

""" Get the frequency of each (best) gemm config recorded

401

"""

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

402

return self._gemm_config_freq.most_common()

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

403

404

def best_config(self):

405

""" Get the overall best config, as voted by all benchmark results.

406

"""

407

return self._gemm_config_freq.most_common(1)

408

409

def std(self):

410

""" Get the standard deviation as a measure of dispersion of the distribution. We should aim for higher values

411

as they indicate there is high variation in the distribution. Thus the evidence of the best config is stronger.

412

"""

413

freqs = self._gemm_config_freq.values()

414

if len(freqs) == 0:

415

return 0

416

mean_freq = sum(freqs) / len(freqs)

417

return math.sqrt(sum((freq - mean_freq) ** 2 for freq in freqs) / len(freqs))

418

419

420

################################################################################

421

# Globals

422

################################################################################

423

424

# Gemm config type factory

425

# Produces a GEMMConfig type specific to a Strategy

426

GEMM_CONFIG_FACTORY = {

427

Strategy.Native: NativeGEMMConfig,

428

Strategy.ReshapedOnlyRHS: ReshapedOnlyRHSGEMMConfig,

429

Strategy.Reshaped: ReshapedGEMMConfig,

430

}

431

432

# Mapping from example binary name to Strategy

433

# Assume 1-to-1 mapping

434

EXAMPLE_FILE_2_STRATEGY = {

435

"benchmark_cl_gemm_native": Strategy.Native,

436

"benchmark_cl_gemm_reshaped_rhs_only": Strategy.ReshapedOnlyRHS,

437

"benchmark_cl_gemm_reshaped": Strategy.Reshaped,

438

}

439

440

# Gemm example arguments type factory

441

# Produces a Gemm_Example_Args type specific to a Strategy

442

# Gemm example arguments consist of:

443

# GEMMParam + GEMMConfig

444

# in that order.

445

# For example, the example args of running a reshaped rhs only example could be:

SiCong Li

2020-07-28 12:24:45 +0100

[diff] [blame]

446

# 100,100,100,1, 4, 4, 4, 1, 1, 1, 0

447

# M ,N ,K, B,m0,n0,k0,h0,interleave_rhs,transpose_rhs,export_to_cl_image_rhs

448

# <-GEMMParam-><-------------GEMMConfig--------------------------------------->

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

449

# Note that the test strategy_name == strategy.name is in place to avoid unwanted enum aliases

450

GEMM_EXAMPLE_ARGS_FACTORY = {

Eren Kopuz

2020-07-13 12:37:06 +0100

[diff] [blame]

451

# We ignore the data type field from GEMMParam as that is extracted separately

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

452

strategy: namedtuple(

453

"{}_Gemm_Example_Args".format(strategy_name),

Eren Kopuz

2020-07-13 12:37:06 +0100

[diff] [blame]

454

GEMMParam._fields[:-1] + GEMM_CONFIG_FACTORY[strategy]._fields,

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

455

)

456

for strategy_name, strategy in Strategy.__members__.items()

457

if strategy_name == strategy.name

458

}

459

460

# File extension used for benchmark result json files

461

BENCHMARK_RESULT_JSON_EXTENSION = "gemmtuner_benchmark"

462

463

################################################################################

464

# Functions

465

################################################################################

466

467

468

def parse_benchmark_commandline(commandline: str) -> Dict[str, str]:

SiCong Li

2020-07-28 12:24:45 +0100

[diff] [blame]

469

""" Parse the benchmark example command-line string into a dictionary of command-line arguments

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

470

"""

Eren Kopuz

2020-07-13 12:37:06 +0100

[diff] [blame]

471

# Separate the data type option from the example_args portion of the string

472

commandline = commandline.replace(",--type=", " --type=")

473

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

474

args = commandline.split()

475

# Discard program name

476

args = args[1:]

477

# Split into a list of (argument name, argument value)

478

args = map(lambda arg: arg.split("="), args)

479

480

def transform(_name):

481

# Strip '-'/"--" if it exists

482

_name = _name.lstrip("-")

483

return _name

484

485

return {transform(name): val for name, val in args}

486

487

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

488

def extract_benchmark_results(

489

json_results: Dict, measurement_method="avg"

490

) -> Generator[BenchmarkResult, None, None]:

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

491

""" Parse the benchmark result and extract relevant information, namely:

GEMM param,

Strategy,

GEMM config,

Measurements

"""

for json_res in json_results:

498

# Get example test and test data.

499

# There should only be 1 test per run

500

example_tests = list(json_res["tests"].items())

501

assert len(example_tests) == 1

502

example_fn, example_test_data = example_tests[0]

503

504

# Process example file name

505

example_fn = example_fn.split(os.path.sep)[-1]

506

507

# Get strategy

508

strategy = EXAMPLE_FILE_2_STRATEGY[example_fn]

509

510

# Get gemm params + gemm configs from example args

511

benchmark_args = parse_benchmark_commandline(json_res["CommandLine"])

512

Gemm_Example_Args_T = GEMM_EXAMPLE_ARGS_FACTORY[strategy]

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

513

example_args = Gemm_Example_Args_T(

514

*(benchmark_args["example_args"].split(",")))

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

515

# Gemm_Example_Arg consists of GEMMParam first and then GEMMConfig (in that order)

Eren Kopuz

2020-07-13 12:37:06 +0100

[diff] [blame]

516

# However data type option is parsed separately from end of options, hence -1 is applied to fields length

517

gemm_param_fields_len = len(GEMMParam._fields) - 1

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

518

gemm_param = GEMMParam.parse_from_strs(

Eren Kopuz

2020-07-13 12:37:06 +0100

[diff] [blame]

519

*example_args[:gemm_param_fields_len],

520

data_type = benchmark_args["type"])

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

521

GEMMConfig = GEMM_CONFIG_FACTORY[strategy]

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

522

gemm_config = GEMMConfig.parse_from_strs(

523

*example_args[gemm_param_fields_len:])

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

524

525

# Get OpenCL_Time_Ms stats

526

measurements = list(example_test_data["measurements"].items())

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

527

# For reshaped RHS only we have two measurements (one also for the reshape kernel)

528

# Hence we must parse and sum them

529

measurement_ms_reshape = 0

530

measurement_ms_kernel = 0

531

for single_measurement in measurements:

532

measurement_instrument, data = single_measurement

533

# Get instrument name and assert that it is the one we expect

534

measurement_instrument_name = measurement_instrument.split("/")[0]

535

assert measurement_instrument_name == "OpenCLTimer"

536

# Take either the minimum or the average of the raw data as the measurement value

537

if measurement_method == "min":

538

measurement_val = min(data["raw"])

539

elif measurement_method == "avg":

540

measurement_val = sum(data["raw"]) / len(data["raw"])

541

else:

542

raise ValueError(

543

"Invalid measurement method: {}".format(measurement_method)

544

)

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

545

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

546

measurement_type = measurement_instrument.split("/")[1]

547

if "reshape" in measurement_type.split("_"):

548

measurement_ms_reshape = measurement_val

549

else:

550

measurement_ms_kernel = measurement_val

551

552

measurement = Measurement(

553

measurement_ms_reshape, measurement_ms_kernel)

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

554

555

yield BenchmarkResult(gemm_param, strategy, gemm_config, measurement)

556

557

558

def parse_json(dir_name):

559

""" Glob all benchmark result json files and parse them into json objects (dicts).

560

"""

561

for res_fn in Path(dir_name).rglob("*.{}".format(BENCHMARK_RESULT_JSON_EXTENSION)):

562

with open(res_fn) as res_fp:

563

yield json.load(res_fp)

564

565

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

566

def check_out_path(out_path):

567

if os.path.exists(out_path):

568

overwrite = (

569

input(

570

"Output JSON {} already exists. Overwrite? [Y/N]: ".format(

out_path)

).lower()

== "y"

)

if not overwrite:

logging.info("Skipping {}".format(out_path))

577

return False

578

logging.info("Saving JSON file to {}".format(out_path))

return True

def dump_json(out_path, dict):

583

with open(out_path, "w") as f:

584

json.dump(dict, f)

585

logging.info("Saved")

586

587

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

588

################################################################################

589

# Main

590

################################################################################

591

592

593

def main(args):

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

594

logging.info(

595

"Searching best gemm configurations from {}".format(

596

args.benchmark_results_dir)

597

)

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

598

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

599

benchmark_results = extract_benchmark_results(

600

parse_json(args.benchmark_results_dir)

601

)

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

602

603

# Add all benchmark results to the recorder

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

604

benchmark_result_recorder = GEMMBenchmarkResultRecorder(tol=args.tolerance)

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

605

for benchmark_result in benchmark_results:

606

benchmark_result_recorder.add(benchmark_result)

607

608

if args.debug:

609

recorder_sum_level = GEMMBenchmarkResultRecorder.SummaryLevel.Detailed

610

else:

611

recorder_sum_level = GEMMBenchmarkResultRecorder.SummaryLevel.Short

612

613

# Print overall summary of the recorded results

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

614

logging.info(benchmark_result_recorder.summary(

615

sum_level=recorder_sum_level))

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

616

617

# Get GEMM configuration distributions for each strategy

618

all_config_dists = benchmark_result_recorder.get_config_distributions()

619

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

620

logging.info("=== Result ===")

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

621

for strategy, config_dist in all_config_dists.items():

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

622

logging.info("Strategy: {}".format(strategy.name))

623

logging.debug("GEMM Config, Votes")

624

for config, freq in config_dist.frequency():

625

logging.debug("{}, {}".format(config, freq))

626

logging.info(

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

627

"Best GEMM Config: {} with std: {}".format(

628

config_dist.best_config(), config_dist.std()

629

)

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

630

)

631

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

632

# Save the recorded results to JSON files in output directory

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

633

if args.output_dir is not None:

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

634

benchmark_result_recorder.save_to_jsons(

635

args.output_dir, only_best_config=(not args.debug)

636

)

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

637

638

639

if __name__ == "__main__":

640

parser = argparse.ArgumentParser(description="CL GEMM Tuner")

641

parser.add_argument(

642

"-b",

643

"--benchmark_results",

644

dest="benchmark_results_dir",

metavar="PATH",

action="store",

type=str,

help="Path to benchmark result directory, where benchmark result json files have a file \

649

extension of '{}'".format(

650

BENCHMARK_RESULT_JSON_EXTENSION

),

required=True,

)

parser.add_argument(

"-o",

"--output_dir",

dest="output_dir",

metavar="PATH",

action="store",

type=str,

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

661

help="Path to directory that holds output JSON files. One for strategy selection and one per strategy for GEMM config selection",

SiCong Li

2019-10-01 19:26:00 +0100

[diff] [blame]

662

)

663

parser.add_argument(

SiCong Li

2019-11-05 10:43:06 +0000

[diff] [blame]

"-t",

"--tolerance",

action="store",

type=float,

default=0.01,

help="For testing if two GEMMConfigs are equivalent in terms of performance. The tolerance is OpenCL timer in\

670

milliseconds. Recommended value: <= 0.1 ms",

671

)

672

parser.add_argument(

Eren Kopuz

2020-06-24 17:29:38 +0100

[diff] [blame]

"-D",

"--debug",

dest="debug",

action="store_true",

help="Enable script debugging output",

SiCong Li