Blame - tests/validation/reference/Winograd.cpp - ml/ComputeLibrary

2018-02-22 16:17:20 +0000

[diff] [blame]

45

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

46

// Winograd input transform matrices

47

static const float imatrix2x2_3x3[] =

Giorgio Arena

2d9de0a

2018-03-15 17:58:20 +0000

[diff] [blame]

48

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

49

1.0f, 0.0f, -1.0f, 0.0f,

50

0.0f, 1.0f, 1.0f, 0.0f,

51

0.0f, -1.0f, 1.0f, 0.0f,

52

0.0f, 1.0f, 0.0f, -1.0f

53

};

54

55

static const float imatrix4x4_3x3[] =

56

{

57

4.0f, 0.0f, -5.0f, 0.0f, 1.0f, 0.0f,

58

0.0f, -4.0f, -4.0f, 1.0f, 1.0f, 0.0f,

59

0.0f, 4.0f, -4.0f, -1.0f, 1.0f, 0.0f,

60

0.0f, -2.0f, -1.0f, 2.0f, 1.0f, 0.0f,

61

0.0f, 2.0f, -1.0f, -2.0f, 1.0f, 0.0f,

62

0.0f, 4.0f, 0.0f, -5.0f, 0.0f, 1.0f,

63

};

64

Giorgio Arena

fe5ef38

2018-04-17 10:14:10 +0100

[diff] [blame^]

65

static const float imatrix4x4_5x5[] =

66

{

67

1.f, 0.f, -21.f / 4.f, 0.f, 21.f / 4.f, 0.f, -1.f, 0.f,

68

0.f, 1.f, 1.f, -17.f / 4.f, -17.f / 4.f, 1.f, 1.f, 0.f,

69

0.f, -1.f, 1.f, 17.f / 4.f, -17.f / 4.f, -1.f, 1.f, 0.f,

70

0.f, 1.f / 2.f, 1.f / 4.f, -5.f / 2.f, -5.f / 4.f, 2.f, 1.f, 0.f,

71

0.f, -1.f / 2.f, 1.f / 4.f, 5.f / 2.f, -5.f / 4.f, -2.f, 1.f, 0.f,

72

0.f, 2.f, 4.f, -5.f / 2.f, -5.f, 1.f / 2.f, 1.f, 0.f,

73

0.f, -2.f, 4.f, 5.f / 2.f, -5.f, -1.f / 2.f, 1.f, 0.f,

74

0.f, -1.f, 0.f, 21.f / 4.f, 0.f, -21.f / 4.f, 0.f, 1.f

75

};

76

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

77

// ------------------------------------------

78

79

// Winograd filter transform matrices

80

static const float fmatrix2x2_3x3[] =

{

1.0f, 0.0f, 0.0f,

0.5f, 0.5f, 0.5f,

0.5f, -0.5f, 0.5f,

0.0f, 0.0f, 1.0f

};

static const float fmatrix4x4_3x3[] =

89

{

90

0.25f, 0.0f, 0.0f,

91

-1.0f / 6.0f, -1.0f / 6.0f, -1.0f / 6.0f,

92

-1.0f / 6.0f, 1.0f / 6.0f, -1.0f / 6.0f,

93

1.0f / 24.0f, 1.0f / 12.0f, 1.0f / 6.0f,

94

1.0f / 24.0f, -1.0f / 12.0f, 1.0f / 6.0f,

0.0f, 0.0f, 1.0f

};

Giorgio Arena

2018-04-11 19:07:17 +0100

[diff] [blame]

98

static const float fmatrix4x4_5x5[] =

99

{

100

1.0f, 0.0f, 0.0f, 0.0f, 0.0f,

101

-2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f,

102

-2.0f / 9.0f, 2.0f / 9.0f, -2.0f / 9.0f, 2.0f / 9.0f, -2.0f / 9.0f,

103

1.0f / 90.0f, 1.0f / 45.0f, 2.0f / 45.0f, 4.0f / 45.0f, 8.0f / 45.0f,

104

1.0f / 90.0f, -1.0f / 45.0f, 2.0f / 45.0f, -4.0f / 45.0f, 8.0f / 45.0f,

105

4.0f / 45.0f, 2.0f / 45.0f, 1.0f / 45.0f, 1.0f / 90.0f, 1.0f / 180.0f,

106

4.0f / 45.0f, -2.0f / 45.0f, 1.0f / 45.0f, -1.0f / 90.0f, 1.0f / 180.0f,

107

0.0f, 0.0f, 0.0f, 0.0f, 1.0f

};

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

111

// ------------------------------------------

112

113

// Winograd output transform matrices

114

static const float omatrix2x2_3x3[] =

115

{

116

1.0f, 1.0f, 1.0f, 0.0f,

117

0.0f, 1.0f, -1.0f, -1.0f

118

};

119

120

static const float omatrix4x4_3x3[] =

121

{

122

1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.0f,

123

0.0f, 1.0f, -1.0f, 2.0f, -2.0f, 0.0f,

124

0.0f, 1.0f, 1.0f, 4.0f, 4.0f, 0.0f,

125

0.0f, 1.0f, -1.0f, 8.0f, -8.0f, 1.0f

126

};

127

128

// ------------------------------------------

129

130

using WinogradKey = std::tuple<std::pair<int, int>, std::pair<int, int>, WinogradTransformType>;

131

132

// Key = (Output tile size, Kernel size, Winograd transform type)

133

static std::map<WinogradKey, const float *> matrix_map =

134

{

135

{ WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::INPUT), imatrix2x2_3x3 },

136

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::INPUT), imatrix4x4_3x3 },

Giorgio Arena

fe5ef38

2018-04-17 10:14:10 +0100

[diff] [blame^]

137

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::INPUT), imatrix4x4_5x5 },

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

138

{ WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3 },

139

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3 },

Giorgio Arena

9373c8b

2018-04-11 19:07:17 +0100

[diff] [blame]

140

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::FILTER), fmatrix4x4_5x5 },

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

141

{ WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix2x2_3x3 },

142

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix4x4_3x3 },

143

};

144

Giorgio Arena

9373c8b

2018-04-11 19:07:17 +0100

[diff] [blame]

145

// Find transformation matrix

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

146

std::map<WinogradKey, const float *>::iterator it;

147

148

it = matrix_map.find(WinogradKey(std::pair<int, int>(output_tile_size.width, output_tile_size.height),

149

std::pair<int, int>(kernel_size.width, kernel_size.height),

150

winograd_transform_type));

151

152

float const *matrix_values = nullptr;

153

if(it != matrix_map.end())

154

{

155

// Get matrix pointer

156

matrix_values = it->second;

Giorgio Arena

2d9de0a

2018-03-15 17:58:20 +0000

[diff] [blame]

157

}

158

else

159

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

160

ARM_COMPUTE_ERROR("Winograd configuration not supported");

Giorgio Arena

2d9de0a

2018-03-15 17:58:20 +0000

[diff] [blame]

161

}

Gian Marco Iodice

2018-02-22 16:17:20 +0000

[diff] [blame]

162

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

163

// Copy values

164

std::copy(&matrix_values[0], &matrix_values[0] + src.num_elements(), &src[0]);

Gian Marco Iodice

2018-02-22 16:17:20 +0000

[diff] [blame]

165

}

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

166

} // namespace

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

167

168

template <typename T>

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

169

SimpleTensor<T> winograd_input_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info)

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

170

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

171

ARM_COMPUTE_ERROR_ON(in.data_layout() != DataLayout::NCHW);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

172

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

173

const PadStrideInfo conv_info = winograd_info.convolution_info;

174

const Size2D output_tile_size = winograd_info.output_tile_size;

175

const Size2D kernel_size = winograd_info.kernel_size;

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

176

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

177

SimpleTensor<T> out{ output_shape, in.data_type() };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

178

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

179

// Calculate dimensions for the tile

180

const unsigned int tile_w = output_tile_size.width + kernel_size.width - 1;

181

const unsigned int tile_h = output_tile_size.height + kernel_size.height - 1;

182

183

TensorShape tile_dims(tile_w, tile_h);

184

185

// Simple tensor for the input tile

186

SimpleTensor<T> src_tile{ tile_dims, in.data_type() };

187

188

// Simple tensor for the temporary tile

189

SimpleTensor<T> tmp_tile{ tile_dims, in.data_type() };

190

191

// Simple tensor for the output tile

192

SimpleTensor<T> dst_tile{ tile_dims, in.data_type() };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

193

194

// Simple tensor for the transformation matrix

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

195

SimpleTensor<T> matrix{ tile_dims, in.data_type() };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

196

197

// Simple tensor for the transformation matrix transposed

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

198

SimpleTensor<T> matrix_transposed{ tile_dims, in.data_type() };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

199

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

200

// Initialize matrix for the input transform

201

initialize_matrix_transform(matrix, output_tile_size, kernel_size, WinogradTransformType::INPUT);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

202

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

203

// Transpose matrix

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

204

transpose_matrix(matrix, matrix_transposed);

205

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

206

const int in_w = in.shape().x();

207

const int in_h = in.shape().y();

208

const int in_d = in.shape().z();

209

const int out_d = out.shape().z();

210

const int num_batches = in.shape().total_size() / (in_w * in_h * in_d);

211

const int num_tiles_x = std::ceil((in_w - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast<float>(output_tile_size.width));

212

const int num_tiles_y = std::ceil((in_h - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast<float>(output_tile_size.height));

213

const int step_x = output_tile_size.width;

214

const int step_y = output_tile_size.height;

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

215

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

216

ARM_COMPUTE_ERROR_ON((num_tiles_x * num_tiles_y) != static_cast<int>(out.shape().y()));

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

217

218

for(int b = 0; b < num_batches; ++b)

219

{

220

for(int z = 0; z < in_d; ++z)

221

{

222

for(int y = 0; y < num_tiles_y; ++y)

223

{

224

for(int x = 0; x < num_tiles_x; ++x)

225

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

226

int xi = x * step_x - conv_info.pad_left();

227

int yi = y * step_y - conv_info.pad_top();

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

228

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

229

// Get the tile from the input tensor

230

get_tile(in, src_tile, Coordinates(xi, yi, z, b));

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

231

232

// Compute the transformation

233

matrix_multiply(matrix, src_tile, tmp_tile);

234

matrix_multiply(tmp_tile, matrix_transposed, dst_tile);

235

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

236

// Store the output tile across the channels

237

for(int i = 0; i < out_d; ++i)

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

238

{

239

int xo = z;

240

int yo = x + y * num_tiles_x;

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

241

out[coords2index(out.shape(), Coordinates(xo, yo, i, b))] = dst_tile[i];

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

}

}

}

}

}

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

247

248

return out;

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

249

}

250

251

template <typename T>

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

252

SimpleTensor<T> winograd_filter_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info)

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

253

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

254

ARM_COMPUTE_ERROR_ON_MSG(in.data_layout() != DataLayout::NCHW, "Only supported NCHW data format");

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

255

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

256

// Create reference

257

SimpleTensor<T> out{ output_shape, in.data_type(), 1 };

258

259

const Size2D output_tile_size = winograd_info.output_tile_size;

260

const Size2D kernel_size = winograd_info.kernel_size;

261

262

TensorShape kernel_tile_dims(kernel_size.width, kernel_size.height);

263

264

// Calculate dimensions for the tile

265

const unsigned int input_tile_w = output_tile_size.width + kernel_size.width - 1;

266

const unsigned int input_tile_h = output_tile_size.height + kernel_size.height - 1;

267

const unsigned int input_tile_area = input_tile_w * input_tile_h;

268

269

// Simple tensor for the input tile

270

SimpleTensor<T> input_tile{ kernel_tile_dims, in.data_type(), 1 };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

271

272

// Simple tensor for the transformation matrix

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

273

SimpleTensor<T> trans_matrix{ TensorShape(kernel_tile_dims[0], input_tile_w), in.data_type(), 1 };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

274

275

// Simple tensor for the transformation matrix transpose

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

276

SimpleTensor<T> trans_matrix_transposed{ TensorShape(input_tile_w, kernel_tile_dims[0]), in.data_type(), 1 };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

277

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

278

// Simple tensor for the temporary tile

279

SimpleTensor<T> tmp_tile{ TensorShape(kernel_tile_dims[0], input_tile_w), in.data_type(), 1 };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

280

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

281

// Simple tensor for the output tile

282

SimpleTensor<T> transf_tile{ TensorShape(input_tile_w, input_tile_w), in.data_type(), 1 };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

283

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

284

// Initialize matrix for the filter transform

285

initialize_matrix_transform(trans_matrix, output_tile_size, kernel_size, WinogradTransformType::FILTER);

286

287

// Transpose the transformation matrix

288

transpose_matrix(trans_matrix, trans_matrix_transposed);

289

290

const int num_channels = in.shape()[2];

291

const int num_filters = in.shape()[3];

292

const int num_batches = in.shape().total_size() / (kernel_size.area() * num_channels * num_filters);

293

294

for(int n = 0; n < num_batches; ++n)

295

{

296

for(int w = 0; w < num_filters; ++w)

297

{

298

for(int z = 0; z < num_channels; ++z)

299

{

300

// Load the tile from the input tensor

301

get_tile(in, input_tile, Coordinates(0, 0, z, w, n));

302

303

// First transformation

304

matrix_multiply(trans_matrix, input_tile, tmp_tile);

305

306

// Second transformation

307

matrix_multiply(tmp_tile, trans_matrix_transposed, transf_tile);

308

309

// Store the output tile across the channels

310

const int output_offset = w + z * num_filters;

311

312

// Store the values across the channels

313

for(unsigned int i = 0; i < input_tile_area; ++i)

314

{

315

out[output_offset + i * num_filters * num_channels] = transf_tile[i];

}

}

}

}

return out;

}

template <typename T>

325

SimpleTensor<T> winograd_output_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info)

326

{

327

ARM_COMPUTE_ERROR_ON_MSG(winograd_info.output_data_layout != DataLayout::NCHW, "Only supported NCHW data format");

328

329

const PadStrideInfo conv_info = winograd_info.convolution_info;

330

const Size2D input_dimensions = winograd_info.input_dimensions;

331

const Size2D output_tile_size = winograd_info.output_tile_size;

332

const Size2D kernel_size = winograd_info.kernel_size;

333

334

// Create reference

335

SimpleTensor<T> out{ output_shape, in.data_type(), 1 };

336

337

// Calculate dimensions for the tiles

338

const unsigned int in_tile_w = output_tile_size.width + kernel_size.width - 1;

339

const unsigned int in_tile_h = output_tile_size.height + kernel_size.height - 1;

340

const unsigned int out_tile_w = output_tile_size.width;

341

const unsigned int out_tile_h = output_tile_size.height;

342

343

ARM_COMPUTE_ERROR_ON(in.shape()[2] != (in_tile_w * in_tile_h));

344

ARM_COMPUTE_ERROR_ON(in.shape()[0] != out.shape()[2]);

345

346

// Compute tile dimensions

347

// Input tile dimensions

348

TensorShape in_tile_dims(in_tile_w, in_tile_h);

349

350

// Output tile dimensions

351

TensorShape out_tile_dims(output_tile_size.width, output_tile_size.height);

352

353

// Transformation matrix dimensions

354

TensorShape tr_tile_dims(in_tile_w, output_tile_size.width);

355

356

// Create tensors

357

// Simple tensor for the input tile

358

SimpleTensor<T> input_tile{ in_tile_dims, in.data_type(), 1 };

359

360

// Simple tensor for the transformation matrix

361

SimpleTensor<T> trans_matrix{ tr_tile_dims, in.data_type(), 1 };

362

363

// Simple tensor for the transformation matrix transpose

364

SimpleTensor<T> trans_matrix_transposed{ TensorShape(tr_tile_dims[1], tr_tile_dims[0]), in.data_type(), 1 };

365

366

// Simple tensor for the temporary tile

367

SimpleTensor<T> tmp_tile{ tr_tile_dims, in.data_type(), 1 };

368

369

// Simple tensor for the output tile

370

SimpleTensor<T> output_tile{ out_tile_dims, in.data_type(), 1 };

371

372

// Initialize matrix for the output transform

373

initialize_matrix_transform(trans_matrix, output_tile_size, kernel_size, WinogradTransformType::OUTPUT);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

374

375

// Transpose the transformation matrix

376

transpose_matrix(trans_matrix, trans_matrix_transposed);

377

378

const int w_in = in.shape()[0];

379

const int h_in = in.shape()[1];

380

const int c_in = in.shape()[2];

381

const int w_out = out.shape()[0];

382

const int h_out = out.shape()[1];

383

const int c_out = out.shape()[2];

384

const int num_batches = in.shape().total_size() / (w_in * h_in * c_in);

385

386

// Input strides

387

const int stridey_in = w_in;

388

const int stridez_in = stridey_in * h_in;

389

const int stridew_in = stridez_in * c_in;

390

391

// Output strides

392

const int stridey_out = w_out;

393

const int stridez_out = stridey_out * h_out;

394

const int stridew_out = stridez_out * c_out;

395

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

396

// Compute number of elements to process in the X and Y direction

397

const int num_elements_x = input_dimensions.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right();

398

const int num_elements_y = input_dimensions.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom();

399

const int num_tiles_x = std::ceil(num_elements_x / static_cast<float>(output_tile_size.width));

400

const int num_tiles_y = std::ceil(num_elements_y / static_cast<float>(output_tile_size.height));

401

402

ARM_COMPUTE_UNUSED(num_tiles_y);

403

ARM_COMPUTE_ERROR_ON(in.shape()[1] != static_cast<unsigned int>(num_tiles_x * num_tiles_y));

404

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

405

for(int n = 0; n < num_batches; ++n)

406

{

407

for(int y = 0; y < h_in; ++y)

408

{

409

for(int x = 0; x < w_in; ++x)

410

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

411

// Load the input tile tile across the channels of the input tensor

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

412

for(int z = 0; z < c_in; ++z)

413

{

414

input_tile[z] = in[x + (y * stridey_in) + (z * stridez_in) + (n * stridew_in)];

415

}

416

417

// First transformation

418

matrix_multiply(trans_matrix, input_tile, tmp_tile);

419

420

// Second transformation

421

matrix_multiply(tmp_tile, trans_matrix_transposed, output_tile);

422

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

423

// Store the output tile

424

const int xo = (y % num_tiles_x) * out_tile_w;

425

const int yo = (y / num_tiles_x) * out_tile_h;

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

426

const int zo = x;

427

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

428

const int output_offset = xo + (yo * stridey_out) + (zo * stridez_out) + (n * stridew_out);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

429

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

430

for(int yi = 0; yi < static_cast<int>(out_tile_h); ++yi)

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

431

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

432

for(int xi = 0; xi < static_cast<int>(out_tile_w); ++xi)

433

{

434

// Check out-of-bound writes

435

if((xo + xi < w_out) && (yo + yi < h_out))

436

{

437

out[output_offset + yi * stridey_out + xi] = output_tile[xi + yi * out_tile_w];

438

}

439

}

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

}

}

}

}

Gian Marco Iodice