Blame - tests/validation/reference/Winograd.cpp - ml/ComputeLibrary

2018-02-22 16:17:20 +0000

[diff] [blame]

45

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

46

// Winograd input transform matrices

47

static const float imatrix2x2_3x3[] =

Giorgio Arena

2d9de0a

2018-03-15 17:58:20 +0000

[diff] [blame]

48

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

49

1.0f, 0.0f, -1.0f, 0.0f,

50

0.0f, 1.0f, 1.0f, 0.0f,

51

0.0f, -1.0f, 1.0f, 0.0f,

52

0.0f, 1.0f, 0.0f, -1.0f

53

};

54

55

static const float imatrix4x4_3x3[] =

56

{

57

4.0f, 0.0f, -5.0f, 0.0f, 1.0f, 0.0f,

58

0.0f, -4.0f, -4.0f, 1.0f, 1.0f, 0.0f,

59

0.0f, 4.0f, -4.0f, -1.0f, 1.0f, 0.0f,

60

0.0f, -2.0f, -1.0f, 2.0f, 1.0f, 0.0f,

61

0.0f, 2.0f, -1.0f, -2.0f, 1.0f, 0.0f,

62

0.0f, 4.0f, 0.0f, -5.0f, 0.0f, 1.0f,

63

};

64

65

// ------------------------------------------

66

67

// Winograd filter transform matrices

68

static const float fmatrix2x2_3x3[] =

{

1.0f, 0.0f, 0.0f,

0.5f, 0.5f, 0.5f,

0.5f, -0.5f, 0.5f,

0.0f, 0.0f, 1.0f

};

static const float fmatrix4x4_3x3[] =

77

{

78

0.25f, 0.0f, 0.0f,

79

-1.0f / 6.0f, -1.0f / 6.0f, -1.0f / 6.0f,

80

-1.0f / 6.0f, 1.0f / 6.0f, -1.0f / 6.0f,

81

1.0f / 24.0f, 1.0f / 12.0f, 1.0f / 6.0f,

82

1.0f / 24.0f, -1.0f / 12.0f, 1.0f / 6.0f,

0.0f, 0.0f, 1.0f

};

Giorgio Arena

2018-04-11 19:07:17 +0100

[diff] [blame]

86

static const float fmatrix4x4_5x5[] =

87

{

88

1.0f, 0.0f, 0.0f, 0.0f, 0.0f,

89

-2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f,

90

-2.0f / 9.0f, 2.0f / 9.0f, -2.0f / 9.0f, 2.0f / 9.0f, -2.0f / 9.0f,

91

1.0f / 90.0f, 1.0f / 45.0f, 2.0f / 45.0f, 4.0f / 45.0f, 8.0f / 45.0f,

92

1.0f / 90.0f, -1.0f / 45.0f, 2.0f / 45.0f, -4.0f / 45.0f, 8.0f / 45.0f,

93

4.0f / 45.0f, 2.0f / 45.0f, 1.0f / 45.0f, 1.0f / 90.0f, 1.0f / 180.0f,

94

4.0f / 45.0f, -2.0f / 45.0f, 1.0f / 45.0f, -1.0f / 90.0f, 1.0f / 180.0f,

95

0.0f, 0.0f, 0.0f, 0.0f, 1.0f

};

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

99

// ------------------------------------------

100

101

// Winograd output transform matrices

102

static const float omatrix2x2_3x3[] =

103

{

104

1.0f, 1.0f, 1.0f, 0.0f,

105

0.0f, 1.0f, -1.0f, -1.0f

106

};

107

108

static const float omatrix4x4_3x3[] =

109

{

110

1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.0f,

111

0.0f, 1.0f, -1.0f, 2.0f, -2.0f, 0.0f,

112

0.0f, 1.0f, 1.0f, 4.0f, 4.0f, 0.0f,

113

0.0f, 1.0f, -1.0f, 8.0f, -8.0f, 1.0f

114

};

115

116

// ------------------------------------------

117

118

using WinogradKey = std::tuple<std::pair<int, int>, std::pair<int, int>, WinogradTransformType>;

119

120

// Key = (Output tile size, Kernel size, Winograd transform type)

121

static std::map<WinogradKey, const float *> matrix_map =

122

{

123

{ WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::INPUT), imatrix2x2_3x3 },

124

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::INPUT), imatrix4x4_3x3 },

125

{ WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3 },

126

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3 },

Giorgio Arena

9373c8b

2018-04-11 19:07:17 +0100

[diff] [blame]

127

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::FILTER), fmatrix4x4_5x5 },

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

128

{ WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix2x2_3x3 },

129

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix4x4_3x3 },

130

};

131

Giorgio Arena

9373c8b

2018-04-11 19:07:17 +0100

[diff] [blame]

132

// Find transformation matrix

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

133

std::map<WinogradKey, const float *>::iterator it;

134

135

it = matrix_map.find(WinogradKey(std::pair<int, int>(output_tile_size.width, output_tile_size.height),

136

std::pair<int, int>(kernel_size.width, kernel_size.height),

137

winograd_transform_type));

138

139

float const *matrix_values = nullptr;

140

if(it != matrix_map.end())

141

{

142

// Get matrix pointer

143

matrix_values = it->second;

Giorgio Arena

2d9de0a

2018-03-15 17:58:20 +0000

[diff] [blame]

144

}

145

else

146

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

147

ARM_COMPUTE_ERROR("Winograd configuration not supported");

Giorgio Arena

2d9de0a

2018-03-15 17:58:20 +0000

[diff] [blame]

148

}

Gian Marco Iodice

2018-02-22 16:17:20 +0000

[diff] [blame]

149

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

150

// Copy values

151

std::copy(&matrix_values[0], &matrix_values[0] + src.num_elements(), &src[0]);

Gian Marco Iodice

2018-02-22 16:17:20 +0000

[diff] [blame]

152

}

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

153

} // namespace

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

154

155

template <typename T>

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

156

SimpleTensor<T> winograd_input_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info)

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

157

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

158

ARM_COMPUTE_ERROR_ON(in.data_layout() != DataLayout::NCHW);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

159

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

160

const PadStrideInfo conv_info = winograd_info.convolution_info;

161

const Size2D output_tile_size = winograd_info.output_tile_size;

162

const Size2D kernel_size = winograd_info.kernel_size;

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

163

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

164

SimpleTensor<T> out{ output_shape, in.data_type() };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

165

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

166

// Calculate dimensions for the tile

167

const unsigned int tile_w = output_tile_size.width + kernel_size.width - 1;

168

const unsigned int tile_h = output_tile_size.height + kernel_size.height - 1;

169

170

TensorShape tile_dims(tile_w, tile_h);

171

172

// Simple tensor for the input tile

173

SimpleTensor<T> src_tile{ tile_dims, in.data_type() };

174

175

// Simple tensor for the temporary tile

176

SimpleTensor<T> tmp_tile{ tile_dims, in.data_type() };

177

178

// Simple tensor for the output tile

179

SimpleTensor<T> dst_tile{ tile_dims, in.data_type() };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

180

181

// Simple tensor for the transformation matrix

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

182

SimpleTensor<T> matrix{ tile_dims, in.data_type() };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

183

184

// Simple tensor for the transformation matrix transposed

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

185

SimpleTensor<T> matrix_transposed{ tile_dims, in.data_type() };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

186

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

187

// Initialize matrix for the input transform

188

initialize_matrix_transform(matrix, output_tile_size, kernel_size, WinogradTransformType::INPUT);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

189

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

190

// Transpose matrix

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

191

transpose_matrix(matrix, matrix_transposed);

192

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

193

const int in_w = in.shape().x();

194

const int in_h = in.shape().y();

195

const int in_d = in.shape().z();

196

const int out_d = out.shape().z();

197

const int num_batches = in.shape().total_size() / (in_w * in_h * in_d);

198

const int num_tiles_x = std::ceil((in_w - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast<float>(output_tile_size.width));

199

const int num_tiles_y = std::ceil((in_h - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast<float>(output_tile_size.height));

200

const int step_x = output_tile_size.width;

201

const int step_y = output_tile_size.height;

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

202

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

203

ARM_COMPUTE_ERROR_ON((num_tiles_x * num_tiles_y) != static_cast<int>(out.shape().y()));

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

204

205

for(int b = 0; b < num_batches; ++b)

206

{

207

for(int z = 0; z < in_d; ++z)

208

{

209

for(int y = 0; y < num_tiles_y; ++y)

210

{

211

for(int x = 0; x < num_tiles_x; ++x)

212

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

213

int xi = x * step_x - conv_info.pad_left();

214

int yi = y * step_y - conv_info.pad_top();

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

215

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

216

// Get the tile from the input tensor

217

get_tile(in, src_tile, Coordinates(xi, yi, z, b));

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

218

219

// Compute the transformation

220

matrix_multiply(matrix, src_tile, tmp_tile);

221

matrix_multiply(tmp_tile, matrix_transposed, dst_tile);

222

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

223

// Store the output tile across the channels

224

for(int i = 0; i < out_d; ++i)

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

225

{

226

int xo = z;

227

int yo = x + y * num_tiles_x;

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

228

out[coords2index(out.shape(), Coordinates(xo, yo, i, b))] = dst_tile[i];

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

}

}

}

}

}

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

234

235

return out;

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

236

}

237

238

template <typename T>

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

239

SimpleTensor<T> winograd_filter_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info)

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

240

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

241

ARM_COMPUTE_ERROR_ON_MSG(in.data_layout() != DataLayout::NCHW, "Only supported NCHW data format");

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

242

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

243

// Create reference

244

SimpleTensor<T> out{ output_shape, in.data_type(), 1 };

245

246

const Size2D output_tile_size = winograd_info.output_tile_size;

247

const Size2D kernel_size = winograd_info.kernel_size;

248

249

TensorShape kernel_tile_dims(kernel_size.width, kernel_size.height);

250

251

// Calculate dimensions for the tile

252

const unsigned int input_tile_w = output_tile_size.width + kernel_size.width - 1;

253

const unsigned int input_tile_h = output_tile_size.height + kernel_size.height - 1;

254

const unsigned int input_tile_area = input_tile_w * input_tile_h;

255

256

// Simple tensor for the input tile

257

SimpleTensor<T> input_tile{ kernel_tile_dims, in.data_type(), 1 };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

258

259

// Simple tensor for the transformation matrix

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

260

SimpleTensor<T> trans_matrix{ TensorShape(kernel_tile_dims[0], input_tile_w), in.data_type(), 1 };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

261

262

// Simple tensor for the transformation matrix transpose

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

263

SimpleTensor<T> trans_matrix_transposed{ TensorShape(input_tile_w, kernel_tile_dims[0]), in.data_type(), 1 };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

264

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

265

// Simple tensor for the temporary tile

266

SimpleTensor<T> tmp_tile{ TensorShape(kernel_tile_dims[0], input_tile_w), in.data_type(), 1 };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

267

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

268

// Simple tensor for the output tile

269

SimpleTensor<T> transf_tile{ TensorShape(input_tile_w, input_tile_w), in.data_type(), 1 };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

270

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

271

// Initialize matrix for the filter transform

272

initialize_matrix_transform(trans_matrix, output_tile_size, kernel_size, WinogradTransformType::FILTER);

273

274

// Transpose the transformation matrix

275

transpose_matrix(trans_matrix, trans_matrix_transposed);

276

277

const int num_channels = in.shape()[2];

278

const int num_filters = in.shape()[3];

279

const int num_batches = in.shape().total_size() / (kernel_size.area() * num_channels * num_filters);

280

281

for(int n = 0; n < num_batches; ++n)

282

{

283

for(int w = 0; w < num_filters; ++w)

284

{

285

for(int z = 0; z < num_channels; ++z)

286

{

287

// Load the tile from the input tensor

288

get_tile(in, input_tile, Coordinates(0, 0, z, w, n));

289

290

// First transformation

291

matrix_multiply(trans_matrix, input_tile, tmp_tile);

292

293

// Second transformation

294

matrix_multiply(tmp_tile, trans_matrix_transposed, transf_tile);

295

296

// Store the output tile across the channels

297

const int output_offset = w + z * num_filters;

298

299

// Store the values across the channels

300

for(unsigned int i = 0; i < input_tile_area; ++i)

301

{

302

out[output_offset + i * num_filters * num_channels] = transf_tile[i];

}

}

}

}

return out;

}

template <typename T>

312

SimpleTensor<T> winograd_output_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info)

313

{

314

ARM_COMPUTE_ERROR_ON_MSG(winograd_info.output_data_layout != DataLayout::NCHW, "Only supported NCHW data format");

315

316

const PadStrideInfo conv_info = winograd_info.convolution_info;

317

const Size2D input_dimensions = winograd_info.input_dimensions;

318

const Size2D output_tile_size = winograd_info.output_tile_size;

319

const Size2D kernel_size = winograd_info.kernel_size;

320

321

// Create reference

322

SimpleTensor<T> out{ output_shape, in.data_type(), 1 };

323

324

// Calculate dimensions for the tiles

325

const unsigned int in_tile_w = output_tile_size.width + kernel_size.width - 1;

326

const unsigned int in_tile_h = output_tile_size.height + kernel_size.height - 1;

327

const unsigned int out_tile_w = output_tile_size.width;

328

const unsigned int out_tile_h = output_tile_size.height;

329

330

ARM_COMPUTE_ERROR_ON(in.shape()[2] != (in_tile_w * in_tile_h));

331

ARM_COMPUTE_ERROR_ON(in.shape()[0] != out.shape()[2]);

332

333

// Compute tile dimensions

334

// Input tile dimensions

335

TensorShape in_tile_dims(in_tile_w, in_tile_h);

336

337

// Output tile dimensions

338

TensorShape out_tile_dims(output_tile_size.width, output_tile_size.height);

339

340

// Transformation matrix dimensions

341

TensorShape tr_tile_dims(in_tile_w, output_tile_size.width);

342

343

// Create tensors

344

// Simple tensor for the input tile

345

SimpleTensor<T> input_tile{ in_tile_dims, in.data_type(), 1 };

346

347

// Simple tensor for the transformation matrix

348

SimpleTensor<T> trans_matrix{ tr_tile_dims, in.data_type(), 1 };

349

350

// Simple tensor for the transformation matrix transpose

351

SimpleTensor<T> trans_matrix_transposed{ TensorShape(tr_tile_dims[1], tr_tile_dims[0]), in.data_type(), 1 };

352

353

// Simple tensor for the temporary tile

354

SimpleTensor<T> tmp_tile{ tr_tile_dims, in.data_type(), 1 };

355

356

// Simple tensor for the output tile

357

SimpleTensor<T> output_tile{ out_tile_dims, in.data_type(), 1 };

358

359

// Initialize matrix for the output transform

360

initialize_matrix_transform(trans_matrix, output_tile_size, kernel_size, WinogradTransformType::OUTPUT);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

361

362

// Transpose the transformation matrix

363

transpose_matrix(trans_matrix, trans_matrix_transposed);

364

365

const int w_in = in.shape()[0];

366

const int h_in = in.shape()[1];

367

const int c_in = in.shape()[2];

368

const int w_out = out.shape()[0];

369

const int h_out = out.shape()[1];

370

const int c_out = out.shape()[2];

371

const int num_batches = in.shape().total_size() / (w_in * h_in * c_in);

372

373

// Input strides

374

const int stridey_in = w_in;

375

const int stridez_in = stridey_in * h_in;

376

const int stridew_in = stridez_in * c_in;

377

378

// Output strides

379

const int stridey_out = w_out;

380

const int stridez_out = stridey_out * h_out;

381

const int stridew_out = stridez_out * c_out;

382

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

383

// Compute number of elements to process in the X and Y direction

384

const int num_elements_x = input_dimensions.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right();

385

const int num_elements_y = input_dimensions.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom();

386

const int num_tiles_x = std::ceil(num_elements_x / static_cast<float>(output_tile_size.width));

387

const int num_tiles_y = std::ceil(num_elements_y / static_cast<float>(output_tile_size.height));

388

389

ARM_COMPUTE_UNUSED(num_tiles_y);

390

ARM_COMPUTE_ERROR_ON(in.shape()[1] != static_cast<unsigned int>(num_tiles_x * num_tiles_y));

391

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

392

for(int n = 0; n < num_batches; ++n)

393

{

394

for(int y = 0; y < h_in; ++y)

395

{

396

for(int x = 0; x < w_in; ++x)

397

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

398

// Load the input tile tile across the channels of the input tensor

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

399

for(int z = 0; z < c_in; ++z)

400

{

401

input_tile[z] = in[x + (y * stridey_in) + (z * stridez_in) + (n * stridew_in)];

402

}

403

404

// First transformation

405

matrix_multiply(trans_matrix, input_tile, tmp_tile);

406

407

// Second transformation

408

matrix_multiply(tmp_tile, trans_matrix_transposed, output_tile);

409

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

410

// Store the output tile

411

const int xo = (y % num_tiles_x) * out_tile_w;

412

const int yo = (y / num_tiles_x) * out_tile_h;

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

413

const int zo = x;

414

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

415

const int output_offset = xo + (yo * stridey_out) + (zo * stridez_out) + (n * stridew_out);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

416

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

417

for(int yi = 0; yi < static_cast<int>(out_tile_h); ++yi)

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

418

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

419

for(int xi = 0; xi < static_cast<int>(out_tile_w); ++xi)

420

{

421

// Check out-of-bound writes

422

if((xo + xi < w_out) && (yo + yi < h_out))

423

{

424

out[output_offset + yi * stridey_out + xi] = output_tile[xi + yi * out_tile_w];

425

}

426

}

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

}

}

}

}

Gian Marco Iodice