Blame - tests/validation/reference/Winograd.cpp - ml/ComputeLibrary

2018-02-22 16:17:20 +0000

[diff] [blame]

46

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

47

// Winograd input transform matrices

48

static const float imatrix2x2_3x3[] =

Giorgio Arena

2d9de0a

2018-03-15 17:58:20 +0000

[diff] [blame]

49

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

50

1.0f, 0.0f, -1.0f, 0.0f,

51

0.0f, 1.0f, 1.0f, 0.0f,

52

0.0f, -1.0f, 1.0f, 0.0f,

53

0.0f, 1.0f, 0.0f, -1.0f

54

};

55

56

static const float imatrix4x4_3x3[] =

57

{

58

4.0f, 0.0f, -5.0f, 0.0f, 1.0f, 0.0f,

59

0.0f, -4.0f, -4.0f, 1.0f, 1.0f, 0.0f,

60

0.0f, 4.0f, -4.0f, -1.0f, 1.0f, 0.0f,

61

0.0f, -2.0f, -1.0f, 2.0f, 1.0f, 0.0f,

62

0.0f, 2.0f, -1.0f, -2.0f, 1.0f, 0.0f,

63

0.0f, 4.0f, 0.0f, -5.0f, 0.0f, 1.0f,

64

};

65

Giorgio Arena

fe5ef38

2018-04-17 10:14:10 +0100

[diff] [blame]

66

static const float imatrix4x4_5x5[] =

67

{

68

1.f, 0.f, -21.f / 4.f, 0.f, 21.f / 4.f, 0.f, -1.f, 0.f,

69

0.f, 1.f, 1.f, -17.f / 4.f, -17.f / 4.f, 1.f, 1.f, 0.f,

70

0.f, -1.f, 1.f, 17.f / 4.f, -17.f / 4.f, -1.f, 1.f, 0.f,

71

0.f, 1.f / 2.f, 1.f / 4.f, -5.f / 2.f, -5.f / 4.f, 2.f, 1.f, 0.f,

72

0.f, -1.f / 2.f, 1.f / 4.f, 5.f / 2.f, -5.f / 4.f, -2.f, 1.f, 0.f,

73

0.f, 2.f, 4.f, -5.f / 2.f, -5.f, 1.f / 2.f, 1.f, 0.f,

74

0.f, -2.f, 4.f, 5.f / 2.f, -5.f, -1.f / 2.f, 1.f, 0.f,

75

0.f, -1.f, 0.f, 21.f / 4.f, 0.f, -21.f / 4.f, 0.f, 1.f

76

};

77

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

78

// ------------------------------------------

79

80

// Winograd filter transform matrices

81

static const float fmatrix2x2_3x3[] =

{

1.0f, 0.0f, 0.0f,

0.5f, 0.5f, 0.5f,

0.5f, -0.5f, 0.5f,

0.0f, 0.0f, 1.0f

};

static const float fmatrix4x4_3x3[] =

90

{

91

0.25f, 0.0f, 0.0f,

92

-1.0f / 6.0f, -1.0f / 6.0f, -1.0f / 6.0f,

93

-1.0f / 6.0f, 1.0f / 6.0f, -1.0f / 6.0f,

94

1.0f / 24.0f, 1.0f / 12.0f, 1.0f / 6.0f,

95

1.0f / 24.0f, -1.0f / 12.0f, 1.0f / 6.0f,

0.0f, 0.0f, 1.0f

};

Giorgio Arena

2018-04-11 19:07:17 +0100

[diff] [blame]

99

static const float fmatrix4x4_5x5[] =

100

{

101

1.0f, 0.0f, 0.0f, 0.0f, 0.0f,

102

-2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f,

103

-2.0f / 9.0f, 2.0f / 9.0f, -2.0f / 9.0f, 2.0f / 9.0f, -2.0f / 9.0f,

104

1.0f / 90.0f, 1.0f / 45.0f, 2.0f / 45.0f, 4.0f / 45.0f, 8.0f / 45.0f,

105

1.0f / 90.0f, -1.0f / 45.0f, 2.0f / 45.0f, -4.0f / 45.0f, 8.0f / 45.0f,

106

4.0f / 45.0f, 2.0f / 45.0f, 1.0f / 45.0f, 1.0f / 90.0f, 1.0f / 180.0f,

107

4.0f / 45.0f, -2.0f / 45.0f, 1.0f / 45.0f, -1.0f / 90.0f, 1.0f / 180.0f,

108

0.0f, 0.0f, 0.0f, 0.0f, 1.0f

};

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

112

// ------------------------------------------

113

114

// Winograd output transform matrices

115

static const float omatrix2x2_3x3[] =

116

{

117

1.0f, 1.0f, 1.0f, 0.0f,

118

0.0f, 1.0f, -1.0f, -1.0f

119

};

120

121

static const float omatrix4x4_3x3[] =

122

{

123

1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.0f,

124

0.0f, 1.0f, -1.0f, 2.0f, -2.0f, 0.0f,

125

0.0f, 1.0f, 1.0f, 4.0f, 4.0f, 0.0f,

126

0.0f, 1.0f, -1.0f, 8.0f, -8.0f, 1.0f

127

};

128

Giorgio Arena

dd03870

2018-04-16 11:20:11 +0100

[diff] [blame]

129

static const float omatrix4x4_5x5[] =

130

{

131

1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 8.0f, 8.0f, 0.0f,

132

0.0f, 1.0f, -1.0f, 2.0f, -2.0f, 4.0f, -4.0f, 0.0f,

133

0.0f, 1.0f, 1.0f, 4.0f, 4.0f, 2.0f, 2.0f, 0.0f,

134

0.0f, 1.0f, -1.0f, 8.0f, -8.0f, 1.0f, -1.0f, 1.0f

135

};

136

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

137

// ------------------------------------------

138

139

using WinogradKey = std::tuple<std::pair<int, int>, std::pair<int, int>, WinogradTransformType>;

140

141

// Key = (Output tile size, Kernel size, Winograd transform type)

142

static std::map<WinogradKey, const float *> matrix_map =

143

{

144

{ WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::INPUT), imatrix2x2_3x3 },

145

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::INPUT), imatrix4x4_3x3 },

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

146

{ WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::INPUT), imatrix2x2_3x3 },

147

{ WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::INPUT), imatrix4x4_3x3 },

148

{ WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::INPUT), imatrix2x2_3x3 },

149

{ WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::INPUT), imatrix4x4_3x3 },

Giorgio Arena

fe5ef38

2018-04-17 10:14:10 +0100

[diff] [blame]

150

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::INPUT), imatrix4x4_5x5 },

Gian Marco Iodice

876be2a

2018-07-03 12:22:09 +0100

[diff] [blame]

151

{ WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::INPUT), imatrix4x4_5x5 },

152

{ WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::INPUT), imatrix4x4_5x5 },

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

153

{ WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3 },

154

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3 },

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

155

{ WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::FILTER), fmatrix2x2_3x3 },

156

{ WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::FILTER), fmatrix4x4_3x3 },

157

{ WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3 },

158

{ WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3 },

Giorgio Arena

9373c8b

2018-04-11 19:07:17 +0100

[diff] [blame]

159

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::FILTER), fmatrix4x4_5x5 },

Gian Marco Iodice

876be2a

2018-07-03 12:22:09 +0100

[diff] [blame]

160

{ WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::FILTER), fmatrix4x4_5x5 },

161

{ WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::FILTER), fmatrix4x4_5x5 },

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

162

{ WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix2x2_3x3 },

163

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix4x4_3x3 },

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

164

{ WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::OUTPUT), omatrix2x2_3x3 },

165

{ WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::OUTPUT), omatrix4x4_3x3 },

166

{ WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::OUTPUT), omatrix2x2_3x3 },

167

{ WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::OUTPUT), omatrix4x4_3x3 },

Giorgio Arena

dd03870

2018-04-16 11:20:11 +0100

[diff] [blame]

168

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::OUTPUT), omatrix4x4_5x5 },

Gian Marco Iodice

876be2a

2018-07-03 12:22:09 +0100

[diff] [blame]

169

{ WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::OUTPUT), omatrix4x4_5x5 },

170

{ WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::OUTPUT), omatrix4x4_5x5 },

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

171

};

172

Giorgio Arena

9373c8b

2018-04-11 19:07:17 +0100

[diff] [blame]

173

// Find transformation matrix

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

174

std::map<WinogradKey, const float *>::iterator it;

175

176

it = matrix_map.find(WinogradKey(std::pair<int, int>(output_tile_size.width, output_tile_size.height),

177

std::pair<int, int>(kernel_size.width, kernel_size.height),

178

winograd_transform_type));

179

180

float const *matrix_values = nullptr;

181

if(it != matrix_map.end())

182

{

183

// Get matrix pointer

184

matrix_values = it->second;

Giorgio Arena

2d9de0a

2018-03-15 17:58:20 +0000

[diff] [blame]

185

}

186

else

187

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

188

ARM_COMPUTE_ERROR("Winograd configuration not supported");

Giorgio Arena

2d9de0a

2018-03-15 17:58:20 +0000

[diff] [blame]

189

}

Gian Marco Iodice

2018-02-22 16:17:20 +0000

[diff] [blame]

190

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

191

// Copy values

192

std::copy(&matrix_values[0], &matrix_values[0] + src.num_elements(), &src[0]);

Gian Marco Iodice

2018-02-22 16:17:20 +0000

[diff] [blame]

193

}

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

194

} // namespace

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

195

196

template <typename T>

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

197

SimpleTensor<T> winograd_input_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info)

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

198

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

199

ARM_COMPUTE_ERROR_ON(in.data_layout() != DataLayout::NCHW);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

200

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

201

const PadStrideInfo conv_info = winograd_info.convolution_info;

202

const Size2D output_tile_size = winograd_info.output_tile_size;

203

const Size2D kernel_size = winograd_info.kernel_size;

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

204

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

205

SimpleTensor<T> out{ output_shape, in.data_type() };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

206

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

207

// Calculate dimensions for the tile

208

const unsigned int tile_w = output_tile_size.width + kernel_size.width - 1;

209

const unsigned int tile_h = output_tile_size.height + kernel_size.height - 1;

210

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

211

// Get the maximum dimension from the tile size

212

const unsigned int tile_max_dim = std::max(tile_w, tile_h);

213

214

TensorShape tile_dims(tile_max_dim, tile_max_dim);

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

215

216

// Simple tensor for the input tile

217

SimpleTensor<T> src_tile{ tile_dims, in.data_type() };

218

219

// Simple tensor for the temporary tile

220

SimpleTensor<T> tmp_tile{ tile_dims, in.data_type() };

221

222

// Simple tensor for the output tile

223

SimpleTensor<T> dst_tile{ tile_dims, in.data_type() };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

224

225

// Simple tensor for the transformation matrix

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

226

SimpleTensor<T> matrix{ tile_dims, in.data_type() };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

227

228

// Simple tensor for the transformation matrix transposed

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

229

SimpleTensor<T> matrix_transposed{ tile_dims, in.data_type() };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

230

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

231

// Initialize matrix for the input transform

232

initialize_matrix_transform(matrix, output_tile_size, kernel_size, WinogradTransformType::INPUT);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

233

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

234

// Transpose matrix

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

235

transpose_matrix<T>(matrix, matrix_transposed);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

236

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

237

const int in_w = in.shape().x();

238

const int in_h = in.shape().y();

239

const int in_d = in.shape().z();

240

const int out_d = out.shape().z();

241

const int num_batches = in.shape().total_size() / (in_w * in_h * in_d);

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

242

const int step_x = output_tile_size.width;

243

const int step_y = output_tile_size.height;

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

244

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

245

// Compute the number of output tiles along the x and y direction of size "output_tile_size"

246

const Size2D num_tiles = compute_winograd_convolution_tiles(Size2D(in_w, in_h),

kernel_size,

output_tile_size,

conv_info);

const int num_tiles_x = num_tiles.width;

252

const int num_tiles_y = num_tiles.height;

253

254

// In case of 1D convolution, the input tile has to be partially filled with zeros

255

int start_x_zero = 0;

256

int start_y_zero = 0;

int end_x_zero = 0;

int end_y_zero = 0;

if(output_tile_size.width == 1)

{

start_x_zero = 1;

start_y_zero = 0;

end_x_zero = tile_max_dim - 1;

265

end_y_zero = tile_max_dim;

266

}

267

else if(output_tile_size.height == 1)

{

start_x_zero = 0;

start_y_zero = 1;

end_x_zero = tile_max_dim;

272

end_y_zero = tile_max_dim - 1;

273

}

274

275

// Set the anchor and shape of the zeros area

276

const Coordinates anchor_zeros(start_x_zero, start_y_zero);

277

const TensorShape shape_zeros(end_x_zero, end_y_zero);

278

279

// If we have a vertical filter (i.e. 1x3, 1x5,..), we need to take the elements along the y direction (step = width of the output tile)

280

const int step_y_transf_tile = kernel_size.width == 1 ? tile_max_dim : 1;

281

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

282

ARM_COMPUTE_ERROR_ON((num_tiles_x * num_tiles_y) != static_cast<int>(out.shape().y()));

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

283

284

for(int b = 0; b < num_batches; ++b)

285

{

286

for(int z = 0; z < in_d; ++z)

287

{

288

for(int y = 0; y < num_tiles_y; ++y)

289

{

290

for(int x = 0; x < num_tiles_x; ++x)

291

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

292

int xi = x * step_x - conv_info.pad_left();

293

int yi = y * step_y - conv_info.pad_top();

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

294

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

295

// Get the tile from the input tensor

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

296

get_tile<T>(in, src_tile, Coordinates(xi, yi, z, b));

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

297

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

298

// Fill partially with zeros in case of 1D convolution

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

299

zeros<T>(src_tile, anchor_zeros, shape_zeros);

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

300

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

301

// Compute the transformation

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

302

matrix_multiply<T>(matrix, src_tile, tmp_tile);

303

matrix_multiply<T>(tmp_tile, matrix_transposed, dst_tile);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

304

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

305

// Store the output tile across the channels

306

for(int i = 0; i < out_d; ++i)

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

307

{

308

int xo = z;

309

int yo = x + y * num_tiles_x;

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

310

out[coords2index(out.shape(), Coordinates(xo, yo, i, b))] = dst_tile[i * step_y_transf_tile];

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

}

}

}

}

}

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

316

317

return out;

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

318

}

319

320

template <typename T>

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

321

SimpleTensor<T> winograd_filter_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info)

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

322

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

323

ARM_COMPUTE_ERROR_ON_MSG(in.data_layout() != DataLayout::NCHW, "Only supported NCHW data format");

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

324

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

325

// Create reference

326

SimpleTensor<T> out{ output_shape, in.data_type(), 1 };

327

328

const Size2D output_tile_size = winograd_info.output_tile_size;

329

const Size2D kernel_size = winograd_info.kernel_size;

330

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

331

// Calculate dimensions for the tile

332

const unsigned int input_tile_w = output_tile_size.width + kernel_size.width - 1;

333

const unsigned int input_tile_h = output_tile_size.height + kernel_size.height - 1;

334

const unsigned int input_tile_area = input_tile_w * input_tile_h;

335

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

336

// Get the maximum dimension from the filter size

337

const unsigned int kernel_max_dim = std::max(kernel_size.width, kernel_size.height);

338

339

// Get the maximum dimension from the input tile

340

const unsigned int input_tile_max_dim = std::max(input_tile_w, input_tile_h);

341

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

342

// Simple tensor for the input tile

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

343

SimpleTensor<T> input_tile{ TensorShape(kernel_max_dim, kernel_max_dim), in.data_type(), 1 };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

344

345

// Simple tensor for the transformation matrix

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

346

SimpleTensor<T> trans_matrix{ TensorShape(kernel_max_dim, input_tile_max_dim), in.data_type(), 1 };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

347

348

// Simple tensor for the transformation matrix transpose

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

349

SimpleTensor<T> trans_matrix_transposed{ TensorShape(input_tile_max_dim, kernel_max_dim), in.data_type(), 1 };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

350

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

351

// Simple tensor for the temporary tile

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

352

SimpleTensor<T> tmp_tile{ TensorShape(kernel_max_dim, input_tile_max_dim), in.data_type(), 1 };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

353

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

354

// Simple tensor for the output tile

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

355

SimpleTensor<T> transf_tile{ TensorShape(input_tile_max_dim, input_tile_max_dim), in.data_type(), 1 };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

356

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

357

// Initialize matrix for the filter transform

358

initialize_matrix_transform(trans_matrix, output_tile_size, kernel_size, WinogradTransformType::FILTER);

359

360

// Transpose the transformation matrix

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

361

transpose_matrix<T>(trans_matrix, trans_matrix_transposed);

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

362

363

const int num_channels = in.shape()[2];

364

const int num_filters = in.shape()[3];

365

const int num_batches = in.shape().total_size() / (kernel_size.area() * num_channels * num_filters);

366

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

367

// If we have a vertical filter (i.e. 1x3, 1x5,..), we need to take the elements along the y direction (step_y_transf_tile = width of the output tile)

368

const int step_y_transf_tile = kernel_size.width == 1 ? input_tile_max_dim : 1;

369

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

370

for(int n = 0; n < num_batches; ++n)

371

{

372

for(int w = 0; w < num_filters; ++w)

373

{

374

for(int z = 0; z < num_channels; ++z)

375

{

376

// Load the tile from the input tensor

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

377

get_tile<T>(in, input_tile, Coordinates(0, 0, z, w, n));

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

378

379

// First transformation

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

380

matrix_multiply<T>(trans_matrix, input_tile, tmp_tile);

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

381

382

// Second transformation

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

383

matrix_multiply<T>(tmp_tile, trans_matrix_transposed, transf_tile);

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

384

385

// Store the output tile across the channels

386

const int output_offset = w + z * num_filters;

387

388

// Store the values across the channels

389

for(unsigned int i = 0; i < input_tile_area; ++i)

390

{

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

391

out[output_offset + i * num_filters * num_channels] = transf_tile[i * step_y_transf_tile];

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

}

}

}

}

return out;

}

template <typename T>

Gian Marco Iodice

2213d4b

2018-04-27 10:39:06 +0100

[diff] [blame]

401

SimpleTensor<T> winograd_output_transform(const SimpleTensor<T> &in, const SimpleTensor<T> &b, const TensorShape &output_shape, const WinogradInfo &winograd_info)

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

402

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

403

const PadStrideInfo conv_info = winograd_info.convolution_info;

404

const Size2D input_dimensions = winograd_info.input_dimensions;

405

const Size2D output_tile_size = winograd_info.output_tile_size;

406

const Size2D kernel_size = winograd_info.kernel_size;

407

408

// Create reference

409

SimpleTensor<T> out{ output_shape, in.data_type(), 1 };

410

411

// Calculate dimensions for the tiles

412

const unsigned int in_tile_w = output_tile_size.width + kernel_size.width - 1;

413

const unsigned int in_tile_h = output_tile_size.height + kernel_size.height - 1;

414

const unsigned int out_tile_w = output_tile_size.width;

415

const unsigned int out_tile_h = output_tile_size.height;

416

417

ARM_COMPUTE_ERROR_ON(in.shape()[2] != (in_tile_w * in_tile_h));

Giorgio Arena

3695f9a

2018-04-23 17:41:22 +0100

[diff] [blame]

418

ARM_COMPUTE_ERROR_ON(in.shape()[0] != out.shape()[get_data_layout_dimension_index(winograd_info.output_data_layout, DataLayoutDimension::CHANNEL)]);

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

419

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

420

// Get the maximum dimension from the tile size

421

const unsigned int in_tile_max_dim = std::max(in_tile_w, in_tile_h);

422

const unsigned int out_tile_max_dim = std::max(output_tile_size.width, output_tile_size.height);

423

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

424

// Compute tile dimensions

425

// Input tile dimensions

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

426

TensorShape in_tile_dims(in_tile_max_dim, in_tile_max_dim);

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

427

428

// Output tile dimensions

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

429

TensorShape out_tile_dims(out_tile_max_dim, out_tile_max_dim);

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

430

431

// Transformation matrix dimensions

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

432

TensorShape tr_tile_dims(in_tile_max_dim, out_tile_max_dim);

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

433

434

// Create tensors

435

// Simple tensor for the input tile

436

SimpleTensor<T> input_tile{ in_tile_dims, in.data_type(), 1 };

437

438

// Simple tensor for the transformation matrix

439

SimpleTensor<T> trans_matrix{ tr_tile_dims, in.data_type(), 1 };

440

441

// Simple tensor for the transformation matrix transpose

442

SimpleTensor<T> trans_matrix_transposed{ TensorShape(tr_tile_dims[1], tr_tile_dims[0]), in.data_type(), 1 };

443

444

// Simple tensor for the temporary tile

445

SimpleTensor<T> tmp_tile{ tr_tile_dims, in.data_type(), 1 };

446

447

// Simple tensor for the output tile

448

SimpleTensor<T> output_tile{ out_tile_dims, in.data_type(), 1 };

449

450

// Initialize matrix for the output transform

451

initialize_matrix_transform(trans_matrix, output_tile_size, kernel_size, WinogradTransformType::OUTPUT);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

452

453

// Transpose the transformation matrix

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

454

transpose_matrix<T>(trans_matrix, trans_matrix_transposed);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

455

456

const int w_in = in.shape()[0];

457

const int h_in = in.shape()[1];

458

const int c_in = in.shape()[2];

459

const int w_out = out.shape()[0];

460

const int h_out = out.shape()[1];

461

const int c_out = out.shape()[2];

462

const int num_batches = in.shape().total_size() / (w_in * h_in * c_in);

463

464

// Input strides

465

const int stridey_in = w_in;

466

const int stridez_in = stridey_in * h_in;

467

const int stridew_in = stridez_in * c_in;

468

469

// Output strides

470

const int stridey_out = w_out;

471

const int stridez_out = stridey_out * h_out;

472

const int stridew_out = stridez_out * c_out;

473

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

474

// Compute the number of output tiles along the x and y direction of size "output_tile_size"

475

const Size2D num_tiles = compute_winograd_convolution_tiles(Size2D(input_dimensions.width, input_dimensions.height),

kernel_size,

output_tile_size,

conv_info);

const int num_tiles_x = num_tiles.width;

481

const int num_tiles_y = num_tiles.height;

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

482

483

ARM_COMPUTE_UNUSED(num_tiles_y);

484

ARM_COMPUTE_ERROR_ON(in.shape()[1] != static_cast<unsigned int>(num_tiles_x * num_tiles_y));

485

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

486

// If we have a vertical filter (i.e. 1x3, 1x5,..), we still need to take the elements along the x direction (step_y_transf_tile = 1)

487

const int step_y_transf_tile = kernel_size.width == 1 ? 1 : output_tile.shape()[0];

488

489

// Initialize with zeros the input tile

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

490

zeros<T>(input_tile, Coordinates(0, 0), input_tile.shape());

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

491

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

492

for(int n = 0; n < num_batches; ++n)

493

{

494

for(int y = 0; y < h_in; ++y)

495

{

496

for(int x = 0; x < w_in; ++x)

497

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

498

// Load the input tile tile across the channels of the input tensor

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

499

for(int z = 0; z < c_in; ++z)

500

{

501

input_tile[z] = in[x + (y * stridey_in) + (z * stridez_in) + (n * stridew_in)];

502

}

503

504

// First transformation

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

505

matrix_multiply<T>(trans_matrix, input_tile, tmp_tile);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

506

507

// Second transformation

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

508

matrix_multiply<T>(tmp_tile, trans_matrix_transposed, output_tile);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

509

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

510

// Store the output tile

511

const int xo = (y % num_tiles_x) * out_tile_w;

512

const int yo = (y / num_tiles_x) * out_tile_h;

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

513

const int zo = x;

514

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

515

const int output_offset = xo + (yo * stridey_out) + (zo * stridez_out) + (n * stridew_out);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

516

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

517

for(int yi = 0; yi < static_cast<int>(out_tile_h); ++yi)

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

518

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

519

for(int xi = 0; xi < static_cast<int>(out_tile_w); ++xi)

520

{

521

// Check out-of-bound writes

522

if((xo + xi < w_out) && (yo + yi < h_out))

523

{

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

524

out[output_offset + yi * stridey_out + xi] = output_tile[xi + yi * step_y_transf_tile];

Gian Marco Iodice

2213d4b

2018-04-27 10:39:06 +0100

[diff] [blame]

525

526

// Add bias

527

out[output_offset + yi * stridey_out + xi] += b[zo];

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

528

}

529

}

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

}

}

}

}

Gian Marco Iodice

2018-02-22 16:17:20 +0000

[diff] [blame]

return out;

}

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

538

template SimpleTensor<float> winograd_filter_transform(const SimpleTensor<float> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info);

539

template SimpleTensor<float> winograd_input_transform(const SimpleTensor<float> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info);

Gian Marco Iodice

2213d4b

2018-04-27 10:39:06 +0100

[diff] [blame]

540

template SimpleTensor<float> winograd_output_transform(const SimpleTensor<float> &in, const SimpleTensor<float> &b, const TensorShape &output_shape, const WinogradInfo &winograd_info);

Vidhya Sudhan Loganathan