Blame - tests/validation/reference/Winograd.cpp - ml/ComputeLibrary

2018-02-22 16:17:20 +0000

[diff] [blame]

46

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

47

// Winograd input transform matrices

48

static const float imatrix2x2_3x3[] =

Giorgio Arena

2d9de0a

2018-03-15 17:58:20 +0000

[diff] [blame]

49

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

50

1.0f, 0.0f, -1.0f, 0.0f,

51

0.0f, 1.0f, 1.0f, 0.0f,

52

0.0f, -1.0f, 1.0f, 0.0f,

53

0.0f, 1.0f, 0.0f, -1.0f

54

};

55

56

static const float imatrix4x4_3x3[] =

57

{

58

4.0f, 0.0f, -5.0f, 0.0f, 1.0f, 0.0f,

59

0.0f, -4.0f, -4.0f, 1.0f, 1.0f, 0.0f,

60

0.0f, 4.0f, -4.0f, -1.0f, 1.0f, 0.0f,

61

0.0f, -2.0f, -1.0f, 2.0f, 1.0f, 0.0f,

62

0.0f, 2.0f, -1.0f, -2.0f, 1.0f, 0.0f,

63

0.0f, 4.0f, 0.0f, -5.0f, 0.0f, 1.0f,

64

};

65

Giorgio Arena

fe5ef38

2018-04-17 10:14:10 +0100

[diff] [blame]

66

static const float imatrix4x4_5x5[] =

67

{

68

1.f, 0.f, -21.f / 4.f, 0.f, 21.f / 4.f, 0.f, -1.f, 0.f,

69

0.f, 1.f, 1.f, -17.f / 4.f, -17.f / 4.f, 1.f, 1.f, 0.f,

70

0.f, -1.f, 1.f, 17.f / 4.f, -17.f / 4.f, -1.f, 1.f, 0.f,

71

0.f, 1.f / 2.f, 1.f / 4.f, -5.f / 2.f, -5.f / 4.f, 2.f, 1.f, 0.f,

72

0.f, -1.f / 2.f, 1.f / 4.f, 5.f / 2.f, -5.f / 4.f, -2.f, 1.f, 0.f,

73

0.f, 2.f, 4.f, -5.f / 2.f, -5.f, 1.f / 2.f, 1.f, 0.f,

74

0.f, -2.f, 4.f, 5.f / 2.f, -5.f, -1.f / 2.f, 1.f, 0.f,

75

0.f, -1.f, 0.f, 21.f / 4.f, 0.f, -21.f / 4.f, 0.f, 1.f

76

};

77

Pablo Tello

2018-09-26 11:25:15 +0100

[diff] [blame]

78

static const float imatrix2x1_7x7[] =

79

{

80

-36.0f, 0.0f, 49.0f, 0.0f, -14.0f, 0.0f, 1.0f, 0.0f,

81

0.0f, -36.0f, 36.0f, 13.0f, -13.0f, -1.0f, 1.0f, 0.0f,

82

0.0f, 36.0f, 36.0f, -13.0f, -13.0f, 1.0f, 1.0f, 0.0f,

83

0.0f, -18.0f, 9.0f, 20.0f, -10.0f, -2.0f, 1.0f, 0.0f,

84

0.0f, 18.0f, 9.0f, -20.0f, -10.0f, 2.0f, 1.0f, 0.0f,

85

0.0f, -12.0f, 4.0f, 15.0f, -5.0f, -3.0f, 1.0f, 0.0f,

86

0.0f, 12.0f, 4.0f, -15.0f, -5.0f, 3.0f, 1.0f, 0.0f,

87

0.0f, -36.0f, 0.0f, 49.0f, 0.0f, -14.0f, 0.0f, 1.0f

88

};

89

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

90

// ------------------------------------------

91

92

// Winograd filter transform matrices

93

static const float fmatrix2x2_3x3[] =

{

1.0f, 0.0f, 0.0f,

0.5f, 0.5f, 0.5f,

0.5f, -0.5f, 0.5f,

0.0f, 0.0f, 1.0f

};

static const float fmatrix4x4_3x3[] =

102

{

103

0.25f, 0.0f, 0.0f,

104

-1.0f / 6.0f, -1.0f / 6.0f, -1.0f / 6.0f,

105

-1.0f / 6.0f, 1.0f / 6.0f, -1.0f / 6.0f,

106

1.0f / 24.0f, 1.0f / 12.0f, 1.0f / 6.0f,

107

1.0f / 24.0f, -1.0f / 12.0f, 1.0f / 6.0f,

0.0f, 0.0f, 1.0f

};

Giorgio Arena

2018-04-11 19:07:17 +0100

[diff] [blame]

111

static const float fmatrix4x4_5x5[] =

112

{

113

1.0f, 0.0f, 0.0f, 0.0f, 0.0f,

114

-2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f,

115

-2.0f / 9.0f, 2.0f / 9.0f, -2.0f / 9.0f, 2.0f / 9.0f, -2.0f / 9.0f,

116

1.0f / 90.0f, 1.0f / 45.0f, 2.0f / 45.0f, 4.0f / 45.0f, 8.0f / 45.0f,

117

1.0f / 90.0f, -1.0f / 45.0f, 2.0f / 45.0f, -4.0f / 45.0f, 8.0f / 45.0f,

118

4.0f / 45.0f, 2.0f / 45.0f, 1.0f / 45.0f, 1.0f / 90.0f, 1.0f / 180.0f,

119

4.0f / 45.0f, -2.0f / 45.0f, 1.0f / 45.0f, -1.0f / 90.0f, 1.0f / 180.0f,

120

0.0f, 0.0f, 0.0f, 0.0f, 1.0f

};

Pablo Tello

2018-09-26 11:25:15 +0100

[diff] [blame]

124

static const float fmatrix2x1_7x7[] =

125

{

126

-1.0f / 36.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,

127

1.0f / 48.0f, -1.0f / 48.0f, 1.0f / 48.0f, -1.0f / 48.0f, 1.0f / 48.0f, -1.0f / 48.0f, 1.0f / 48.0f,

128

1.0f / 48.0f, 1.0f / 48.0f, 1.0f / 48.0f, 1.0f / 48.0f, 1.0f / 48.0f, 1.0f / 48.0f, 1.0f / 48.0f,

129

-1.0f / 120.0f, 1.0f / 60.0f, -1.0f / 30.0f, 1.0f / 15.0f, -2.0f / 15.0f, 4.0f / 15.0f, -8.0f / 15.0f,

130

-1.0f / 120.0f, -1.0f / 60.0f, -1.0f / 30.0f, -1.0f / 15.0f, -2.0f / 15.0f, -4.0f / 15.0f, -8.0f / 15.0f,

131

1.0f / 720.0f, -1.0f / 240.0f, 1.0f / 80.0f, -3.0f / 80.0f, 9.0f / 80.0f, -27.0f / 80.0f, 81.0f / 80.0f,

132

1.0f / 720.0f, 1.0f / 240.0f, 1.0f / 80.0f, 3.0f / 80.0f, 9.0f / 80.0f, 27.0f / 80.0f, 81.0f / 80.0f,

133

0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f

134

};

135

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

136

// ------------------------------------------

137

138

// Winograd output transform matrices

139

static const float omatrix2x2_3x3[] =

140

{

141

1.0f, 1.0f, 1.0f, 0.0f,

142

0.0f, 1.0f, -1.0f, -1.0f

143

};

144

145

static const float omatrix4x4_3x3[] =

146

{

147

1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.0f,

148

0.0f, 1.0f, -1.0f, 2.0f, -2.0f, 0.0f,

149

0.0f, 1.0f, 1.0f, 4.0f, 4.0f, 0.0f,

150

0.0f, 1.0f, -1.0f, 8.0f, -8.0f, 1.0f

151

};

152

Giorgio Arena

dd03870

2018-04-16 11:20:11 +0100

[diff] [blame]

153

static const float omatrix4x4_5x5[] =

154

{

155

1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 8.0f, 8.0f, 0.0f,

156

0.0f, 1.0f, -1.0f, 2.0f, -2.0f, 4.0f, -4.0f, 0.0f,

157

0.0f, 1.0f, 1.0f, 4.0f, 4.0f, 2.0f, 2.0f, 0.0f,

158

0.0f, 1.0f, -1.0f, 8.0f, -8.0f, 1.0f, -1.0f, 1.0f

159

};

160

Pablo Tello

2018-09-26 11:25:15 +0100

[diff] [blame]

161

static const float omatrix2x1_7x7[] =

162

{

163

1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.0f,

164

0.0f, -1.0f, 1.0f, -2.0f, 2.0f, -3.0f, 3.0f, 1.0f

165

};

166

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

167

// ------------------------------------------

168

169

using WinogradKey = std::tuple<std::pair<int, int>, std::pair<int, int>, WinogradTransformType>;

170

171

// Key = (Output tile size, Kernel size, Winograd transform type)

172

static std::map<WinogradKey, const float *> matrix_map =

173

{

174

{ WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::INPUT), imatrix2x2_3x3 },

175

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::INPUT), imatrix4x4_3x3 },

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

176

{ WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::INPUT), imatrix2x2_3x3 },

177

{ WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::INPUT), imatrix4x4_3x3 },

178

{ WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::INPUT), imatrix2x2_3x3 },

179

{ WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::INPUT), imatrix4x4_3x3 },

Giorgio Arena

fe5ef38

2018-04-17 10:14:10 +0100

[diff] [blame]

180

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::INPUT), imatrix4x4_5x5 },

Gian Marco Iodice

2018-07-03 12:22:09 +0100

[diff] [blame]

181

{ WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::INPUT), imatrix4x4_5x5 },

Pablo Tello

2018-09-26 11:25:15 +0100

[diff] [blame]

182

{ WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1), WinogradTransformType::INPUT), imatrix2x1_7x7 },

183

{ WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7), WinogradTransformType::INPUT), imatrix2x1_7x7 },

Michele Di Giorgio

f955d51

2019-02-27 14:26:51 +0000

[diff] [blame]

184

{ WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(7, 7), WinogradTransformType::INPUT), imatrix2x1_7x7 },

Gian Marco Iodice

2018-07-03 12:22:09 +0100

[diff] [blame]

185

{ WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::INPUT), imatrix4x4_5x5 },

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

186

{ WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3 },

187

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3 },

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

188

{ WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::FILTER), fmatrix2x2_3x3 },

189

{ WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::FILTER), fmatrix4x4_3x3 },

190

{ WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3 },

191

{ WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3 },

Giorgio Arena

9373c8b

2018-04-11 19:07:17 +0100

[diff] [blame]

192

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::FILTER), fmatrix4x4_5x5 },

Gian Marco Iodice

2018-07-03 12:22:09 +0100

[diff] [blame]

193

{ WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::FILTER), fmatrix4x4_5x5 },

Pablo Tello

2018-09-26 11:25:15 +0100

[diff] [blame]

194

{ WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1), WinogradTransformType::FILTER), fmatrix2x1_7x7 },

195

{ WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7), WinogradTransformType::FILTER), fmatrix2x1_7x7 },

Gian Marco Iodice

2018-07-03 12:22:09 +0100

[diff] [blame]

196

{ WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::FILTER), fmatrix4x4_5x5 },

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

197

{ WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix2x2_3x3 },

198

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix4x4_3x3 },

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

199

{ WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::OUTPUT), omatrix2x2_3x3 },

200

{ WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::OUTPUT), omatrix4x4_3x3 },

201

{ WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::OUTPUT), omatrix2x2_3x3 },

202

{ WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::OUTPUT), omatrix4x4_3x3 },

Giorgio Arena

dd03870

2018-04-16 11:20:11 +0100

[diff] [blame]

203

{ WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::OUTPUT), omatrix4x4_5x5 },

Gian Marco Iodice

2018-07-03 12:22:09 +0100

[diff] [blame]

204

{ WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::OUTPUT), omatrix4x4_5x5 },

Pablo Tello

2018-09-26 11:25:15 +0100

[diff] [blame]

205

{ WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1), WinogradTransformType::OUTPUT), omatrix2x1_7x7 },

206

{ WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7), WinogradTransformType::OUTPUT), omatrix2x1_7x7 },

Gian Marco Iodice

2018-07-03 12:22:09 +0100

[diff] [blame]

207

{ WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::OUTPUT), omatrix4x4_5x5 },

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

208

};

209

Giorgio Arena

9373c8b

2018-04-11 19:07:17 +0100

[diff] [blame]

210

// Find transformation matrix

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

211

std::map<WinogradKey, const float *>::iterator it;

212

213

it = matrix_map.find(WinogradKey(std::pair<int, int>(output_tile_size.width, output_tile_size.height),

214

std::pair<int, int>(kernel_size.width, kernel_size.height),

215

winograd_transform_type));

216

217

float const *matrix_values = nullptr;

218

if(it != matrix_map.end())

219

{

220

// Get matrix pointer

221

matrix_values = it->second;

Giorgio Arena

2d9de0a

2018-03-15 17:58:20 +0000

[diff] [blame]

222

}

223

else

224

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

225

ARM_COMPUTE_ERROR("Winograd configuration not supported");

Giorgio Arena

2d9de0a

2018-03-15 17:58:20 +0000

[diff] [blame]

226

}

Gian Marco Iodice

2018-02-22 16:17:20 +0000

[diff] [blame]

227

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

228

// Copy values

229

std::copy(&matrix_values[0], &matrix_values[0] + src.num_elements(), &src[0]);

Gian Marco Iodice

2018-02-22 16:17:20 +0000

[diff] [blame]

230

}

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

231

} // namespace

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

232

233

template <typename T>

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

234

SimpleTensor<T> winograd_input_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info)

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

235

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

236

ARM_COMPUTE_ERROR_ON(in.data_layout() != DataLayout::NCHW);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

237

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

238

const PadStrideInfo conv_info = winograd_info.convolution_info;

239

const Size2D output_tile_size = winograd_info.output_tile_size;

240

const Size2D kernel_size = winograd_info.kernel_size;

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

241

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

242

SimpleTensor<T> out{ output_shape, in.data_type() };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

243

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

244

// Calculate dimensions for the tile

245

const unsigned int tile_w = output_tile_size.width + kernel_size.width - 1;

246

const unsigned int tile_h = output_tile_size.height + kernel_size.height - 1;

247

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

248

// Get the maximum dimension from the tile size

249

const unsigned int tile_max_dim = std::max(tile_w, tile_h);

250

251

TensorShape tile_dims(tile_max_dim, tile_max_dim);

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

252

253

// Simple tensor for the input tile

254

SimpleTensor<T> src_tile{ tile_dims, in.data_type() };

255

256

// Simple tensor for the temporary tile

257

SimpleTensor<T> tmp_tile{ tile_dims, in.data_type() };

258

259

// Simple tensor for the output tile

260

SimpleTensor<T> dst_tile{ tile_dims, in.data_type() };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

261

262

// Simple tensor for the transformation matrix

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

263

SimpleTensor<T> matrix{ tile_dims, in.data_type() };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

264

265

// Simple tensor for the transformation matrix transposed

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

266

SimpleTensor<T> matrix_transposed{ tile_dims, in.data_type() };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

267

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

268

// Initialize matrix for the input transform

269

initialize_matrix_transform(matrix, output_tile_size, kernel_size, WinogradTransformType::INPUT);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

270

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

271

// Transpose matrix

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

272

transpose_matrix<T>(matrix, matrix_transposed);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

273

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

274

const int in_w = in.shape().x();

275

const int in_h = in.shape().y();

276

const int in_d = in.shape().z();

277

const int out_d = out.shape().z();

278

const int num_batches = in.shape().total_size() / (in_w * in_h * in_d);

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

279

const int step_x = output_tile_size.width;

280

const int step_y = output_tile_size.height;

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

281

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

282

// Compute the number of output tiles along the x and y direction of size "output_tile_size"

283

const Size2D num_tiles = compute_winograd_convolution_tiles(Size2D(in_w, in_h),

kernel_size,

output_tile_size,

conv_info);

const int num_tiles_x = num_tiles.width;

289

const int num_tiles_y = num_tiles.height;

290

291

// In case of 1D convolution, the input tile has to be partially filled with zeros

292

int start_x_zero = 0;

293

int start_y_zero = 0;

int end_x_zero = 0;

int end_y_zero = 0;

if(output_tile_size.width == 1)

{

start_x_zero = 1;

start_y_zero = 0;

end_x_zero = tile_max_dim - 1;

302

end_y_zero = tile_max_dim;

303

}

304

else if(output_tile_size.height == 1)

{

start_x_zero = 0;

start_y_zero = 1;

end_x_zero = tile_max_dim;

309

end_y_zero = tile_max_dim - 1;

310

}

311

312

// Set the anchor and shape of the zeros area

313

const Coordinates anchor_zeros(start_x_zero, start_y_zero);

314

const TensorShape shape_zeros(end_x_zero, end_y_zero);

315

316

// If we have a vertical filter (i.e. 1x3, 1x5,..), we need to take the elements along the y direction (step = width of the output tile)

317

const int step_y_transf_tile = kernel_size.width == 1 ? tile_max_dim : 1;

318

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

319

ARM_COMPUTE_ERROR_ON((num_tiles_x * num_tiles_y) != static_cast<int>(out.shape().y()));

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

320

321

for(int b = 0; b < num_batches; ++b)

322

{

323

for(int z = 0; z < in_d; ++z)

324

{

325

for(int y = 0; y < num_tiles_y; ++y)

326

{

327

for(int x = 0; x < num_tiles_x; ++x)

328

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

329

int xi = x * step_x - conv_info.pad_left();

330

int yi = y * step_y - conv_info.pad_top();

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

331

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

332

// Get the tile from the input tensor

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

333

get_tile<T>(in, src_tile, Coordinates(xi, yi, z, b));

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

334

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

335

// Fill partially with zeros in case of 1D convolution

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

336

zeros<T>(src_tile, anchor_zeros, shape_zeros);

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

337

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

338

// Compute the transformation

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

339

matrix_multiply<T>(matrix, src_tile, tmp_tile);

340

matrix_multiply<T>(tmp_tile, matrix_transposed, dst_tile);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

341

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

342

// Store the output tile across the channels

343

for(int i = 0; i < out_d; ++i)

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

344

{

345

int xo = z;

346

int yo = x + y * num_tiles_x;

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

347

out[coords2index(out.shape(), Coordinates(xo, yo, i, b))] = dst_tile[i * step_y_transf_tile];

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

}

}

}

}

}

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

353

354

return out;

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

355

}

356

357

template <typename T>

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

358

SimpleTensor<T> winograd_filter_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info)

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

359

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

360

ARM_COMPUTE_ERROR_ON_MSG(in.data_layout() != DataLayout::NCHW, "Only supported NCHW data format");

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

361

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

362

// Create reference

363

SimpleTensor<T> out{ output_shape, in.data_type(), 1 };

364

365

const Size2D output_tile_size = winograd_info.output_tile_size;

366

const Size2D kernel_size = winograd_info.kernel_size;

367

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

368

// Calculate dimensions for the tile

369

const unsigned int input_tile_w = output_tile_size.width + kernel_size.width - 1;

370

const unsigned int input_tile_h = output_tile_size.height + kernel_size.height - 1;

371

const unsigned int input_tile_area = input_tile_w * input_tile_h;

372

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

373

// Get the maximum dimension from the filter size

374

const unsigned int kernel_max_dim = std::max(kernel_size.width, kernel_size.height);

375

376

// Get the maximum dimension from the input tile

377

const unsigned int input_tile_max_dim = std::max(input_tile_w, input_tile_h);

378

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

379

// Simple tensor for the input tile

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

380

SimpleTensor<T> input_tile{ TensorShape(kernel_max_dim, kernel_max_dim), in.data_type(), 1 };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

381

382

// Simple tensor for the transformation matrix

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

383

SimpleTensor<T> trans_matrix{ TensorShape(kernel_max_dim, input_tile_max_dim), in.data_type(), 1 };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

384

385

// Simple tensor for the transformation matrix transpose

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

386

SimpleTensor<T> trans_matrix_transposed{ TensorShape(input_tile_max_dim, kernel_max_dim), in.data_type(), 1 };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

387

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

388

// Simple tensor for the temporary tile

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

389

SimpleTensor<T> tmp_tile{ TensorShape(kernel_max_dim, input_tile_max_dim), in.data_type(), 1 };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

390

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

391

// Simple tensor for the output tile

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

392

SimpleTensor<T> transf_tile{ TensorShape(input_tile_max_dim, input_tile_max_dim), in.data_type(), 1 };

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

393

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

394

// Initialize matrix for the filter transform

395

initialize_matrix_transform(trans_matrix, output_tile_size, kernel_size, WinogradTransformType::FILTER);

396

397

// Transpose the transformation matrix

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

398

transpose_matrix<T>(trans_matrix, trans_matrix_transposed);

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

399

400

const int num_channels = in.shape()[2];

401

const int num_filters = in.shape()[3];

402

const int num_batches = in.shape().total_size() / (kernel_size.area() * num_channels * num_filters);

403

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

404

// If we have a vertical filter (i.e. 1x3, 1x5,..), we need to take the elements along the y direction (step_y_transf_tile = width of the output tile)

405

const int step_y_transf_tile = kernel_size.width == 1 ? input_tile_max_dim : 1;

406

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

407

for(int n = 0; n < num_batches; ++n)

408

{

409

for(int w = 0; w < num_filters; ++w)

410

{

411

for(int z = 0; z < num_channels; ++z)

412

{

413

// Load the tile from the input tensor

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

414

get_tile<T>(in, input_tile, Coordinates(0, 0, z, w, n));

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

415

416

// First transformation

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

417

matrix_multiply<T>(trans_matrix, input_tile, tmp_tile);

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

418

419

// Second transformation

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

420

matrix_multiply<T>(tmp_tile, trans_matrix_transposed, transf_tile);

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

421

422

// Store the output tile across the channels

423

const int output_offset = w + z * num_filters;

424

425

// Store the values across the channels

426

for(unsigned int i = 0; i < input_tile_area; ++i)

427

{

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

428

out[output_offset + i * num_filters * num_channels] = transf_tile[i * step_y_transf_tile];

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

}

}

}

}

return out;

}

template <typename T>

Gian Marco Iodice

2213d4b

2018-04-27 10:39:06 +0100

[diff] [blame]

438

SimpleTensor<T> winograd_output_transform(const SimpleTensor<T> &in, const SimpleTensor<T> &b, const TensorShape &output_shape, const WinogradInfo &winograd_info)

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

439

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

440

const PadStrideInfo conv_info = winograd_info.convolution_info;

441

const Size2D input_dimensions = winograd_info.input_dimensions;

442

const Size2D output_tile_size = winograd_info.output_tile_size;

443

const Size2D kernel_size = winograd_info.kernel_size;

444

445

// Create reference

446

SimpleTensor<T> out{ output_shape, in.data_type(), 1 };

447

448

// Calculate dimensions for the tiles

449

const unsigned int in_tile_w = output_tile_size.width + kernel_size.width - 1;

450

const unsigned int in_tile_h = output_tile_size.height + kernel_size.height - 1;

451

const unsigned int out_tile_w = output_tile_size.width;

452

const unsigned int out_tile_h = output_tile_size.height;

453

454

ARM_COMPUTE_ERROR_ON(in.shape()[2] != (in_tile_w * in_tile_h));

Giorgio Arena

3695f9a

2018-04-23 17:41:22 +0100

[diff] [blame]

455

ARM_COMPUTE_ERROR_ON(in.shape()[0] != out.shape()[get_data_layout_dimension_index(winograd_info.output_data_layout, DataLayoutDimension::CHANNEL)]);

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

456

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

457

// Get the maximum dimension from the tile size

458

const unsigned int in_tile_max_dim = std::max(in_tile_w, in_tile_h);

459

const unsigned int out_tile_max_dim = std::max(output_tile_size.width, output_tile_size.height);

460

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

461

// Compute tile dimensions

462

// Input tile dimensions

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

463

TensorShape in_tile_dims(in_tile_max_dim, in_tile_max_dim);

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

464

465

// Output tile dimensions

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

466

TensorShape out_tile_dims(out_tile_max_dim, out_tile_max_dim);

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

467

468

// Transformation matrix dimensions

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

469

TensorShape tr_tile_dims(in_tile_max_dim, out_tile_max_dim);

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

470

471

// Create tensors

472

// Simple tensor for the input tile

473

SimpleTensor<T> input_tile{ in_tile_dims, in.data_type(), 1 };

474

475

// Simple tensor for the transformation matrix

476

SimpleTensor<T> trans_matrix{ tr_tile_dims, in.data_type(), 1 };

477

478

// Simple tensor for the transformation matrix transpose

479

SimpleTensor<T> trans_matrix_transposed{ TensorShape(tr_tile_dims[1], tr_tile_dims[0]), in.data_type(), 1 };

480

481

// Simple tensor for the temporary tile

482

SimpleTensor<T> tmp_tile{ tr_tile_dims, in.data_type(), 1 };

483

484

// Simple tensor for the output tile

485

SimpleTensor<T> output_tile{ out_tile_dims, in.data_type(), 1 };

486

487

// Initialize matrix for the output transform

488

initialize_matrix_transform(trans_matrix, output_tile_size, kernel_size, WinogradTransformType::OUTPUT);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

489

490

// Transpose the transformation matrix

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

491

transpose_matrix<T>(trans_matrix, trans_matrix_transposed);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

492

493

const int w_in = in.shape()[0];

494

const int h_in = in.shape()[1];

495

const int c_in = in.shape()[2];

496

const int w_out = out.shape()[0];

497

const int h_out = out.shape()[1];

498

const int c_out = out.shape()[2];

499

const int num_batches = in.shape().total_size() / (w_in * h_in * c_in);

500

501

// Input strides

502

const int stridey_in = w_in;

503

const int stridez_in = stridey_in * h_in;

504

const int stridew_in = stridez_in * c_in;

505

506

// Output strides

507

const int stridey_out = w_out;

508

const int stridez_out = stridey_out * h_out;

509

const int stridew_out = stridez_out * c_out;

510

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

511

// Compute the number of output tiles along the x and y direction of size "output_tile_size"

512

const Size2D num_tiles = compute_winograd_convolution_tiles(Size2D(input_dimensions.width, input_dimensions.height),

kernel_size,

output_tile_size,

conv_info);

const int num_tiles_x = num_tiles.width;

518

const int num_tiles_y = num_tiles.height;

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

519

520

ARM_COMPUTE_UNUSED(num_tiles_y);

521

ARM_COMPUTE_ERROR_ON(in.shape()[1] != static_cast<unsigned int>(num_tiles_x * num_tiles_y));

522

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

523

// If we have a vertical filter (i.e. 1x3, 1x5,..), we still need to take the elements along the x direction (step_y_transf_tile = 1)

524

const int step_y_transf_tile = kernel_size.width == 1 ? 1 : output_tile.shape()[0];

525

526

// Initialize with zeros the input tile

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

527

zeros<T>(input_tile, Coordinates(0, 0), input_tile.shape());

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

528

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

529

for(int n = 0; n < num_batches; ++n)

530

{

531

for(int y = 0; y < h_in; ++y)

532

{

533

for(int x = 0; x < w_in; ++x)

534

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

535

// Load the input tile tile across the channels of the input tensor

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

536

for(int z = 0; z < c_in; ++z)

537

{

538

input_tile[z] = in[x + (y * stridey_in) + (z * stridez_in) + (n * stridew_in)];

539

}

540

541

// First transformation

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

542

matrix_multiply<T>(trans_matrix, input_tile, tmp_tile);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

543

544

// Second transformation

Vidhya Sudhan Loganathan

2018-08-31 16:10:16 +0100

[diff] [blame]

545

matrix_multiply<T>(tmp_tile, trans_matrix_transposed, output_tile);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

546

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

547

// Store the output tile

548

const int xo = (y % num_tiles_x) * out_tile_w;

549

const int yo = (y / num_tiles_x) * out_tile_h;

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

550

const int zo = x;

551

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

552

const int output_offset = xo + (yo * stridey_out) + (zo * stridez_out) + (n * stridew_out);

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

553

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

554

for(int yi = 0; yi < static_cast<int>(out_tile_h); ++yi)

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

555

{

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

556

for(int xi = 0; xi < static_cast<int>(out_tile_w); ++xi)

557

{

558

// Check out-of-bound writes

559

if((xo + xi < w_out) && (yo + yi < h_out))

560

{

Gian Marco Iodice

2018-06-13 14:05:54 +0100

[diff] [blame]

561

out[output_offset + yi * stridey_out + xi] = output_tile[xi + yi * step_y_transf_tile];

Gian Marco Iodice

2213d4b

2018-04-27 10:39:06 +0100

[diff] [blame]

562

563

// Add bias

564

out[output_offset + yi * stridey_out + xi] += b[zo];

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

565

}

566

}

Gian Marco Iodice

2018-03-02 11:18:12 +0000

[diff] [blame]

}

}

}

}

Gian Marco Iodice

2018-02-22 16:17:20 +0000

[diff] [blame]

return out;

}

Gian Marco Iodice

2018-03-22 11:24:56 +0000

[diff] [blame]

575

template SimpleTensor<float> winograd_filter_transform(const SimpleTensor<float> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info);

576

template SimpleTensor<float> winograd_input_transform(const SimpleTensor<float> &in, const TensorShape &output_shape, const WinogradInfo &winograd_info);

Gian Marco Iodice

2213d4b

2018-04-27 10:39:06 +0100

[diff] [blame]

577

template SimpleTensor<float> winograd_output_transform(const SimpleTensor<float> &in, const SimpleTensor<float> &b, const TensorShape &output_shape, const WinogradInfo &winograd_info);

Vidhya Sudhan Loganathan