Blame - src/core/CL/cl_kernels/pooling_layer.cl - ml/ComputeLibrary

2017-09-04 18:44:23 +0100

[diff] [blame]

1

/*

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

2

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

3

*

4

* SPDX-License-Identifier: MIT

5

*

6

* Permission is hereby granted, free of charge, to any person obtaining a copy

7

* of this software and associated documentation files (the "Software"), to

8

* deal in the Software without restriction, including without limitation the

9

* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

10

* sell copies of the Software, and to permit persons to whom the Software is

11

* furnished to do so, subject to the following conditions:

12

*

13

* The above copyright notice and this permission notice shall be included in all

14

* copies or substantial portions of the Software.

15

*

16

* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

17

* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

18

* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

19

* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

20

* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

21

* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

* SOFTWARE.

*/

#include "helpers.h"

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

26

#if defined(POOL_AVG) || defined(POOL_L2)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

27

#define POOL_OP(x, y) ((x) + (y))

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

28

#else /* defined(POOL_AVG) || defined(POOL_L2) */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

29

#define POOL_OP(x, y) (fmax((x), (y)))

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

30

#endif /* defined(POOL_AVG) || defined(POOL_L2) */

31

32

#if defined(POOL_L2)

33

#define POW2_OP(x, vec_size) ((x) * (x))

34

#else /* defined(POOL_L2) */

35

#define POW2_OP(x, vec_size) (x)

36

#endif /* defined(POOL_L2) */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

37

steniu01

2017-06-23 17:00:26 +0100

[diff] [blame]

38

#define DIV_OP(x, y) (x * (1.f / y))

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

39

#define SQRT_OP(x) sqrt((x))

steniu01

2017-06-23 17:00:26 +0100

[diff] [blame]

40

Michalis Spyrou

e74b201

2018-04-18 09:49:16 +0100

[diff] [blame]

41

#define DIV_OP_NHWC(x, y) (x * (VEC_DATA_TYPE(DATA_TYPE, 8))(1.f / y))

42

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

43

#if STRIDE_X == 1

44

#define POOLING3x3(res, input, output) POOLING3x3_STRIDE1(res, input, output)

45

#elif STRIDE_X == 2 /* STRIDE_X == 1 */

46

#define POOLING3x3(res, input, output) POOLING3x3_STRIDE2(res, input, output)

47

#elif STRIDE_X == 3 /* STRIDE_X not equals 1 or 2 */

48

#define POOLING3x3(res, input, output) POOLING3x3_STRIDE3(res, input, output)

49

#endif /* STRIDE_X == 3 */

50

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

51

#define POOLING3x3_STRIDE1(res, input, output) \

52

({ \

53

VEC_DATA_TYPE(DATA_TYPE, 4) \

54

data00 = vload4(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)); \

55

VEC_DATA_TYPE(DATA_TYPE, 2) \

56

data01 = vload2(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0) + 4); \

57

VEC_DATA_TYPE(DATA_TYPE, 4) \

58

data10 = vload4(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0)); \

59

VEC_DATA_TYPE(DATA_TYPE, 2) \

60

data11 = vload2(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0) + 4); \

61

VEC_DATA_TYPE(DATA_TYPE, 4) \

62

data20 = vload4(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0)); \

63

VEC_DATA_TYPE(DATA_TYPE, 2) \

64

data21 = vload2(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0) + 4); \

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

65

data00 = POW2_OP(data00, 4); \

66

data01 = POW2_OP(data01, 2); \

67

data10 = POW2_OP(data10, 4); \

68

data11 = POW2_OP(data11, 2); \

69

data20 = POW2_OP(data20, 4); \

70

data21 = POW2_OP(data21, 2); \

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

71

\

72

VEC_DATA_TYPE(DATA_TYPE, 8) \

73

values00 = (VEC_DATA_TYPE(DATA_TYPE, 8))(data00.s01212323); \

74

VEC_DATA_TYPE(DATA_TYPE, 4) \

75

values01 = (VEC_DATA_TYPE(DATA_TYPE, 4))(data01.s0, data00.s3, data01.s01); \

76

VEC_DATA_TYPE(DATA_TYPE, 8) \

77

values10 = (VEC_DATA_TYPE(DATA_TYPE, 8))(data10.s01212323); \

78

VEC_DATA_TYPE(DATA_TYPE, 4) \

79

values11 = (VEC_DATA_TYPE(DATA_TYPE, 4))(data11.s0, data10.s3, data11.s01); \

80

VEC_DATA_TYPE(DATA_TYPE, 8) \

81

values20 = (VEC_DATA_TYPE(DATA_TYPE, 8))(data20.s01212323); \

82

VEC_DATA_TYPE(DATA_TYPE, 4) \

83

values21 = (VEC_DATA_TYPE(DATA_TYPE, 4))(data21.s0, data20.s3, data21.s01); \

84

\

85

values00 = POOL_OP(values00, values10); \

86

values01 = POOL_OP(values01, values11); \

87

values00 = POOL_OP(values00, values20); \

88

values01 = POOL_OP(values01, values21); \

89

\

90

res = POOL_OP((VEC_DATA_TYPE(DATA_TYPE, 4))(values00.s036, values01.s1), (VEC_DATA_TYPE(DATA_TYPE, 4))(values00.s147, values01.s2)); \

91

res = POOL_OP(res, (VEC_DATA_TYPE(DATA_TYPE, 4))(values00.s25, values01.s03)); \

92

})

93

94

#define POOLING3x3_STRIDE2(res, input, output) \

95

({ \

96

VEC_DATA_TYPE(DATA_TYPE, 8) \

97

data00 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)); \

98

DATA_TYPE data01 = *((__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0) + 8); \

99

VEC_DATA_TYPE(DATA_TYPE, 8) \

100

data10 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0)); \

101

DATA_TYPE data11 = *((__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0) + 8); \

102

VEC_DATA_TYPE(DATA_TYPE, 8) \

103

data20 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0)); \

104

DATA_TYPE data21 = *((__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0) + 8); \

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

105

data00 = POW2_OP(data00, 8); \

106

data01 = POW2_OP(data01, 1); \

107

data10 = POW2_OP(data10, 8); \

108

data11 = POW2_OP(data11, 1); \

109

data20 = POW2_OP(data20, 8); \

110

data21 = POW2_OP(data21, 1); \

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

111

\

112

VEC_DATA_TYPE(DATA_TYPE, 8) \

113

values00 = (VEC_DATA_TYPE(DATA_TYPE, 8))(data00.s01223445); \

114

VEC_DATA_TYPE(DATA_TYPE, 4) \

115

values01 = (VEC_DATA_TYPE(DATA_TYPE, 4))(data00.s667, data01); \

116

VEC_DATA_TYPE(DATA_TYPE, 8) \

117

values10 = (VEC_DATA_TYPE(DATA_TYPE, 8))(data10.s01223445); \

118

VEC_DATA_TYPE(DATA_TYPE, 4) \

119

values11 = (VEC_DATA_TYPE(DATA_TYPE, 4))(data10.s667, data11); \

120

VEC_DATA_TYPE(DATA_TYPE, 8) \

121

values20 = (VEC_DATA_TYPE(DATA_TYPE, 8))(data20.s01223445); \

122

VEC_DATA_TYPE(DATA_TYPE, 4) \

123

values21 = (VEC_DATA_TYPE(DATA_TYPE, 4))(data20.s667, data21); \

124

\

125

values00 = POOL_OP(values00, values10); \

126

values01 = POOL_OP(values01, values11); \

127

values00 = POOL_OP(values00, values20); \

128

values01 = POOL_OP(values01, values21); \

129

\

130

res = POOL_OP((VEC_DATA_TYPE(DATA_TYPE, 4))(values00.s036, values01.s1), (VEC_DATA_TYPE(DATA_TYPE, 4))(values00.s147, values01.s2)); \

131

res = POOL_OP(res, (VEC_DATA_TYPE(DATA_TYPE, 4))(values00.s25, values01.s03)); \

132

})

133

134

#define POOLING3x3_STRIDE3(res, input, output) \

135

({ \

136

VEC_DATA_TYPE(DATA_TYPE, 8) \

137

data00 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)); \

138

VEC_DATA_TYPE(DATA_TYPE, 4) \

139

data01 = vload4(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0) + 8); \

140

VEC_DATA_TYPE(DATA_TYPE, 8) \

141

data10 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0)); \

142

VEC_DATA_TYPE(DATA_TYPE, 4) \

143

data11 = vload4(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0) + 8); \

144

VEC_DATA_TYPE(DATA_TYPE, 8) \

145

data20 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0)); \

146

VEC_DATA_TYPE(DATA_TYPE, 4) \

147

data21 = vload4(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0) + 8); \

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

148

data00 = POW2_OP(data00, 8); \

149

data01 = POW2_OP(data01, 4); \

150

data10 = POW2_OP(data10, 8); \

151

data11 = POW2_OP(data11, 4); \

152

data20 = POW2_OP(data20, 8); \

153

data21 = POW2_OP(data21, 4); \

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

154

\

155

data00 = POOL_OP(data00, data10); \

156

data01 = POOL_OP(data01, data11); \

157

data00 = POOL_OP(data00, data20); \

158

data01 = POOL_OP(data01, data21); \

159

\

160

res = POOL_OP((VEC_DATA_TYPE(DATA_TYPE, 4))(data00.s036, data01.s1), (VEC_DATA_TYPE(DATA_TYPE, 4))(data00.s147, data01.s2)); \

161

res = POOL_OP(res, (VEC_DATA_TYPE(DATA_TYPE, 4))(data00.s25, data01.s03)); \

162

})

163

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

164

DATA_TYPE calculate_avg_scale(const int pool_size_x, const int pool_size_y, const int upper_bound_w, const int upper_bound_h,

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

165

const int pad_x, const int pad_y, const int stride_x, const int stride_y)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

166

{

Georgios Pinitas

2017-10-30 15:56:32 +0000

[diff] [blame]

167

int start_x = get_global_id(0) * stride_x - pad_x;

168

int start_y = get_global_id(1) * stride_y - pad_y;

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

169

const int end_x = min(start_x + pool_size_x, upper_bound_w);

170

const int end_y = min(start_y + pool_size_y, upper_bound_h);

Georgios Pinitas

2017-10-30 15:56:32 +0000

[diff] [blame]

171

#if defined(EXCLUDE_PADDING)

172

start_x = max(0, start_x);

173

start_y = max(0, start_y);

174

#endif /* defined(EXCLUDE_PADDING) */

steniu01

2017-06-23 17:00:26 +0100

[diff] [blame]

175

return ((end_y - start_y) * (end_x - start_x));

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

176

}

177

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

178

/** Performs a pooling function of pool size equal to 2.

179

*

Vidhya Sudhan Loganathan

2018-07-04 09:34:00 +0100

[diff] [blame^]

180

* @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

181

* @note In case of average pooling the following information must be passed at compile time:

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

182

* -DPOOL_AVG or -DPOOL_L2 must be provided otherwise max pooling will be performed.

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

183

* -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)

184

* -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions

185

* -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

186

*

Vidhya Sudhan Loganathan

2018-07-04 09:34:00 +0100

[diff] [blame^]

187

* @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

188

* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)

189

* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)

190

* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)

191

* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)

192

* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)

193

* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)

194

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

195

* @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

196

* @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)

197

* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)

198

* @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)

199

* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)

200

* @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)

201

* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)

202

* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

203

*/

204

__kernel void pooling_layer_2(

205

TENSOR3D_DECLARATION(input),

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

206

TENSOR3D_DECLARATION(output))

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

207

{

208

// Get pixels pointer

209

Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);

210

Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);

211

212

// Load data

213

VEC_DATA_TYPE(DATA_TYPE, 2)

214

data0 = vload2(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0));

215

VEC_DATA_TYPE(DATA_TYPE, 2)

216

data1 = vload2(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0));

217

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

218

#if defined(POOL_L2)

219

// Raise to power of 2 for L2 Pooling

220

data0 = POW2_OP(data0, 2);

221

data1 = POW2_OP(data1, 2);

222

#endif /* defined(POOL_L2) */

223

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

224

// Perform calculations

225

data0 = POOL_OP(data0, data1);

226

DATA_TYPE res = POOL_OP(data0.s0, data0.s1);

227

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

228

#if defined(POOL_AVG) || defined(POOL_L2)

229

// Divide by pool region in case of average or l2 pooling

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

230

res = DIV_OP(res, calculate_avg_scale(2, 2, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y));

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

231

#endif /* defined(POOL_AVG) || defined(POOL_L2) */

232

233

#if defined(POOL_L2)

234

// Take square root of the result in L2 pooling

235

res = SQRT_OP(res);

236

#endif /* defined(POOL_L2) */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

237

238

// Store result

239

*(__global DATA_TYPE *)output.ptr = res;

240

}

241

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

242

/** Performs a pooling function of pool size equal to 3

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

243

*

Vidhya Sudhan Loganathan

2018-07-04 09:34:00 +0100

[diff] [blame^]

244

* @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

245

* @note In case of average pooling the following information must be passed at compile time:

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

246

* -DPOOL_AVG or -DPOOL_L2 must be provided otherwise max pooling will be performed.

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

247

* -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)

248

* -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions

249

* -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

250

*

Vidhya Sudhan Loganathan

2018-07-04 09:34:00 +0100

[diff] [blame^]

251

* @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

252

* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)

253

* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)

254

* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)

255

* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)

256

* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)

257

* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)

258

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

259

* @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

260

* @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)

261

* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)

262

* @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)

263

* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)

264

* @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)

265

* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)

266

* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

267

*/

268

__kernel void pooling_layer_3(

269

TENSOR3D_DECLARATION(input),

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

270

TENSOR3D_DECLARATION(output))

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

271

{

272

// Get pixels pointer

273

Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);

274

Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);

275

276

// Load data

277

VEC_DATA_TYPE(DATA_TYPE, 3)

278

data0 = vload3(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0));

279

VEC_DATA_TYPE(DATA_TYPE, 3)

280

data1 = vload3(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0));

281

VEC_DATA_TYPE(DATA_TYPE, 3)

282

data2 = vload3(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0));

283

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

284

#if defined(POOL_L2)

285

// Raise to power of 2 for L2 Pooling

286

data0 = POW2_OP(data0, 3);

287

data1 = POW2_OP(data1, 3);

288

data2 = POW2_OP(data2, 3);

289

#endif /* defined(POOL_L2) */

290

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

291

// Perform calculations

292

data0 = POOL_OP(data0, data1);

293

data0 = POOL_OP(data0, data2);

294

DATA_TYPE res = POOL_OP(POOL_OP(data0.s0, data0.s1), data0.s2);

295

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

296

#if defined(POOL_AVG) || defined(POOL_L2)

Georgios Pinitas

2017-06-19 16:11:53 +0100

[diff] [blame]

297

// Divide by pool region in case of average pooling

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

298

res = DIV_OP(res, calculate_avg_scale(3, 3, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y));

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

299

#endif /* defined(POOL_AVG) || defined(POOL_L2) */

300

301

#if defined(POOL_L2)

302

// Take square root of the result in L2 pooling

303

res = SQRT_OP(res);

304

#endif /* defined(POOL_L2) */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

305

306

// Store result

307

*(__global DATA_TYPE *)output.ptr = res;

308

}

Georgios Pinitas

2017-06-19 16:11:53 +0100

[diff] [blame]

309

Vidhya Sudhan Loganathan

2018-07-04 09:34:00 +0100

[diff] [blame^]

310

#if defined(POOLING3x3)

steniu01

2017-06-23 17:00:26 +0100

[diff] [blame]

311

312

#define CONVERT_OP(data_type) convert_##data_type##4

313

#define CONVERT_VECTOR4(data_type) CONVERT_OP(data_type)

314

315

VEC_DATA_TYPE(DATA_TYPE, 4)

316

calculate_avg_scale4(const int pool_size, const int upper_bound_w, const int upper_bound_h,

317

const int pad_x, const int pad_y, const int stride_x, const int stride_y)

318

{

Georgios Pinitas

2017-10-30 15:56:32 +0000

[diff] [blame]

319

int4 start_x = ((int4)get_global_id(0) * 4 + (int4)(0, 1, 2, 3)) * (int4)stride_x - (int4)pad_x;

320

int start_y = get_global_id(1) * stride_y - pad_y;

steniu01

2017-06-23 17:00:26 +0100

[diff] [blame]

321

const int4 end_x = min(start_x + (int4)pool_size, (int4)upper_bound_w);

322

const int end_y = min(start_y + pool_size, upper_bound_h);

Georgios Pinitas

2017-10-30 15:56:32 +0000

[diff] [blame]

323

#if defined(EXCLUDE_PADDING)

324

start_x = max((int4)0, start_x);

325

start_y = max(0, start_y);

326

#endif /* defined(EXCLUDE_PADDING) */

steniu01

2017-06-23 17:00:26 +0100

[diff] [blame]

327

return (VEC_DATA_TYPE(DATA_TYPE, 4))(1.f) / CONVERT_VECTOR4(DATA_TYPE)(((int4)(end_y - start_y)) * (end_x - start_x));

328

}

329

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

330

/** Performs an optimized pooling function of pool size equal to 3 when the stride_x is less equal than 3

Georgios Pinitas

2017-06-19 16:11:53 +0100

[diff] [blame]

331

*

Vidhya Sudhan Loganathan

2018-07-04 09:34:00 +0100

[diff] [blame^]

332

* @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

333

* @note In case of average pooling the following information must be passed at compile time:

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

334

* -DPOOL_AVG or -DPOOL_L2 must be provided otherwise max pooling will be performed.

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

335

* -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)

336

* -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions

337

* -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension

Georgios Pinitas

2017-06-19 16:11:53 +0100

[diff] [blame]

338

*

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

339

* @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32

Georgios Pinitas

2017-06-19 16:11:53 +0100

[diff] [blame]

340

* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)

341

* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)

342

* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)

343

* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)

344

* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)

345

* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)

346

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

347

* @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr

Georgios Pinitas

2017-06-19 16:11:53 +0100

[diff] [blame]

348

* @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)

349

* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)

350

* @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)

351

* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)

352

* @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)

353

* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)

354

* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

355

*/

Anton Lokhmotov

af6204c

2017-11-08 09:34:19 +0000

[diff] [blame]

356

__kernel void pooling_layer_optimized_3(

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

357

TENSOR3D_DECLARATION(input),

358

TENSOR3D_DECLARATION(output))

359

{

360

// Get pixels pointer

361

Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);

362

Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);

363

364

VEC_DATA_TYPE(DATA_TYPE, 4)

365

res;

366

367

// Perform pooling 3x3 for 4 output elements

368

POOLING3x3(res, input, output);

369

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

370

#if defined(POOL_AVG) || defined(POOL_L2)

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

371

// Divide by pool region in case of average pooling

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

372

res *= calculate_avg_scale4(3, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y);

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

373

#endif /* defined(POOL_AVG) || defined(POOL_L2) */

374

375

#if defined(POOL_L2)

376

// Take square root of the result in L2 pooling

377

res = SQRT_OP(res);

378

#endif /* defined(POOL_L2) */

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

379

380

vstore4(res, 0, (__global DATA_TYPE *)output.ptr);

381

}

Vidhya Sudhan Loganathan

2018-07-04 09:34:00 +0100

[diff] [blame^]

382

#endif // defined(POOLING3x3)

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

383

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

384

#if defined(POOL_SIZE_X) && defined(POOL_SIZE_Y)

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

385

386

// Set the initial value for the pooling operation accordingly with the data type

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

387

#if defined(POOL_AVG) || defined(POOL_L2)

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

388

#define INITIAL_VALUE 0

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

389

#else /* defined(POOL_AVG) || defined(POOL_L2) */

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

390

#if FP16

391

#define INITIAL_VALUE -HALF_MAX

392

#else // FP16

393

#define INITIAL_VALUE -FLT_MAX

394

#endif // FP16

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

#endif // POOL_AVG

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

398

/** Performs a pooling function of pool size equal to N (NCHW)

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

399

*

Vidhya Sudhan Loganathan

2018-07-04 09:34:00 +0100

[diff] [blame^]

400

* @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

401

* @note -DFP16 must be passed at compile time if half float data type is used

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

402

* @note Pool sizes must be passed using -DPOOL_SIZE_X and -DPOOL_SIZE_Y e.g. -DPOOL_SIZE_X=13;

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

403

* @note In case of average pooling the following information must be passed at compile time:

404

* -DPOOL_AVG must be provided otherwise max pooling will be performed.

405

* -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)

406

* -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions

407

* -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension

408

*

Vidhya Sudhan Loganathan

2018-07-04 09:34:00 +0100

[diff] [blame^]

409

* @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

410

* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)

411

* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)

412

* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)

413

* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)

414

* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)

415

* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)

416

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image

417

* @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr

418

* @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)

419

* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)

420

* @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)

421

* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)

422

* @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)

423

* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)

424

* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image

425

*/

Michalis Spyrou

e74b201

2018-04-18 09:49:16 +0100

[diff] [blame]

426

__kernel void pooling_layer_MxN_nchw(

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

427

TENSOR3D_DECLARATION(input),

428

TENSOR3D_DECLARATION(output))

429

{

430

// Get pixels pointer

431

Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);

432

Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);

433

434

VEC_DATA_TYPE(DATA_TYPE, 8)

435

vdata = INITIAL_VALUE;

436

DATA_TYPE sdata = INITIAL_VALUE;

437

438

// Load data

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

439

for(int y = 0; y < POOL_SIZE_Y; y++)

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

440

{

441

int x = 0;

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

442

for(; x <= ((int)POOL_SIZE_X - 8); x += 8)

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

443

{

444

VEC_DATA_TYPE(DATA_TYPE, 8)

445

data0 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, x, y, 0));

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

446

#if defined(POOL_L2)

447

// Raise to power of 2 for L2 Pooling

448

data0 *= data0;

449

#endif /* defined(POOL_L2) */

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

450

vdata = POOL_OP(vdata, data0);

451

}

452

453

// Leftover

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

454

for(; x < (int)POOL_SIZE_X; ++x)

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

455

{

456

DATA_TYPE data0 = *((__global DATA_TYPE *)tensor3D_offset(&input, x, y, 0));

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

457

#if defined(POOL_L2)

458

// Raise to power of 2 for L2 Pooling

459

data0 *= data0;

460

#endif /* defined(POOL_L2) */

461

sdata = POOL_OP(sdata, data0);

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

}

}

// Reduce result

VEC_DATA_TYPE(DATA_TYPE, 4)

467

reduce4 = POOL_OP(vdata.s0123, vdata.s4567);

468

VEC_DATA_TYPE(DATA_TYPE, 2)

469

reduce2 = POOL_OP(reduce4.s01, reduce4.s23);

470

DATA_TYPE res = POOL_OP(reduce2.s0, reduce2.s1);

471

res = POOL_OP(res, sdata);

472

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

473

#if defined(POOL_AVG) || defined(POOL_L2)

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

474

// Divide by pool region in case of average pooling

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

475

res = DIV_OP(res, calculate_avg_scale(POOL_SIZE_X, POOL_SIZE_Y, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y));

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

476

#endif /* defined(POOL_AVG) || defined(POOL_L2) */

477

478

#if defined(POOL_L2)

479

// Take square root of the result in L2 pooling

480

res = SQRT_OP(res);

481

#endif /* defined(POOL_L2) */

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

482

483

// Store result

484

*(__global DATA_TYPE *)output.ptr = res;

485

}

Isabella Gottardi