Blame - src/core/CL/cl_kernels/pooling_layer.cl - ml/ComputeLibrary

2017-09-04 18:44:23 +0100

[diff] [blame]

1

/*

Michele Di Giorgio

d9eaf61

2020-07-08 11:12:57 +0100

[diff] [blame]

2

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

3

*

4

* SPDX-License-Identifier: MIT

5

*

6

* Permission is hereby granted, free of charge, to any person obtaining a copy

7

* of this software and associated documentation files (the "Software"), to

8

* deal in the Software without restriction, including without limitation the

9

* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

10

* sell copies of the Software, and to permit persons to whom the Software is

11

* furnished to do so, subject to the following conditions:

12

*

13

* The above copyright notice and this permission notice shall be included in all

14

* copies or substantial portions of the Software.

15

*

16

* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

17

* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

18

* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

19

* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

20

* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

21

* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

* SOFTWARE.

*/

#include "helpers.h"

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

26

#if defined(POOL_AVG) || defined(POOL_L2)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

27

#define POOL_OP(x, y) ((x) + (y))

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

28

#else /* defined(POOL_AVG) || defined(POOL_L2) */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

29

#define POOL_OP(x, y) (fmax((x), (y)))

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

30

#endif /* defined(POOL_AVG) || defined(POOL_L2) */

31

32

#if defined(POOL_L2)

33

#define POW2_OP(x, vec_size) ((x) * (x))

34

#else /* defined(POOL_L2) */

35

#define POW2_OP(x, vec_size) (x)

36

#endif /* defined(POOL_L2) */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

37

steniu01

2017-06-23 17:00:26 +0100

[diff] [blame]

38

#define DIV_OP(x, y) (x * (1.f / y))

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

39

#define SQRT_OP(x) sqrt((x))

steniu01

2017-06-23 17:00:26 +0100

[diff] [blame]

40

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

41

#define DIV_OP_NHWC(x, y) (x * (VEC_DATA_TYPE(ACC_DATA_TYPE, 8))(1.f / y))

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

42

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

43

#if STRIDE_X == 1

44

#define POOLING3x3(res, input, output) POOLING3x3_STRIDE1(res, input, output)

45

#elif STRIDE_X == 2 /* STRIDE_X == 1 */

46

#define POOLING3x3(res, input, output) POOLING3x3_STRIDE2(res, input, output)

47

#elif STRIDE_X == 3 /* STRIDE_X not equals 1 or 2 */

48

#define POOLING3x3(res, input, output) POOLING3x3_STRIDE3(res, input, output)

49

#endif /* STRIDE_X == 3 */

50

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

51

#if defined(FP_MIXED_PRECISION)

52

#define CONVERT_TO_ACC_DATA_TYPE(x, n) CONVERT(x, VEC_DATA_TYPE(ACC_DATA_TYPE, n))

53

#define VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(n, offset, ptr) \

54

CONVERT_TO_ACC_DATA_TYPE(vload##n(offset, ptr), n)

55

#else /* defined(FP_MIXED_PRECISION) */

56

#define VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(n, offset, ptr) vload##n(offset, ptr)

57

#endif /* defined(FP_MIXED_PRECISION) */

58

59

#define POOLING3x3_STRIDE1(res, input, output) \

60

({ \

61

VEC_DATA_TYPE(ACC_DATA_TYPE, 4) \

62

data00 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(4, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)); \

63

VEC_DATA_TYPE(ACC_DATA_TYPE, 2) \

64

data01 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(2, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0) + 4); \

65

VEC_DATA_TYPE(ACC_DATA_TYPE, 4) \

66

data10 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(4, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0)); \

67

VEC_DATA_TYPE(ACC_DATA_TYPE, 2) \

68

data11 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(2, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0) + 4); \

69

VEC_DATA_TYPE(ACC_DATA_TYPE, 4) \

70

data20 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(4, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0)); \

71

VEC_DATA_TYPE(ACC_DATA_TYPE, 2) \

72

data21 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(2, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0) + 4); \

73

data00 = POW2_OP(data00, 4); \

74

data01 = POW2_OP(data01, 2); \

75

data10 = POW2_OP(data10, 4); \

76

data11 = POW2_OP(data11, 2); \

77

data20 = POW2_OP(data20, 4); \

78

data21 = POW2_OP(data21, 2); \

79

\

80

VEC_DATA_TYPE(ACC_DATA_TYPE, 8) \

81

values00 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 8))(data00.s01212323); \

82

VEC_DATA_TYPE(ACC_DATA_TYPE, 4) \

83

values01 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 4))(data01.s0, data00.s3, data01.s01); \

84

VEC_DATA_TYPE(ACC_DATA_TYPE, 8) \

85

values10 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 8))(data10.s01212323); \

86

VEC_DATA_TYPE(ACC_DATA_TYPE, 4) \

87

values11 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 4))(data11.s0, data10.s3, data11.s01); \

88

VEC_DATA_TYPE(ACC_DATA_TYPE, 8) \

89

values20 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 8))(data20.s01212323); \

90

VEC_DATA_TYPE(ACC_DATA_TYPE, 4) \

91

values21 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 4))(data21.s0, data20.s3, data21.s01); \

92

\

93

values00 = POOL_OP(values00, values10); \

94

values01 = POOL_OP(values01, values11); \

95

values00 = POOL_OP(values00, values20); \

96

values01 = POOL_OP(values01, values21); \

97

\

98

res = POOL_OP((VEC_DATA_TYPE(ACC_DATA_TYPE, 4))(values00.s036, values01.s1), (VEC_DATA_TYPE(ACC_DATA_TYPE, 4))(values00.s147, values01.s2)); \

99

res = POOL_OP(res, (VEC_DATA_TYPE(ACC_DATA_TYPE, 4))(values00.s25, values01.s03)); \

100

})

101

102

#define POOLING3x3_STRIDE2(res, input, output) \

103

({ \

104

VEC_DATA_TYPE(ACC_DATA_TYPE, 8) \

105

data00 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(8, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)); \

106

ACC_DATA_TYPE data01 = (ACC_DATA_TYPE)(*((__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0) + 8)); \

107

VEC_DATA_TYPE(ACC_DATA_TYPE, 8) \

108

data10 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(8, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0)); \

109

ACC_DATA_TYPE data11 = (ACC_DATA_TYPE)(*((__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0) + 8)); \

110

VEC_DATA_TYPE(ACC_DATA_TYPE, 8) \

111

data20 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(8, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0)); \

112

ACC_DATA_TYPE data21 = (ACC_DATA_TYPE)(*((__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0) + 8)); \

113

data00 = POW2_OP(data00, 8); \

114

data01 = POW2_OP(data01, 1); \

115

data10 = POW2_OP(data10, 8); \

116

data11 = POW2_OP(data11, 1); \

117

data20 = POW2_OP(data20, 8); \

118

data21 = POW2_OP(data21, 1); \

119

\

120

VEC_DATA_TYPE(ACC_DATA_TYPE, 8) \

121

values00 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 8))(data00.s01223445); \

122

VEC_DATA_TYPE(ACC_DATA_TYPE, 4) \

123

values01 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 4))(data00.s667, data01); \

124

VEC_DATA_TYPE(ACC_DATA_TYPE, 8) \

125

values10 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 8))(data10.s01223445); \

126

VEC_DATA_TYPE(ACC_DATA_TYPE, 4) \

127

values11 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 4))(data10.s667, data11); \

128

VEC_DATA_TYPE(ACC_DATA_TYPE, 8) \

129

values20 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 8))(data20.s01223445); \

130

VEC_DATA_TYPE(ACC_DATA_TYPE, 4) \

131

values21 = (VEC_DATA_TYPE(ACC_DATA_TYPE, 4))(data20.s667, data21); \

132

\

133

values00 = POOL_OP(values00, values10); \

134

values01 = POOL_OP(values01, values11); \

135

values00 = POOL_OP(values00, values20); \

136

values01 = POOL_OP(values01, values21); \

137

\

138

res = POOL_OP((VEC_DATA_TYPE(ACC_DATA_TYPE, 4))(values00.s036, values01.s1), (VEC_DATA_TYPE(ACC_DATA_TYPE, 4))(values00.s147, values01.s2)); \

139

res = POOL_OP(res, (VEC_DATA_TYPE(ACC_DATA_TYPE, 4))(values00.s25, values01.s03)); \

140

})

141

142

#define POOLING3x3_STRIDE3(res, input, output) \

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

143

({ \

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

144

VEC_DATA_TYPE(ACC_DATA_TYPE, 8) \

145

data00 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(8, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)); \

146

VEC_DATA_TYPE(ACC_DATA_TYPE, 4) \

147

data01 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(4, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0) + 8); \

148

VEC_DATA_TYPE(ACC_DATA_TYPE, 8) \

149

data10 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(8, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0)); \

150

VEC_DATA_TYPE(ACC_DATA_TYPE, 4) \

151

data11 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(4, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0) + 8); \

152

VEC_DATA_TYPE(ACC_DATA_TYPE, 8) \

153

data20 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(8, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0)); \

154

VEC_DATA_TYPE(ACC_DATA_TYPE, 4) \

155

data21 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(4, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0) + 8); \

156

data00 = POW2_OP(data00, 8); \

157

data01 = POW2_OP(data01, 4); \

158

data10 = POW2_OP(data10, 8); \

159

data11 = POW2_OP(data11, 4); \

160

data20 = POW2_OP(data20, 8); \

161

data21 = POW2_OP(data21, 4); \

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

162

\

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

163

data00 = POOL_OP(data00, data10); \

164

data01 = POOL_OP(data01, data11); \

165

data00 = POOL_OP(data00, data20); \

166

data01 = POOL_OP(data01, data21); \

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

167

\

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

168

res = POOL_OP((VEC_DATA_TYPE(ACC_DATA_TYPE, 4))(data00.s036, data01.s1), (VEC_DATA_TYPE(ACC_DATA_TYPE, 4))(data00.s147, data01.s2)); \

169

res = POOL_OP(res, (VEC_DATA_TYPE(ACC_DATA_TYPE, 4))(data00.s25, data01.s03)); \

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

170

})

171

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

172

ACC_DATA_TYPE calculate_avg_scale(const int pool_size_x, const int pool_size_y, const int upper_bound_w, const int upper_bound_h,

173

const int pad_x, const int pad_y, const int stride_x, const int stride_y)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

174

{

Georgios Pinitas

2017-10-30 15:56:32 +0000

[diff] [blame]

175

int start_x = get_global_id(0) * stride_x - pad_x;

176

int start_y = get_global_id(1) * stride_y - pad_y;

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

177

const int end_x = min(start_x + pool_size_x, upper_bound_w);

178

const int end_y = min(start_y + pool_size_y, upper_bound_h);

Georgios Pinitas

2017-10-30 15:56:32 +0000

[diff] [blame]

179

#if defined(EXCLUDE_PADDING)

180

start_x = max(0, start_x);

181

start_y = max(0, start_y);

182

#endif /* defined(EXCLUDE_PADDING) */

steniu01

2017-06-23 17:00:26 +0100

[diff] [blame]

183

return ((end_y - start_y) * (end_x - start_x));

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

184

}

185

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

186

/** Performs a pooling function of pool size equal to 2.

187

*

Vidhya Sudhan Loganathan

2018-07-04 09:34:00 +0100

[diff] [blame]

188

* @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

189

* @note In case of average pooling the following information must be passed at compile time:

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

190

* -DPOOL_AVG or -DPOOL_L2 must be provided otherwise max pooling will be performed.

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

191

* -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)

192

* -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions

193

* -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

194

*

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

195

* @param[in] input_ptr Pointer to the source tensor. Supported data types: F16/F32

196

* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

197

* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

198

* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

199

* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)

200

* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)

201

* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

202

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor

203

* @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr

204

* @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

205

* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

206

* @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

207

* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)

208

* @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)

209

* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

210

* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

211

*/

212

__kernel void pooling_layer_2(

213

TENSOR3D_DECLARATION(input),

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

214

TENSOR3D_DECLARATION(output))

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

215

{

216

// Get pixels pointer

217

Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);

218

Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);

219

220

// Load data

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

221

VEC_DATA_TYPE(ACC_DATA_TYPE, 2)

222

data0 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(2, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0));

223

VEC_DATA_TYPE(ACC_DATA_TYPE, 2)

224

data1 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(2, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0));

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

225

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

226

#if defined(POOL_L2)

227

// Raise to power of 2 for L2 Pooling

228

data0 = POW2_OP(data0, 2);

229

data1 = POW2_OP(data1, 2);

230

#endif /* defined(POOL_L2) */

231

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

232

// Perform calculations

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

233

data0 = POOL_OP(data0, data1);

234

ACC_DATA_TYPE res = POOL_OP(data0.s0, data0.s1);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

235

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

236

#if defined(POOL_AVG) || defined(POOL_L2)

237

// Divide by pool region in case of average or l2 pooling

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

238

res = DIV_OP(res, calculate_avg_scale(2, 2, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y));

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

239

#endif /* defined(POOL_AVG) || defined(POOL_L2) */

240

241

#if defined(POOL_L2)

242

// Take square root of the result in L2 pooling

243

res = SQRT_OP(res);

244

#endif /* defined(POOL_L2) */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

245

246

// Store result

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

247

*(__global DATA_TYPE *)output.ptr = (DATA_TYPE)res;

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

248

}

249

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

250

/** Performs a pooling function of pool size equal to 3

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

251

*

Vidhya Sudhan Loganathan

2018-07-04 09:34:00 +0100

[diff] [blame]

252

* @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

253

* @note In case of average pooling the following information must be passed at compile time:

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

254

* -DPOOL_AVG or -DPOOL_L2 must be provided otherwise max pooling will be performed.

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

255

* -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)

256

* -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions

257

* -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

258

*

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

259

* @param[in] input_ptr Pointer to the source tensor. Supported data types: F16/F32

260

* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

261

* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

262

* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

263

* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)

264

* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)

265

* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

266

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor

267

* @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr

268

* @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

269

* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

270

* @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

271

* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)

272

* @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)

273

* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

274

* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

275

*/

276

__kernel void pooling_layer_3(

277

TENSOR3D_DECLARATION(input),

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

278

TENSOR3D_DECLARATION(output))

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

279

{

280

// Get pixels pointer

281

Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);

282

Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);

283

284

// Load data

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

285

VEC_DATA_TYPE(ACC_DATA_TYPE, 3)

286

data0 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(3, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0));

287

VEC_DATA_TYPE(ACC_DATA_TYPE, 3)

288

data1 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(3, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0));

289

VEC_DATA_TYPE(ACC_DATA_TYPE, 3)

290

data2 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(3, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0));

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

291

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

292

#if defined(POOL_L2)

293

// Raise to power of 2 for L2 Pooling

294

data0 = POW2_OP(data0, 3);

295

data1 = POW2_OP(data1, 3);

296

data2 = POW2_OP(data2, 3);

297

#endif /* defined(POOL_L2) */

298

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

299

// Perform calculations

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

300

data0 = POOL_OP(data0, data1);

301

data0 = POOL_OP(data0, data2);

302

ACC_DATA_TYPE res = POOL_OP(POOL_OP(data0.s0, data0.s1), data0.s2);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

303

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

304

#if defined(POOL_AVG) || defined(POOL_L2)

Georgios Pinitas

2017-06-19 16:11:53 +0100

[diff] [blame]

305

// Divide by pool region in case of average pooling

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

306

res = DIV_OP(res, calculate_avg_scale(3, 3, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y));

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

307

#endif /* defined(POOL_AVG) || defined(POOL_L2) */

308

309

#if defined(POOL_L2)

310

// Take square root of the result in L2 pooling

311

res = SQRT_OP(res);

312

#endif /* defined(POOL_L2) */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

313

314

// Store result

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

315

*(__global DATA_TYPE *)output.ptr = (DATA_TYPE)res;

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

316

}

Georgios Pinitas

2017-06-19 16:11:53 +0100

[diff] [blame]

317

Vidhya Sudhan Loganathan

2018-07-04 09:34:00 +0100

[diff] [blame]

318

#if defined(POOLING3x3)

steniu01

2017-06-23 17:00:26 +0100

[diff] [blame]

319

320

#define CONVERT_OP(data_type) convert_##data_type##4

321

#define CONVERT_VECTOR4(data_type) CONVERT_OP(data_type)

322

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

323

VEC_DATA_TYPE(ACC_DATA_TYPE, 4)

steniu01

2017-06-23 17:00:26 +0100

[diff] [blame]

324

calculate_avg_scale4(const int pool_size, const int upper_bound_w, const int upper_bound_h,

325

const int pad_x, const int pad_y, const int stride_x, const int stride_y)

326

{

Georgios Pinitas

2017-10-30 15:56:32 +0000

[diff] [blame]

327

int4 start_x = ((int4)get_global_id(0) * 4 + (int4)(0, 1, 2, 3)) * (int4)stride_x - (int4)pad_x;

328

int start_y = get_global_id(1) * stride_y - pad_y;

steniu01

2017-06-23 17:00:26 +0100

[diff] [blame]

329

const int4 end_x = min(start_x + (int4)pool_size, (int4)upper_bound_w);

330

const int end_y = min(start_y + pool_size, upper_bound_h);

Georgios Pinitas

2017-10-30 15:56:32 +0000

[diff] [blame]

331

#if defined(EXCLUDE_PADDING)

332

start_x = max((int4)0, start_x);

333

start_y = max(0, start_y);

334

#endif /* defined(EXCLUDE_PADDING) */

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

335

return (VEC_DATA_TYPE(ACC_DATA_TYPE, 4))(1.f) / CONVERT_VECTOR4(ACC_DATA_TYPE)(((int4)(end_y - start_y)) * (end_x - start_x));

steniu01

2017-06-23 17:00:26 +0100

[diff] [blame]

336

}

337

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

338

/** Performs an optimized pooling function of pool size equal to 3 when the stride_x is less equal than 3

Georgios Pinitas

2017-06-19 16:11:53 +0100

[diff] [blame]

339

*

Vidhya Sudhan Loganathan

2018-07-04 09:34:00 +0100

[diff] [blame]

340

* @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

341

* @note In case of average pooling the following information must be passed at compile time:

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

342

* -DPOOL_AVG or -DPOOL_L2 must be provided otherwise max pooling will be performed.

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

343

* -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)

344

* -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions

345

* -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension

Georgios Pinitas

2017-06-19 16:11:53 +0100

[diff] [blame]

346

*

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

347

* @param[in] input_ptr Pointer to the source tensor. Supported data types: F16/F32

348

* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)

Georgios Pinitas

2017-06-19 16:11:53 +0100

[diff] [blame]

349

* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

350

* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)

Georgios Pinitas

2017-06-19 16:11:53 +0100

[diff] [blame]

351

* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)

352

* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)

353

* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

354

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor

355

* @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr

356

* @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)

Georgios Pinitas

2017-06-19 16:11:53 +0100

[diff] [blame]

357

* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

358

* @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)

Georgios Pinitas

2017-06-19 16:11:53 +0100

[diff] [blame]

359

* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)

360

* @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)

361

* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

362

* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

363

*/

Anton Lokhmotov

af6204c

2017-11-08 09:34:19 +0000

[diff] [blame]

364

__kernel void pooling_layer_optimized_3(

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

365

TENSOR3D_DECLARATION(input),

366

TENSOR3D_DECLARATION(output))

367

{

368

// Get pixels pointer

369

Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);

370

Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);

371

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

372

VEC_DATA_TYPE(ACC_DATA_TYPE, 4)

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

373

res;

374

375

// Perform pooling 3x3 for 4 output elements

376

POOLING3x3(res, input, output);

377

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

378

#if defined(POOL_AVG) || defined(POOL_L2)

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

379

// Divide by pool region in case of average pooling

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

380

res *= calculate_avg_scale4(3, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y);

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

381

#endif /* defined(POOL_AVG) || defined(POOL_L2) */

382

383

#if defined(POOL_L2)

384

// Take square root of the result in L2 pooling

385

res = SQRT_OP(res);

386

#endif /* defined(POOL_L2) */

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

387

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

388

vstore4(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 4)), 0, (__global DATA_TYPE *)output.ptr);

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

389

}

Vidhya Sudhan Loganathan

2018-07-04 09:34:00 +0100

[diff] [blame]

390

#endif // defined(POOLING3x3)

Gian Marco Iodice

2017-08-02 13:19:48 +0100

[diff] [blame]

391

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

392

#if defined(POOL_SIZE_X) && defined(POOL_SIZE_Y)

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

393

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

394

/** Performs a pooling function of pool size equal to N (NCHW)

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

395

*

Vidhya Sudhan Loganathan

2018-07-04 09:34:00 +0100

[diff] [blame]

396

* @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

397

* @note Pool sizes must be passed using -DPOOL_SIZE_X and -DPOOL_SIZE_Y e.g. -DPOOL_SIZE_X=13;

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

398

* @note In case of average pooling the following information must be passed at compile time:

399

* -DPOOL_AVG must be provided otherwise max pooling will be performed.

400

* -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)

401

* -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions

402

* -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension

Michele Di Giorgio

cbbed28

2019-12-20 13:26:08 +0000

[diff] [blame]

403

* @note The initial value for the pooling operation must be passed at compile time using -DINITIAL_VALUE e.g. -DINITIAL_VALUE=0

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

404

*

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

405

* @param[in] input_ptr Pointer to the source tensor. Supported data types: F16/F32

406

* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

407

* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

408

* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

409

* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)

410

* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)

411

* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

412

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor

413

* @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr

414

* @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

415

* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

416

* @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

417

* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)

418

* @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)

419

* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

420

* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

421

*/

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

422

__kernel void pooling_layer_MxN_nchw(

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

423

TENSOR3D_DECLARATION(input),

424

TENSOR3D_DECLARATION(output))

425

{

426

// Get pixels pointer

427

Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);

428

Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);

429

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

430

VEC_DATA_TYPE(ACC_DATA_TYPE, 8)

431

vdata = INITIAL_VALUE;

432

ACC_DATA_TYPE sdata = INITIAL_VALUE;

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

433

434

// Load data

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

435

for(int y = 0; y < POOL_SIZE_Y; y++)

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

436

{

437

int x = 0;

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

438

for(; x <= ((int)POOL_SIZE_X - 8); x += 8)

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

439

{

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

440

VEC_DATA_TYPE(ACC_DATA_TYPE, 8)

441

data0 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(8, 0, (__global DATA_TYPE *)tensor3D_offset(&input, x, y, 0));

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

442

#if defined(POOL_L2)

443

// Raise to power of 2 for L2 Pooling

444

data0 *= data0;

445

#endif /* defined(POOL_L2) */

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

446

vdata = POOL_OP(vdata, data0);

447

}

448

449

// Leftover

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

450

for(; x < (int)POOL_SIZE_X; ++x)

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

451

{

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

452

ACC_DATA_TYPE data0 = (ACC_DATA_TYPE)(*((__global DATA_TYPE *)tensor3D_offset(&input, x, y, 0)));

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

453

#if defined(POOL_L2)

454

// Raise to power of 2 for L2 Pooling

455

data0 *= data0;

456

#endif /* defined(POOL_L2) */

457

sdata = POOL_OP(sdata, data0);

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

}

}

// Reduce result

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

462

VEC_DATA_TYPE(ACC_DATA_TYPE, 4)

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

463

reduce4 = POOL_OP(vdata.s0123, vdata.s4567);

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

464

VEC_DATA_TYPE(ACC_DATA_TYPE, 2)

465

reduce2 = POOL_OP(reduce4.s01, reduce4.s23);

466

ACC_DATA_TYPE res = POOL_OP(reduce2.s0, reduce2.s1);

467

res = POOL_OP(res, sdata);

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

468

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

469

#if defined(POOL_AVG) || defined(POOL_L2)

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

470

// Divide by pool region in case of average pooling

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

471

res = DIV_OP(res, calculate_avg_scale(POOL_SIZE_X, POOL_SIZE_Y, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y));

Georgios Pinitas

2017-08-31 14:21:36 +0100

[diff] [blame]

472

#endif /* defined(POOL_AVG) || defined(POOL_L2) */

473

474

#if defined(POOL_L2)

475

// Take square root of the result in L2 pooling

476

res = SQRT_OP(res);

477

#endif /* defined(POOL_L2) */

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

478

479

// Store result

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

480

*(__global DATA_TYPE *)output.ptr = (DATA_TYPE)res;

Gian Marco Iodice

2017-09-05 13:51:21 +0100

[diff] [blame]

481

}

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

482

#endif // defined(POOL_SIZE_X) && defined(POOL_SIZE_Y)

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

483

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

484

ACC_DATA_TYPE calculate_avg_scale_nhwc(const int pool_size_x, const int pool_size_y, int upper_bound_w, int upper_bound_h,

485

const int pad_x, const int pad_y, const int stride_x, const int stride_y)

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

486

{

487

int start_x = get_global_id(1) * stride_x - pad_x;

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

488

#if defined(DST_DEPTH)

489

int start_y = (get_global_id(2) % DST_DEPTH) * stride_y - pad_y;

490

#else /* defined(DST_DEPTH) */

Michele Di Giorgio

f512580

2019-08-15 15:00:37 +0100

[diff] [blame]

491

int start_y = get_global_id(2) * stride_y - pad_y;

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

492

#endif /* defined(DST_DEPTH) */

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

493

494

#if !defined(EXCLUDE_PADDING)

495

upper_bound_w += pad_x;

496

upper_bound_h += pad_y;

497

#endif /* defined(EXCLUDE_PADDING) */

498

const int end_x = min(start_x + pool_size_x, upper_bound_w);

499

const int end_y = min(start_y + pool_size_y, upper_bound_h);

500

#if defined(EXCLUDE_PADDING)

501

start_x = max(0, start_x);

502

start_y = max(0, start_y);

503

#endif /* defined(EXCLUDE_PADDING) */

504

return ((end_y - start_y) * (end_x - start_x));

505

}

506

507

/** Performs a pooling function of pool size equal to N (NHWC)

508

*

509

* @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

510

* @note Pool sizes must be passed using -DPOOL_SIZE_X and -DPOOL_SIZE_Y e.g. -DPOOL_SIZE_X=13;

511

* @note Tensors width and height must be passed at compile time using -DMAX_WIDTH and -DMAX_HEIGHT

512

* @note Strides must be passed at compile time using -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions

513

* @note Pad values must be passed at compile time using -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension

514

* @note In case of average pooling the following information must be passed at compile time:

515

* -DPOOL_AVG must be provided otherwise max pooling will be performed.

Michele Di Giorgio

cbbed28

2019-12-20 13:26:08 +0000

[diff] [blame]

516

* @note The initial value for the pooling operation must be passed at compile time using -DINITIAL_VALUE e.g. -DINITIAL_VALUE=0

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

517

*

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

518

* @param[in] input_ptr Pointer to the source tensor. Supported data types: F16/F32

519

* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

520

* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

521

* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

522

* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)

523

* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)

524

* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

525

* @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)

526

* @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes)

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

527

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor

528

* @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

529

* @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

530

* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

531

* @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

532

* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

533

* @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes)

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

534

* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

535

* @param[in] output_stride_w Stride of the destination tensor in W dimension (in bytes)

536

* @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

537

* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

538

*/

539

__kernel void pooling_layer_MxN_nhwc(

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

540

TENSOR4D_DECLARATION(input),

541

TENSOR4D_DECLARATION(output))

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

542

{

543

// Get pixels pointer

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

544

#if defined(DST_DEPTH)

545

Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT(input, DST_DEPTH);

546

Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DST_DEPTH);

547

#else /* defined(DST_DEPTH) */

548

Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);

549

Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);

550

#endif /* defined(DST_DEPTH) */

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

551

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

552

VEC_DATA_TYPE(ACC_DATA_TYPE, 8)

Michele Di Giorgio

f512580

2019-08-15 15:00:37 +0100

[diff] [blame]

553

vdata = INITIAL_VALUE;

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

554

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

555

const int idx_width = get_global_id(1) * STRIDE_X;

556

#if defined(DST_DEPTH)

557

const int idx_height = (get_global_id(2) % DST_DEPTH) * STRIDE_Y;

558

#else /* defined(DST_DEPTH) */

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

559

const int idx_height = get_global_id(2) * STRIDE_Y;

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

560

#endif /* defined(DST_DEPTH) */

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

561

562

for(int y = 0; y < POOL_SIZE_Y; ++y)

563

{

Georgios Pinitas

e222055

2018-07-20 13:23:44 +0100

[diff] [blame]

564

int y1 = select(y, PAD_Y - idx_height, y + idx_height - PAD_Y < 0 || y + idx_height - PAD_Y >= MAX_HEIGHT);

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

565

for(int x = 0; x < POOL_SIZE_X; ++x)

566

{

Georgios Pinitas

e222055

2018-07-20 13:23:44 +0100

[diff] [blame]

567

int x1 = select(x, PAD_X - idx_width - 1, x + idx_width - PAD_X < 0 || x + idx_width - PAD_X >= MAX_WIDTH);

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

568

x1 = select(x1, PAD_X - idx_width - 1, y != y1);

569

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

570

#if defined(DST_DEPTH)

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

571

VEC_DATA_TYPE(ACC_DATA_TYPE, 8)

572

data0 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(8, 0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, x1 - PAD_X, y1 - PAD_Y, 0));

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

573

#else /* defined(DST_DEPTH) */

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

574

VEC_DATA_TYPE(ACC_DATA_TYPE, 8)

Sheri Zhang

2020-08-03 20:11:56 +0100

[diff] [blame^]

575

data0 = VLOAD_AND_CONVERT_TO_ACC_DATA_TYPE(8, 0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, x1 - PAD_X, y1 - PAD_Y));

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

576

#endif /* defined(DST_DEPTH) */

577

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

578

#if defined(POOL_L2)

579

// Raise to power of 2 for L2 Pooling

580

data0 *= data0;

581

#endif /* defined(POOL_L2) */

Sang-Hoon Park

2019-09-18 13:39:00 +0100

[diff] [blame]

582

vdata = POOL_OP(vdata, CONVERT(data0, VEC_DATA_TYPE(ACC_DATA_TYPE, 8)));

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

}

}

#if defined(POOL_AVG) || defined(POOL_L2)

587

// Divide by pool region in case of average pooling

588

vdata = DIV_OP_NHWC(vdata, calculate_avg_scale_nhwc(POOL_SIZE_X, POOL_SIZE_Y, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y));

589

#endif /* defined(POOL_AVG) || defined(POOL_L2) */

590

591

#if defined(POOL_L2)

592

// Take square root of the result in L2 pooling

593

vdata = SQRT_OP(vdata);

594

#endif /* defined(POOL_L2) */

595

596

// Store result

Michele Di Giorgio

f512580

2019-08-15 15:00:37 +0100

[diff] [blame]

597

vstore8(CONVERT(vdata, VEC_DATA_TYPE(DATA_TYPE, 8)), 0, (__global DATA_TYPE *)output.ptr);

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

598

}

Sheri Zhang