Blame - src/core/CL/cl_kernels/pooling_layer.cl - ml/ComputeLibrary

2017-09-04 18:44:23 +0100

[diff] [blame]

/*

*

* SPDX-License-Identifier: MIT

5

*

6

* Permission is hereby granted, free of charge, to any person obtaining a copy

7

* of this software and associated documentation files (the "Software"), to

8

* deal in the Software without restriction, including without limitation the

9

* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

10

* sell copies of the Software, and to permit persons to whom the Software is

11

* furnished to do so, subject to the following conditions:

12

*

13

* The above copyright notice and this permission notice shall be included in all

14

* copies or substantial portions of the Software.

15

*

16

* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

17

* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

18

* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

19

* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

20

* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

21

* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

* SOFTWARE.

*/

#include "helpers.h"

Anthony Barbier

2017-07-03 17:39:37 +0100

[diff] [blame^]

26

#ifdef POOL_AVG

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

27

#define POOL_OP(x, y) ((x) + (y))

Anthony Barbier

2017-07-03 17:39:37 +0100

[diff] [blame^]

28

#else /* POOL_AVG */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

29

#define POOL_OP(x, y) (fmax((x), (y)))

Anthony Barbier

2017-07-03 17:39:37 +0100

[diff] [blame^]

30

#endif /* POOL_AVG */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

31

32

float calculate_avg_scale(const int pool_size, const int upper_bound_w, const int upper_bound_h,

33

const int pad_x, const int pad_y, const int stride_x, const int stride_y)

34

{

35

int start_x = get_global_id(0) * stride_x - pad_x;

36

int start_y = get_global_id(1) * stride_y - pad_y;

37

int end_x = min(start_x + pool_size, upper_bound_w);

38

int end_y = min(start_y + pool_size, upper_bound_h);

39

return 1.f / ((end_y - start_y) * (end_x - start_x));

40

}

41

42

/** Performs a pooling function of pool size equal to 2.

43

*

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

44

* @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16, F32;

45

* @note In case of average pooling -DPOOL_AVG must be provided otherwise max pooling will be performed.

46

*

47

* @param[in] input_ptr Pointer to the source image. Supported data types: F16, F32

48

* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)

49

* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)

50

* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)

51

* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)

52

* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)

53

* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)

54

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image

55

* @param[out] output_ptr Pointer to the destination image. Supported data types: F16, F32

56

* @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)

57

* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)

58

* @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)

59

* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)

60

* @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)

61

* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)

62

* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image

63

* @param[in] max_dims The maximum index that can be accessed in x and y dimension (width + pad)

64

* @param[in] strides The pooling operation strides in each dimension

65

* @param[in] paddings The pooling operation paddings in each dimension

66

*/

67

__kernel void pooling_layer_2(

68

TENSOR3D_DECLARATION(input),

69

TENSOR3D_DECLARATION(output)

70

#ifdef POOL_AVG

71

,

72

int2 max_dims, int2 strides, int2 paddings

Anthony Barbier

2017-07-03 17:39:37 +0100

[diff] [blame^]

73

#endif /* POOL_AVG */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

74

)

75

{

76

// Get pixels pointer

77

Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);

78

Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);

79

80

// Load data

81

VEC_DATA_TYPE(DATA_TYPE, 2)

82

data0 = vload2(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0));

83

VEC_DATA_TYPE(DATA_TYPE, 2)

84

data1 = vload2(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0));

85

86

// Perform calculations

87

data0 = POOL_OP(data0, data1);

88

DATA_TYPE res = POOL_OP(data0.s0, data0.s1);

89

Georgios Pinitas

2017-06-19 16:11:53 +0100

[diff] [blame]

90

// Divide by pool region in case of average pooling

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

91

#ifdef POOL_AVG

92

res *= calculate_avg_scale(2, max_dims.x, max_dims.y, paddings.x, paddings.y, strides.x, strides.y);

Anthony Barbier

2017-07-03 17:39:37 +0100

[diff] [blame^]

93

#endif /* POOL_AVG */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

94

95

// Store result

96

*(__global DATA_TYPE *)output.ptr = res;

97

}

98

99

/** Performs a pooling function of pool size equal to 3.

100

*

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

101

* @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16, F32;

102

* @note In case of average pooling -DPOOL_AVG must be provided otherwise max pooling will be performed.

103

*

104

* @param[in] input_ptr Pointer to the source image. Supported data types: F16, F32

105

* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)

106

* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)

107

* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)

108

* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)

109

* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)

110

* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)

111

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image

112

* @param[out] output_ptr Pointer to the destination image. Supported data types: F16, F32

113

* @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)

114

* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)

115

* @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)

116

* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)

117

* @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)

118

* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)

119

* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image

120

* @param[in] max_dims The maximum index that can be accessed in x and y dimension (width + pad)

121

* @param[in] strides The pooling operation strides in each dimension

122

* @param[in] paddings The pooling operation paddings in each dimension

123

*/

124

__kernel void pooling_layer_3(

125

TENSOR3D_DECLARATION(input),

126

TENSOR3D_DECLARATION(output)

127

#ifdef POOL_AVG

128

,

129

int2 max_dims, int2 strides, int2 paddings

Anthony Barbier

2017-07-03 17:39:37 +0100

[diff] [blame^]

130

#endif /* POOL_AVG */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

131

)

132

{

133

// Get pixels pointer

134

Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);

135

Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);

136

137

// Load data

138

VEC_DATA_TYPE(DATA_TYPE, 3)

139

data0 = vload3(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0));

140

VEC_DATA_TYPE(DATA_TYPE, 3)

141

data1 = vload3(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0));

142

VEC_DATA_TYPE(DATA_TYPE, 3)

143

data2 = vload3(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0));

144

145

// Perform calculations

146

data0 = POOL_OP(data0, data1);

147

data0 = POOL_OP(data0, data2);

148

DATA_TYPE res = POOL_OP(POOL_OP(data0.s0, data0.s1), data0.s2);

149

Georgios Pinitas

2017-06-19 16:11:53 +0100

[diff] [blame]

150

// Divide by pool region in case of average pooling

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

151

#ifdef POOL_AVG

152

res *= calculate_avg_scale(3, max_dims.x, max_dims.y, paddings.x, paddings.y, strides.x, strides.y);

Anthony Barbier

2017-07-03 17:39:37 +0100

[diff] [blame^]

153

#endif /* POOL_AVG */

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

154

155

// Store result

156

*(__global DATA_TYPE *)output.ptr = res;

157

}

Georgios Pinitas

2017-06-19 16:11:53 +0100

[diff] [blame]

158

159

/** Performs a pooling function of pool size equal to 7.

160

*

161

* @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16, F32;

162

* @note In case of average pooling -DPOOL_AVG must be provided otherwise max pooling will be performed.

163

*

164

* @param[in] input_ptr Pointer to the source image. Supported data types: F16, F32

165

* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)

166

* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)

167

* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)

168

* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)

169

* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)

170

* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)

171

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image

172

* @param[out] output_ptr Pointer to the destination image. Supported data types: F16, F32

173

* @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)

174

* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)

175

* @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)

176

* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)

177

* @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)

178

* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)

179

* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image

180

* @param[in] max_dims The maximum index that can be accessed in x and y dimension (width + pad)

181

* @param[in] strides The pooling operation strides in each dimension

182

* @param[in] paddings The pooling operation paddings in each dimension

183

*/

184

__kernel void pooling_layer_7(

185

TENSOR3D_DECLARATION(input),

186

TENSOR3D_DECLARATION(output)

187

#ifdef POOL_AVG

188

,

189

int2 max_dims, int2 strides, int2 paddings

Anthony Barbier

2017-07-03 17:39:37 +0100

[diff] [blame^]

190

#endif /* POOL_AVG */

Georgios Pinitas

2017-06-19 16:11:53 +0100

[diff] [blame]

191

)

192

{

193

// Get pixels pointer

194

Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);

195

Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);

196

197

// Load data

198

VEC_DATA_TYPE(DATA_TYPE, 8)

199

data0 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0));

200

VEC_DATA_TYPE(DATA_TYPE, 8)

201

data1 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0));

202

VEC_DATA_TYPE(DATA_TYPE, 8)

203

data2 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0));

204

VEC_DATA_TYPE(DATA_TYPE, 8)

205

data3 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 3, 0));

206

VEC_DATA_TYPE(DATA_TYPE, 8)

207

data4 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 4, 0));

208

VEC_DATA_TYPE(DATA_TYPE, 8)

209

data5 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 5, 0));

210

VEC_DATA_TYPE(DATA_TYPE, 8)

211

data6 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 6, 0));

212

213

// Pool operation of all rows

214

data0 = POOL_OP(data0, data1);

215

data2 = POOL_OP(data2, data3);

216

data4 = POOL_OP(data4, data5);

217

data0 = POOL_OP(data0, data2);

218

data4 = POOL_OP(data4, data6);

219

data0 = POOL_OP(data0, data4);

// Set last element

#ifdef POOL_AVG

data0.s7 = 0;

Anthony Barbier

2017-07-03 17:39:37 +0100

[diff] [blame^]

224

#else /* POOL_AVG */

Georgios Pinitas

2017-06-19 16:11:53 +0100

[diff] [blame]

225

data0.s7 = data0.s6;

Anthony Barbier

2017-07-03 17:39:37 +0100

[diff] [blame^]

226

#endif /* POOL_AVG */

Georgios Pinitas

2017-06-19 16:11:53 +0100

[diff] [blame]

227

228

// Reduce result

229

VEC_DATA_TYPE(DATA_TYPE, 4)

230

reduce4 = POOL_OP(data0.s0123, data0.s4567);

231

VEC_DATA_TYPE(DATA_TYPE, 2)

232

reduce2 = POOL_OP(reduce4.s01, reduce4.s23);

233

DATA_TYPE res = POOL_OP(reduce2.s0, reduce2.s1);

234

235

// Divide by pool region in case of average pooling

236

#ifdef POOL_AVG

237

res *= calculate_avg_scale(7, max_dims.x, max_dims.y, paddings.x, paddings.y, strides.x, strides.y);

Anthony Barbier

2017-07-03 17:39:37 +0100

[diff] [blame^]

238

#endif /* POOL_AVG */

Georgios Pinitas