Blame - src/core/CL/cl_kernels/pooling_layer_quantized.cl - ml/ComputeLibrary

2017-11-08 09:34:19 +0000

[diff] [blame]

41

#if defined(POOL_AVG)

42

#define POOL_OP(x, y) ((x) + (y))

43

#else /* defined(POOL_AVG) */

44

#define POOL_OP(x, y) (max((x), (y)))

45

#endif /* defined(POOL_AVG) */

46

47

#define DIV_OP(x, y) (x * (1.f / y))

48

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

49

#define DIV_OP_NHWC(x, y) (convert_float8(x) * (float8)(1.f / y))

50

Anton Lokhmotov

2017-11-08 09:34:19 +0000

[diff] [blame]

51

#if defined(POOL_L2)

52

#error "L2 pooling is not supported"

53

#endif /* defined(POOL_L2) */

54

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

55

int calculate_avg_scale(const int pool_size_x, const int pool_size_y, const int upper_bound_w, const int upper_bound_h,

Anton Lokhmotov

2017-11-08 09:34:19 +0000

[diff] [blame]

56

const int pad_x, const int pad_y, const int stride_x, const int stride_y)

57

{

58

int start_x = get_global_id(0) * stride_x - pad_x;

59

int start_y = get_global_id(1) * stride_y - pad_y;

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

60

const int end_x = min(start_x + pool_size_x, upper_bound_w);

61

const int end_y = min(start_y + pool_size_y, upper_bound_h);

Anton Lokhmotov

2017-11-08 09:34:19 +0000

[diff] [blame]

62

#if defined(EXCLUDE_PADDING)

63

start_x = max(0, start_x);

64

start_y = max(0, start_y);

65

#endif /* defined(EXCLUDE_PADDING) */

66

return ((end_y - start_y) * (end_x - start_x));

67

}

68

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

69

/** Performs a pooling function of pool size equal to N (NCHW)

Anton Lokhmotov

2017-11-08 09:34:19 +0000

[diff] [blame]

70

*

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

71

* @note Pool sizes must be passed using -DPOOL_SIZE_X and -DPOOL_SIZE_Y e.g. -DPOOL_SIZE_X=13;

Anton Lokhmotov

2017-11-08 09:34:19 +0000

[diff] [blame]

72

* @note In case of average pooling the following information must be passed at compile time:

73

* -DPOOL_AVG must be provided otherwise max pooling will be performed.

74

* -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)

75

* -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions

76

* -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension

77

*

78

* @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8

79

* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)

80

* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)

81

* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)

82

* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)

83

* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)

84

* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)

85

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image

86

* @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr

87

* @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)

88

* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)

89

* @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)

90

* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)

91

* @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)

92

* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)

93

* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image

94

*/

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

95

__kernel void pooling_layer_MxN_quantized_nchw(

Anton Lokhmotov

2017-11-08 09:34:19 +0000

[diff] [blame]

96

TENSOR3D_DECLARATION(input),

97

TENSOR3D_DECLARATION(output))

98

{

99

// Get pixels pointer

100

Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);

101

Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);

int8 vdata = 0;

int sdata = 0;

// Load data

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

107

for(int y = 0; y < POOL_SIZE_Y; y++)

Anton Lokhmotov

2017-11-08 09:34:19 +0000

[diff] [blame]

108

{

109

int x = 0;

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

110

for(; x <= ((int)POOL_SIZE_X - 8); x += 8)

Anton Lokhmotov

2017-11-08 09:34:19 +0000

[diff] [blame]

111

{

112

uchar8 data = vload8(0, (__global uchar *)tensor3D_offset(&input, x, y, 0));

113

int8 data0 = convert_int8(data);

114

vdata = POOL_OP(vdata, data0);

115

}

116

117

// Leftover

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

118

for(; x < (int)POOL_SIZE_X; ++x)

Anton Lokhmotov

2017-11-08 09:34:19 +0000

[diff] [blame]

119

{

120

uchar data = *((__global uchar *)tensor3D_offset(&input, x, y, 0));

121

int data0 = convert_int(data);

122

sdata = POOL_OP(sdata, data0);

}

}

// Reduce result

int4 reduce4 = POOL_OP(vdata.s0123, vdata.s4567);

128

int2 reduce2 = POOL_OP(reduce4.s01, reduce4.s23);

129

int res = POOL_OP(reduce2.s0, reduce2.s1);

130

res = POOL_OP(res, sdata);

131

132

#if defined(POOL_AVG)

Isabella Gottardi

2018-01-31 17:49:25 +0000

[diff] [blame]

133

res = round(DIV_OP(res, calculate_avg_scale(POOL_SIZE_X, POOL_SIZE_Y, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y)));

Anton Lokhmotov

2017-11-08 09:34:19 +0000

[diff] [blame]

134

#endif /* defined(POOL_AVG) */

135

Pablo Tello

a52e4cf

2019-04-01 14:55:18 +0100

[diff] [blame]

136

uchar result_u8 = convert_uchar(res);

137

138

#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)

139

140

const float result_f32 = convert_float(result_u8);

141

const float input_offset = (float)OFFSET_IN1;

142

const float input_scale = (float)SCALE_IN1;

143

const float scale_out = (float)SCALE_OUT;

144

const float offset_out = (float)OFFSET_OUT;

145

const float in_f32 = (result_f32 - input_offset) * input_scale;

146

const float out_f32 = in_f32 / scale_out + offset_out;

147

result_u8 = convert_uchar_sat(convert_int_rte(out_f32));

148

149

#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */

150

151

*(__global uchar *)output.ptr = result_u8;

Anton Lokhmotov

2017-11-08 09:34:19 +0000

[diff] [blame]

152

}

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

153

154

int calculate_avg_scale_nhwc(const int pool_size_x, const int pool_size_y, int upper_bound_w, int upper_bound_h,

155

const int pad_x, const int pad_y, const int stride_x, const int stride_y)

156

{

157

int start_x = get_global_id(1) * stride_x - pad_x;

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

158

#if defined(DST_DEPTH)

159

int start_y = (get_global_id(2) % DST_DEPTH) * stride_y - pad_y;

160

#else /* defined(DST_DEPTH) */

Michalis Spyrou

2018-10-30 16:41:21 +0000

[diff] [blame]

161

int start_y = get_global_id(2) * stride_y - pad_y;

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

162

#endif /* defined(DST_DEPTH) */

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

163

164

const int end_x = min(start_x + pool_size_x, upper_bound_w);

165

const int end_y = min(start_y + pool_size_y, upper_bound_h);

166

167

start_x = max(0, start_x);

168

start_y = max(0, start_y);

169

170

return ((end_y - start_y) * (end_x - start_x));

171

}

172

173

/** Performs a pooling function of pool size equal to N (NHWC)

174

*

175

* @note Pool sizes must be passed using -DPOOL_SIZE_X and -DPOOL_SIZE_Y e.g. -DPOOL_SIZE_X=13;

176

* @note Tensors width and height must be passed at compile time using -DMAX_WIDTH and -DMAX_HEIGHT

177

* @note Strides must be passed at compile time using -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions

178

* @note Pad values must be passed at compile time using -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension

179

* @note In case of average pooling the following information must be passed at compile time:

180

* -DPOOL_AVG must be provided otherwise max pooling will be performed.

181

*

182

* @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8

183

* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)

184

* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)

185

* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)

186

* @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)

187

* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)

188

* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

189

* @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)

190

* @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes)

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

191

* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image

192

* @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

193

* @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

194

* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

195

* @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

196

* @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

197

* @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes)

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

198

* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

199

* @param[in] output_stride_w Stride of the destination tensor in W dimension (in bytes)

200

* @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

201

* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image

202

*/

203

__kernel void pooling_layer_MxN_quantized_nhwc(

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

204

TENSOR4D_DECLARATION(input),

205

TENSOR4D_DECLARATION(output))

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

206

{

207

// Get pixels pointer

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

208

#if defined(DST_DEPTH)

209

Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT(input, DST_DEPTH);

210

Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DST_DEPTH);

211

#else /* defined(DST_DEPTH) */

Michalis Spyrou

2018-10-30 16:41:21 +0000

[diff] [blame]

212

Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);

213

Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

214

#endif /* defined(DST_DEPTH) */

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

int8 vdata = 0;

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

218

const int idx_width = get_global_id(1) * STRIDE_X;

219

#if defined(DST_DEPTH)

220

const int idx_height = (get_global_id(2) % DST_DEPTH) * STRIDE_Y;

221

#else /* defined(DST_DEPTH) */

Michalis Spyrou

2018-10-30 16:41:21 +0000

[diff] [blame]

222

const int idx_height = get_global_id(2) * STRIDE_Y;

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

223

#endif /* defined(DST_DEPTH) */

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

224

225

for(int y = 0; y < POOL_SIZE_Y; ++y)

226

{

Michalis Spyrou

2018-10-30 16:41:21 +0000

[diff] [blame]

227

int y1 = select(y, PAD_Y - idx_height, y + idx_height - PAD_Y < 0 || y + idx_height - PAD_Y >= MAX_HEIGHT);

Michalis Spyrou

2018-04-18 09:49:16 +0100

[diff] [blame]

228

for(int x = 0; x < POOL_SIZE_X; ++x)

229

{

Michalis Spyrou

2018-10-30 16:41:21 +0000

[diff] [blame]

230

int x1 = select(x, PAD_X - idx_width - 1, x + idx_width - PAD_X < 0 || x + idx_width - PAD_X >= MAX_WIDTH);

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

231

x1 = select(x1, PAD_X - idx_width - 1, y != y1);

232

233

#if defined(DST_DEPTH)

234

uchar8 data = vload8(0, (__global uchar *)tensor4D_offset(&input, 0, x1 - PAD_X, y1 - PAD_Y, 0));

235

#else /* defined(DST_DEPTH) */

Michalis Spyrou

2018-10-30 16:41:21 +0000

[diff] [blame]

236

uchar8 data = vload8(0, (__global uchar *)tensor3D_offset(&input, 0, x1 - PAD_X, y1 - PAD_Y));

Georgios Pinitas

2018-10-29 20:07:15 +0000

[diff] [blame]

237

#endif /* defined(DST_DEPTH) */

238

239

int8 data0 = convert_int8(data);

240

vdata = POOL_OP(vdata, data0);

Michalis Spyrou