Blame - src/core/CL/cl_kernels/direct_convolution1x1.cl - ml/ComputeLibrary

2017-08-14 11:26:37 +0100

[diff] [blame]

37

38

#define ADD_OP(a, b) ((a) + (b))

39

#define MUL_OP(a, b) ((a) * (b))

40

#define CONVERT_SAT(a, b) ((a))

41

42

#endif /* FIXED_POINT_POSITION */

43

Gian Marco Iodice

1c8409d

2017-09-06 17:24:25 +0100

[diff] [blame]

44

#if defined(DATA_TYPE) && defined(DATA_SIZE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH)

45

SiCong Li

2017-07-28 14:46:20 +0100

[diff] [blame]

46

#if STRIDE_X == 3

47

#define INPUT_PIXEL_STR(data_size) extract_input_stride3_##data_size

48

#define INPUT_PIXEL(data_size) INPUT_PIXEL_STR(data_size)

49

#elif STRIDE_X == 2

50

#define INPUT_PIXEL(data_size) extract_input_stride2

51

#elif STRIDE_X == 1

52

#define INPUT_PIXEL(data_size) extract_input_stride1

53

#else /* STRIDE_X not equals 1, 2 or 3 */

54

#error "Only support strides 1, 2 and 3"

55

#endif /* STRIDE_X == 3 */

56

57

/** Extracts a 1D horizontal vector from the input tensor with stride as 1.

58

*

59

* @param[in] input_pixel Pointer to the first pixel.

60

*

61

* @return extracted input pixels.

62

*/

63

inline VEC_DATA_TYPE(DATA_TYPE, 8) extract_input_stride1(__global const DATA_TYPE *input_pixel)

64

{

65

return vload8(0, input_pixel);

66

}

67

68

/** Extracts a 1D horizontal vector from the input tensor with stride as 2.

69

*

70

* @param[in] input_pixel Pointer to the first pixel.

71

*

72

* @return extracted input pixels.

73

*/

74

inline VEC_DATA_TYPE(DATA_TYPE, 8) extract_input_stride2(__global const DATA_TYPE *input_pixel)

75

{

76

VEC_DATA_TYPE(DATA_TYPE, 16)

77

temp = vload16(0, input_pixel);

78

return temp.s02468ace;

79

}

80

81

/** Extracts a 1D horizontal vector from the input tensor with stride as 3 and 32-bit data size.

82

*

83

* @param[in] input_pixel Pointer to the first pixel.

84

*

85

* @return extracted input pixels.

86

*/

87

inline VEC_DATA_TYPE(DATA_TYPE, 8) extract_input_stride3_32(__global const DATA_TYPE *input_pixel)

88

{

89

VEC_DATA_TYPE(DATA_TYPE, 4)

90

temp1 = vload4(0, input_pixel);

91

VEC_DATA_TYPE(DATA_TYPE, 4)

92

temp2 = vload4(0, input_pixel + 6);

93

VEC_DATA_TYPE(DATA_TYPE, 4)

94

temp3 = vload4(0, input_pixel + 12);

95

VEC_DATA_TYPE(DATA_TYPE, 4)

96

temp4 = vload4(0, input_pixel + 18);

97

return (VEC_DATA_TYPE(DATA_TYPE, 8))(temp1.s03, temp2.s03, temp3.s03, temp4.s03);

98

}

99

100

/** Extracts a 1D horizontal vector from the input tensor with stride as 3 and 16-bit data size.

101

*

102

* @param[in] input_pixel Pointer to the first pixel.

103

*

104

* @return extracted input pixels.

105

*/

106

inline VEC_DATA_TYPE(DATA_TYPE, 8) extract_input_stride3_16(__global const DATA_TYPE *input_pixel)

107

{

108

VEC_DATA_TYPE(DATA_TYPE, 8)

109

temp1 = vload8(0, input_pixel);

110

VEC_DATA_TYPE(DATA_TYPE, 8)

111

temp2 = vload8(0, input_pixel + 8);

112

VEC_DATA_TYPE(DATA_TYPE, 8)

113

temp3 = vload8(0, input_pixel + 16);

114

return (VEC_DATA_TYPE(DATA_TYPE, 8))(temp1.s036, temp2.s147, temp3.s25);

115

}

116

117

/** Extracts a 1D horizontal vector from the input tensor with stride as 3 and 8-bit data size.

118

*

119

* @param[in] input_pixel Pointer to the first pixel.

120

*

121

* @return extracted input pixels.

122

*/

123

inline VEC_DATA_TYPE(DATA_TYPE, 8) extract_input_stride3_8(__global const DATA_TYPE *input_pixel)

124

{

125

VEC_DATA_TYPE(DATA_TYPE, 16)

126

temp1 = vload16(0, input_pixel);

127

VEC_DATA_TYPE(DATA_TYPE, 16)

128

temp2 = vload16(0, input_pixel + 12);

129

return (VEC_DATA_TYPE(DATA_TYPE, 8))(temp1.s0369, temp2.s0369);

130

}

131

132

/** This kernel performs a direct convolution to convolve the low three dimensions.

133

*

134

* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float

135

* @note The data size must be passed at compile time using -DDATA_SIZE e.g. -DDATA_SIZE=32

Gian Marco Iodice

2017-08-08 10:53:00 +0100

[diff] [blame]

136

* @note The convolution stride x must be passed at compile time using -DSTRIDE_X e.g. -DSTRIDE_X=1

137

* @note The third dimensions of the weights tensors must be passed at compile time using -DWEIGHTS_DEPTH

SiCong Li

2017-07-28 14:46:20 +0100

[diff] [blame]

138

* @note In case biases will be added to the convolution -DHAS_BIAS has to be passed to append the final matrix with 1 in each row.

139

*

Gian Marco Iodice

2017-08-08 10:53:00 +0100

[diff] [blame]

140

* @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32

SiCong Li

2017-07-28 14:46:20 +0100

[diff] [blame]

141

* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)

142

* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

143

* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)

144

* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)

145

* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)

146

* @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)

147

* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor

148

* @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr

149

* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)

150

* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)

151

* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)

152

* @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes)

153

* @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)

154

* @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)

155

* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor

156

* @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p weights_ptr

157

* @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes)

158

* @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes)

159

* @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes)

160

* @param[in] weights_step_y weights_stride_y * number of elements along y processed per workitem(in bytes)

161

* @param[in] weights_stride_z Stride of the weights tensor in Z dimension (in bytes)

162

* @param[in] weights_step_z weights_stride_z * number of elements along Z processed per workitem(in bytes)

163

* @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor

164

* @param[in] biases_ptr Pointer to the biases tensor. Same as @p src_ptr

165

* @param[in] biases_stride_x Stride of the biases tensor in X dimension (in bytes)

166

* @param[in] biases_step_x biases_stride_x * number of elements along X processed per workitem(in bytes)

167

* @param[in] biases_offset_first_element_in_bytes The offset of the first element in the biases tensor

Gian Marco Iodice

2017-08-08 10:53:00 +0100

[diff] [blame]

168

* @param[in] weights_stride_w Stride of the weights tensor in the 4th dimension

SiCong Li

2017-07-28 14:46:20 +0100

[diff] [blame]

169

*/

170

__kernel void direct_convolution1x1(

171

TENSOR3D_DECLARATION(src),

172

TENSOR3D_DECLARATION(dst),

173

TENSOR3D_DECLARATION(weights),

174

#ifdef HAS_BIAS

175

VECTOR_DECLARATION(biases),

176

#endif /* defined(HAS_BIAS) */

Gian Marco Iodice

2017-08-08 10:53:00 +0100

[diff] [blame]

177

unsigned int weights_stride_w)

SiCong Li

2017-07-28 14:46:20 +0100

[diff] [blame]

178

{

179

Image src = CONVERT_TO_IMAGE_STRUCT(src);

180

Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights);

181

Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);

182

183

#ifdef HAS_BIAS

184

Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases);

185

#endif /* defined(HAS_BIAS) */

186

Michalis Spyrou

2017-08-14 11:26:37 +0100

[diff] [blame]

187

VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 8)

SiCong Li

2017-07-28 14:46:20 +0100

[diff] [blame]

188

pixels = 0;

189

190

const uint z_index = get_global_id(2);

191

192

weights.ptr += z_index * weights_stride_w;

193

Gian Marco Iodice

744b5ed

2017-10-06 15:44:27 +0100

[diff] [blame]

194

for(volatile int d = 0; d < WEIGHTS_DEPTH; ++d)

SiCong Li

2017-07-28 14:46:20 +0100

[diff] [blame]

195

{

196

DATA_TYPE weight = *(__global DATA_TYPE *)weights.ptr;

197

VEC_DATA_TYPE(DATA_TYPE, 8)

198

input_pixel = INPUT_PIXEL(DATA_SIZE)((__global DATA_TYPE *)src.ptr);

Michalis Spyrou

2017-08-14 11:26:37 +0100

[diff] [blame]

199

pixels = ADD_OP(pixels, MUL_OP((VEC_DATA_TYPE(DATA_TYPE, 8))weight, input_pixel));

SiCong Li

2017-07-28 14:46:20 +0100

[diff] [blame]

200

src.ptr += src_stride_z;

201

weights.ptr += weights_stride_z;

202

}

203

204

#ifdef HAS_BIAS

Michalis Spyrou

2017-08-14 11:26:37 +0100

[diff] [blame]

205

pixels = ADD_OP(pixels, (VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 8)) * ((__global DATA_TYPE *)(vector_offset(&biases, z_index))));

SiCong Li

2017-07-28 14:46:20 +0100

[diff] [blame]

206

#endif /* defined(HAS_BIAS) */

207

Michalis Spyrou

2017-08-14 11:26:37 +0100

[diff] [blame]

208

vstore8(CONVERT_SAT(pixels, VEC_DATA_TYPE(DATA_TYPE, 8)), 0, (__global DATA_TYPE *)dst.ptr);

SiCong Li