Blame - src/core/CL/cl_kernels/depthwise_convolution_quantized.cl - ml/ComputeLibrary

return (left + input_offset) * (int2)(left_coeff + weight_offset) + (middle + input_offset) * (int2)(middle_coeff + weight_offset) + (right + input_offset) * (int2)(right_coeff + weight_offset);

64

}

65

66

/** Compute a 1D horizontal convolution of size 3 and stride 2 for uchar type.

67

*

68

* @param[in] left_pixel Pointer to the left pixel.

69

* @param[in] left_coeff Weight of the left pixel

70

* @param[in] middle_coeff Weight of the middle pixel

71

* @param[in] right_coeff Weight of the right pixel

72

* @param[in] input_offset Quantized offset of zero point of the input tensor data range

73

* @param[in] weight_offset Quantized offset of zero point of the weights tensor data range

74

*

75

* @return a int2 containing 2 convoluted values.

76

*/

77

inline int2 convolution1x3_stride_2(__global const uchar *left_pixel,

78

const int left_coeff,

79

const int middle_coeff,

80

const int right_coeff,

81

const int input_offset,

82

const int weight_offset)

83

{

84

int4 temp0 = CONVERT(vload4(0, left_pixel), int4);

85

int temp1 = CONVERT(*(left_pixel + 4 * sizeof(uchar)), int);

86

87

int2 left = CONVERT(temp0.s02, int2);

88

int2 middle = CONVERT(temp0.s13, int2);

89

int2 right = CONVERT((int2)(temp0.s2, temp1), int2);

90

91

return (left + input_offset) * (int2)(left_coeff + weight_offset) + (middle + input_offset) * (int2)(middle_coeff + weight_offset) + (right + input_offset) * (int2)(right_coeff + weight_offset);

92

}

93

94

/** Compute a 1D horizontal convolution of size 3 and stride 3 for uchar type.

95

*

96

* @param[in] left_pixel Pointer to the left pixel.

97

* @param[in] left_coeff Weight of the left pixel

98

* @param[in] middle_coeff Weight of the middle pixel

99

* @param[in] right_coeff Weight of the right pixel

100

* @param[in] input_offset Quantized offset of zero point of the input tensor data range

101

* @param[in] weight_offset Quantized offset of zero point of the weights tensor data range

102

*

103

* @return a int2 containing 2 convoluted values.

104

*/

105

inline int2 convolution1x3_stride_3(__global const uchar *left_pixel,

106

const int left_coeff,

107

const int middle_coeff,

108

const int right_coeff,

109

const int input_offset,

110

const int weight_offset)

111

{

112

int4 temp0 = CONVERT(vload4(0, left_pixel), int4);

113

int2 temp1 = CONVERT(vload2(0, (left_pixel + 4 * sizeof(uchar))), int2);

114

115

int2 left = CONVERT(temp0.s03, int2);

116

int2 middle = CONVERT((int2)(temp0.s1, temp1.s0), int2);

117

int2 right = CONVERT((int2)(temp0.s2, temp1.s1), int2);

118

119

return (left + input_offset) * (int2)(left_coeff + weight_offset) + (middle + input_offset) * (int2)(middle_coeff + weight_offset) + (right + input_offset) * (int2)(right_coeff + weight_offset);

120

}

121

122

/** Apply a 3x3 convolution matrix to a single channel QASYMM8 input image and return the result.

123

*

124

* Convolution matrix layout:

125

*

126

* [ mat0, mat1, mat2 ]\n

127

* [ mat3, mat4, mat5 ]\n

128

* [ mat6, mat7, mat8 ]\n

129

*

130

* @param[in] src A pointer to source Image structure

131

* @param[in] mat0 Coefficient from the convolution matrix

132

* @param[in] mat1 Coefficient from the convolution matrix

133

* @param[in] mat2 Coefficient from the convolution matrix

134

* @param[in] mat3 Coefficient from the convolution matrix

135

* @param[in] mat4 Coefficient from the convolution matrix

136

* @param[in] mat5 Coefficient from the convolution matrix

137

* @param[in] mat6 Coefficient from the convolution matrix

138

* @param[in] mat7 Coefficient from the convolution matrix

139

* @param[in] mat8 Coefficient from the convolution matrix

140

* @param[in] input_offset Quantized offset of zero point of the input tensor data range

141

* @param[in] weight_offset Quantized offset of zero point of the weights tensor data range

142

* @param[in] output_offset Quantized offset of zero point of the output tensor data range

143

* @param[in] output_multiplier Output scale multiplier

144

* @param[in] output_shift Output scale divisor exponent

145

* @param[in] bias (Optional) Bias value

146

*

147

* @return a uchar2 containing 2 convoluted values.

148

*/

149

inline uchar2 convolution3x3(

150

Image *src,

151

const uchar mat0, const uchar mat1, const uchar mat2,

152

const uchar mat3, const uchar mat4, const uchar mat5,

153

const uchar mat6, const uchar mat7, const uchar mat8,

154

const int input_offset, const int weight_offset, const int output_offset,

155

const int output_multiplier, const int output_shift

156

#if defined(HAS_BIAS)

157

,

158

const int bias

159

#endif //defined(HAS_BIAS)

)

{

int2 pixels;

pixels = convolution1x3(offset(src, 0, 0), mat0, mat1, mat2, input_offset, weight_offset);

165

pixels += convolution1x3(offset(src, 0, 1), mat3, mat4, mat5, input_offset, weight_offset);

166

pixels += convolution1x3(offset(src, 0, 2), mat6, mat7, mat8, input_offset, weight_offset);

167

#if defined(HAS_BIAS)

168

pixels += (int2)(bias);

169

#endif //defined(HAS_BIAS)

170

171

pixels = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(pixels, output_multiplier, output_shift, 2);

172

pixels = pixels + output_offset;

173

pixels = clamp(pixels, 0, 255);

174

175

return CONVERT(pixels, uchar2);

176

}

177

178

/** This function computes the horizontal integral of the image.

179

*

180

* @param[in] src_ptr Pointer to the source image. Supported data types: QASYMM8

181

* @param[in] src_stride_x Stride of the source image in X dimension (in bytes)

182

* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

183

* @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)

184

* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)

185

* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image

186

* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)

187

* @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes)

188

* @param[in] dst_ptr Pointer to the destination tensor. Supported data types: QASYMM8

189

* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)

190

* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)

191

* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)

192

* @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)

193

* @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)

194

* @param[in] dst_step_z dst_stride_z * number of elements along Y processed per workitem(in bytes)

195

* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor

196

* @param[in] weights_ptr Pointer to the weights tensor. Supported data types: QASYMM8

197

* @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes)

198

* @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes)

199

* @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes)

200

* @param[in] weights_step_y weights_stride_y * number of elements along Y processed per workitem(in bytes)

201

* @param[in] weights_stride_z Stride of the weights tensor in Z dimension (in bytes)

202

* @param[in] weights_step_z weights_stride_z * number of elements along Y processed per workitem(in bytes)

203

* @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor

204

* @param[in] biases_ptr (Optional) Pointer to the biases vector. Supported data types: QASYMM8

205

* @param[in] biases_stride_x (Optional) Stride of the biases vector in X dimension (in bytes)

206

* @param[in] biases_step_x (Optional) biases_stride_x * number of elements along X processed per workitem(in bytes)

207

* @param[in] biases_offset_first_element_in_bytes (Optional) The offset of the first element in the biases vector

208

* @param[in] input_offset Quantized offset of zero point of the input tensor data range

209

* @param[in] weight_offset Quantized offset of zero point of the weights tensor data range

210

* @param[in] output_offset Quantized offset of zero point of the output tensor data range

211

* @param[in] output_multiplier Output scale multiplier

212

* @param[in] output_shift Output scale divisor exponent

213

*/

214

215

__kernel void depthwise_convolution_3x3_quantized(

216

TENSOR3D_DECLARATION(src),

217

TENSOR3D_DECLARATION(dst),

218

TENSOR3D_DECLARATION(weights),

219

#if defined(HAS_BIAS)

220

VECTOR_DECLARATION(biases),

221

#endif //defined(HAS_BIAS)

int input_offset,

int weight_offset,

int output_offset,

int output_multiplier,

226

int output_shift)

227

{

228

Image src = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src);

229

Image dst = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst);

230

Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT(weights);

231

#if defined(HAS_BIAS)

232

Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases);

233

#endif //defined(HAS_BIAS)

234

235

uchar3 offset = (uchar3)(0, 1, 2) * (uchar3)weights_stride_y;

236

uchar3 weights_values0 = vload3(0, weights.ptr + offset.s0);

237

uchar3 weights_values1 = vload3(0, weights.ptr + offset.s1);

238

uchar3 weights_values2 = vload3(0, weights.ptr + offset.s2);

239

240

#if defined(HAS_BIAS)

241

int bias_value = *((__global int *)(vector_offset(&biases, get_global_id(2))));

242

#endif //defined(HAS_BIAS)

243

244

uchar2 pixels = convolution3x3(&src, weights_values0.s0, weights_values0.s1, weights_values0.s2,

245

weights_values1.s0, weights_values1.s1, weights_values1.s2,

246

weights_values2.s0, weights_values2.s1, weights_values2.s2,

247

input_offset, weight_offset, output_offset,

248

output_multiplier, output_shift

249

#if defined(HAS_BIAS)

250

,

251

bias_value

252

#endif //defined(HAS_BIAS)

253

);

254

255

vstore2(pixels, 0, dst.ptr);

256

}

257

258

#endif //defined(CONV_STRIDE_X)