Blame - arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h - ml/ComputeLibrary

2020-02-25 15:57:21 +0000

[diff] [blame]

77

* @param[out] output Output tensor. Data type supported: QASYMM8

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

78

* @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add

79

* @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication

80

* @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8

Giorgio Arena

2020-02-07 13:46:45 +0000

[diff] [blame]

81

* @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

82

* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,

Giorgio Arena

2020-02-07 13:46:45 +0000

[diff] [blame]

83

* Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

84

*/

Georgios Pinitas

932491f

2018-09-21 16:33:15 +0100

[diff] [blame]

85

void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,

Giorgio Arena

2020-02-07 13:46:45 +0000

[diff] [blame]

86

int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

87

/** Initialise the kernel's inputs, output

88

*

89

* @param[in] compile_context The compile context to be used.

90

* @param[in] input Input tensor. Data type supported: S32

91

* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.

92

* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.

93

* @param[out] output Output tensor. Data type supported: QASYMM8

94

* @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add

95

* @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication

96

* @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8

97

* @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.

98

* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,

99

* Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.

100

*/

101

void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift,

102

int result_offset_after_shift,

103

int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

104

/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint

105

*

Gian Marco Iodice

4b90865

2018-10-18 10:21:02 +0100

[diff] [blame]

106

* @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32

107

* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.

108

* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.

Sheri Zhang

2020-02-25 15:57:21 +0000

[diff] [blame]

109

* @param[in] output Output tensor. Data type supported: QASYMM8

Giorgio Arena

2020-02-07 13:46:45 +0000

[diff] [blame]

110

* @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.

Gian Marco Iodice

4b90865

2018-10-18 10:21:02 +0100

[diff] [blame]

111

* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,

Giorgio Arena

2020-02-07 13:46:45 +0000

[diff] [blame]

112

* Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

113

*

Georgios Pinitas

631c41a

2017-12-06 11:53:03 +0000

[diff] [blame]

114

* @return a status

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

115

*/

Giorgio Arena

2020-02-07 13:46:45 +0000

[diff] [blame]

116

static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());

Gian Marco

05288a2

2017-11-21 10:57:50 +0000

[diff] [blame]

117

};

Georgios Pinitas

51e53a3

2018-10-22 13:49:08 +0100

[diff] [blame]

118

Manuel Bottini

2019-11-29 17:25:25 +0000

[diff] [blame]

119

/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint on OpenCL.

120

*

121

* CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint depends on 3 parameters:

122

*

123

* result_fixedpoint_multiplier, result_shift, result_offset_after_shift

124

*

125

* The final result is:

126

*

127

* (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift

128

*

129

* where FixedPointMul(x, y) is the nearest integer to the following

130

* mathematical expression, evaluated without overflow or intermediate rounding:

*

* (x * y) / 2^31

*

* For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68

135

*

136

* In case the bias tensor is provided, the final result is:

137

*

138

* ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift

139

*

140

* This function calls the following OpenCL kernels:

141

*

142

* -# @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel

143

*

144

* @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions

145

* after the result is shifted right by result_shift

146

*/

147

class CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint : public ICLSimpleFunction

148

{

149

public:

150

/** Initialise the kernel's inputs, output

151

*

152

* @param[in] input Input tensor. Data type supported: S32

153

* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.

154

* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.

Sheri Zhang

2020-02-25 15:57:21 +0000

[diff] [blame]

155

* @param[out] output Output tensor. Data type supported: QASYMM8_SIGNED

Manuel Bottini

2019-11-29 17:25:25 +0000

[diff] [blame]

156

* @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add

157

* @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication

158

* @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED

Giorgio Arena

2020-02-07 13:46:45 +0000

[diff] [blame]

159

* @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.

Manuel Bottini

2019-11-29 17:25:25 +0000

[diff] [blame]

160

* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0

Giorgio Arena

2020-02-07 13:46:45 +0000

[diff] [blame]

161

* Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.

Manuel Bottini

2019-11-29 17:25:25 +0000

[diff] [blame]

162

*/

163

void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,

Giorgio Arena

2020-02-07 13:46:45 +0000

[diff] [blame]

164

int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

165

/** Initialise the kernel's inputs, output

166

*

167

* @param[in] compile_context The compile context to be used.

168

* @param[in] input Input tensor. Data type supported: S32

169

* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.

170

* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.

171

* @param[out] output Output tensor. Data type supported: QASYMM8_SIGNED

172

* @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add

173

* @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication

174

* @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED

175

* @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.

176

* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0

177

* Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.

178

*/

179

void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift,

180

int result_offset_after_shift,

181

int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());

Manuel Bottini

2019-11-29 17:25:25 +0000

[diff] [blame]

182

/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint

183

*

184

* @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32

185

* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.

186

* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.

Sheri Zhang

2020-02-25 15:57:21 +0000

[diff] [blame]

187

* @param[in] output Output tensor. Data type supported: QASYMM8_SIGNED

Giorgio Arena

2020-02-07 13:46:45 +0000

[diff] [blame]

188

* @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.

Manuel Bottini

2019-11-29 17:25:25 +0000

[diff] [blame]

189

* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0

Giorgio Arena

2020-02-07 13:46:45 +0000

[diff] [blame]

190

* Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.

Manuel Bottini

2019-11-29 17:25:25 +0000

[diff] [blame]

191

*

192

* @return a status

193

*/

Giorgio Arena

2020-02-07 13:46:45 +0000

[diff] [blame]

194

static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());

Manuel Bottini

2019-11-29 17:25:25 +0000

[diff] [blame]

195

};

196

Manuel Bottini

2019-07-01 17:35:56 +0100

[diff] [blame]

197

/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on OpenCL.

198

*

199

* CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint depends on 2 parameters:

200

*

201

* result_fixedpoint_multiplier, result_shift

202

*

203

* The final result is:

204

*

205

* (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift)

206

*

207

* where FixedPointMul(x, y) is the nearest integer to the following

208

* mathematical expression, evaluated without overflow or intermediate rounding:

*

* (x * y) / 2^31

*

* For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68

213

*

214

* In case the bias tensor is provided, the final result is:

215

*

216

* ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift

217

*

218

* This function calls the following NEON kernels:

219

*

220

* -# @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel

221

*

222

* @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions

223

* after the result is shifted right by result_shift

224

*/

225

class CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint : public ICLSimpleFunction

226

{

227

public:

228

/** Initialise the kernel's inputs, output

229

*

230

* @param[in] input Input tensor. Data type supported: S32

231

* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.

232

* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.

Sheri Zhang

2020-02-25 15:57:21 +0000

[diff] [blame]

233

* @param[out] output Output tensor. Data type supported: QSYMM16

Manuel Bottini

2019-07-01 17:35:56 +0100

[diff] [blame]

234

* @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add

235

* @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication

Giorgio Arena

2020-02-07 13:46:45 +0000

[diff] [blame]

236

* @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.

Manuel Bottini

2019-07-01 17:35:56 +0100

[diff] [blame]

237

* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16.

Giorgio Arena

2020-02-07 13:46:45 +0000

[diff] [blame]

238

* Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.

Manuel Bottini

2019-07-01 17:35:56 +0100

[diff] [blame]

239

*/

Giorgio Arena

2020-02-07 13:46:45 +0000

[diff] [blame]

240

void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),

241

int max = std::numeric_limits<int32_t>::max());

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

242

/** Initialise the kernel's inputs, output

243

*

244

* @param[in] compile_context The compile context to be used.

245

* @param[in] input Input tensor. Data type supported: S32

246

* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.

247

* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.

248

* @param[out] output Output tensor. Data type supported: QSYMM16

249

* @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add

250

* @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication

251

* @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.

252

* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16.

253

* Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.

254

*/

255

void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift,

256

int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());

Manuel Bottini

2019-07-01 17:35:56 +0100

[diff] [blame]

257

/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint

258

*

259

* @param[in] input Input tensor info. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32

260

* @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.

261

* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.

Sheri Zhang

2020-02-25 15:57:21 +0000

[diff] [blame]

262

* @param[in] output Output tensor info. Data type supported: QSYMM16

Giorgio Arena

2020-02-07 13:46:45 +0000

[diff] [blame]

263

* @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.

Manuel Bottini

2019-07-01 17:35:56 +0100

[diff] [blame]

264

* @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16,

Giorgio Arena

2020-02-07 13:46:45 +0000

[diff] [blame]

265

* Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.

Manuel Bottini

2019-07-01 17:35:56 +0100

[diff] [blame]

266

*

267

* @return a status

268

*/

Giorgio Arena

2020-02-07 13:46:45 +0000

[diff] [blame]

269

static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());

Manuel Bottini

2019-07-01 17:35:56 +0100

[diff] [blame]

270

};

Sheri Zhang

2020-02-25 15:57:21 +0000

[diff] [blame]

271

/** Basic function to execute GEMMLowpQuantizeDown kernels on CL.

272

*

273

* This function calls the following CL kernels:

274

*

Luca Foschiani

689c968

2020-02-26 14:30:14 +0000

[diff] [blame]

275

* -# @ref CLGEMMLowpQuantizeDownInt32ScaleKernel

Sheri Zhang

1b14c75

2020-03-09 14:29:52 +0000

[diff] [blame]

276

* -# @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel

Sheri Zhang

2020-02-25 15:57:21 +0000

[diff] [blame]

277

* -# @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel

278

* -# @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel

Michele Di Giorgio

1c1b3aa

2020-04-02 17:35:42 +0100

[diff] [blame]

279

* -# @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel

Sheri Zhang

2020-02-25 15:57:21 +0000

[diff] [blame]

280

*/

281

class CLGEMMLowpOutputStage : public ICLSimpleFunction

282

{

283

public:

284

/** Initialise the kernel's inputs, output

285

*

286

* @param[in] input Input tensor. Data type supported: S32

287

* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.

288

* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.

289

* @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED

290

* @param[in] info GEMMLowp output stage metadata.

291

*/

292

void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info);

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

293

/** Initialise the kernel's inputs, output

294

*

295

* @param[in] compile_context The compile context to be used.

296

* @param[in] input Input tensor. Data type supported: S32

297

* @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.

298

* Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.

299

* @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED

300

* @param[in] info GEMMLowp output stage metadata.

301

*/

302

void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info);

Sheri Zhang