Blame - arm_compute/core/NEON/NEAsymm.h - ml/ComputeLibrary

2017-11-28 09:10:03 +0000

[diff] [blame]

1

/*

Michele Di Giorgio

d9eaf61

2020-07-08 11:12:57 +0100

[diff] [blame^]

2

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

3

*

4

* SPDX-License-Identifier: MIT

5

*

6

* Permission is hereby granted, free of charge, to any person obtaining a copy

7

* of this software and associated documentation files (the "Software"), to

8

* deal in the Software without restriction, including without limitation the

9

* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

10

* sell copies of the Software, and to permit persons to whom the Software is

11

* furnished to do so, subject to the following conditions:

12

*

13

* The above copyright notice and this permission notice shall be included in all

14

* copies or substantial portions of the Software.

15

*

16

* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

17

* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

18

* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

19

* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

20

* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

21

* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

22

* SOFTWARE.

23

*/

Michalis Spyrou

f464337

2019-11-29 16:17:13 +0000

[diff] [blame]

24

#ifndef ARM_COMPUTE_NEASYMM_H

25

#define ARM_COMPUTE_NEASYMM_H

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

26

Manuel Bottini

7bb56c6

2019-06-26 15:17:09 +0100

[diff] [blame]

27

#include "arm_compute/core/NEON/NEMath.h"

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

28

#include <arm_neon.h>

29

30

namespace arm_compute

31

{

Michel Iwaniec

5dfeae6

2017-11-29 10:48:23 +0000

[diff] [blame]

32

using qasymm8x8_t = uint8x8_t; /**< 8 bit quantized asymmetric vector with 8 elements */

33

using qasymm8x8x2_t = uint8x8x2_t; /**< 8 bit quantized asymmetric vector with 16 elements */

34

using qasymm8x8x3_t = uint8x8x3_t; /**< 8 bit quantized asymmetric vector with 24 elements */

35

using qasymm8x8x4_t = uint8x8x4_t; /**< 8 bit quantized asymmetric vector with 32 elements */

36

using qasymm8x16_t = uint8x16_t; /**< 8 bit quantized asymmetric vector with 16 elements */

37

Michalis Spyrou

2019-11-28 11:31:23 +0000

[diff] [blame]

38

using qasymm8x8_signed_t = int8x8_t; /**< 8 bit quantized signed asymmetric vector with 8 elements */

39

using qasymm8x8x2_signed_t = int8x8x2_t; /**< 8 bit quantized signed asymmetric vector with 16 elements */

40

using qasymm8x8x3_signed_t = int8x8x3_t; /**< 8 bit quantized signed asymmetric vector with 24 elements */

41

using qasymm8x8x4_signed_t = int8x8x4_t; /**< 8 bit quantized signed asymmetric vector with 32 elements */

42

using qasymm8x16_signed_t = int8x16_t; /**< 8 bit quantized signed asymmetric vector with 16 elements */

43

Michel Iwaniec

5dfeae6

2017-11-29 10:48:23 +0000

[diff] [blame]

44

/** Perform a multiply-accumulate on all 16 components of a QASYMM8 vector

*

* vd*vs + vo

*

* @param[in] vd Input vector value in QASYMM8 format

49

* @param[in] vs Vector multiplier in F32 format. The multiplier value must be duplicated across all four lanes.

50

* @param[in] vo Vector addend in F32 format. The addend value must be duplicated across all four lanes.

51

*

52

* @return A 16-component vector in QASYMM8 format, saturated to fit

53

*/

54

uint8x16_t vmlaq_qasymm8(qasymm8x16_t vd, float32x4_t vs, float32x4_t vo);

Georgios Pinitas

2018-01-12 16:29:45 +0000

[diff] [blame]

55

Michalis Spyrou

2019-11-28 11:31:23 +0000

[diff] [blame]

56

/** Perform a multiply-accumulate on all 16 components of a QASYMM8_SIGNED vector

*

* vd*vs + vo

*

* @param[in] vd Input vector value in QASYMM8_SIGNED format

61

* @param[in] vs Vector multiplier in F32 format. The multiplier value must be duplicated across all four lanes.

62

* @param[in] vo Vector addend in F32 format. The addend value must be duplicated across all four lanes.

63

*

64

* @return A 16-component vector in QASYMM8_SIGNED format, saturated to fit

65

*/

66

int8x16_t vmlaq_qasymm8_signed(qasymm8x16_signed_t vd, float32x4_t vs, float32x4_t vo);

67

Georgios Pinitas

2018-01-12 16:29:45 +0000

[diff] [blame]

68

/** Performs final quantization step on 16 elements

69

*

Michalis Spyrou

2020-06-22 17:05:43 +0100

[diff] [blame]

70

* @param[in] in_s32 Input to be quantized.

71

* @param[in] result_fixedpoint_multiplier Result multiplier parameter

72

* @param[in] result_shift Result shift parameter

73

* @param[in] result_offset_after_shift_s32 Result offset parameter

74

* @param[in] min_u8 Relu lower bound

75

* @param[in] max_u8 Relu upper bound

76

* @param[in] is_bounded_relu Specified if a fused bounded relu should be applied

Georgios Pinitas

2018-01-12 16:29:45 +0000

[diff] [blame]

77

*

78

* @return Quantized values

79

*/

Michalis Spyrou

2020-06-22 17:05:43 +0100

[diff] [blame]

80

inline uint8x16_t finalize_quantization(int32x4x4_t &in_s32,

81

int result_fixedpoint_multiplier,

82

int32_t result_shift,

83

int32x4_t result_offset_after_shift_s32,

84

uint8x16_t min_u8,

85

uint8x16_t max_u8,

86

bool is_bounded_relu)

Georgios Pinitas

2018-01-12 16:29:45 +0000

[diff] [blame]

87

{

88

const static int32x4_t zero_s32 = vdupq_n_s32(0);

89

Michele Di Giorgio

2019-10-29 10:58:13 +0000

[diff] [blame]

90

if(result_shift < 0)

91

{

92

in_s32.val[0] = vmulq_n_s32(in_s32.val[0], (1 << (-result_shift)));

93

in_s32.val[1] = vmulq_n_s32(in_s32.val[1], (1 << (-result_shift)));

94

in_s32.val[2] = vmulq_n_s32(in_s32.val[2], (1 << (-result_shift)));

95

in_s32.val[3] = vmulq_n_s32(in_s32.val[3], (1 << (-result_shift)));

Georgios Pinitas

2018-01-12 16:29:45 +0000

[diff] [blame]

96

Michele Di Giorgio

2019-10-29 10:58:13 +0000

[diff] [blame]

97

in_s32.val[0] = vqrdmulhq_n_s32(in_s32.val[0], result_fixedpoint_multiplier);

98

in_s32.val[1] = vqrdmulhq_n_s32(in_s32.val[1], result_fixedpoint_multiplier);

99

in_s32.val[2] = vqrdmulhq_n_s32(in_s32.val[2], result_fixedpoint_multiplier);

100

in_s32.val[3] = vqrdmulhq_n_s32(in_s32.val[3], result_fixedpoint_multiplier);

}

else

{

// Fixed point multiplication with vector saturating rounding doubling multiply high with scalar

105

in_s32.val[0] = vqrdmulhq_n_s32(in_s32.val[0], result_fixedpoint_multiplier);

106

in_s32.val[1] = vqrdmulhq_n_s32(in_s32.val[1], result_fixedpoint_multiplier);

107

in_s32.val[2] = vqrdmulhq_n_s32(in_s32.val[2], result_fixedpoint_multiplier);

108

in_s32.val[3] = vqrdmulhq_n_s32(in_s32.val[3], result_fixedpoint_multiplier);

109

110

// Round to the nearest division by a power-of-two using result_shift_s32

111

in_s32.val[0] = rounding_divide_by_pow2(in_s32.val[0], result_shift);

112

in_s32.val[1] = rounding_divide_by_pow2(in_s32.val[1], result_shift);

113

in_s32.val[2] = rounding_divide_by_pow2(in_s32.val[2], result_shift);

114

in_s32.val[3] = rounding_divide_by_pow2(in_s32.val[3], result_shift);

115

}

Georgios Pinitas

2018-01-12 16:29:45 +0000

[diff] [blame]

116

117

// Add the offset terms

118

in_s32.val[0] = vaddq_s32(in_s32.val[0], result_offset_after_shift_s32);

119

in_s32.val[1] = vaddq_s32(in_s32.val[1], result_offset_after_shift_s32);

120

in_s32.val[2] = vaddq_s32(in_s32.val[2], result_offset_after_shift_s32);

121

in_s32.val[3] = vaddq_s32(in_s32.val[3], result_offset_after_shift_s32);

122

123

// Saturate negative values

124

in_s32.val[0] = vmaxq_s32(in_s32.val[0], zero_s32);

125

in_s32.val[1] = vmaxq_s32(in_s32.val[1], zero_s32);

126

in_s32.val[2] = vmaxq_s32(in_s32.val[2], zero_s32);

127

in_s32.val[3] = vmaxq_s32(in_s32.val[3], zero_s32);

128

129

// Convert S32 to S16

130

const int16x8x2_t in_s16 =

131

{

132

{

133

vcombine_s16(vqmovn_s32(in_s32.val[0]), vqmovn_s32(in_s32.val[1])),

134

vcombine_s16(vqmovn_s32(in_s32.val[2]), vqmovn_s32(in_s32.val[3]))

}

};

// Convert S16 to U8

uint8x16_t out_u8 = vcombine_u8(vqmovun_s16(in_s16.val[0]), vqmovun_s16(in_s16.val[1]));

if(is_bounded_relu)

{

out_u8 = vmaxq_u8(out_u8, min_u8);

144

out_u8 = vminq_u8(out_u8, max_u8);

}

return out_u8;

}

Pablo Tello

2019-02-05 16:16:19 +0000

[diff] [blame]

149

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame]

150

/** Performs final quantization step on 16 elements

151

*

Michalis Spyrou

2020-06-22 17:05:43 +0100

[diff] [blame]

152

* @param[in] in_s32 Input to be quantized.

153

* @param[in] result_fixedpoint_multiplier Result multiplier parameter

154

* @param[in] result_shift Result shift parameter

155

* @param[in] result_offset_after_shift_s32 Result offset parameter

156

* @param[in] min_s8 Relu lower bound

157

* @param[in] max_s8 Relu upper bound

158

* @param[in] is_bounded_relu Specified if a fused bounded relu should be applied

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame]

159

*

160

* @return Quantized values

161

*/

Michalis Spyrou

2020-06-22 17:05:43 +0100

[diff] [blame]

162

inline int8x16_t finalize_quantization(int32x4x4_t &in_s32,

163

int result_fixedpoint_multiplier,

164

int32_t result_shift,

165

int32x4_t result_offset_after_shift_s32,

166

int8x16_t min_s8,

167

int8x16_t max_s8,

168

bool is_bounded_relu)

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame]

169

{

Michele Di Giorgio

2019-10-29 10:58:13 +0000

[diff] [blame]

170

if(result_shift < 0)

171

{

172

in_s32.val[0] = vmulq_n_s32(in_s32.val[0], (1 << (-result_shift)));

173

in_s32.val[1] = vmulq_n_s32(in_s32.val[1], (1 << (-result_shift)));

174

in_s32.val[2] = vmulq_n_s32(in_s32.val[2], (1 << (-result_shift)));

175

in_s32.val[3] = vmulq_n_s32(in_s32.val[3], (1 << (-result_shift)));

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame]

176

Michele Di Giorgio

2019-10-29 10:58:13 +0000

[diff] [blame]

177

in_s32.val[0] = vqrdmulhq_n_s32(in_s32.val[0], result_fixedpoint_multiplier);

178

in_s32.val[1] = vqrdmulhq_n_s32(in_s32.val[1], result_fixedpoint_multiplier);

179

in_s32.val[2] = vqrdmulhq_n_s32(in_s32.val[2], result_fixedpoint_multiplier);

180

in_s32.val[3] = vqrdmulhq_n_s32(in_s32.val[3], result_fixedpoint_multiplier);

}

else

{

// Fixed point multiplication with vector saturating rounding doubling multiply high with scalar

185

in_s32.val[0] = vqrdmulhq_n_s32(in_s32.val[0], result_fixedpoint_multiplier);

186

in_s32.val[1] = vqrdmulhq_n_s32(in_s32.val[1], result_fixedpoint_multiplier);

187

in_s32.val[2] = vqrdmulhq_n_s32(in_s32.val[2], result_fixedpoint_multiplier);

188

in_s32.val[3] = vqrdmulhq_n_s32(in_s32.val[3], result_fixedpoint_multiplier);

189

190

// Round to the nearest division by a power-of-two using result_shift_s32

191

in_s32.val[0] = rounding_divide_by_pow2(in_s32.val[0], result_shift);

192

in_s32.val[1] = rounding_divide_by_pow2(in_s32.val[1], result_shift);

193

in_s32.val[2] = rounding_divide_by_pow2(in_s32.val[2], result_shift);

194

in_s32.val[3] = rounding_divide_by_pow2(in_s32.val[3], result_shift);

195

}

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame]

196

197

// Add the offset terms

198

in_s32.val[0] = vaddq_s32(in_s32.val[0], result_offset_after_shift_s32);

199

in_s32.val[1] = vaddq_s32(in_s32.val[1], result_offset_after_shift_s32);

200

in_s32.val[2] = vaddq_s32(in_s32.val[2], result_offset_after_shift_s32);

201

in_s32.val[3] = vaddq_s32(in_s32.val[3], result_offset_after_shift_s32);

202

203

// Convert S32 to S16

204

const int16x8x2_t in_s16 =

205

{

206

{

207

vcombine_s16(vqmovn_s32(in_s32.val[0]), vqmovn_s32(in_s32.val[1])),

208

vcombine_s16(vqmovn_s32(in_s32.val[2]), vqmovn_s32(in_s32.val[3]))

}

};

// Convert S16 to S8

int8x16_t out_s8 = vcombine_s8(vqmovn_s16(in_s16.val[0]), vqmovn_s16(in_s16.val[1]));

if(is_bounded_relu)

{

out_s8 = vmaxq_s8(out_s8, min_s8);

218

out_s8 = vminq_s8(out_s8, max_s8);

}

return out_s8;

}

Georgios Pinitas

2019-10-16 19:21:40 +0100

[diff] [blame]

224

/** Performs final quantization step on 16 elements for symmetric quantization

225

*

Michalis Spyrou

2020-06-22 17:05:43 +0100

[diff] [blame]

226

* @param[in] in_s32 Input to be quantized.

227

* @param[in] result_fixedpoint_multiplier Result multiplier parameter

228

* @param[in] result_shift Result shift parameter

229

* @param[in] result_offset_after_shift_s32 Result offset parameter

230

* @param[in] min_s8 Relu lower bound

231

* @param[in] max_s8 Relu upper bound

232

* @param[in] is_bounded_relu Specified if a fused bounded relu should be applied

Georgios Pinitas

2019-10-16 19:21:40 +0100

[diff] [blame]

233

*

234

* @return Quantized values

235

*/

Georgios Pinitas

2019-10-16 19:21:40 +0100

[diff] [blame]

236

inline int8x16_t finalize_quantization_symm(int32x4x4_t &in_s32,

237

const int32x4x4_t &result_fixedpoint_multiplier,

238

const int32x4x4_t &result_shift,

239

const int32x4_t &result_offset_after_shift_s32,

240

const int8x16_t &min_s8,

Michalis Spyrou

2020-06-22 17:05:43 +0100

[diff] [blame]

241

const int8x16_t &max_s8,

242

const bool is_bounded_relu)

Georgios Pinitas

2019-10-16 19:21:40 +0100

[diff] [blame]

243

{

Michele Di Giorgio

2019-10-29 10:58:13 +0000

[diff] [blame]

244

const static int32x4_t one_s32 = vdupq_n_s32(1);

Georgios Pinitas

2019-10-16 19:21:40 +0100

[diff] [blame]

245

Michele Di Giorgio

2019-10-29 10:58:13 +0000

[diff] [blame]

246

// Fixed point multiplication with vector saturating rounding doubling multiply high with scalar

247

int32x4x4_t res_shift_gt0 =

248

{

249

vqrdmulhq_s32(in_s32.val[0], result_fixedpoint_multiplier.val[0]),

250

vqrdmulhq_s32(in_s32.val[1], result_fixedpoint_multiplier.val[1]),

251

vqrdmulhq_s32(in_s32.val[2], result_fixedpoint_multiplier.val[2]),

252

vqrdmulhq_s32(in_s32.val[3], result_fixedpoint_multiplier.val[3]),

253

};

Georgios Pinitas

2019-10-16 19:21:40 +0100

[diff] [blame]

254

// Round to the nearest division by a power-of-two using result_shift_s32

Michele Di Giorgio

2019-10-29 10:58:13 +0000

[diff] [blame]

255

res_shift_gt0.val[0] = rounding_divide_by_pow2(res_shift_gt0.val[0], result_shift.val[0]);

256

res_shift_gt0.val[1] = rounding_divide_by_pow2(res_shift_gt0.val[1], result_shift.val[1]);

257

res_shift_gt0.val[2] = rounding_divide_by_pow2(res_shift_gt0.val[2], result_shift.val[2]);

258

res_shift_gt0.val[3] = rounding_divide_by_pow2(res_shift_gt0.val[3], result_shift.val[3]);

259

260

int32x4x4_t res_shift_lt0 =

261

{

262

vmulq_s32(in_s32.val[0], vshlq_s32(one_s32, vnegq_s32(result_shift.val[0]))),

263

vmulq_s32(in_s32.val[1], vshlq_s32(one_s32, vnegq_s32(result_shift.val[1]))),

264

vmulq_s32(in_s32.val[2], vshlq_s32(one_s32, vnegq_s32(result_shift.val[2]))),

265

vmulq_s32(in_s32.val[3], vshlq_s32(one_s32, vnegq_s32(result_shift.val[3]))),

266

};

267

res_shift_lt0.val[0] = vqrdmulhq_s32(res_shift_lt0.val[0], result_fixedpoint_multiplier.val[0]);

268

res_shift_lt0.val[1] = vqrdmulhq_s32(res_shift_lt0.val[1], result_fixedpoint_multiplier.val[1]);

269

res_shift_lt0.val[2] = vqrdmulhq_s32(res_shift_lt0.val[2], result_fixedpoint_multiplier.val[2]);

270

res_shift_lt0.val[3] = vqrdmulhq_s32(res_shift_lt0.val[3], result_fixedpoint_multiplier.val[3]);

271

272

// Select result depending on shift value

273

const uint32x4x4_t mask_lt0 =

274

{

275

#ifdef __aarch64__

276

vcltzq_s32(result_shift.val[0]),

277

vcltzq_s32(result_shift.val[1]),

278

vcltzq_s32(result_shift.val[2]),

279

vcltzq_s32(result_shift.val[3]),

280

#else //__aarch64__

281

vcltq_s32(result_shift.val[0], vdupq_n_s32(0)),

282

vcltq_s32(result_shift.val[1], vdupq_n_s32(0)),

283

vcltq_s32(result_shift.val[2], vdupq_n_s32(0)),

284

vcltq_s32(result_shift.val[3], vdupq_n_s32(0)),

#endif //__aarch64__

};

in_s32.val[0] = vbslq_s32(mask_lt0.val[0], res_shift_lt0.val[0], res_shift_gt0.val[0]);

289

in_s32.val[1] = vbslq_s32(mask_lt0.val[1], res_shift_lt0.val[1], res_shift_gt0.val[1]);

290

in_s32.val[2] = vbslq_s32(mask_lt0.val[2], res_shift_lt0.val[2], res_shift_gt0.val[2]);

291

in_s32.val[3] = vbslq_s32(mask_lt0.val[3], res_shift_lt0.val[3], res_shift_gt0.val[3]);

Georgios Pinitas

2019-10-16 19:21:40 +0100

[diff] [blame]

292

293

// Add the offset terms

294

in_s32.val[0] = vaddq_s32(in_s32.val[0], result_offset_after_shift_s32);

295

in_s32.val[1] = vaddq_s32(in_s32.val[1], result_offset_after_shift_s32);

296

in_s32.val[2] = vaddq_s32(in_s32.val[2], result_offset_after_shift_s32);

297

in_s32.val[3] = vaddq_s32(in_s32.val[3], result_offset_after_shift_s32);

298

299

// Convert S32 to S16

300

const int16x8x2_t in_s16 =

301

{

302

{

303

vcombine_s16(vqmovn_s32(in_s32.val[0]), vqmovn_s32(in_s32.val[1])),

304

vcombine_s16(vqmovn_s32(in_s32.val[2]), vqmovn_s32(in_s32.val[3]))

}

};

// Convert S16 to S8

int8x16_t out_s8 = vcombine_s8(vqmovn_s16(in_s16.val[0]), vqmovn_s16(in_s16.val[1]));

if(is_bounded_relu)

{

out_s8 = vmaxq_s8(out_s8, min_s8);

314

out_s8 = vminq_s8(out_s8, max_s8);

}

return out_s8;

}

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

320

/** Performs final quantization step on single element

321

*

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

322

* @param[in] in_value Input to be quantized.

323

* @param[in] result_fixedpoint_multiplier Result multiplier parameter

324

* @param[in] result_shift Result shift parameter

325

* @param[in] result_offset_after_shift_s32 Result offset parameter

326

* @param[in] min_u8 Relu lower bound

327

* @param[in] max_u8 Relu upper bound

Michalis Spyrou

2020-06-22 17:05:43 +0100

[diff] [blame]

328

* @param[in] is_bounded_relu Specified if a fused bounded relu should be applied

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

329

*

330

* @return Quantized value

331

*/

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

332

inline uint8_t finalize_quantization(int32_t in_value, int result_fixedpoint_multiplier,

333

int32_t result_shift, int32_t result_offset_after_shift_s32,

Michalis Spyrou

2020-06-22 17:05:43 +0100

[diff] [blame]

334

uint8_t min_u8, uint8_t max_u8, bool is_bounded_relu)

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

335

{

336

int32x4_t in_s32 = vdupq_n_s32(in_value);

337

Michele Di Giorgio

2019-10-29 10:58:13 +0000

[diff] [blame]

338

if(result_shift < 0)

339

{

340

in_value = vgetq_lane_s32(vqrdmulhq_n_s32(vmulq_n_s32(in_s32, (1 << (-result_shift))), result_fixedpoint_multiplier), 0);

}

else

{

// Fixed point multiplication with vector saturating rounding doubling multiply high with scalar

345

in_value = vgetq_lane_s32(vqrdmulhq_n_s32(in_s32, result_fixedpoint_multiplier), 0);

346

// Shift value by result_shift_s32

347

in_value = rounding_divide_by_pow2(in_value, result_shift);

348

}

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

349

350

// Add the offset term

351

in_value += result_offset_after_shift_s32;

352

353

// Bound the result

Georgios Pinitas

6fa2638

2019-03-18 10:05:34 +0000

[diff] [blame]

354

uint8_t out_u8 = static_cast<uint8_t>(std::max<int32_t>(0, std::min<int32_t>(255, in_value)));

George Wort

2019-02-22 16:37:41 +0000

[diff] [blame]

355

if(is_bounded_relu)

356

{

357

out_u8 = static_cast<uint8_t>(std::max(min_u8, std::min(max_u8, out_u8)));

}

return out_u8;

}

Georgios Pinitas

2019-10-16 19:21:40 +0100

[diff] [blame]

363

/** Performs final quantization step on single element

364

*

Georgios Pinitas

2019-10-16 19:21:40 +0100

[diff] [blame]

365

* @param[in] in_value Input to be quantized.

366

* @param[in] result_fixedpoint_multiplier Result multiplier parameter

367

* @param[in] result_shift Result shift parameter

368

* @param[in] result_offset_after_shift_s32 Result offset parameter

369

* @param[in] min_s8 Relu lower bound

370

* @param[in] max_s8 Relu upper bound

Michalis Spyrou

2020-06-22 17:05:43 +0100

[diff] [blame]

371

* @param[in] is_bounded_relu Specified if a fused bounded relu should be applied

Georgios Pinitas

2019-10-16 19:21:40 +0100

[diff] [blame]

372

*

373

* @return Quantized value

374

*/

Georgios Pinitas

2019-10-16 19:21:40 +0100

[diff] [blame]

375

inline int8_t finalize_quantization(int32_t in_value, int result_fixedpoint_multiplier,

376

int32_t result_shift, int32_t result_offset_after_shift_s32,

Michalis Spyrou

2020-06-22 17:05:43 +0100

[diff] [blame]

377

int8_t min_s8, int8_t max_s8, bool is_bounded_relu)

Georgios Pinitas

2019-10-16 19:21:40 +0100

[diff] [blame]

378

{

379

int32x4_t in_s32 = vdupq_n_s32(in_value);

380

Michele Di Giorgio

2019-10-29 10:58:13 +0000

[diff] [blame]

381

if(result_shift < 0)

382

{

383

in_value = vgetq_lane_s32(vqrdmulhq_n_s32(vmulq_n_s32(in_s32, (1 << (-result_shift))), result_fixedpoint_multiplier), 0);

}

else

{

// Fixed point multiplication with vector saturating rounding doubling multiply high with scalar

388

in_value = vgetq_lane_s32(vqrdmulhq_n_s32(in_s32, result_fixedpoint_multiplier), 0);

Georgios Pinitas

2019-10-16 19:21:40 +0100

[diff] [blame]

389

Michele Di Giorgio

2019-10-29 10:58:13 +0000

[diff] [blame]

390

// Shift value by result_shift_s32

391

in_value = rounding_divide_by_pow2(in_value, result_shift);

392

}

Georgios Pinitas

2019-10-16 19:21:40 +0100

[diff] [blame]

393

394

// Add the offset term

395

in_value += result_offset_after_shift_s32;

396

397

// Bound the result

398

int8_t out_s8 = static_cast<int8_t>(std::max<int32_t>(-128, std::min<int32_t>(127, in_value)));

399

if(is_bounded_relu)

400

{

401

out_s8 = static_cast<int8_t>(std::max(min_s8, std::min(max_s8, out_s8)));

}

return out_s8;

}

Georgios Pinitas

2019-04-15 15:44:17 +0100

[diff] [blame]

407

/** Dequantize a neon vector holding 8 quantized values.

408

*

409

* @param[in] qv Input values to be dequantized.

410

* @param[in] qi Quantization information to be used in the computation.

411

*

412

* @return Dequantized values in a neon vector

413

*/

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

414

inline float32x4x2_t vdequantize(const uint8x8_t &qv, const UniformQuantizationInfo &qi)

Georgios Pinitas

2019-04-15 15:44:17 +0100

[diff] [blame]

415

{

416

const float scale = qi.scale;

417

const int offset = qi.offset;

418

const int32x4_t voffset = vdupq_n_s32(offset);

419

const float32x4_t vscale = vdupq_n_f32(scale);

420

const float32x4x2_t vdequantized_input =

421

{

422

{

423

vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(qv)))), voffset)), vscale),

424

vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(qv)))), voffset)), vscale),

425

}

426

};

427

return vdequantized_input;

428

}

429

Michalis Spyrou

2019-11-28 11:31:23 +0000

[diff] [blame]

430

/** Dequantize a neon vector holding 8 singed quantized values.

431

*

432

* @param[in] qv Input values to be dequantized.

433

* @param[in] qi Quantization information to be used in the computation.

434

*

435

* @return Dequantized values in a neon vector

436

*/

437

inline float32x4x2_t vdequantize(const int8x8_t &qv, const UniformQuantizationInfo &qi)

438

{

439

const float scale = qi.scale;

440

const int offset = qi.offset;

441

const int32x4_t voffset = vdupq_n_s32(offset);

442

const float32x4_t vscale = vdupq_n_f32(scale);

443

const float32x4x2_t vdequantized_input =

444

{

445

{

446

vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_low_s16(vmovl_s8(qv))), voffset)), vscale),

447

vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_high_s16(vmovl_s8(qv))), voffset)), vscale),

448

}

449

};

450

return vdequantized_input;

451

}

452

Pablo Tello

2019-02-05 16:16:19 +0000

[diff] [blame]

453

/** Dequantize a neon vector holding 16 quantized values.

454

*

Georgios Pinitas

2019-04-15 15:44:17 +0100

[diff] [blame]

455

* @param[in] qv Input values to be dequantized.

456

* @param[in] qi Quantization information to be used in the computation.

Pablo Tello

2019-02-05 16:16:19 +0000

[diff] [blame]

457

*

458

* @return Dequantized values in a neon vector

459

*/

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

460

inline float32x4x4_t vdequantize(const uint8x16_t &qv, const UniformQuantizationInfo &qi)

Pablo Tello

2019-02-05 16:16:19 +0000

[diff] [blame]

461

{

462

const float scale = qi.scale;

463

const int offset = qi.offset;

464

const int32x4_t voffset = vdupq_n_s32(offset);

465

const float32x4_t vscale = vdupq_n_f32(scale);

466

const float32x4x4_t vdequantized_input =

467

{

468

{

469

vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(qv))))), voffset)), vscale),

470

vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(qv))))), voffset)), vscale),

471

vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(qv))))), voffset)), vscale),

472

vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(qv))))), voffset)), vscale),

473

}

474

};

475

return vdequantized_input;

476

}

477

Michalis Spyrou

2019-11-28 11:31:23 +0000

[diff] [blame]

478

/** Dequantize a neon vector holding 16 signed quantized values.

479

*

480

* @param[in] qv Input values to be dequantized.

481

* @param[in] qi Quantization information to be used in the computation.

482

*

483

* @return Dequantized values in a neon vector

484

*/

485

inline float32x4x4_t vdequantize(const int8x16_t &qv, const UniformQuantizationInfo &qi)

486

{

487

const float scale = qi.scale;

488

const int offset = qi.offset;

489

const int32x4_t voffset = vdupq_n_s32(offset);

490

const float32x4_t vscale = vdupq_n_f32(scale);

491

const float32x4x4_t vdequantized_input =

492

{

493

{

494

vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(qv)))), voffset)), vscale),

495

vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_low_s8(qv)))), voffset)), vscale),

496

vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_high_s8(qv)))), voffset)), vscale),

497

vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_high_s8(qv)))), voffset)), vscale),

498

}

499

};

500

return vdequantized_input;

501

}

502

Georgios Pinitas

3d13af8

2019-06-04 13:04:16 +0100

[diff] [blame]

503

/** Dequantize following an asymmetric quantization scheme a neon vector holding 16 quantized values.

504

*

505

* @param[in] qv Input values to be dequantized.

506

* @param[in] scale Quantization scaling factor.

507

* @param[in] offset Zero quantization offset.

508

*

509

* @return Dequantized values in a neon vector

510

*/

511

inline float32x4x4_t vdequantize(const uint8x16_t &qv, float scale, int32_t offset)

512

{

513

const int32x4_t voffset = vdupq_n_s32(offset);

514

const float32x4_t vscale = vdupq_n_f32(scale);

515

const float32x4x4_t vdequantized_input =

516

{

517

{

518

vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(qv))))), voffset)), vscale),

519

vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(qv))))), voffset)), vscale),

520

vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(qv))))), voffset)), vscale),

521

vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(qv))))), voffset)), vscale),

522

}

523

};

524

return vdequantized_input;

525

}

526

Sang-Hoon Park

d817647

2019-12-04 09:46:28 +0000

[diff] [blame]

527

/** Dequantize a vector of 16 values stored as signed asymmetric.

528

*

529

* @param[in] qv Input values to be dequantized.

530

* @param[in] scale Quantization scaling factor.

531

* @param[in] offset Zero quantization offset.

532

*

533

* @return Dequantized values in a neon vector

534

*/

535

inline float32x4x4_t vdequantize(const int8x16_t &qv, float scale, int32_t offset)

536

{

537

const int32x4_t voffset = vdupq_n_s32(offset);

538

const float32x4_t vscale = vdupq_n_f32(scale);

539

const float32x4x4_t vdequantized_input =

540

{

541

{

542

vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(qv)))), voffset)), vscale),

543

vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_low_s8(qv)))), voffset)), vscale),

544

vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_high_s8(qv)))), voffset)), vscale),

545

vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_high_s8(qv)))), voffset)), vscale),

546

}

547

};

548

return vdequantized_input;

549

}

550

Georgios Pinitas

2019-11-11 18:24:22 +0000

[diff] [blame]

551

/** Dequantize following symmetric quantization scheme a neon vector holding 16 quantized values.

Michalis Spyrou

2019-08-22 16:52:00 +0100

[diff] [blame]

552

*

Georgios Pinitas

2019-11-11 18:24:22 +0000

[diff] [blame]

553

* @param[in] qv Input values to be dequantized.

554

* @param[in] vscale Vector containing quantization scaling factors.

Michalis Spyrou

2019-08-22 16:52:00 +0100

[diff] [blame]

555

*

556

* @return Dequantized values in a neon vector

557

*/

Georgios Pinitas

2019-11-11 18:24:22 +0000

[diff] [blame]

558

inline float32x4x4_t vdequantize(const int8x16_t &qv, const float32x4x4_t vscale)

Michalis Spyrou

2019-08-22 16:52:00 +0100

[diff] [blame]

559

{

560

const float32x4x4_t vdequantized_input =

561

{

562

{

Georgios Pinitas

2019-11-11 18:24:22 +0000

[diff] [blame]

563

vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(qv))))), vscale.val[0]),

564

vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_low_s8(qv))))), vscale.val[1]),

565

vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_high_s8(qv))))), vscale.val[2]),

566

vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_high_s8(qv))))), vscale.val[3]),

Michalis Spyrou

2019-08-22 16:52:00 +0100

[diff] [blame]

567

}

568

};

569

return vdequantized_input;

570

}

571

Georgios Pinitas

3d13af8

2019-06-04 13:04:16 +0100

[diff] [blame]

572

/** Dequantize following a symmetric quantization scheme a neon vector holding 16 quantized values.

573

*

574

* @param[in] qv Input values to be dequantized.

575

* @param[in] scale Quantization scaling factor.

576

*

577

* @return Dequantized values in a neon vector

578

*/

579

inline float32x4x4_t vdequantize(const int8x16_t &qv, float scale)

580

{

581

const float32x4_t vscale = vdupq_n_f32(scale);

582

const float32x4x4_t vdequantized_input =

583

{

584

{

585

vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(qv))))), vscale),

586

vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_low_s8(qv))))), vscale),

587

vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_high_s8(qv))))), vscale),

588

vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_high_s8(qv))))), vscale),

589

}

590

};

591

return vdequantized_input;

592

}

593

Georgios Pinitas

2019-04-15 15:44:17 +0100

[diff] [blame]

594

/** Quantize a neon vector holding 8 floating point values.

595

*

596

* @param[in] qv Input values to be quantized.

597

* @param[in] qi Quantization information to be used in the computation.

598

*

599

* @return A neon vector holding the quantized values

600

*/

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

601

inline uint8x8_t vquantize(const float32x4x2_t &qv, const UniformQuantizationInfo &qi)

Georgios Pinitas

2019-04-15 15:44:17 +0100

[diff] [blame]

602

{

603

const float scale = qi.scale;

604

const int offset = qi.offset;

605

const float32x4_t voffset = vdupq_n_f32(offset);

606

const float32x4_t vinvscale = vdupq_n_f32(1.f / scale);

607

const int32x4x4_t rf =

{

{

#ifdef __aarch64__

vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),

612

vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),

613

#else //__aarch64__

614

vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),

615

vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),

#endif //__aarch64__

}

};

return vqmovun_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1])));

620

}

621

Michalis Spyrou

2019-11-28 11:31:23 +0000

[diff] [blame]

622

/** Quantize a neon vector holding 8 floating point values.

623

*

624

* @param[in] qv Input values to be quantized.

625

* @param[in] qi Quantization information to be used in the computation.

626

*

627

* @return A neon vector holding the singed quantized values

628

*/

629

inline int8x8_t vquantize_signed(const float32x4x2_t &qv, const UniformQuantizationInfo &qi)

630

{

631

const float scale = qi.scale;

632

const int offset = qi.offset;

633

const float32x4_t voffset = vdupq_n_f32(offset);

634

const float32x4_t vinvscale = vdupq_n_f32(1.f / scale);

635

const int32x4x4_t rf =

{

{

#ifdef __aarch64__

vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),

640

vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),

641

#else //__aarch64__

642

vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),

643

vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),

#endif //__aarch64__

}

};

return vqmovn_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1])));

648

}

649

Pablo Tello

2019-02-05 16:16:19 +0000

[diff] [blame]

650

/** Quantize a neon vector holding 16 floating point values.

651

*

Georgios Pinitas

2019-04-15 15:44:17 +0100

[diff] [blame]

652

* @param[in] qv Input values to be quantized.

653

* @param[in] qi Quantization information to be used in the computation.

Pablo Tello

2019-02-05 16:16:19 +0000

[diff] [blame]

654

*

655

* @return A neon vector holding the quantized values

656

*/

Georgios Pinitas

2019-05-21 13:32:43 +0100

[diff] [blame]

657

inline uint8x16_t vquantize(const float32x4x4_t &qv, const UniformQuantizationInfo &qi)

Pablo Tello

2019-02-05 16:16:19 +0000

[diff] [blame]

658

{

659

const float scale = qi.scale;

660

const int offset = qi.offset;

661

const float32x4_t voffset = vdupq_n_f32(offset);

662

const float32x4_t vinvscale = vdupq_n_f32(1.f / scale);

663

const int32x4x4_t rf =

{

{

#ifdef __aarch64__

vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),

668

vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),

669

vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)),

670

vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)),

671

#else //__aarch64__

672

vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),

673

vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),

674

vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)),

675

vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)),

#endif //__aarch64__

}

};

const uint8x8_t pa = vqmovun_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1])));

680

const uint8x8_t pb = vqmovun_s16(vcombine_s16(vqmovn_s32(rf.val[2]), vqmovn_s32(rf.val[3])));

681

return vcombine_u8(pa, pb);

682

}

Michele Di Giorgio

d64a46c

2019-10-01 12:25:49 +0100

[diff] [blame]

683

Michalis Spyrou

2019-11-28 11:31:23 +0000

[diff] [blame]

684

/** Signed quantize a neon vector holding 16 floating point values.

685

*

686

* @param[in] qv Input values to be quantized.

687

* @param[in] qi Quantization information to be used in the computation.

688

*

689

* @return A neon vector holding the quantized values

690

*/

Michalis Spyrou

2019-11-28 11:31:23 +0000

[diff] [blame]

691

inline int8x16_t vquantize_signed(const float32x4x4_t &qv, const UniformQuantizationInfo &qi)

692

{

693

const float scale = qi.scale;

694

const int offset = qi.offset;

695

const float32x4_t voffset = vdupq_n_f32(offset);

696

const float32x4_t vinvscale = vdupq_n_f32(1.f / scale);

697

const int32x4x4_t rf =

{

{

#ifdef __aarch64__

vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),

702

vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),

703

vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)),

704

vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)),

705

#else //__aarch64__

706

vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),

707

vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),

708

vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)),

709

vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)),

710

#endif //__aarch64__

Michalis Spyrou

2019-11-28 11:31:23 +0000

[diff] [blame]

711

}

712

};

713

const int8x8_t pa = vqmovn_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1])));

714

const int8x8_t pb = vqmovn_s16(vcombine_s16(vqmovn_s32(rf.val[2]), vqmovn_s32(rf.val[3])));

715

return vcombine_s8(pa, pb);

716

}

717

Michele Di Giorgio

d64a46c

2019-10-01 12:25:49 +0100

[diff] [blame]

718

/** Quantize to QASYMM16 a neon vector holding 16 floating point values.

719

*

720

* @param[in] qv Input values to be quantized.

721

* @param[in] qi Quantization information to be used in the computation.

722

*

723

* @return A neon vector holding the quantized values

724

*/

725

inline uint16x8x2_t vquantize_qasymm16(const float32x4x4_t &qv, const UniformQuantizationInfo &qi)

726

{

727

const float scale = qi.scale;

728

const int offset = qi.offset;

729

const float32x4_t voffset = vdupq_n_f32(offset);

730

const float32x4_t vinvscale = vdupq_n_f32(1.f / scale);

731

const int32x4x4_t rf =

{

{

#ifdef __aarch64__

vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),

736

vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),

737

vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)),

738

vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)),

739

#else //__aarch64__

740

vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),

741

vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),

742

vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)),

743

vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)),

#endif //__aarch64__

}

};

const uint16x8_t pa = vcombine_u16(vqmovun_s32(rf.val[0]), vqmovun_s32(rf.val[1]));

748

const uint16x8_t pb = vcombine_u16(vqmovun_s32(rf.val[2]), vqmovun_s32(rf.val[3]));

749

return { pa, pb };

750

}

Gian Marco