Blame - tests/validation/reference/GEMMLowp.cpp - ml/ComputeLibrary

2017-11-17 09:27:57 +0000

[diff] [blame]

44

{

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

45

const int cols_in = in->shape().x();

46

const bool is_per_channel = result_mult_int.size() > 1;

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

47

48

for(int i = 0; i < in->num_elements(); ++i)

49

{

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

50

int32_t result = ((*in)[i] + result_offset);

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

if(bias != nullptr)

{

result += (*bias)[i % cols_in];

55

}

56

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

57

result *= (is_per_channel) ? result_mult_int[i % cols_in] : result_mult_int[0];

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

58

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

59

result >>= (is_per_channel) ? result_shift[i % cols_in] : result_shift[0];

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

// Bounded ReLu

if(min != max)

{

result = std::max(min, std::min(max, result));

65

}

66

67

(*dst)[i] = static_cast<uint8_t>(std::max(0, std::min(255, result)));

68

}

69

}

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

70

71

template <typename T>

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

72

void quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> *in, const SimpleTensor<T> *bias, SimpleTensor<uint8_t> *dst, std::vector<int32_t> result_fixedpoint_multiplier,

73

std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max)

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

74

{

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

75

const int cols_in = in->shape().x();

76

const bool is_per_channel = result_fixedpoint_multiplier.size() > 1;

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

77

78

for(int i = 0; i < in->num_elements(); ++i)

79

{

80

int32_t result = (*in)[i];

if(bias != nullptr)

{

result += (*bias)[i % cols_in];

85

}

86

87

// Fixed point multiplication

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

88

const int32_t multiplier = (is_per_channel) ? result_fixedpoint_multiplier[i % cols_in] : result_fixedpoint_multiplier[0];

89

const int32_t shift = (is_per_channel) ? result_shift[i % cols_in] : result_shift[0];

90

91

result = asymm_rounding_divide_by_pow2(asymm_int_mult(result, multiplier), shift);

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

92

result += result_offset_after_shift;

// Bounded ReLu

if(min != max)

{

result = std::max(min, std::min(max, result));

98

}

99

100

(*dst)[i] = static_cast<uint8_t>(std::max(0, std::min(255, result)));

101

}

102

}

Gian Marco Iodice

bc415af

2019-06-13 15:58:32 +0100

[diff] [blame]

103

104

template <typename T>

105

void quantize_down_int32_to_int16_scale_by_fixedpoint(const SimpleTensor<T> *in, const SimpleTensor<T> *bias, SimpleTensor<int16_t> *dst, int32_t result_fixedpoint_multiplier, int32_t result_shift,

106

int32_t min, int32_t max)

107

{

108

const int cols_in = in->shape().x();

109

110

for(int i = 0; i < in->num_elements(); ++i)

111

{

112

int32_t result = (*in)[i];

if(bias != nullptr)

{

result += (*bias)[i % cols_in];

117

}

118

119

// Fixed point multiplication

Manuel Bottini

0726398

2019-10-17 18:37:26 +0100

[diff] [blame]

120

if(result_shift < 0)

121

{

122

result = asymm_int_mult(result * (1 << (-result_shift)), result_fixedpoint_multiplier);

}

else

{

result = asymm_rounding_divide_by_pow2(asymm_int_mult(result, result_fixedpoint_multiplier), result_shift);

127

}

Gian Marco Iodice

bc415af

2019-06-13 15:58:32 +0100

[diff] [blame]

// Bounded ReLu

if(min != max)

{

result = std::max(min, std::min(max, result));

133

}

134

135

(*dst)[i] = static_cast<int16_t>(std::max(-32768, std::min(32767, result)));

136

}

137

}

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

138

} // namespace

139

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

140

template <typename T_out, typename T_in, typename T_in_1>

141

SimpleTensor<T_out> gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> &a, const SimpleTensor<T_in_1> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset)

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

142

{

Michalis Spyrou

f3dfa27

2017-11-21 17:52:12 +0000

[diff] [blame]

143

static_assert(std::is_same<typename std::decay<T_out>::type, int32_t>::value, "Only int32_t is allowed for the output");

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

144

Michalis Spyrou

f3dfa27

2017-11-21 17:52:12 +0000

[diff] [blame]

145

DataType dt = std::is_same<T_out, int32_t>::value ? DataType::S32 : DataType::U32;

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

146

SimpleTensor<T_out> c(shape_c, dt);

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

147

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

148

const int K = a.shape().x();

149

const int M = a.shape().y();

150

const int N = b.shape().x();

151

const int D = a.shape().z(); // Number of matrices in a batch

152

153

const int a_stride_z = K * M;

154

// Do not slide the matrix B along the 3rd dimension in case matrix B has less than 3 dimensions

155

const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0;

156

const int c_stride_z = N * M;

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

157

Michalis Spyrou

f3dfa27

2017-11-21 17:52:12 +0000

[diff] [blame]

158

std::vector<T_out> acc;

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

159

acc.resize(N);

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

160

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

161

for(int depth = 0; depth < D; ++depth)

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

162

{

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

163

const int base_addr_a = depth * a_stride_z;

164

const int base_addr_b = depth * b_stride_z;

165

const int base_addr_c = depth * c_stride_z;

166

167

for(int i = 0; i < M; ++i)

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

168

{

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

169

for(int j = 0; j < N; ++j)

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

170

{

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

171

acc[j] = 0;

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

172

}

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

173

for(int k = 0; k < K; ++k)

174

{

175

const T_out tmp_a = a_offset + static_cast<T_out>(a[base_addr_a + k + i * K]);

176

for(int j = 0; j < N; ++j)

177

{

178

const T_out tmp_b = b_offset + static_cast<T_out>(b[base_addr_b + j + k * N]);

179

const T_out mult_as_int = tmp_a * tmp_b;

180

acc[j] += mult_as_int;

181

}

182

}

183

for(int j = 0; j < N; ++j)

184

{

185

c[base_addr_c + j + i * N] = acc[j];

186

}

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

}

}

return c;

}

Pablo Tello

2017-11-15 13:28:27 +0000

[diff] [blame]

193

// used to validate assembly kernels which don't know anything about offsets

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

194

template <typename T1, typename T2, typename T3>

195

SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c)

Pablo Tello

181e651

2017-11-15 13:28:27 +0000

[diff] [blame]

196

{

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

197

return gemmlowp_matrix_multiply_core<T1, T2, T3>(a, b, shape_c, 0, 0);

Pablo Tello

181e651

2017-11-15 13:28:27 +0000

[diff] [blame]

198

}

199

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

200

template <typename T>

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

201

SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, int32_t result_offset, std::vector<int32_t> result_mult_int, std::vector<int32_t> result_shift,

202

int32_t min, int32_t max)

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

203

{

204

SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8);

205

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

206

quantize_down_int32_to_uint8_scale<T>(&in, nullptr, &dst, result_offset, result_mult_int, result_shift, min, max);

return dst;

}

template <typename T>

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

212

SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_offset, std::vector<int32_t> result_mult_int,

213

std::vector<int32_t> result_shift, int32_t min, int32_t max)

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

214

{

215

SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8);

216

217

quantize_down_int32_to_uint8_scale<T>(&in, &bias, &dst, result_offset, result_mult_int, result_shift, min, max);

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

return dst;

}

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

222

template <typename T>

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

223

SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, std::vector<int32_t> result_fixedpoint_multiplier, std::vector<int32_t> result_shift,

224

int32_t result_offset_after_shift, int32_t min, int32_t max)

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

225

{

226

SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8);

227

228

quantize_down_int32_to_uint8_scale_by_fixedpoint<T>(&in, nullptr, &dst, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);

return dst;

}

template <typename T>

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

234

SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, std::vector<int32_t> result_fixedpoint_multiplier,

235

std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max)

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

236

{

237

SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8);

238

239

quantize_down_int32_to_uint8_scale_by_fixedpoint<T>(&in, &bias, &dst, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);

return dst;

}

Gian Marco Iodice

2019-06-13 15:58:32 +0100

[diff] [blame]

244

template <typename T>

245

SimpleTensor<int16_t> gemmlowp_quantize_down_int32_to_int16_scale_by_fixedpoint(const SimpleTensor<T> &in, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t min,

246

int32_t max)

247

{

248

SimpleTensor<int16_t> dst(in.shape(), DataType::QSYMM16);

249

250

quantize_down_int32_to_int16_scale_by_fixedpoint<T>(&in, nullptr, &dst, result_fixedpoint_multiplier, result_shift, min, max);

return dst;

}

template <typename T>

256

SimpleTensor<int16_t> gemmlowp_quantize_down_int32_to_int16_scale_by_fixedpoint(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_fixedpoint_multiplier, int32_t result_shift,

257

int32_t min, int32_t max)

258

{

259

SimpleTensor<int16_t> dst(in.shape(), DataType::QSYMM16);

260

261

quantize_down_int32_to_int16_scale_by_fixedpoint<T>(&in, &bias, &dst, result_fixedpoint_multiplier, result_shift, min, max);

return dst;

}

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

266

template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, std::vector<int32_t> result_fixedpoint_multiplier,

267

std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);

268

template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,

269

std::vector<int32_t> result_fixedpoint_multiplier,

270

std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);

Gian Marco Iodice

bc415af

2019-06-13 15:58:32 +0100

[diff] [blame]

271

template SimpleTensor<int16_t> gemmlowp_quantize_down_int32_to_int16_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, int32_t result_fixedpoint_multiplier, int32_t result_shift,

272

int32_t min, int32_t max);

273

template SimpleTensor<int16_t> gemmlowp_quantize_down_int32_to_int16_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_fixedpoint_multiplier,

274

int32_t result_shift, int32_t min, int32_t max);

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

275

template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<int32_t> &a, int32_t result_offset, std::vector<int32_t> result_mult_int,

276

std::vector<int32_t> result_shift, int32_t min, int32_t max);

277

template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_offset, std::vector<int32_t> result_mult_int,

278

std::vector<int32_t> result_shift, int32_t min, int32_t max);

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

279

template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);

280

template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

281

template SimpleTensor<int32_t> gemmlowp<int32_t, int8_t, int8_t>(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c);

282

template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, uint8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c);

283

template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, int8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c);

Pablo Tello