Blame - tests/validation/reference/GEMMLowp.cpp - ml/ComputeLibrary

2017-11-17 09:27:57 +0000

[diff] [blame]

68

{

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

69

const int cols_in = in->shape().x();

70

const bool is_per_channel = result_mult_int.size() > 1;

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

71

72

for(int i = 0; i < in->num_elements(); ++i)

73

{

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

74

int32_t result = ((*in)[i] + result_offset);

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

if(bias != nullptr)

{

result += (*bias)[i % cols_in];

79

}

80

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

81

result *= (is_per_channel) ? result_mult_int[i % cols_in] : result_mult_int[0];

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

82

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

83

result >>= (is_per_channel) ? result_shift[i % cols_in] : result_shift[0];

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

// Bounded ReLu

if(min != max)

{

result = std::max(min, std::min(max, result));

89

}

90

Manuel Bottini

2019-12-02 16:22:35 +0000

[diff] [blame]

91

(*dst)[i] = static_cast<TOut>(std::max<TIn>(std::numeric_limits<TOut>::lowest(),

92

std::min<TIn>(std::numeric_limits<TOut>::max(), result)));

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

93

}

94

}

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

95

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame]

96

template <typename TIn, typename TOut>

97

void quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> *in, const SimpleTensor<TIn> *bias, SimpleTensor<TOut> *dst, std::vector<int32_t> result_fixedpoint_multiplier,

98

std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max)

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

99

{

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

100

const int cols_in = in->shape().x();

101

const bool is_per_channel = result_fixedpoint_multiplier.size() > 1;

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

102

103

for(int i = 0; i < in->num_elements(); ++i)

104

{

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame]

105

TIn result = (*in)[i];

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

if(bias != nullptr)

{

result += (*bias)[i % cols_in];

110

}

111

112

// Fixed point multiplication

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

113

const int32_t multiplier = (is_per_channel) ? result_fixedpoint_multiplier[i % cols_in] : result_fixedpoint_multiplier[0];

114

const int32_t shift = (is_per_channel) ? result_shift[i % cols_in] : result_shift[0];

115

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame]

116

if(shift < 0)

117

{

118

result = asymm_int_mult(result * (1 << (-shift)), multiplier);

}

else

{

result = asymm_rounding_divide_by_pow2(asymm_int_mult(result, multiplier), shift);

123

}

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

124

result += result_offset_after_shift;

// Bounded ReLu

if(min != max)

{

result = std::max(min, std::min(max, result));

130

}

131

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame]

132

(*dst)[i] = static_cast<TOut>(std::max<TIn>(std::numeric_limits<TOut>::lowest(),

133

std::min<TIn>(std::numeric_limits<TOut>::max(), result)));

Gian Marco Iodice

bc415af

2019-06-13 15:58:32 +0100

[diff] [blame]

134

}

135

}

Sheri Zhang

1b14c75

2020-03-09 14:29:52 +0000

[diff] [blame]

136

137

template <typename TIn, typename TOut>

138

void quantize_down_scale_by_float(const SimpleTensor<TIn> *in, const SimpleTensor<TIn> *bias, SimpleTensor<TOut> *dst, std::vector<float_t> result_real_multiplier,

139

int32_t result_offset, int32_t min, int32_t max)

140

{

141

const int cols_in = in->shape().x();

142

const bool is_per_channel = result_real_multiplier.size() > 1;

143

144

for(int i = 0; i < in->num_elements(); ++i)

145

{

146

TIn result = (*in)[i];

if(bias != nullptr)

{

result += (*bias)[i % cols_in];

151

}

152

153

// Float multiplication

154

const float_t multiplier = (is_per_channel) ? result_real_multiplier[i % cols_in] : result_real_multiplier[0];

155

156

float_t result_f = static_cast<float_t>(result) * multiplier + static_cast<float_t>(result_offset);

Georgios Pinitas

afc630f

2020-03-30 14:09:27 +0100

[diff] [blame]

157

result = static_cast<TIn>(support::cpp11::round(result_f));

Sheri Zhang

1b14c75

2020-03-09 14:29:52 +0000

[diff] [blame]

// Bounded ReLu

if(min != max)

{

result = std::max(min, std::min(max, result));

163

}

164

165

(*dst)[i] = static_cast<TOut>(std::max<TIn>(std::numeric_limits<TOut>::lowest(),

166

std::min<TIn>(std::numeric_limits<TOut>::max(), result)));

167

}

168

}

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

169

} // namespace

170

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

171

template <typename T_out, typename T_in, typename T_in_1>

172

SimpleTensor<T_out> gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> &a, const SimpleTensor<T_in_1> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset)

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

173

{

Michalis Spyrou

f3dfa27

2017-11-21 17:52:12 +0000

[diff] [blame]

174

static_assert(std::is_same<typename std::decay<T_out>::type, int32_t>::value, "Only int32_t is allowed for the output");

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

175

Michalis Spyrou

f3dfa27

2017-11-21 17:52:12 +0000

[diff] [blame]

176

DataType dt = std::is_same<T_out, int32_t>::value ? DataType::S32 : DataType::U32;

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

177

SimpleTensor<T_out> c(shape_c, dt);

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

178

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

179

const int K = a.shape().x();

180

const int M = a.shape().y();

181

const int N = b.shape().x();

182

const int D = a.shape().z(); // Number of matrices in a batch

183

184

const int a_stride_z = K * M;

185

// Do not slide the matrix B along the 3rd dimension in case matrix B has less than 3 dimensions

186

const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0;

187

const int c_stride_z = N * M;

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

188

Michalis Spyrou

f3dfa27

2017-11-21 17:52:12 +0000

[diff] [blame]

189

std::vector<T_out> acc;

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

190

acc.resize(N);

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

191

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

192

for(int depth = 0; depth < D; ++depth)

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

193

{

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

194

const int base_addr_a = depth * a_stride_z;

195

const int base_addr_b = depth * b_stride_z;

196

const int base_addr_c = depth * c_stride_z;

197

198

for(int i = 0; i < M; ++i)

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

199

{

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

200

for(int j = 0; j < N; ++j)

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

201

{

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

202

acc[j] = 0;

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

203

}

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

204

for(int k = 0; k < K; ++k)

205

{

206

const T_out tmp_a = a_offset + static_cast<T_out>(a[base_addr_a + k + i * K]);

207

for(int j = 0; j < N; ++j)

208

{

209

const T_out tmp_b = b_offset + static_cast<T_out>(b[base_addr_b + j + k * N]);

210

const T_out mult_as_int = tmp_a * tmp_b;

211

acc[j] += mult_as_int;

212

}

213

}

214

for(int j = 0; j < N; ++j)

215

{

216

c[base_addr_c + j + i * N] = acc[j];

217

}

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

}

}

return c;

}

Pablo Tello

2017-11-15 13:28:27 +0000

[diff] [blame]

224

// used to validate assembly kernels which don't know anything about offsets

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

225

template <typename T1, typename T2, typename T3>

226

SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c)

Pablo Tello

181e651

2017-11-15 13:28:27 +0000

[diff] [blame]

227

{

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

228

return gemmlowp_matrix_multiply_core<T1, T2, T3>(a, b, shape_c, 0, 0);

Pablo Tello

181e651

2017-11-15 13:28:27 +0000

[diff] [blame]

229

}

230

Manuel Bottini

2019-12-02 16:22:35 +0000

[diff] [blame]

231

template <typename TIn, typename TOut>

232

SimpleTensor<TOut> gemmlowp_quantize_down_scale(const SimpleTensor<TIn> &in, int32_t result_offset, std::vector<int32_t> result_mult_int, std::vector<int32_t> result_shift,

233

int32_t min, int32_t max)

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

234

{

Manuel Bottini

2019-12-02 16:22:35 +0000

[diff] [blame]

235

SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

236

Manuel Bottini

2019-12-02 16:22:35 +0000

[diff] [blame]

237

quantize_down_scale<TIn, TOut>(&in, nullptr, &dst, result_offset, result_mult_int, result_shift, min, max);

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

return dst;

}

Manuel Bottini

2019-12-02 16:22:35 +0000

[diff] [blame]

242

template <typename TIn, typename TOut>

243

SimpleTensor<TOut> gemmlowp_quantize_down_scale(const SimpleTensor<TIn> &in, const SimpleTensor<TIn> &bias, int32_t result_offset, std::vector<int32_t> result_mult_int,

244

std::vector<int32_t> result_shift, int32_t min, int32_t max)

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

245

{

Manuel Bottini

2019-12-02 16:22:35 +0000

[diff] [blame]

246

SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

247

Manuel Bottini

2019-12-02 16:22:35 +0000

[diff] [blame]

248

quantize_down_scale<TIn, TOut>(&in, &bias, &dst, result_offset, result_mult_int, result_shift, min, max);

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

return dst;

}

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame]

253

template <typename TIn, typename TOut>

254

SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> &in, std::vector<int32_t> result_fixedpoint_multiplier, std::vector<int32_t> result_shift,

255

int32_t result_offset_after_shift, int32_t min, int32_t max)

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

256

{

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame]

257

SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

258

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame]

259

quantize_down_scale_by_fixedpoint<TIn, TOut>(&in, nullptr, &dst, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

return dst;

}

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame]

264

template <typename TIn, typename TOut>

265

SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> &in, const SimpleTensor<TIn> &bias, std::vector<int32_t> result_fixedpoint_multiplier,

266

std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max)

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

267

{

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame]

268

SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

269

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame]

270

quantize_down_scale_by_fixedpoint<TIn, TOut>(&in, &bias, &dst, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

return dst;

}

Sheri Zhang

2020-03-09 14:29:52 +0000

[diff] [blame]

275

template <typename TIn, typename TOut>

276

SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> &in, const SimpleTensor<TIn> &bias,

277

std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max)

278

{

279

SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());

280

281

quantize_down_scale_by_float<TIn, TOut>(&in, &bias, &dst, result_real_multiplier, result_offset, min, max);

return dst;

}

template <typename TIn, typename TOut>

287

SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> &in,

288

std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max)

289

{

290

SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());

291

292

quantize_down_scale_by_float<TIn, TOut>(&in, nullptr, &dst, result_real_multiplier, result_offset, min, max);

return dst;

}

template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,

298

std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);

299

template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a,

300

std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);

301

template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,

302

std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);

303

template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<int32_t> &a,

304

std::vector<float_t> result_real_multiplier, int32_t result_offset, int32_t min, int32_t max);

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame]

305

template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, std::vector<int32_t> result_fixedpoint_multiplier,

306

std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);

307

template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,

308

std::vector<int32_t> result_fixedpoint_multiplier,

309

std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);

310

template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, std::vector<int32_t> result_fixedpoint_multiplier,

311

std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);

312

template SimpleTensor<int8_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,

313

std::vector<int32_t> result_fixedpoint_multiplier,

314

std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);

315

template SimpleTensor<int16_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, std::vector<int32_t> result_fixedpoint_multiplier,

316

std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);

317

template SimpleTensor<int16_t> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b,

318

std::vector<int32_t> result_fixedpoint_multiplier,

319

std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);

Manuel Bottini

2019-12-02 16:22:35 +0000

[diff] [blame]

320

template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale(const SimpleTensor<int32_t> &a, int32_t result_offset, std::vector<int32_t> result_mult_int,

321

std::vector<int32_t> result_shift, int32_t min, int32_t max);

322

template SimpleTensor<uint8_t> gemmlowp_quantize_down_scale(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_offset, std::vector<int32_t> result_mult_int,

323

std::vector<int32_t> result_shift, int32_t min, int32_t max);

324

template SimpleTensor<int8_t> gemmlowp_quantize_down_scale(const SimpleTensor<int32_t> &a, int32_t result_offset, std::vector<int32_t> result_mult_int,

325

std::vector<int32_t> result_shift, int32_t min, int32_t max);

326

template SimpleTensor<int8_t> gemmlowp_quantize_down_scale(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_offset, std::vector<int32_t> result_mult_int,

327

std::vector<int32_t> result_shift, int32_t min, int32_t max);

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

328

template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);

329

template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

330

template SimpleTensor<int32_t> gemmlowp<int32_t, int8_t, int8_t>(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c);

331

template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, uint8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c);

332

template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, int8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c);

Pablo Tello