Blame - tests/validation/reference/GEMMLowp.cpp - ml/ComputeLibrary

2017-11-17 09:27:57 +0000

[diff] [blame]

66

{

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

67

const int cols_in = in->shape().x();

68

const bool is_per_channel = result_mult_int.size() > 1;

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

69

70

for(int i = 0; i < in->num_elements(); ++i)

71

{

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

72

int32_t result = ((*in)[i] + result_offset);

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

if(bias != nullptr)

{

result += (*bias)[i % cols_in];

77

}

78

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

79

result *= (is_per_channel) ? result_mult_int[i % cols_in] : result_mult_int[0];

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

80

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

81

result >>= (is_per_channel) ? result_shift[i % cols_in] : result_shift[0];

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

// Bounded ReLu

if(min != max)

{

result = std::max(min, std::min(max, result));

87

}

88

89

(*dst)[i] = static_cast<uint8_t>(std::max(0, std::min(255, result)));

90

}

91

}

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

92

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame^]

93

template <typename TIn, typename TOut>

94

void quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> *in, const SimpleTensor<TIn> *bias, SimpleTensor<TOut> *dst, std::vector<int32_t> result_fixedpoint_multiplier,

95

std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max)

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

96

{

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

97

const int cols_in = in->shape().x();

98

const bool is_per_channel = result_fixedpoint_multiplier.size() > 1;

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

99

100

for(int i = 0; i < in->num_elements(); ++i)

101

{

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame^]

102

TIn result = (*in)[i];

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

if(bias != nullptr)

{

result += (*bias)[i % cols_in];

107

}

108

109

// Fixed point multiplication

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

110

const int32_t multiplier = (is_per_channel) ? result_fixedpoint_multiplier[i % cols_in] : result_fixedpoint_multiplier[0];

111

const int32_t shift = (is_per_channel) ? result_shift[i % cols_in] : result_shift[0];

112

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame^]

113

if(shift < 0)

114

{

115

result = asymm_int_mult(result * (1 << (-shift)), multiplier);

}

else

{

result = asymm_rounding_divide_by_pow2(asymm_int_mult(result, multiplier), shift);

120

}

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

121

result += result_offset_after_shift;

// Bounded ReLu

if(min != max)

{

result = std::max(min, std::min(max, result));

127

}

128

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame^]

129

(*dst)[i] = static_cast<TOut>(std::max<TIn>(std::numeric_limits<TOut>::lowest(),

130

std::min<TIn>(std::numeric_limits<TOut>::max(), result)));

Gian Marco Iodice

bc415af

2019-06-13 15:58:32 +0100

[diff] [blame]

131

}

132

}

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

133

} // namespace

134

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

135

template <typename T_out, typename T_in, typename T_in_1>

136

SimpleTensor<T_out> gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> &a, const SimpleTensor<T_in_1> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset)

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

137

{

Michalis Spyrou

f3dfa27

2017-11-21 17:52:12 +0000

[diff] [blame]

138

static_assert(std::is_same<typename std::decay<T_out>::type, int32_t>::value, "Only int32_t is allowed for the output");

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

139

Michalis Spyrou

f3dfa27

2017-11-21 17:52:12 +0000

[diff] [blame]

140

DataType dt = std::is_same<T_out, int32_t>::value ? DataType::S32 : DataType::U32;

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

141

SimpleTensor<T_out> c(shape_c, dt);

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

142

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

143

const int K = a.shape().x();

144

const int M = a.shape().y();

145

const int N = b.shape().x();

146

const int D = a.shape().z(); // Number of matrices in a batch

147

148

const int a_stride_z = K * M;

149

// Do not slide the matrix B along the 3rd dimension in case matrix B has less than 3 dimensions

150

const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0;

151

const int c_stride_z = N * M;

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

152

Michalis Spyrou

f3dfa27

2017-11-21 17:52:12 +0000

[diff] [blame]

153

std::vector<T_out> acc;

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

154

acc.resize(N);

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

155

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

156

for(int depth = 0; depth < D; ++depth)

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

157

{

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

158

const int base_addr_a = depth * a_stride_z;

159

const int base_addr_b = depth * b_stride_z;

160

const int base_addr_c = depth * c_stride_z;

161

162

for(int i = 0; i < M; ++i)

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

163

{

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

164

for(int j = 0; j < N; ++j)

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

165

{

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

166

acc[j] = 0;

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

167

}

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

168

for(int k = 0; k < K; ++k)

169

{

170

const T_out tmp_a = a_offset + static_cast<T_out>(a[base_addr_a + k + i * K]);

171

for(int j = 0; j < N; ++j)

172

{

173

const T_out tmp_b = b_offset + static_cast<T_out>(b[base_addr_b + j + k * N]);

174

const T_out mult_as_int = tmp_a * tmp_b;

175

acc[j] += mult_as_int;

176

}

177

}

178

for(int j = 0; j < N; ++j)

179

{

180

c[base_addr_c + j + i * N] = acc[j];

181

}

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

}

}

return c;

}

Pablo Tello

2017-11-15 13:28:27 +0000

[diff] [blame]

188

// used to validate assembly kernels which don't know anything about offsets

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

189

template <typename T1, typename T2, typename T3>

190

SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c)

Pablo Tello

181e651

2017-11-15 13:28:27 +0000

[diff] [blame]

191

{

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

192

return gemmlowp_matrix_multiply_core<T1, T2, T3>(a, b, shape_c, 0, 0);

Pablo Tello

181e651

2017-11-15 13:28:27 +0000

[diff] [blame]

193

}

194

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

195

template <typename T>

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

196

SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, int32_t result_offset, std::vector<int32_t> result_mult_int, std::vector<int32_t> result_shift,

197

int32_t min, int32_t max)

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

198

{

199

SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8);

200

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

201

quantize_down_int32_to_uint8_scale<T>(&in, nullptr, &dst, result_offset, result_mult_int, result_shift, min, max);

return dst;

}

template <typename T>

Vidhya Sudhan Loganathan

2019-11-04 14:42:08 +0000

[diff] [blame]

207

SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_offset, std::vector<int32_t> result_mult_int,

208

std::vector<int32_t> result_shift, int32_t min, int32_t max)

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

209

{

210

SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8);

211

212

quantize_down_int32_to_uint8_scale<T>(&in, &bias, &dst, result_offset, result_mult_int, result_shift, min, max);

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

return dst;

}

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame^]

217

template <typename TIn, typename TOut>

218

SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> &in, std::vector<int32_t> result_fixedpoint_multiplier, std::vector<int32_t> result_shift,

219

int32_t result_offset_after_shift, int32_t min, int32_t max)

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

220

{

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame^]

221

SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

222

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame^]

223

quantize_down_scale_by_fixedpoint<TIn, TOut>(&in, nullptr, &dst, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

return dst;

}

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame^]

228

template <typename TIn, typename TOut>

229

SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_fixedpoint(const SimpleTensor<TIn> &in, const SimpleTensor<TIn> &bias, std::vector<int32_t> result_fixedpoint_multiplier,

230

std::vector<int32_t> result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max)

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

231

{

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame^]

232

SimpleTensor<TOut> dst(in.shape(), DataTypeExtractor<TOut>::data_type());

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

233

Georgios Pinitas

2019-11-21 14:10:25 +0000

[diff] [blame^]

234

quantize_down_scale_by_fixedpoint<TIn, TOut>(&in, &bias, &dst, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);

Gian Marco