Blame - tests/validation/reference/GEMMLowp.cpp - ml/ComputeLibrary

2017-11-17 09:27:57 +0000

[diff] [blame]

if(bias != nullptr)

{

result += (*bias)[i % cols_in];

54

}

55

Gian Marco

58c5794

2017-11-28 09:10:03 +0000

[diff] [blame]

56

result *= result_mult_int;

57

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

58

result >>= result_shift;

// Bounded ReLu

if(min != max)

{

result = std::max(min, std::min(max, result));

64

}

65

66

(*dst)[i] = static_cast<uint8_t>(std::max(0, std::min(255, result)));

67

}

68

}

Gian Marco

58c5794

2017-11-28 09:10:03 +0000

[diff] [blame]

69

70

template <typename T>

71

void quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> *in, const SimpleTensor<T> *bias, SimpleTensor<uint8_t> *dst, int32_t result_fixedpoint_multiplier, int32_t result_shift,

72

int32_t result_offset_after_shift, int32_t min, int32_t max)

73

{

74

const int cols_in = in->shape().x();

75

76

for(int i = 0; i < in->num_elements(); ++i)

77

{

78

int32_t result = (*in)[i];

if(bias != nullptr)

{

result += (*bias)[i % cols_in];

83

}

84

85

// Fixed point multiplication

86

result = asymm_rounding_divide_by_pow2(asymm_int_mult(result, result_fixedpoint_multiplier), result_shift);

87

result += result_offset_after_shift;

// Bounded ReLu

if(min != max)

{

result = std::max(min, std::min(max, result));

93

}

94

95

(*dst)[i] = static_cast<uint8_t>(std::max(0, std::min(255, result)));

96

}

97

}

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

98

} // namespace

99

Michalis Spyrou

2017-11-21 17:52:12 +0000

[diff] [blame]

100

template <typename T_out, typename T_in>

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

101

SimpleTensor<T_out> gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> &a, const SimpleTensor<T_in> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset)

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

102

{

Michalis Spyrou

2017-11-21 17:52:12 +0000

[diff] [blame]

103

static_assert(std::is_same<typename std::decay<T_out>::type, int32_t>::value, "Only int32_t is allowed for the output");

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

104

Michalis Spyrou

2017-11-21 17:52:12 +0000

[diff] [blame]

105

DataType dt = std::is_same<T_out, int32_t>::value ? DataType::S32 : DataType::U32;

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

106

SimpleTensor<T_out> c(shape_c, dt);

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

107

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

108

const int K = a.shape().x();

109

const int M = a.shape().y();

110

const int N = b.shape().x();

111

const int D = a.shape().z(); // Number of matrices in a batch

112

113

const int a_stride_z = K * M;

114

// Do not slide the matrix B along the 3rd dimension in case matrix B has less than 3 dimensions

115

const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0;

116

const int c_stride_z = N * M;

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

117

Michalis Spyrou

2017-11-21 17:52:12 +0000

[diff] [blame]

118

std::vector<T_out> acc;

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

119

acc.resize(N);

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

120

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

121

for(int depth = 0; depth < D; ++depth)

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

122

{

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

123

const int base_addr_a = depth * a_stride_z;

124

const int base_addr_b = depth * b_stride_z;

125

const int base_addr_c = depth * c_stride_z;

126

127

for(int i = 0; i < M; ++i)

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

128

{

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

129

for(int j = 0; j < N; ++j)

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

130

{

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

131

acc[j] = 0;

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

132

}

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

133

for(int k = 0; k < K; ++k)

134

{

135

const T_out tmp_a = a_offset + static_cast<T_out>(a[base_addr_a + k + i * K]);

136

for(int j = 0; j < N; ++j)

137

{

138

const T_out tmp_b = b_offset + static_cast<T_out>(b[base_addr_b + j + k * N]);

139

const T_out mult_as_int = tmp_a * tmp_b;

140

acc[j] += mult_as_int;

141

}

142

}

143

for(int j = 0; j < N; ++j)

144

{

145

c[base_addr_c + j + i * N] = acc[j];

146

}

Pablo Tello

2017-09-29 11:30:12 +0100

[diff] [blame]

}

}

return c;

}

Pablo Tello

2017-11-15 13:28:27 +0000

[diff] [blame]

153

// used to validate assembly kernels which don't know anything about offsets

Michalis Spyrou

2017-11-21 17:52:12 +0000

[diff] [blame]

154

template <typename T1, typename T2>

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

155

SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T2> &b, TensorShape shape_c)

Pablo Tello

181e651

2017-11-15 13:28:27 +0000

[diff] [blame]

156

{

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

157

return gemmlowp_matrix_multiply_core<T1, T2>(a, b, shape_c, 0, 0);

Pablo Tello

181e651

2017-11-15 13:28:27 +0000

[diff] [blame]

158

}

159

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

160

template <typename T>

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

161

SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max)

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

162

{

163

SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8);

164

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

165

quantize_down_int32_to_uint8_scale<T>(&in, nullptr, &dst, result_offset, result_mult_int, result_shift, min, max);

return dst;

}

template <typename T>

171

SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_offset, int32_t result_mult_int, int32_t result_shift,

172

int32_t min, int32_t max)

173

{

174

SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8);

175

176

quantize_down_int32_to_uint8_scale<T>(&in, &bias, &dst, result_offset, result_mult_int, result_shift, min, max);

Gian Marco

2017-11-08 12:24:09 +0000

[diff] [blame]

return dst;

}

Gian Marco

2017-11-28 09:10:03 +0000

[diff] [blame]

181

template <typename T>

182

SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, int32_t result_fixedpoint_multiplier, int32_t result_shift,

183

int32_t result_offset_after_shift, int32_t min,

184

int32_t max)

185

{

186

SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8);

187

188

quantize_down_int32_to_uint8_scale_by_fixedpoint<T>(&in, nullptr, &dst, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);

return dst;

}

template <typename T>

194

SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_fixedpoint_multiplier, int32_t result_shift,

195

int32_t result_offset_after_shift, int32_t min, int32_t max)

196

{

197

SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8);

198

199

quantize_down_int32_to_uint8_scale_by_fixedpoint<T>(&in, &bias, &dst, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);

return dst;

}

template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, int32_t result_fixedpoint_multiplier, int32_t result_shift,

205

int32_t result_offset_after_shift, int32_t min, int32_t max);

206

template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_fixedpoint_multiplier,

207

int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);

Gian Marco

2017-11-17 09:27:57 +0000

[diff] [blame]

208

template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<int32_t> &a, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min,

209

int32_t max);

210

template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_offset, int32_t result_mult_int,

211

int32_t result_shift, int32_t min, int32_t max);

Georgios Pinitas

2018-09-24 16:31:08 +0100

[diff] [blame]

212

template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);

213

template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);

214

template SimpleTensor<int32_t> gemmlowp(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c);

215

template SimpleTensor<int32_t> gemmlowp(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c);

Pablo Tello