Blame - src/backends/reference/workloads/ConvImpl.cpp - ml/armnn

telsoa01

4fcda01

2018-03-09 14:13:49 +0000

[diff] [blame]

1

//

2

David Beck

ecb56cd

2018-09-05 12:52:57 +0100

[diff] [blame]

3

// SPDX-License-Identifier: MIT

telsoa01

4fcda01

2018-03-09 14:13:49 +0000

[diff] [blame]

4

//

5

6

#include "ConvImpl.hpp"

7

8

#include <boost/assert.hpp>

#include <cmath>

#include <limits>

namespace armnn

{

QuantizedMultiplierSmallerThanOne::QuantizedMultiplierSmallerThanOne(float multiplier)

17

{

18

BOOST_ASSERT(multiplier >= 0.0f && multiplier < 1.0f);

19

if (multiplier == 0.0f)

{

m_Multiplier = 0;

m_RightShift = 0;

}

else

{

const double q = std::frexp(multiplier, &m_RightShift);

27

m_RightShift = -m_RightShift;

28

int64_t qFixed = static_cast<int64_t>(std::round(q * (1ll << 31)));

29

BOOST_ASSERT(qFixed <= (1ll << 31));

30

if (qFixed == (1ll << 31))

{

qFixed /= 2;

--m_RightShift;

}

BOOST_ASSERT(m_RightShift >= 0);

36

BOOST_ASSERT(qFixed <= std::numeric_limits<int32_t>::max());

37

m_Multiplier = static_cast<int32_t>(qFixed);

}

}

int32_t QuantizedMultiplierSmallerThanOne::operator*(int32_t rhs) const

42

{

43

int32_t x = SaturatingRoundingDoublingHighMul(rhs, m_Multiplier);

44

return RoundingDivideByPOT(x, m_RightShift);

45

}

46

47

int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)

48

{

telsoa01

c577f2c

2018-08-31 09:22:23 +0100

[diff] [blame]

49

// Check for overflow.

telsoa01

4fcda01

2018-03-09 14:13:49 +0000

[diff] [blame]

50

if (a == b && a == std::numeric_limits<int32_t>::min())

51

{

52

return std::numeric_limits<int32_t>::max();

}

int64_t a_64(a);

int64_t b_64(b);

int64_t ab_64 = a_64 * b_64;

57

int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));

58

int32_t ab_x2_high32 = static_cast<std::int32_t>((ab_64 + nudge) / (1ll << 31));

return ab_x2_high32;

}

int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x, int exponent)

63

{

64

BOOST_ASSERT(exponent >= 0 && exponent <= 31);

65

int32_t mask = (1 << exponent) - 1;

66

int32_t remainder = x & mask;

67

int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);

68

return (x >> exponent) + (remainder > threshold ? 1 : 0);

69

}

70

Mike Kelly

9b39832

2019-05-22 17:21:49 +0100

[diff] [blame]

71

void Convolve(const TensorShape& rInputShape,

72

Decoder<float>& rInputDecoder,

73

const TensorShape& rOutputShape,

74

Encoder<float>& rOutputEncoder,

75

const TensorShape& rFilterShape,

76

Decoder<float>& rFilterDecoder,

77

bool biasEnabled,

78

Decoder<float>* pBiasDecoder,

79

DataLayout dataLayout,

80

unsigned int paddingTop,

81

unsigned int paddingLeft,

82

unsigned int xStride,

83

unsigned int yStride,

84

unsigned int xDilation,

85

unsigned int yDilation,

86

bool depthwise)

87

{

88

if (biasEnabled && !pBiasDecoder)

89

{

90

throw InvalidArgumentException("Bias is enabled but the bias data is invalid");

91

}

92

const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);

93

94

const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();

95

const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();

96

const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();

97

98

unsigned int depthMultiplier = depthwise ? rFilterShape[0] : 1;

99

unsigned int inputChannels = depthwise ? rFilterShape[1] : rFilterShape[channelsIndex];

100

unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : rFilterShape[0];

101

102

unsigned int batchSize = rOutputShape[0];

103

unsigned int outputHeight = rOutputShape[heightIndex];

104

unsigned int outputWidth = rOutputShape[widthIndex];

105

unsigned int inputHeight = rInputShape[heightIndex];

106

unsigned int inputWidth = rInputShape[widthIndex];

107

108

unsigned int filterHeight = depthwise ? rFilterShape[2] : rFilterShape[heightIndex];

109

unsigned int filterWidth = depthwise ? rFilterShape[3] : rFilterShape[widthIndex];

110

111

for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)

112

{

113

for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)

114

{

115

for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)

116

{

117

for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)

118

{

119

// This loop goes over each output element.

120

float sum = 0.0f;

121

122

// For depthwise, each output channel corresponds to exactly one input channel.

123

// For normal, must loop over each input channel.

124

for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)

125

{

126

unsigned int depthwiseMultiplierIdx = 0;

127

if (depthwise)

128

{

129

cInput = cOutput / depthMultiplier;

130

depthwiseMultiplierIdx = cOutput % depthMultiplier;

131

}

132

133

for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)

134

{

135

for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)

136

{

137

// This loop goes over each input element for each output element.

138

unsigned int filterIndex = 0;

139

140

// Since dimensionality of kernel depends on depthwiseness, so does index.

141

if (depthwise)

142

{

143

filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels +

144

cInput * filterWidth * filterHeight +

145

yFilter * filterWidth +

xFilter;

}

else

{

Matteo Martincigh

f2aaab3

2019-06-06 15:46:22 +0100

[diff] [blame]

150

// Keep this implementation, as using DataLayoutIndexed::GetIndex causes great

151

// performance regression.

152

if (dataLayout == DataLayout::NHWC)

153

{

154

filterIndex = cOutput * filterHeight * filterWidth * inputChannels +

155

yFilter * filterWidth * inputChannels +

156

xFilter * inputChannels +

cInput;

}

else

{

filterIndex = cOutput * filterWidth * filterHeight * inputChannels +

162

cInput * filterWidth * filterHeight +

163

yFilter * filterWidth +

164

xFilter;

165

}

Mike Kelly

9b39832

2019-05-22 17:21:49 +0100

[diff] [blame]

166

}

Matteo Martincigh

18f2d1c

2019-06-05 13:54:25 +0100

[diff] [blame]

167

Aron Virginas-Tar

5edc881

2019-11-05 18:00:21 +0000

[diff] [blame^]

168

rFilterDecoder.SetIndex(filterIndex, cOutput);

Mike Kelly

9b39832

2019-05-22 17:21:49 +0100

[diff] [blame]

169

float filterValue = rFilterDecoder.Get();

Mike Kelly

9b39832

2019-05-22 17:21:49 +0100

[diff] [blame]

170

171

unsigned int yInput = yOutput * yStride + yFilter * yDilation;

172

unsigned int xInput = xOutput * xStride + xFilter * xDilation;

float inputValue;

// Check if we're in the padding.

177

if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||

178

xInput < paddingLeft || xInput >= inputWidth + paddingLeft )

{

inputValue = 0.0f;

}

else

{

Matteo Martincigh

f2aaab3

2019-06-06 15:46:22 +0100

[diff] [blame]

184

unsigned int inputIndex = 0;

185

186

// Keep this implementation, as using DataLayoutIndexed::GetIndex causes great

187

// performance regression.

188

if (dataLayout == DataLayout::NHWC)

189

{

190

inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +

191

(yInput - paddingTop) * inputWidth * inputChannels +

192

(xInput - paddingLeft) * inputChannels +

cInput;

}

else

{

inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +

198

inputWidth * inputHeight * cInput +

199

inputWidth * (yInput - paddingTop) +

200

xInput - paddingLeft;

201

}

Mike Kelly

9b39832

2019-05-22 17:21:49 +0100

[diff] [blame]

202

Matteo Martincigh

18f2d1c

2019-06-05 13:54:25 +0100

[diff] [blame]

203

rInputDecoder[inputIndex];

Mike Kelly

9b39832

2019-05-22 17:21:49 +0100

[diff] [blame]

204

inputValue = rInputDecoder.Get();

Mike Kelly

9b39832

2019-05-22 17:21:49 +0100

[diff] [blame]

205

}

Matteo Martincigh

18f2d1c

2019-06-05 13:54:25 +0100

[diff] [blame]

206

Mike Kelly

9b39832

2019-05-22 17:21:49 +0100

[diff] [blame]

207

sum += filterValue * inputValue;

}

}

}

if (biasEnabled)

{

Aron Virginas-Tar

5edc881

2019-11-05 18:00:21 +0000

[diff] [blame^]

214

(*pBiasDecoder).SetIndex(cOutput, cOutput);

Mike Kelly

9b39832

2019-05-22 17:21:49 +0100

[diff] [blame]

215

sum += pBiasDecoder->Get();

Mike Kelly

9b39832

2019-05-22 17:21:49 +0100

[diff] [blame]

216

}

Mike Kelly

9b39832

2019-05-22 17:21:49 +0100

[diff] [blame]

217

Matteo Martincigh

18f2d1c

2019-06-05 13:54:25 +0100

[diff] [blame]

218

unsigned int outIdx = dataLayoutIndexed.GetIndex(rOutputShape, batchIdx, cOutput, yOutput, xOutput);

219

220

rOutputEncoder[outIdx];

Mike Kelly

9b39832

2019-05-22 17:21:49 +0100

[diff] [blame]

221

rOutputEncoder.Set(sum);

Mike Kelly

9b39832

2019-05-22 17:21:49 +0100

[diff] [blame]

}

}

}

}

}

Aron Virginas-Tar

2019-11-05 18:00:21 +0000

[diff] [blame^]

228

} // namespace armnn