Blame - src/backends/reference/workloads/ConvImpl.cpp - ml/armnn

2018-03-09 14:13:49 +0000

[diff] [blame]

2

David Beck

ecb56cd

2018-09-05 12:52:57 +0100

[diff] [blame]

3

// SPDX-License-Identifier: MIT

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

4

//

5

6

#include "ConvImpl.hpp"

7

Narumol Prangnawarat

2020-04-01 16:51:23 +0100

[diff] [blame]

8

#include <armnn/utility/Assert.hpp>

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

#include <cmath>

#include <limits>

namespace armnn

{

QuantizedMultiplierSmallerThanOne::QuantizedMultiplierSmallerThanOne(float multiplier)

17

{

Narumol Prangnawarat

2020-04-01 16:51:23 +0100

[diff] [blame]

18

ARMNN_ASSERT(multiplier >= 0.0f && multiplier < 1.0f);

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

19

if (multiplier == 0.0f)

{

m_Multiplier = 0;

m_RightShift = 0;

}

else

{

const double q = std::frexp(multiplier, &m_RightShift);

27

m_RightShift = -m_RightShift;

Jim Flynn

870b96c

2022-03-25 21:24:56 +0000

[diff] [blame]

28

int64_t qFixed = static_cast<int64_t>(::round(q * (1ll << 31)));

Narumol Prangnawarat

2020-04-01 16:51:23 +0100

[diff] [blame]

29

ARMNN_ASSERT(qFixed <= (1ll << 31));

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

30

if (qFixed == (1ll << 31))

{

qFixed /= 2;

--m_RightShift;

}

Narumol Prangnawarat

2020-04-01 16:51:23 +0100

[diff] [blame]

35

ARMNN_ASSERT(m_RightShift >= 0);

36

ARMNN_ASSERT(qFixed <= std::numeric_limits<int32_t>::max());

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

37

m_Multiplier = static_cast<int32_t>(qFixed);

}

}

int32_t QuantizedMultiplierSmallerThanOne::operator*(int32_t rhs) const

42

{

43

int32_t x = SaturatingRoundingDoublingHighMul(rhs, m_Multiplier);

44

return RoundingDivideByPOT(x, m_RightShift);

45

}

46

47

int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)

48

{

telsoa01

c577f2c

2018-08-31 09:22:23 +0100

[diff] [blame]

49

// Check for overflow.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

50

if (a == b && a == std::numeric_limits<int32_t>::min())

51

{

52

return std::numeric_limits<int32_t>::max();

}

int64_t a_64(a);

int64_t b_64(b);

int64_t ab_64 = a_64 * b_64;

57

int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));

58

int32_t ab_x2_high32 = static_cast<std::int32_t>((ab_64 + nudge) / (1ll << 31));

return ab_x2_high32;

}

int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x, int exponent)

63

{

Narumol Prangnawarat

2020-04-01 16:51:23 +0100

[diff] [blame]

64

ARMNN_ASSERT(exponent >= 0 && exponent <= 31);

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

65

int32_t mask = (1 << exponent) - 1;

66

int32_t remainder = x & mask;

67

int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);

68

return (x >> exponent) + (remainder > threshold ? 1 : 0);

69

}

70

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

71

void Convolve(const TensorShape& rInputShape,

72

Decoder<float>& rInputDecoder,

73

const TensorShape& rOutputShape,

74

Encoder<float>& rOutputEncoder,

75

const TensorShape& rFilterShape,

76

Decoder<float>& rFilterDecoder,

77

bool biasEnabled,

78

Decoder<float>* pBiasDecoder,

79

DataLayout dataLayout,

80

unsigned int paddingTop,

81

unsigned int paddingLeft,

82

unsigned int xStride,

83

unsigned int yStride,

84

unsigned int xDilation,

85

unsigned int yDilation,

86

bool depthwise)

87

{

88

if (biasEnabled && !pBiasDecoder)

89

{

90

throw InvalidArgumentException("Bias is enabled but the bias data is invalid");

91

}

92

const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);

93

94

const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();

95

const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();

96

const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();

97

Jan Eilers

2021-06-02 12:01:25 +0100

[diff] [blame]

98

// Weights layout:

99

// Conv2d: [O,H,W,I]

100

// Depthwise: [1,H,W,O]

101

const unsigned int inputChannels = rInputShape[channelsIndex];

102

const unsigned int outputChannels = rOutputShape[channelsIndex];

103

const unsigned int depthMultiplier = depthwise ? outputChannels/inputChannels : 1;

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

104

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

105

const unsigned int batchSize = rOutputShape[0];

106

const unsigned int outputHeight = rOutputShape[heightIndex];

107

const unsigned int outputWidth = rOutputShape[widthIndex];

108

const unsigned int inputHeight = rInputShape[heightIndex];

109

const unsigned int inputWidth = rInputShape[widthIndex];

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

110

Jan Eilers

2021-06-02 12:01:25 +0100

[diff] [blame]

111

const unsigned int filterHeight = depthwise ? rFilterShape[1] : rFilterShape[heightIndex];

112

const unsigned int filterWidth = depthwise ? rFilterShape[2] : rFilterShape[widthIndex];

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

113

Finn Williams

ea8ce70

2020-09-29 19:54:00 +0100

[diff] [blame]

114

const std::vector<float> inputVec = rInputDecoder.DecodeTensor(rInputShape);

Jan Eilers

2021-06-02 12:01:25 +0100

[diff] [blame]

115

const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape, depthwise);

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

116

Finn Williams

ea8ce70

2020-09-29 19:54:00 +0100

[diff] [blame]

117

const TensorShape biasShape{outputChannels};

118

const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

119

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

120

for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)

121

{

122

for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)

123

{

124

for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)

125

{

126

for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)

127

{

128

// This loop goes over each output element.

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

129

float sum = 0.0f;

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

130

131

// For depthwise, each output channel corresponds to exactly one input channel.

132

// For normal, must loop over each input channel.

133

for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)

134

{

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

135

for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)

136

{

137

for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)

138

{

139

// This loop goes over each input element for each output element.

140

unsigned int filterIndex = 0;

141

142

// Since dimensionality of kernel depends on depthwiseness, so does index.

143

if (depthwise)

144

{

Jan Eilers

2021-06-02 12:01:25 +0100

[diff] [blame]

145

cInput = cOutput / depthMultiplier;

146

// filterDepth = outputChannels;

147

filterIndex = xFilter * outputChannels + cOutput +

148

yFilter * filterWidth * outputChannels;

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

149

}

150

else

151

{

Matteo Martincigh

2019-06-06 15:46:22 +0100

[diff] [blame]

152

// Keep this implementation, as using DataLayoutIndexed::GetIndex causes great

153

// performance regression.

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

154

if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)

Matteo Martincigh

2019-06-06 15:46:22 +0100

[diff] [blame]

155

{

156

filterIndex = cOutput * filterHeight * filterWidth * inputChannels +

157

yFilter * filterWidth * inputChannels +

158

xFilter * inputChannels +

cInput;

}

else

{

filterIndex = cOutput * filterWidth * filterHeight * inputChannels +

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

164

cInput * filterWidth * filterHeight +

Matteo Martincigh

2019-06-06 15:46:22 +0100

[diff] [blame]

165

yFilter * filterWidth +

166

xFilter;

167

}

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

168

}

Matteo Martincigh

18f2d1c

2019-06-05 13:54:25 +0100

[diff] [blame]

169

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

170

unsigned int yInput = yOutput * yStride + yFilter * yDilation;

171

unsigned int xInput = xOutput * xStride + xFilter * xDilation;

float inputValue;

// Check if we're in the padding.

176

if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

177

xInput < paddingLeft || xInput >= inputWidth + paddingLeft)

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

{

inputValue = 0.0f;

}

else

{

Matteo Martincigh

2019-06-06 15:46:22 +0100

[diff] [blame]

183

unsigned int inputIndex = 0;

184

185

// Keep this implementation, as using DataLayoutIndexed::GetIndex causes great

186

// performance regression.

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

187

if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)

Matteo Martincigh

2019-06-06 15:46:22 +0100

[diff] [blame]

188

{

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

189

inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +

Matteo Martincigh

2019-06-06 15:46:22 +0100

[diff] [blame]

190

(yInput - paddingTop) * inputWidth * inputChannels +

191

(xInput - paddingLeft) * inputChannels +

cInput;

}

else

{

inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +

197

inputWidth * inputHeight * cInput +

198

inputWidth * (yInput - paddingTop) +

199

xInput - paddingLeft;

200

}

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

201

inputValue = inputVec[inputIndex];

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

202

}

Matteo Martincigh

18f2d1c

2019-06-05 13:54:25 +0100

[diff] [blame]

203

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

204

sum += filterVec[filterIndex] * inputValue;

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

}

}

}

if (biasEnabled)

{

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

211

sum += biasVec[cOutput];

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

212

}

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

213

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

214

unsigned int outIdx;

215

if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)

216

{

217

outIdx = batchIdx * outputHeight * outputWidth * outputChannels +

218

yOutput * outputWidth * outputChannels +

219

xOutput * outputChannels +

cOutput;

}

else

{

outIdx = batchIdx * outputHeight * outputWidth * outputChannels +

225

cOutput * outputHeight * outputWidth +

226

yOutput * outputWidth +

227

xOutput;

228

}

Matteo Martincigh

18f2d1c

2019-06-05 13:54:25 +0100

[diff] [blame]

229

230

rOutputEncoder[outIdx];

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

231

rOutputEncoder.Set(sum);

Mike Kelly