Blame - src/backends/reference/workloads/ConvImpl.cpp - ml/armnn

2018-03-09 14:13:49 +0000

[diff] [blame]

2

David Beck

ecb56cd

2018-09-05 12:52:57 +0100

[diff] [blame]

3

// SPDX-License-Identifier: MIT

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

4

//

5

6

#include "ConvImpl.hpp"

7

Narumol Prangnawarat

2020-04-01 16:51:23 +0100

[diff] [blame]

8

#include <armnn/utility/Assert.hpp>

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

#include <cmath>

#include <limits>

namespace armnn

{

QuantizedMultiplierSmallerThanOne::QuantizedMultiplierSmallerThanOne(float multiplier)

17

{

Narumol Prangnawarat

2020-04-01 16:51:23 +0100

[diff] [blame]

18

ARMNN_ASSERT(multiplier >= 0.0f && multiplier < 1.0f);

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

19

if (multiplier == 0.0f)

{

m_Multiplier = 0;

m_RightShift = 0;

}

else

{

const double q = std::frexp(multiplier, &m_RightShift);

27

m_RightShift = -m_RightShift;

28

int64_t qFixed = static_cast<int64_t>(std::round(q * (1ll << 31)));

Narumol Prangnawarat

2020-04-01 16:51:23 +0100

[diff] [blame]

29

ARMNN_ASSERT(qFixed <= (1ll << 31));

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

30

if (qFixed == (1ll << 31))

{

qFixed /= 2;

--m_RightShift;

}

Narumol Prangnawarat

2020-04-01 16:51:23 +0100

[diff] [blame]

35

ARMNN_ASSERT(m_RightShift >= 0);

36

ARMNN_ASSERT(qFixed <= std::numeric_limits<int32_t>::max());

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

37

m_Multiplier = static_cast<int32_t>(qFixed);

}

}

int32_t QuantizedMultiplierSmallerThanOne::operator*(int32_t rhs) const

42

{

43

int32_t x = SaturatingRoundingDoublingHighMul(rhs, m_Multiplier);

44

return RoundingDivideByPOT(x, m_RightShift);

45

}

46

47

int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)

48

{

telsoa01

c577f2c

2018-08-31 09:22:23 +0100

[diff] [blame]

49

// Check for overflow.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

50

if (a == b && a == std::numeric_limits<int32_t>::min())

51

{

52

return std::numeric_limits<int32_t>::max();

}

int64_t a_64(a);

int64_t b_64(b);

int64_t ab_64 = a_64 * b_64;

57

int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));

58

int32_t ab_x2_high32 = static_cast<std::int32_t>((ab_64 + nudge) / (1ll << 31));

return ab_x2_high32;

}

int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x, int exponent)

63

{

Narumol Prangnawarat

2020-04-01 16:51:23 +0100

[diff] [blame]

64

ARMNN_ASSERT(exponent >= 0 && exponent <= 31);

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

65

int32_t mask = (1 << exponent) - 1;

66

int32_t remainder = x & mask;

67

int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);

68

return (x >> exponent) + (remainder > threshold ? 1 : 0);

69

}

70

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

71

void Convolve(const TensorShape& rInputShape,

72

Decoder<float>& rInputDecoder,

73

const TensorShape& rOutputShape,

74

Encoder<float>& rOutputEncoder,

75

const TensorShape& rFilterShape,

76

Decoder<float>& rFilterDecoder,

77

bool biasEnabled,

78

Decoder<float>* pBiasDecoder,

79

DataLayout dataLayout,

80

unsigned int paddingTop,

81

unsigned int paddingLeft,

82

unsigned int xStride,

83

unsigned int yStride,

84

unsigned int xDilation,

85

unsigned int yDilation,

86

bool depthwise)

87

{

88

if (biasEnabled && !pBiasDecoder)

89

{

90

throw InvalidArgumentException("Bias is enabled but the bias data is invalid");

91

}

92

const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);

93

94

const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();

95

const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();

96

const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();

97

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

98

const unsigned int depthMultiplier = depthwise ? rFilterShape[0] : 1;

99

const unsigned int inputChannels = depthwise ? rFilterShape[1] : rFilterShape[channelsIndex];

100

const unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : rFilterShape[0];

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

101

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

102

const unsigned int batchSize = rOutputShape[0];

103

const unsigned int outputHeight = rOutputShape[heightIndex];

104

const unsigned int outputWidth = rOutputShape[widthIndex];

105

const unsigned int inputHeight = rInputShape[heightIndex];

106

const unsigned int inputWidth = rInputShape[widthIndex];

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

107

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

108

const unsigned int filterHeight = depthwise ? rFilterShape[2] : rFilterShape[heightIndex];

109

const unsigned int filterWidth = depthwise ? rFilterShape[3] : rFilterShape[widthIndex];

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

110

Finn Williams

ea8ce70

2020-09-29 19:54:00 +0100

[diff] [blame^]

111

const std::vector<float> inputVec = rInputDecoder.DecodeTensor(rInputShape);

112

const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape, depthMultiplier, depthwise);

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

113

Finn Williams

ea8ce70

2020-09-29 19:54:00 +0100

[diff] [blame^]

114

const TensorShape biasShape{outputChannels};

115

const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

116

117

unsigned int depthwiseMultiplierIdx = 0;

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

118

for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)

119

{

120

for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)

121

{

122

for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)

123

{

124

for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)

125

{

126

// This loop goes over each output element.

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

127

float sum = 0.0f;

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

128

129

// For depthwise, each output channel corresponds to exactly one input channel.

130

// For normal, must loop over each input channel.

131

for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)

132

{

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

133

if (depthwise)

134

{

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

135

depthwiseMultiplierIdx = 0;

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

136

cInput = cOutput / depthMultiplier;

137

depthwiseMultiplierIdx = cOutput % depthMultiplier;

138

}

139

140

for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)

141

{

142

for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)

143

{

144

// This loop goes over each input element for each output element.

145

unsigned int filterIndex = 0;

146

147

// Since dimensionality of kernel depends on depthwiseness, so does index.

148

if (depthwise)

149

{

150

filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels +

151

cInput * filterWidth * filterHeight +

152

yFilter * filterWidth +

xFilter;

}

else

{

Matteo Martincigh

2019-06-06 15:46:22 +0100

[diff] [blame]

157

// Keep this implementation, as using DataLayoutIndexed::GetIndex causes great

158

// performance regression.

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

159

if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)

Matteo Martincigh

2019-06-06 15:46:22 +0100

[diff] [blame]

160

{

161

filterIndex = cOutput * filterHeight * filterWidth * inputChannels +

162

yFilter * filterWidth * inputChannels +

163

xFilter * inputChannels +

cInput;

}

else

{

filterIndex = cOutput * filterWidth * filterHeight * inputChannels +

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

169

cInput * filterWidth * filterHeight +

Matteo Martincigh

2019-06-06 15:46:22 +0100

[diff] [blame]

170

yFilter * filterWidth +

171

xFilter;

172

}

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

173

}

Matteo Martincigh

18f2d1c

2019-06-05 13:54:25 +0100

[diff] [blame]

174

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

175

unsigned int yInput = yOutput * yStride + yFilter * yDilation;

176

unsigned int xInput = xOutput * xStride + xFilter * xDilation;

float inputValue;

// Check if we're in the padding.

181

if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

182

xInput < paddingLeft || xInput >= inputWidth + paddingLeft)

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

{

inputValue = 0.0f;

}

else

{

Matteo Martincigh

2019-06-06 15:46:22 +0100

[diff] [blame]

188

unsigned int inputIndex = 0;

189

190

// Keep this implementation, as using DataLayoutIndexed::GetIndex causes great

191

// performance regression.

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

192

if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)

Matteo Martincigh

2019-06-06 15:46:22 +0100

[diff] [blame]

193

{

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

194

inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +

Matteo Martincigh

2019-06-06 15:46:22 +0100

[diff] [blame]

195

(yInput - paddingTop) * inputWidth * inputChannels +

196

(xInput - paddingLeft) * inputChannels +

cInput;

}

else

{

inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +

202

inputWidth * inputHeight * cInput +

203

inputWidth * (yInput - paddingTop) +

204

xInput - paddingLeft;

205

}

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

206

inputValue = inputVec[inputIndex];

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

207

}

Matteo Martincigh

18f2d1c

2019-06-05 13:54:25 +0100

[diff] [blame]

208

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

209

sum += filterVec[filterIndex] * inputValue;

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

}

}

}

if (biasEnabled)

{

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

216

sum += biasVec[cOutput];

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

217

}

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

218

Finn Williams

2020-09-17 15:58:31 +0100

[diff] [blame]

219

unsigned int outIdx;

220

if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)

221

{

222

outIdx = batchIdx * outputHeight * outputWidth * outputChannels +

223

yOutput * outputWidth * outputChannels +

224

xOutput * outputChannels +

cOutput;

}

else

{

outIdx = batchIdx * outputHeight * outputWidth * outputChannels +

230

cOutput * outputHeight * outputWidth +

231

yOutput * outputWidth +

232

xOutput;

233

}

Matteo Martincigh

18f2d1c

2019-06-05 13:54:25 +0100

[diff] [blame]

234

235

rOutputEncoder[outIdx];

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

236

rOutputEncoder.Set(sum);

Mike Kelly