Blame - src/backends/reference/workloads/ConvImpl.hpp - ml/armnn

telsoa01

4fcda01

2018-03-09 14:13:49 +0000

[diff] [blame]

1

//

2

David Beck

ecb56cd

2018-09-05 12:52:57 +0100

[diff] [blame]

3

// SPDX-License-Identifier: MIT

telsoa01

4fcda01

2018-03-09 14:13:49 +0000

[diff] [blame]

//

#pragma once

#include "RefWorkloadUtils.hpp"

9

10

#include <armnn/Tensor.hpp>

11

12

#include <boost/assert.hpp>

13

#include <boost/numeric/conversion/cast.hpp>

#include <cmath>

#include <limits>

namespace armnn

{

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

21

/// Performs multiplication of an integer with a multiplier which is less than one,

telsoa01

4fcda01

2018-03-09 14:13:49 +0000

[diff] [blame]

22

/// using quantized integer arithmetic which is consistent with AndroidNN's CPU executor.

23

struct QuantizedMultiplierSmallerThanOne

24

{

25

public:

26

/// Constructs a QuantizedMultiplierSmallerThanOne which will multiply by the given multiplier.

27

/// This stores the appropriate integer quantities (derived from the given multiplier) for later use.

28

/// The implementation of this function is adapted from Android NN's QuantizeMultiplierSmallerThanOne().

29

QuantizedMultiplierSmallerThanOne(float multiplier);

30

telsoa01

c577f2c

2018-08-31 09:22:23 +0100

[diff] [blame]

31

/// The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmallerThanOne().

telsoa01

4fcda01

2018-03-09 14:13:49 +0000

[diff] [blame]

32

int32_t operator*(int32_t rhs) const;

33

34

private:

telsoa01

c577f2c

2018-08-31 09:22:23 +0100

[diff] [blame]

35

/// The implementation of this function is adapted from gemmlowp's SaturatingRoundingDoublingHighMul().

telsoa01

4fcda01

2018-03-09 14:13:49 +0000

[diff] [blame]

36

static int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b);

37

telsoa01

c577f2c

2018-08-31 09:22:23 +0100

[diff] [blame]

38

/// The implementation of this function is adapted from gemmlowp's RoundingDivideByPOT().

telsoa01

4fcda01

2018-03-09 14:13:49 +0000

[diff] [blame]

39

static int32_t RoundingDivideByPOT(int32_t x, int exponent);

40

41

int32_t m_Multiplier;

42

int32_t m_RightShift;

43

};

44

telsoa01

c577f2c

2018-08-31 09:22:23 +0100

[diff] [blame]

45

/// An implementation shared by normal and depthwise convolution.

telsoa01

4fcda01

2018-03-09 14:13:49 +0000

[diff] [blame]

46

template<typename ConvData, typename InputType, typename BiasType, typename AccumulatorType>

47

static void ConvImpl(ConvData data,

48

const InputType* inputData,

49

float inputScale,

50

int32_t inputOffset,

51

const InputType* filterData,

52

float filterScale,

53

int32_t filterOffset,

54

const BiasType* biasData,

55

InputType* outputData,

56

float outputScale,

57

int32_t outputOffset,

telsoa01

c577f2c

2018-08-31 09:22:23 +0100

[diff] [blame]

58

const TensorInfo& filterInfo,

telsoa01

4fcda01

2018-03-09 14:13:49 +0000

[diff] [blame]

59

bool depthwise = false)

60

{

surmeh01

3537c2c

2018-05-18 16:31:43 +0100

[diff] [blame]

61

if (data.m_Parameters.m_BiasEnabled && !biasData)

62

{

63

throw InvalidArgumentException("Bias is enabled but the bias data is invalid");

64

}

65

telsoa01

4fcda01

2018-03-09 14:13:49 +0000

[diff] [blame]

66

const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);

67

const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]);

telsoa01

4fcda01

2018-03-09 14:13:49 +0000

[diff] [blame]

68

69

unsigned int depthMult = depthwise ? filterInfo.GetShape()[0] : 1;

70

unsigned int channelsInput = filterInfo.GetShape()[1];

71

unsigned int channelsOutput = depthwise ? channelsInput * depthMult : filterInfo.GetShape()[0];

72

telsoa01

4fcda01

2018-03-09 14:13:49 +0000

[diff] [blame]

73

unsigned int batchSize = outputInfo0.GetShape()[0];

74

unsigned int heightOutput = outputInfo0.GetShape()[2];

75

unsigned int widthOutput = outputInfo0.GetShape()[3];

76

unsigned int heightInput = inputInfo0.GetShape()[2];

77

unsigned int widthInput = inputInfo0.GetShape()[3];

78

79

unsigned int heightFilter = filterInfo.GetShape()[2];

80

unsigned int widthFilter = filterInfo.GetShape()[3];

81

82

unsigned int paddingTop = data.m_Parameters.m_PadTop;

83

unsigned int paddingLeft = data.m_Parameters.m_PadLeft;

84

unsigned int hStride = data.m_Parameters.m_StrideY;

85

unsigned int xStride = data.m_Parameters.m_StrideX;

86

telsoa01

c577f2c

2018-08-31 09:22:23 +0100

[diff] [blame]

87

// The world's least efficient convolution.

telsoa01

4fcda01

2018-03-09 14:13:49 +0000

[diff] [blame]

88

for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)

89

{

90

for (unsigned int cOutput = 0; cOutput < channelsOutput; cOutput++)

91

{

92

for (unsigned int yOutput = 0; yOutput < heightOutput; yOutput++)

93

{

94

for (unsigned int xOutput = 0; xOutput < widthOutput; xOutput++)

95

{

telsoa01

c577f2c

2018-08-31 09:22:23 +0100

[diff] [blame]

96

// This loop goes over each output element.

telsoa01

4fcda01

2018-03-09 14:13:49 +0000

[diff] [blame]

97

AccumulatorType sum = AccumulatorType();

98

telsoa01

c577f2c

2018-08-31 09:22:23 +0100

[diff] [blame]

99

// For depthwise, each output channel corresponds to exactly one input channel.

100

// For normal, must loop over each input channel.

telsoa01

4fcda01

2018-03-09 14:13:49 +0000

[diff] [blame]

101

for (unsigned int cInput = 0; cInput < (depthwise ? 1 : channelsInput); cInput++)

102

{

103

unsigned int depthwiseMultiplierIdx = 0;

104

if (depthwise)

105

{

106

cInput = cOutput / depthMult;

107

depthwiseMultiplierIdx = cOutput % depthMult;

108

}

109

110

for (unsigned int yFilter = 0; yFilter < heightFilter; yFilter++)

111

{

112

for (unsigned int xFilter = 0; xFilter < widthFilter; xFilter++)

113

{

telsoa01

c577f2c

2018-08-31 09:22:23 +0100

[diff] [blame]

114

// This loop goes over each input element for each output element.

telsoa01

4fcda01

2018-03-09 14:13:49 +0000

[diff] [blame]

115

116

unsigned int filterIndex;

117

telsoa01

c577f2c

2018-08-31 09:22:23 +0100

[diff] [blame]

118

// Since dimensionality of kernel depends on depthwiseness, so does index.

telsoa01

4fcda01

2018-03-09 14:13:49 +0000

[diff] [blame]

119

if (depthwise)

120

{

121

filterIndex = depthwiseMultiplierIdx * widthFilter * heightFilter * channelsInput +

122

cInput * widthFilter * heightFilter +

123

yFilter * widthFilter +

xFilter;

}

else

{

filterIndex = cOutput * widthFilter * heightFilter * channelsInput +

129

cInput * widthFilter * heightFilter +

130

yFilter * widthFilter +

131

xFilter;

132

}

133

AccumulatorType filterValue = filterData[filterIndex] -

134

boost::numeric_cast<AccumulatorType>(filterOffset);

135

136

unsigned int yInput = yOutput * hStride + yFilter;

137

unsigned int xInput = xOutput * xStride + xFilter;

138

139

AccumulatorType inputValue;

140

telsoa01

c577f2c

2018-08-31 09:22:23 +0100

[diff] [blame]

141

// Check if we're in the padding.

telsoa01

4fcda01

2018-03-09 14:13:49 +0000

[diff] [blame]

142

if (yInput < paddingTop || yInput >= heightInput + paddingTop ||

143

xInput < paddingLeft || xInput >= widthInput + paddingLeft )

144

{

145

inputValue = AccumulatorType();

}

else

{

inputValue = inputData[batchIdx * widthInput * heightInput * channelsInput +

150

widthInput * heightInput * cInput +

151

widthInput * (yInput - paddingTop) +

152

xInput - paddingLeft] -

153

boost::numeric_cast<AccumulatorType>(inputOffset);

154

}

155

sum += filterValue * inputValue;

}

}

}

if (data.m_Parameters.m_BiasEnabled)

161

{

162

sum += biasData[cOutput];

163

}

164

165

if (outputScale != 0.0f)

166

{

167

float multiplier = (inputScale * filterScale) / outputScale;

168

// Apply the multiplier to sum, but do so using some quantized arithmetic which is consistent

169

// with the AndroidNN CPU implementation. This should be (roughly) equivalent to:

170

// sum = std::round(multiplier * sum + outputOffset);

171

sum = boost::numeric_cast<AccumulatorType>(

172

QuantizedMultiplierSmallerThanOne(multiplier) * boost::numeric_cast<int32_t>(sum))

173

+ boost::numeric_cast<AccumulatorType>(outputOffset);

174

sum = std::min<AccumulatorType>(std::max<AccumulatorType>(sum, 0), 255);

175

}

176

177

outputData[batchIdx * widthOutput * heightOutput * channelsOutput +

178

widthOutput * heightOutput * cOutput +

179

widthOutput * yOutput +

180

xOutput] = boost::numeric_cast<InputType>(sum);

}

}

}

}

}

} //namespace armnn