Blame - src/backends/reference/workloads/ConvImpl.hpp - ml/armnn

2018-03-09 14:13:49 +0000

[diff] [blame]

1

//

2

David Beck

ecb56cd

2018-09-05 12:52:57 +0100

[diff] [blame]

3

// SPDX-License-Identifier: MIT

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

//

#pragma once

#include "RefWorkloadUtils.hpp"

9

10

#include <armnn/Tensor.hpp>

11

12

#include <boost/assert.hpp>

13

#include <boost/numeric/conversion/cast.hpp>

#include <cmath>

#include <limits>

namespace armnn

{

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

21

/// Performs multiplication of an integer with a multiplier which is less than one,

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

22

/// using quantized integer arithmetic which is consistent with AndroidNN's CPU executor.

23

struct QuantizedMultiplierSmallerThanOne

24

{

25

public:

26

/// Constructs a QuantizedMultiplierSmallerThanOne which will multiply by the given multiplier.

27

/// This stores the appropriate integer quantities (derived from the given multiplier) for later use.

28

/// The implementation of this function is adapted from Android NN's QuantizeMultiplierSmallerThanOne().

29

QuantizedMultiplierSmallerThanOne(float multiplier);

30

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

31

/// The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmallerThanOne().

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

32

int32_t operator*(int32_t rhs) const;

33

34

private:

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

35

/// The implementation of this function is adapted from gemmlowp's SaturatingRoundingDoublingHighMul().

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

36

static int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b);

37

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

38

/// The implementation of this function is adapted from gemmlowp's RoundingDivideByPOT().

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

39

static int32_t RoundingDivideByPOT(int32_t x, int exponent);

40

41

int32_t m_Multiplier;

42

int32_t m_RightShift;

43

};

44

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

45

/// An implementation shared by normal and depthwise convolution.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

46

template<typename ConvData, typename InputType, typename BiasType, typename AccumulatorType>

47

static void ConvImpl(ConvData data,

48

const InputType* inputData,

49

float inputScale,

50

int32_t inputOffset,

51

const InputType* filterData,

52

float filterScale,

53

int32_t filterOffset,

54

const BiasType* biasData,

55

InputType* outputData,

56

float outputScale,

57

int32_t outputOffset,

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

58

const TensorInfo& filterInfo,

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

59

bool depthwise = false)

60

{

surmeh01

3537c2c

2018-05-18 16:31:43 +0100

[diff] [blame]

61

if (data.m_Parameters.m_BiasEnabled && !biasData)

62

{

63

throw InvalidArgumentException("Bias is enabled but the bias data is invalid");

64

}

65

Nikhil Raj

2018-10-18 10:11:04 +0100

[diff] [blame^]

66

const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

67

const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]);

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

68

Nikhil Raj

2018-10-18 10:11:04 +0100

[diff] [blame^]

69

const DataLayoutIndexed dataLayoutIndexed(data.m_Parameters.m_DataLayout);

70

const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();

71

const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();

72

const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();

73

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

74

unsigned int depthMult = depthwise ? filterInfo.GetShape()[0] : 1;

Nikhil Raj

2018-10-18 10:11:04 +0100

[diff] [blame^]

75

unsigned int channelsInput = filterInfo.GetShape()[channelsIndex];

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

76

unsigned int channelsOutput = depthwise ? channelsInput * depthMult : filterInfo.GetShape()[0];

77

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

78

unsigned int batchSize = outputInfo0.GetShape()[0];

Nikhil Raj

2018-10-18 10:11:04 +0100

[diff] [blame^]

79

unsigned int heightOutput = outputInfo0.GetShape()[heightIndex];

80

unsigned int widthOutput = outputInfo0.GetShape()[widthIndex];

81

unsigned int heightInput = inputInfo0.GetShape()[heightIndex];

82

unsigned int widthInput = inputInfo0.GetShape()[widthIndex];

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

83

Nikhil Raj

2018-10-18 10:11:04 +0100

[diff] [blame^]

84

unsigned int heightFilter = filterInfo.GetShape()[heightIndex];

85

unsigned int widthFilter = filterInfo.GetShape()[widthIndex];

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

86

87

unsigned int paddingTop = data.m_Parameters.m_PadTop;

88

unsigned int paddingLeft = data.m_Parameters.m_PadLeft;

89

unsigned int hStride = data.m_Parameters.m_StrideY;

90

unsigned int xStride = data.m_Parameters.m_StrideX;

91

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

92

// The world's least efficient convolution.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

93

for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)

94

{

95

for (unsigned int cOutput = 0; cOutput < channelsOutput; cOutput++)

96

{

97

for (unsigned int yOutput = 0; yOutput < heightOutput; yOutput++)

98

{

99

for (unsigned int xOutput = 0; xOutput < widthOutput; xOutput++)

100

{

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

101

// This loop goes over each output element.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

102

AccumulatorType sum = AccumulatorType();

103

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

104

// For depthwise, each output channel corresponds to exactly one input channel.

105

// For normal, must loop over each input channel.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

106

for (unsigned int cInput = 0; cInput < (depthwise ? 1 : channelsInput); cInput++)

107

{

108

unsigned int depthwiseMultiplierIdx = 0;

109

if (depthwise)

110

{

111

cInput = cOutput / depthMult;

112

depthwiseMultiplierIdx = cOutput % depthMult;

113

}

114

115

for (unsigned int yFilter = 0; yFilter < heightFilter; yFilter++)

116

{

117

for (unsigned int xFilter = 0; xFilter < widthFilter; xFilter++)

118

{

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

119

// This loop goes over each input element for each output element.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

120

121

unsigned int filterIndex;

122

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

123

// Since dimensionality of kernel depends on depthwiseness, so does index.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

124

if (depthwise)

125

{

126

filterIndex = depthwiseMultiplierIdx * widthFilter * heightFilter * channelsInput +

127

cInput * widthFilter * heightFilter +

128

yFilter * widthFilter +

xFilter;

}

else

{

filterIndex = cOutput * widthFilter * heightFilter * channelsInput +

134

cInput * widthFilter * heightFilter +

135

yFilter * widthFilter +

136

xFilter;

137

}

138

AccumulatorType filterValue = filterData[filterIndex] -

139

boost::numeric_cast<AccumulatorType>(filterOffset);

140

141

unsigned int yInput = yOutput * hStride + yFilter;

142

unsigned int xInput = xOutput * xStride + xFilter;

143

144

AccumulatorType inputValue;

145

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

146

// Check if we're in the padding.

telsoa01