Blame - src/backends/reference/workloads/ConvImpl.hpp - ml/armnn

2018-03-09 14:13:49 +0000

[diff] [blame]

1

//

2

David Beck

ecb56cd

2018-09-05 12:52:57 +0100

[diff] [blame]

3

// SPDX-License-Identifier: MIT

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

//

#pragma once

#include "RefWorkloadUtils.hpp"

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

9

#include "TensorBufferArrayView.hpp"

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

10

11

#include <armnn/Tensor.hpp>

12

13

#include <boost/assert.hpp>

14

#include <boost/numeric/conversion/cast.hpp>

#include <cmath>

#include <limits>

namespace armnn

{

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

22

/// Performs multiplication of an integer with a multiplier which is less than one,

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

23

/// using quantized integer arithmetic which is consistent with AndroidNN's CPU executor.

24

struct QuantizedMultiplierSmallerThanOne

25

{

26

public:

27

/// Constructs a QuantizedMultiplierSmallerThanOne which will multiply by the given multiplier.

28

/// This stores the appropriate integer quantities (derived from the given multiplier) for later use.

29

/// The implementation of this function is adapted from Android NN's QuantizeMultiplierSmallerThanOne().

30

QuantizedMultiplierSmallerThanOne(float multiplier);

31

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

32

/// The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmallerThanOne().

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

33

int32_t operator*(int32_t rhs) const;

34

35

private:

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

36

/// The implementation of this function is adapted from gemmlowp's SaturatingRoundingDoublingHighMul().

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

37

static int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b);

38

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

39

/// The implementation of this function is adapted from gemmlowp's RoundingDivideByPOT().

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

40

static int32_t RoundingDivideByPOT(int32_t x, int exponent);

41

42

int32_t m_Multiplier;

43

int32_t m_RightShift;

44

};

45

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

46

/// An implementation shared by normal and depthwise convolution.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

47

template<typename ConvData, typename InputType, typename BiasType, typename AccumulatorType>

48

static void ConvImpl(ConvData data,

49

const InputType* inputData,

50

float inputScale,

51

int32_t inputOffset,

52

const InputType* filterData,

53

float filterScale,

54

int32_t filterOffset,

55

const BiasType* biasData,

56

InputType* outputData,

57

float outputScale,

58

int32_t outputOffset,

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

59

const TensorInfo& filterInfo,

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

60

bool depthwise = false)

61

{

surmeh01

3537c2c

2018-05-18 16:31:43 +0100

[diff] [blame]

62

if (data.m_Parameters.m_BiasEnabled && !biasData)

63

{

64

throw InvalidArgumentException("Bias is enabled but the bias data is invalid");

65

}

66

Nikhil Raj

2018-10-18 10:11:04 +0100

[diff] [blame]

67

const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

68

const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]);

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

69

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

70

TensorBufferArrayView<InputType> output(outputInfo0.GetShape(),

71

GetOutputTensorData<InputType>(0, data),

72

data.m_Parameters.m_DataLayout);

73

Nikhil Raj

2018-10-18 10:11:04 +0100

[diff] [blame]

74

const DataLayoutIndexed dataLayoutIndexed(data.m_Parameters.m_DataLayout);

75

const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();

76

const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();

77

const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();

78

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

79

unsigned int depthMult = depthwise ? filterInfo.GetShape()[0] : 1;

Nikhil Raj

2018-10-18 10:11:04 +0100

[diff] [blame]

80

unsigned int channelsInput = filterInfo.GetShape()[channelsIndex];

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

81

unsigned int channelsOutput = depthwise ? channelsInput * depthMult : filterInfo.GetShape()[0];

82

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

83

unsigned int batchSize = outputInfo0.GetShape()[0];

Nikhil Raj

2018-10-18 10:11:04 +0100

[diff] [blame]

84

unsigned int heightOutput = outputInfo0.GetShape()[heightIndex];

85

unsigned int widthOutput = outputInfo0.GetShape()[widthIndex];

86

unsigned int heightInput = inputInfo0.GetShape()[heightIndex];

87

unsigned int widthInput = inputInfo0.GetShape()[widthIndex];

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

88

Nikhil Raj

2018-10-18 10:11:04 +0100

[diff] [blame]

89

unsigned int heightFilter = filterInfo.GetShape()[heightIndex];

90

unsigned int widthFilter = filterInfo.GetShape()[widthIndex];

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

91

92

unsigned int paddingTop = data.m_Parameters.m_PadTop;

93

unsigned int paddingLeft = data.m_Parameters.m_PadLeft;

94

unsigned int hStride = data.m_Parameters.m_StrideY;

95

unsigned int xStride = data.m_Parameters.m_StrideX;

96

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

97

// The world's least efficient convolution.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

98

for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)

99

{

100

for (unsigned int cOutput = 0; cOutput < channelsOutput; cOutput++)

101

{

102

for (unsigned int yOutput = 0; yOutput < heightOutput; yOutput++)

103

{

104

for (unsigned int xOutput = 0; xOutput < widthOutput; xOutput++)

105

{

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

106

// This loop goes over each output element.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

107

AccumulatorType sum = AccumulatorType();

108

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

109

// For depthwise, each output channel corresponds to exactly one input channel.

110

// For normal, must loop over each input channel.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

111

for (unsigned int cInput = 0; cInput < (depthwise ? 1 : channelsInput); cInput++)

112

{

113

unsigned int depthwiseMultiplierIdx = 0;

114

if (depthwise)

115

{

116

cInput = cOutput / depthMult;

117

depthwiseMultiplierIdx = cOutput % depthMult;

118

}

119

120

for (unsigned int yFilter = 0; yFilter < heightFilter; yFilter++)

121

{

122

for (unsigned int xFilter = 0; xFilter < widthFilter; xFilter++)

123

{

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

124

// This loop goes over each input element for each output element.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

125

126

unsigned int filterIndex;

127

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

128

// Since dimensionality of kernel depends on depthwiseness, so does index.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

129

if (depthwise)

130

{

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

131

if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)

132

{

133

filterIndex = depthwiseMultiplierIdx * heightFilter * widthFilter

134

* channelsInput +

135

yFilter * widthFilter * channelsInput +

136

xFilter * channelsInput +

cInput;

}

else

{

filterIndex = depthwiseMultiplierIdx * widthFilter * heightFilter

142

* channelsInput +

143

cInput * widthFilter * heightFilter +

144

yFilter * widthFilter +

145

xFilter;

146

}

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

147

}

148

else

149

{

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

150

if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)

151

{

152

filterIndex = cOutput * heightFilter * widthFilter * channelsInput +

153

yFilter * widthFilter * channelsInput +

154

xFilter * channelsInput +

cInput;

}

else

{

filterIndex = cOutput * widthFilter * heightFilter * channelsInput +

160

cInput * widthFilter * heightFilter +

161

yFilter * widthFilter +

162

xFilter;

163

}

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

164

}

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

165

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

166

AccumulatorType filterValue = filterData[filterIndex] -

167

boost::numeric_cast<AccumulatorType>(filterOffset);

168

169

unsigned int yInput = yOutput * hStride + yFilter;

170

unsigned int xInput = xOutput * xStride + xFilter;

171

172

AccumulatorType inputValue;

173

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

174

// Check if we're in the padding.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

175

if (yInput < paddingTop || yInput >= heightInput + paddingTop ||

176

xInput < paddingLeft || xInput >= widthInput + paddingLeft )

177

{

178

inputValue = AccumulatorType();

179

}

180

else

181

{

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

182

unsigned int inputIndex;

183

184

if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)

185

{

186

inputIndex = batchIdx * heightInput * widthInput * channelsInput +

187

(yInput - paddingTop) * widthInput * channelsInput +

188

(xInput - paddingLeft) * channelsInput +

cInput;

}

else

{

inputIndex = batchIdx * widthInput * heightInput * channelsInput +

195

widthInput * heightInput * cInput +

196

widthInput * (yInput - paddingTop) +

197

xInput - paddingLeft;

198

}

199

200

inputValue = inputData[inputIndex] -

201

boost::numeric_cast<AccumulatorType>(inputOffset);

202

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

203

}

204

sum += filterValue * inputValue;

}

}

}

if (data.m_Parameters.m_BiasEnabled)

210

{

211

sum += biasData[cOutput];

212

}

213

214

if (outputScale != 0.0f)

215

{

216

float multiplier = (inputScale * filterScale) / outputScale;

217

// Apply the multiplier to sum, but do so using some quantized arithmetic which is consistent

218

// with the AndroidNN CPU implementation. This should be (roughly) equivalent to:

219

// sum = std::round(multiplier * sum + outputOffset);

220

sum = boost::numeric_cast<AccumulatorType>(

221

QuantizedMultiplierSmallerThanOne(multiplier) * boost::numeric_cast<int32_t>(sum))

222

+ boost::numeric_cast<AccumulatorType>(outputOffset);

223

sum = std::min<AccumulatorType>(std::max<AccumulatorType>(sum, 0), 255);

224

}

225

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

226

output.Get(batchIdx, cOutput, yOutput, xOutput) = boost::numeric_cast<InputType>(sum);

telsoa01