Blame - src/backends/reference/workloads/ConvImpl.hpp - ml/armnn

2018-03-09 14:13:49 +0000

[diff] [blame]

1

//

2

David Beck

ecb56cd

2018-09-05 12:52:57 +0100

[diff] [blame]

3

// SPDX-License-Identifier: MIT

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

//

#pragma once

#include "RefWorkloadUtils.hpp"

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

9

#include "TensorBufferArrayView.hpp"

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

10

11

#include <armnn/Tensor.hpp>

12

Matteo Martincigh

2135015

2018-11-28 16:22:22 +0000

[diff] [blame]

13

#include <DataLayoutIndexed.hpp>

Matthew Bentham

8800c00

2018-11-19 13:19:28 +0000

[diff] [blame]

14

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

15

#include <boost/assert.hpp>

16

#include <boost/numeric/conversion/cast.hpp>

17

Matteo Martincigh

2018-11-28 16:22:36 +0000

[diff] [blame]

18

#include <DataLayoutIndexed.hpp>

19

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

#include <cmath>

#include <limits>

namespace armnn

{

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

26

/// Performs multiplication of an integer with a multiplier which is less than one,

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

27

/// using quantized integer arithmetic which is consistent with AndroidNN's CPU executor.

28

struct QuantizedMultiplierSmallerThanOne

29

{

30

public:

31

/// Constructs a QuantizedMultiplierSmallerThanOne which will multiply by the given multiplier.

32

/// This stores the appropriate integer quantities (derived from the given multiplier) for later use.

33

/// The implementation of this function is adapted from Android NN's QuantizeMultiplierSmallerThanOne().

34

QuantizedMultiplierSmallerThanOne(float multiplier);

35

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

36

/// The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmallerThanOne().

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

37

int32_t operator*(int32_t rhs) const;

38

39

private:

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

40

/// The implementation of this function is adapted from gemmlowp's SaturatingRoundingDoublingHighMul().

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

41

static int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b);

42

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

43

/// The implementation of this function is adapted from gemmlowp's RoundingDivideByPOT().

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

44

static int32_t RoundingDivideByPOT(int32_t x, int exponent);

45

46

int32_t m_Multiplier;

47

int32_t m_RightShift;

48

};

49

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

50

/// An implementation shared by normal and depthwise convolution.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

51

template<typename ConvData, typename InputType, typename BiasType, typename AccumulatorType>

52

static void ConvImpl(ConvData data,

53

const InputType* inputData,

54

float inputScale,

55

int32_t inputOffset,

56

const InputType* filterData,

57

float filterScale,

58

int32_t filterOffset,

59

const BiasType* biasData,

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

60

float outputScale,

61

int32_t outputOffset,

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

62

const TensorInfo& filterInfo,

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

63

bool depthwise = false)

64

{

surmeh01

3537c2c

2018-05-18 16:31:43 +0100

[diff] [blame]

65

if (data.m_Parameters.m_BiasEnabled && !biasData)

66

{

67

throw InvalidArgumentException("Bias is enabled but the bias data is invalid");

68

}

69

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

70

const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]);

71

const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]);

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

72

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

73

TensorBufferArrayView<InputType> output(outputInfo.GetShape(),

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

74

GetOutputTensorData<InputType>(0, data),

75

data.m_Parameters.m_DataLayout);

76

Matteo Martincigh

2135015

2018-11-28 16:22:22 +0000

[diff] [blame]

77

const armnnUtils::DataLayoutIndexed dataLayoutIndexed(data.m_Parameters.m_DataLayout);

Matteo Martincigh

2018-11-28 16:22:36 +0000

[diff] [blame]

78

Nikhil Raj

e4dfd6e

2018-10-18 10:11:04 +0100

[diff] [blame]

79

const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();

80

const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();

81

const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();

82

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

83

unsigned int depthMultiplier = depthwise ? filterInfo.GetShape()[0] : 1;

84

unsigned int inputChannels = depthwise ? filterInfo.GetShape()[1] : filterInfo.GetShape()[channelsIndex];

85

unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : filterInfo.GetShape()[0];

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

86

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

87

unsigned int batchSize = outputInfo.GetShape()[0];

88

unsigned int outputHeight = outputInfo.GetShape()[heightIndex];

89

unsigned int outputWidth = outputInfo.GetShape()[widthIndex];

90

unsigned int inputHeight = inputInfo.GetShape()[heightIndex];

91

unsigned int inputWidth = inputInfo.GetShape()[widthIndex];

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

92

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

93

unsigned int filterHeight = depthwise ? filterInfo.GetShape()[2] : filterInfo.GetShape()[heightIndex];

94

unsigned int filterWidth = depthwise ? filterInfo.GetShape()[3] : filterInfo.GetShape()[widthIndex];

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

95

Matteo Martincigh

2018-11-28 16:22:36 +0000

[diff] [blame]

96

unsigned int paddingTop = data.m_Parameters.m_PadTop;

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

97

unsigned int paddingLeft = data.m_Parameters.m_PadLeft;

Matteo Martincigh

2018-11-28 16:22:36 +0000

[diff] [blame]

98

unsigned int xStride = data.m_Parameters.m_StrideX;

99

unsigned int yStride = data.m_Parameters.m_StrideY;

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

100

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

101

// The world's least efficient convolution.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

102

for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)

103

{

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

104

for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

105

{

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

106

for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

107

{

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

108

for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

109

{

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

110

// This loop goes over each output element.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

111

AccumulatorType sum = AccumulatorType();

112

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

113

// For depthwise, each output channel corresponds to exactly one input channel.

114

// For normal, must loop over each input channel.

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

115

for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

116

{

117

unsigned int depthwiseMultiplierIdx = 0;

118

if (depthwise)

119

{

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

120

cInput = cOutput / depthMultiplier;

121

depthwiseMultiplierIdx = cOutput % depthMultiplier;

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

122

}

123

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

124

for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

125

{

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

126

for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

127

{

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

128

// This loop goes over each input element for each output element.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

129

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

130

unsigned int filterIndex = 0;

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

131

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

132

// Since dimensionality of kernel depends on depthwiseness, so does index.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

133

if (depthwise)

134

{

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

135

filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels +

136

cInput * filterWidth * filterHeight +

137

yFilter * filterWidth +

138

xFilter;

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

139

}

140

else

141

{

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

142

if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)

143

{

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

144

filterIndex = cOutput * filterHeight * filterWidth * inputChannels +

145

yFilter * filterWidth * inputChannels +

146

xFilter * inputChannels +

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

cInput;

}

else

{

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

151

filterIndex = cOutput * filterWidth * filterHeight * inputChannels +

152

cInput * filterWidth * filterHeight +

153

yFilter * filterWidth +

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

154

xFilter;

155

}

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

156

}

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

157

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

158

AccumulatorType filterValue = filterData[filterIndex] -

159

boost::numeric_cast<AccumulatorType>(filterOffset);

160

Matteo Martincigh

2018-11-28 16:22:36 +0000

[diff] [blame]

161

unsigned int yInput = yOutput * yStride + yFilter;

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

162

unsigned int xInput = xOutput * xStride + xFilter;

163

164

AccumulatorType inputValue;

165

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

166

// Check if we're in the padding.

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

167

if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||

168

xInput < paddingLeft || xInput >= inputWidth + paddingLeft )

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

169

{

170

inputValue = AccumulatorType();

171

}

172

else

173

{

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

174

unsigned int inputIndex;

175

176

if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)

177

{

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

178

inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +

179

(yInput - paddingTop) * inputWidth * inputChannels +

180

(xInput - paddingLeft) * inputChannels +

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

cInput;

}

else

{

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

186

inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +

187

inputWidth * inputHeight * cInput +

188

inputWidth * (yInput - paddingTop) +

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

189

xInput - paddingLeft;

190

}

191

192

inputValue = inputData[inputIndex] -

193

boost::numeric_cast<AccumulatorType>(inputOffset);

194

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

195

}

196

sum += filterValue * inputValue;

}

}

}

if (data.m_Parameters.m_BiasEnabled)

202

{

203

sum += biasData[cOutput];

204

}

205

206

if (outputScale != 0.0f)

207

{

208

float multiplier = (inputScale * filterScale) / outputScale;

209

// Apply the multiplier to sum, but do so using some quantized arithmetic which is consistent

210

// with the AndroidNN CPU implementation. This should be (roughly) equivalent to:

211

// sum = std::round(multiplier * sum + outputOffset);

212

sum = boost::numeric_cast<AccumulatorType>(

213

QuantizedMultiplierSmallerThanOne(multiplier) * boost::numeric_cast<int32_t>(sum))

214

+ boost::numeric_cast<AccumulatorType>(outputOffset);

215

sum = std::min<AccumulatorType>(std::max<AccumulatorType>(sum, 0), 255);

216

}

217

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

218

output.Get(batchIdx, cOutput, yOutput, xOutput) = boost::numeric_cast<InputType>(sum);

telsoa01