Blame - src/backends/reference/workloads/ConvImpl.hpp - ml/armnn

2018-03-09 14:13:49 +0000

[diff] [blame]

1

//

2

David Beck

ecb56cd

2018-09-05 12:52:57 +0100

[diff] [blame]

3

// SPDX-License-Identifier: MIT

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

//

#pragma once

#include "RefWorkloadUtils.hpp"

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

9

#include "TensorBufferArrayView.hpp"

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

10

11

#include <armnn/Tensor.hpp>

12

Matteo Martincigh

2135015

2018-11-28 16:22:22 +0000

[diff] [blame]

13

#include <DataLayoutIndexed.hpp>

Matthew Bentham

8800c00

2018-11-19 13:19:28 +0000

[diff] [blame]

14

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

15

#include <boost/assert.hpp>

16

#include <boost/numeric/conversion/cast.hpp>

17

Matteo Martincigh

2018-11-28 16:22:36 +0000

[diff] [blame]

18

#include <DataLayoutIndexed.hpp>

19

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

#include <cmath>

#include <limits>

namespace armnn

{

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

26

/// Performs multiplication of an integer with a multiplier which is less than one,

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

27

/// using quantized integer arithmetic which is consistent with AndroidNN's CPU executor.

28

struct QuantizedMultiplierSmallerThanOne

29

{

30

public:

31

/// Constructs a QuantizedMultiplierSmallerThanOne which will multiply by the given multiplier.

32

/// This stores the appropriate integer quantities (derived from the given multiplier) for later use.

33

/// The implementation of this function is adapted from Android NN's QuantizeMultiplierSmallerThanOne().

34

QuantizedMultiplierSmallerThanOne(float multiplier);

35

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

36

/// The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmallerThanOne().

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

37

int32_t operator*(int32_t rhs) const;

38

39

private:

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

40

/// The implementation of this function is adapted from gemmlowp's SaturatingRoundingDoublingHighMul().

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

41

static int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b);

42

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

43

/// The implementation of this function is adapted from gemmlowp's RoundingDivideByPOT().

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

44

static int32_t RoundingDivideByPOT(int32_t x, int exponent);

45

46

int32_t m_Multiplier;

47

int32_t m_RightShift;

48

};

49

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

50

/// An implementation shared by normal and depthwise convolution.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

51

template<typename ConvData, typename InputType, typename BiasType, typename AccumulatorType>

52

static void ConvImpl(ConvData data,

53

const InputType* inputData,

54

float inputScale,

55

int32_t inputOffset,

56

const InputType* filterData,

57

float filterScale,

58

int32_t filterOffset,

59

const BiasType* biasData,

60

InputType* outputData,

61

float outputScale,

62

int32_t outputOffset,

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

63

const TensorInfo& filterInfo,

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

64

bool depthwise = false)

65

{

surmeh01

3537c2c

2018-05-18 16:31:43 +0100

[diff] [blame]

66

if (data.m_Parameters.m_BiasEnabled && !biasData)

67

{

68

throw InvalidArgumentException("Bias is enabled but the bias data is invalid");

69

}

70

Nikhil Raj

2018-10-18 10:11:04 +0100

[diff] [blame]

71

const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

72

const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]);

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

73

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

74

TensorBufferArrayView<InputType> output(outputInfo0.GetShape(),

75

GetOutputTensorData<InputType>(0, data),

76

data.m_Parameters.m_DataLayout);

77

Matteo Martincigh

2135015

2018-11-28 16:22:22 +0000

[diff] [blame]

78

const armnnUtils::DataLayoutIndexed dataLayoutIndexed(data.m_Parameters.m_DataLayout);

Matteo Martincigh

2018-11-28 16:22:36 +0000

[diff] [blame]

79

Nikhil Raj

2018-10-18 10:11:04 +0100

[diff] [blame]

80

const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();

81

const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();

82

const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();

83

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

84

unsigned int depthMult = depthwise ? filterInfo.GetShape()[0] : 1;

Nikhil Raj

2018-10-18 10:11:04 +0100

[diff] [blame]

85

unsigned int channelsInput = filterInfo.GetShape()[channelsIndex];

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

86

unsigned int channelsOutput = depthwise ? channelsInput * depthMult : filterInfo.GetShape()[0];

87

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

88

unsigned int batchSize = outputInfo0.GetShape()[0];

Nikhil Raj

2018-10-18 10:11:04 +0100

[diff] [blame]

89

unsigned int heightOutput = outputInfo0.GetShape()[heightIndex];

90

unsigned int widthOutput = outputInfo0.GetShape()[widthIndex];

91

unsigned int heightInput = inputInfo0.GetShape()[heightIndex];

92

unsigned int widthInput = inputInfo0.GetShape()[widthIndex];

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

93

Nikhil Raj

2018-10-18 10:11:04 +0100

[diff] [blame]

94

unsigned int heightFilter = filterInfo.GetShape()[heightIndex];

95

unsigned int widthFilter = filterInfo.GetShape()[widthIndex];

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

96

Matteo Martincigh

2018-11-28 16:22:36 +0000

[diff] [blame]

97

unsigned int paddingTop = data.m_Parameters.m_PadTop;

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

98

unsigned int paddingLeft = data.m_Parameters.m_PadLeft;

Matteo Martincigh

2018-11-28 16:22:36 +0000

[diff] [blame]

99

unsigned int xStride = data.m_Parameters.m_StrideX;

100

unsigned int yStride = data.m_Parameters.m_StrideY;

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

101

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

102

// The world's least efficient convolution.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

103

for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)

104

{

105

for (unsigned int cOutput = 0; cOutput < channelsOutput; cOutput++)

106

{

107

for (unsigned int yOutput = 0; yOutput < heightOutput; yOutput++)

108

{

109

for (unsigned int xOutput = 0; xOutput < widthOutput; xOutput++)

110

{

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

111

// This loop goes over each output element.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

112

AccumulatorType sum = AccumulatorType();

113

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

114

// For depthwise, each output channel corresponds to exactly one input channel.

115

// For normal, must loop over each input channel.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

116

for (unsigned int cInput = 0; cInput < (depthwise ? 1 : channelsInput); cInput++)

117

{

118

unsigned int depthwiseMultiplierIdx = 0;

119

if (depthwise)

120

{

121

cInput = cOutput / depthMult;

122

depthwiseMultiplierIdx = cOutput % depthMult;

123

}

124

125

for (unsigned int yFilter = 0; yFilter < heightFilter; yFilter++)

126

{

127

for (unsigned int xFilter = 0; xFilter < widthFilter; xFilter++)

128

{

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

129

// This loop goes over each input element for each output element.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

130

131

unsigned int filterIndex;

132

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

133

// Since dimensionality of kernel depends on depthwiseness, so does index.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

134

if (depthwise)

135

{

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

136

if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)

137

{

138

filterIndex = depthwiseMultiplierIdx * heightFilter * widthFilter

139

* channelsInput +

140

yFilter * widthFilter * channelsInput +

141

xFilter * channelsInput +

cInput;

}

else

{

filterIndex = depthwiseMultiplierIdx * widthFilter * heightFilter

147

* channelsInput +

148

cInput * widthFilter * heightFilter +

149

yFilter * widthFilter +

150

xFilter;

151

}

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

152

}

153

else

154

{

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

155

if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)

156

{

157

filterIndex = cOutput * heightFilter * widthFilter * channelsInput +

158

yFilter * widthFilter * channelsInput +

159

xFilter * channelsInput +

cInput;

}

else

{

filterIndex = cOutput * widthFilter * heightFilter * channelsInput +

165

cInput * widthFilter * heightFilter +

166

yFilter * widthFilter +

167

xFilter;

168

}

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

169

}

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

170

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

171

AccumulatorType filterValue = filterData[filterIndex] -

172

boost::numeric_cast<AccumulatorType>(filterOffset);

173

Matteo Martincigh

2018-11-28 16:22:36 +0000

[diff] [blame]

174

unsigned int yInput = yOutput * yStride + yFilter;

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

175

unsigned int xInput = xOutput * xStride + xFilter;

176

177

AccumulatorType inputValue;

178

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

179

// Check if we're in the padding.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

180

if (yInput < paddingTop || yInput >= heightInput + paddingTop ||

181

xInput < paddingLeft || xInput >= widthInput + paddingLeft )

182

{

183

inputValue = AccumulatorType();

184

}

185

else

186

{

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

187

unsigned int inputIndex;

188

189

if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)

190

{

191

inputIndex = batchIdx * heightInput * widthInput * channelsInput +

192

(yInput - paddingTop) * widthInput * channelsInput +

193

(xInput - paddingLeft) * channelsInput +

cInput;

}

else

{

inputIndex = batchIdx * widthInput * heightInput * channelsInput +

200

widthInput * heightInput * cInput +

201

widthInput * (yInput - paddingTop) +

202

xInput - paddingLeft;

203

}

204

205

inputValue = inputData[inputIndex] -

206

boost::numeric_cast<AccumulatorType>(inputOffset);

207

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

208

}

209

sum += filterValue * inputValue;

}

}

}

if (data.m_Parameters.m_BiasEnabled)

215

{

216

sum += biasData[cOutput];

217

}

218

219

if (outputScale != 0.0f)

220

{

221

float multiplier = (inputScale * filterScale) / outputScale;

222

// Apply the multiplier to sum, but do so using some quantized arithmetic which is consistent

223

// with the AndroidNN CPU implementation. This should be (roughly) equivalent to:

224

// sum = std::round(multiplier * sum + outputOffset);

225

sum = boost::numeric_cast<AccumulatorType>(

226

QuantizedMultiplierSmallerThanOne(multiplier) * boost::numeric_cast<int32_t>(sum))

227

+ boost::numeric_cast<AccumulatorType>(outputOffset);

228

sum = std::min<AccumulatorType>(std::max<AccumulatorType>(sum, 0), 255);

229

}

230

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

231

output.Get(batchIdx, cOutput, yOutput, xOutput) = boost::numeric_cast<InputType>(sum);

telsoa01