Blame - src/backends/reference/workloads/ConvImpl.hpp - ml/armnn

2018-03-09 14:13:49 +0000

[diff] [blame]

1

//

2

David Beck

ecb56cd

2018-09-05 12:52:57 +0100

[diff] [blame]

3

// SPDX-License-Identifier: MIT

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

//

#pragma once

#include "RefWorkloadUtils.hpp"

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

9

#include "TensorBufferArrayView.hpp"

Mike Kelly

9b39832

2019-05-22 17:21:49 +0100

[diff] [blame]

10

#include "BaseIterator.hpp"

11

#include "Decoders.hpp"

12

#include "Encoders.hpp"

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

13

14

#include <armnn/Tensor.hpp>

15

Matteo Martincigh

2135015

2018-11-28 16:22:22 +0000

[diff] [blame]

16

#include <DataLayoutIndexed.hpp>

Matthew Bentham

8800c00

2018-11-19 13:19:28 +0000

[diff] [blame]

17

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

18

#include <boost/assert.hpp>

19

#include <boost/numeric/conversion/cast.hpp>

20

Matteo Martincigh

2018-11-28 16:22:36 +0000

[diff] [blame]

21

#include <DataLayoutIndexed.hpp>

22

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

#include <cmath>

#include <limits>

namespace armnn

{

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

29

/// Performs multiplication of an integer with a multiplier which is less than one,

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

30

/// using quantized integer arithmetic which is consistent with AndroidNN's CPU executor.

31

struct QuantizedMultiplierSmallerThanOne

32

{

33

public:

34

/// Constructs a QuantizedMultiplierSmallerThanOne which will multiply by the given multiplier.

35

/// This stores the appropriate integer quantities (derived from the given multiplier) for later use.

36

/// The implementation of this function is adapted from Android NN's QuantizeMultiplierSmallerThanOne().

37

QuantizedMultiplierSmallerThanOne(float multiplier);

38

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

39

/// The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmallerThanOne().

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

40

int32_t operator*(int32_t rhs) const;

41

42

private:

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

43

/// The implementation of this function is adapted from gemmlowp's SaturatingRoundingDoublingHighMul().

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

44

static int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b);

45

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

46

/// The implementation of this function is adapted from gemmlowp's RoundingDivideByPOT().

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

47

static int32_t RoundingDivideByPOT(int32_t x, int exponent);

48

49

int32_t m_Multiplier;

50

int32_t m_RightShift;

51

};

52

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

53

/// An implementation shared by normal and depthwise convolution.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

54

template<typename ConvData, typename InputType, typename BiasType, typename AccumulatorType>

55

static void ConvImpl(ConvData data,

56

const InputType* inputData,

57

float inputScale,

58

int32_t inputOffset,

59

const InputType* filterData,

60

float filterScale,

61

int32_t filterOffset,

62

const BiasType* biasData,

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

63

float outputScale,

64

int32_t outputOffset,

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

65

const TensorInfo& filterInfo,

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

66

bool depthwise = false)

67

{

surmeh01

3537c2c

2018-05-18 16:31:43 +0100

[diff] [blame]

68

if (data.m_Parameters.m_BiasEnabled && !biasData)

69

{

70

throw InvalidArgumentException("Bias is enabled but the bias data is invalid");

71

}

72

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

73

const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]);

74

const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]);

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

75

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

76

TensorBufferArrayView<InputType> output(outputInfo.GetShape(),

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

77

GetOutputTensorData<InputType>(0, data),

78

data.m_Parameters.m_DataLayout);

79

Matteo Martincigh

2135015

2018-11-28 16:22:22 +0000

[diff] [blame]

80

const armnnUtils::DataLayoutIndexed dataLayoutIndexed(data.m_Parameters.m_DataLayout);

Matteo Martincigh

2018-11-28 16:22:36 +0000

[diff] [blame]

81

Nikhil Raj

e4dfd6e

2018-10-18 10:11:04 +0100

[diff] [blame]

82

const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();

83

const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();

84

const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();

85

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

86

unsigned int depthMultiplier = depthwise ? filterInfo.GetShape()[0] : 1;

87

unsigned int inputChannels = depthwise ? filterInfo.GetShape()[1] : filterInfo.GetShape()[channelsIndex];

88

unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : filterInfo.GetShape()[0];

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

89

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

90

unsigned int batchSize = outputInfo.GetShape()[0];

91

unsigned int outputHeight = outputInfo.GetShape()[heightIndex];

92

unsigned int outputWidth = outputInfo.GetShape()[widthIndex];

93

unsigned int inputHeight = inputInfo.GetShape()[heightIndex];

94

unsigned int inputWidth = inputInfo.GetShape()[widthIndex];

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

95

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

96

unsigned int filterHeight = depthwise ? filterInfo.GetShape()[2] : filterInfo.GetShape()[heightIndex];

97

unsigned int filterWidth = depthwise ? filterInfo.GetShape()[3] : filterInfo.GetShape()[widthIndex];

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

98

Matteo Martincigh

2018-11-28 16:22:36 +0000

[diff] [blame]

99

unsigned int paddingTop = data.m_Parameters.m_PadTop;

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

100

unsigned int paddingLeft = data.m_Parameters.m_PadLeft;

Matteo Martincigh

2018-11-28 16:22:36 +0000

[diff] [blame]

101

unsigned int xStride = data.m_Parameters.m_StrideX;

102

unsigned int yStride = data.m_Parameters.m_StrideY;

Matthew Bentham

49cb7d0

2019-05-10 16:55:23 +0100

[diff] [blame]

103

unsigned int xDilation = data.m_Parameters.m_DilationX;

104

unsigned int yDilation = data.m_Parameters.m_DilationY;

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

105

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

106

// The world's least efficient convolution.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

107

for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)

108

{

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

109

for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

110

{

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

111

for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

112

{

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

113

for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

114

{

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

115

// This loop goes over each output element.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

116

AccumulatorType sum = AccumulatorType();

117

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

118

// For depthwise, each output channel corresponds to exactly one input channel.

119

// For normal, must loop over each input channel.

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

120

for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

121

{

122

unsigned int depthwiseMultiplierIdx = 0;

123

if (depthwise)

124

{

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

125

cInput = cOutput / depthMultiplier;

126

depthwiseMultiplierIdx = cOutput % depthMultiplier;

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

127

}

128

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

129

for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

130

{

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

131

for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

132

{

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

133

// This loop goes over each input element for each output element.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

134

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

135

unsigned int filterIndex = 0;

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

136

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

137

// Since dimensionality of kernel depends on depthwiseness, so does index.

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

138

if (depthwise)

139

{

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

140

filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels +

141

cInput * filterWidth * filterHeight +

142

yFilter * filterWidth +

143

xFilter;

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

144

}

145

else

146

{

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

147

if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)

148

{

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

149

filterIndex = cOutput * filterHeight * filterWidth * inputChannels +

150

yFilter * filterWidth * inputChannels +

151

xFilter * inputChannels +

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

cInput;

}

else

{

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

156

filterIndex = cOutput * filterWidth * filterHeight * inputChannels +

157

cInput * filterWidth * filterHeight +

158

yFilter * filterWidth +

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

159

xFilter;

160

}

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

161

}

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

162

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

163

AccumulatorType filterValue = filterData[filterIndex] -

164

boost::numeric_cast<AccumulatorType>(filterOffset);

165

Matthew Bentham

49cb7d0

2019-05-10 16:55:23 +0100

[diff] [blame]

166

unsigned int yInput = yOutput * yStride + yFilter * yDilation;

167

unsigned int xInput = xOutput * xStride + xFilter * xDilation;

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

168

169

AccumulatorType inputValue;

170

telsoa01

2018-08-31 09:22:23 +0100

[diff] [blame]

171

// Check if we're in the padding.

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

172

if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||

173

xInput < paddingLeft || xInput >= inputWidth + paddingLeft )

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

174

{

175

inputValue = AccumulatorType();

176

}

177

else

178

{

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

179

unsigned int inputIndex;

180

181

if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)

182

{

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

183

inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +

184

(yInput - paddingTop) * inputWidth * inputChannels +

185

(xInput - paddingLeft) * inputChannels +

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

cInput;

}

else

{

Matteo Martincigh

2018-12-18 09:26:39 +0000

[diff] [blame]

191

inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +

192

inputWidth * inputHeight * cInput +

193

inputWidth * (yInput - paddingTop) +

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

194

xInput - paddingLeft;

195

}

196

197

inputValue = inputData[inputIndex] -

198

boost::numeric_cast<AccumulatorType>(inputOffset);

199

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

200

}

201

sum += filterValue * inputValue;

}

}

}

if (data.m_Parameters.m_BiasEnabled)

207

{

208

sum += biasData[cOutput];

209

}

210

211

if (outputScale != 0.0f)

212

{

213

float multiplier = (inputScale * filterScale) / outputScale;

214

// Apply the multiplier to sum, but do so using some quantized arithmetic which is consistent

215

// with the AndroidNN CPU implementation. This should be (roughly) equivalent to:

216

// sum = std::round(multiplier * sum + outputOffset);

217

sum = boost::numeric_cast<AccumulatorType>(

218

QuantizedMultiplierSmallerThanOne(multiplier) * boost::numeric_cast<int32_t>(sum))

219

+ boost::numeric_cast<AccumulatorType>(outputOffset);

220

sum = std::min<AccumulatorType>(std::max<AccumulatorType>(sum, 0), 255);

221

}

222

narpra01

2018-10-26 16:24:58 +0100

[diff] [blame]

223

output.Get(batchIdx, cOutput, yOutput, xOutput) = boost::numeric_cast<InputType>(sum);

telsoa01

2018-03-09 14:13:49 +0000

[diff] [blame]

}

}

}

}

}

Mike Kelly

2019-05-22 17:21:49 +0100

[diff] [blame]

230

void Convolve(const TensorShape& rInputShape,

231

Decoder<float>& rInputDecoder,

232

const TensorShape& rOutputShape,

233

Encoder<float>& rOutputEncoder,

234

const TensorShape& rFilterShape,

235

Decoder<float>& rFilterDecoder,

236

bool biasEnabled,

237

Decoder<float>* pBiasDecoder,

238

DataLayout dataLayout,

239

unsigned int paddingTop,

240

unsigned int paddingLeft,

241

unsigned int xStride,

242

unsigned int yStride,

243

unsigned int xDilation,

244

unsigned int yDilation,

245

bool depthwise = false);

telsoa01