Blame - src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp - ml/armnn

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

1

//

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame^]

2

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

3

// SPDX-License-Identifier: MIT

4

//

5

6

// SPDX-License-Identifier: Apache-2.0

7

//

8

9

#include "QuantizeOperator.hpp"

10

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame^]

11

#include "TosaRescaleOperatorUtils.hpp"

12

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

13

// This function is paraphrased from:

14

// tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc from function convertQuantizeOp

15

TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,

16

const std::vector<const TensorInfo*>& inputs,

17

const std::vector<const TensorInfo*>& outputs)

18

{

19

ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputs.size() == 1,

20

"ConvertQuantizeToTosaOperator: Quantize must have only one input" );

21

ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( outputs.size() == 1,

22

"ConvertQuantizeToTosaOperator: Quantize must have only one output" );

23

24

std::string inputName = std::string("input0_");

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

25

std::string outputName = std::string("output0_");

26

std::string blockName = std::string("Op_QUANTIZE_block_") + GetUniqueTosaMappingID();

27

28

// If a layer is present then the block will be used for execution, so input and output names need to be determined

29

// using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.

30

if(layer != nullptr)

31

{

32

// Get the layers connected to the input slots and determine unique tensor names.

33

Layer& connectedLayer = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();

34

inputName = GenerateUniqueName(connectedLayer, 0);

35

36

// Determine unique output tensor name.

37

outputName = GenerateUniqueOutputName(*layer, 0);

38

}

39

40

const TensorInfo inputInfo = *inputs[0];

41

const TensorInfo outputInfo = *outputs[0];

42

43

// Extract quantization detail from Tensor

44

float zeroPoint = static_cast<float>(outputInfo.GetQuantizationOffset());

45

// No per axis support in Tensorflow TOSA code

46

float scale = outputInfo.GetQuantizationScale();

47

48

// As per the Tensorflow quantization specification

49

// Tensorflow TOSA code calculates quantization using multiplication by scale

50

// Armnn code calculates quantization using division by scale

51

// Invert scale factor passed from Armnn for tf TOSA code

52

scale = (scale != 0) ? (1 / scale) : scale;

53

54

std::vector<TosaSerializationTensor*> tensors;

55

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame^]

56

std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputInfo.GetShape());

57

DType inputDType0 = ArmNNToDType(inputInfo.GetDataType());

58

float isFloatInput = inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32;

59

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

60

// Only add input tensors if connected layer is an input layer.

61

// As intermediate or constant tensors will be created separately.

62

// There also can't be duplicate tensor.

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

63

if(inputName.find("input0_") != std::string::npos)

64

{

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

65

tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {}));

66

}

67

68

std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputInfo.GetShape());

69

DType outputDType0 = ArmNNToDType(outputInfo.GetDataType());

70

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame^]

if (isFloatInput)

{

// quantize:

// const_zeroPoint = constant(zeroPoint)

75

// const_scale = constant(scale)

76

// out_mul = mul(input, const_scale)

77

// out_add = add(out_mul, const_zeroPoint)

78

// output = cast<output_type>(out_add)

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

79

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame^]

80

std::string outputNameScale = std::string("input1_") + GetUniqueTosaMappingID();

81

std::string outputNameZeroPoint = std::string("input2_") + GetUniqueTosaMappingID();

82

std::string outputNameMul = std::string("intermediate0_") + GetUniqueTosaMappingID();

83

std::string outputNameAdd = std::string("intermediate1_") + GetUniqueTosaMappingID();

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

84

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame^]

85

// const_zeroPoint

86

TosaSerializationOperator* zeroPointOp = nullptr;

87

TosaSerializationTensor* zeroPointTensor = nullptr;

88

CreateConstTosaOperator<float>(outputNameZeroPoint,

zeroPoint,

inputDType0,

inputShape0,

zeroPointOp,

zeroPointTensor);

tensors.push_back(zeroPointTensor);

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

95

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame^]

96

// const_scale

97

TosaSerializationOperator *scaleOp = nullptr;

98

TosaSerializationTensor* scaleTensor = nullptr;

99

CreateConstTosaOperator<float>(outputNameScale,

scale,

inputDType0,

inputShape0,

scaleOp,

scaleTensor);

tensors.push_back(scaleTensor);

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

106

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame^]

107

// mul

108

int32_t shift = 0;

109

TosaMulAttribute mulAttribute(shift);

110

TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,

111

Attribute_MulAttribute,

112

&mulAttribute,

113

{inputName, outputNameScale},

114

{outputNameMul});

115

tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

116

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame^]

117

// add

118

TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,

119

Attribute_NONE,

120

nullptr,

121

{outputNameMul, outputNameZeroPoint},

122

{outputNameAdd});

123

tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

124

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame^]

125

// cast

126

TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,

Attribute_NONE,

nullptr,

{outputNameAdd},

{outputName});

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

131

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame^]

132

tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));

133

134

// operatorInputNames/operatorOutputNames ends up being the same as

135

// blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings

136

return new TosaSerializationBasicBlock(blockName, // name

137

mainName, // region name

138

{zeroPointOp, scaleOp, mulOp, addOp, castOp}, // operators

139

tensors, // tensors

140

{inputName}, // inputs

141

{outputName}); // outputs

}

else

{

double scale_alpha = inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale();

146

int32_t input_zp = inputs[0]->GetQuantizationOffset();

147

int32_t output_zp = outputs[0]->GetQuantizationOffset();

148

149

TosaSerializationOperator* rescaleOp = nullptr;

150

TosaSerializationTensor* rescaleTensor = nullptr;

151

CreateRescaleTosaOperator(inputName,

outputName,

outputDType0,

inputShape0,

scale_alpha,

input_zp,

output_zp,

true,

true,

&rescaleOp,

&rescaleTensor);

tensors.push_back(rescaleTensor);

163

164

// operatorInputNames/operatorOutputNames ends up being the same as

165

// blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings

166

return new TosaSerializationBasicBlock(blockName, // name

167

mainName, // region name

168

{rescaleOp}, // operators

169

tensors, // tensors

170

{inputName}, // inputs

171

{outputName}); // outputs

172

}

173

}