Blame - src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp - ml/armnn

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

1

//

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame]

2

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

3

// SPDX-License-Identifier: MIT

4

//

5

6

// SPDX-License-Identifier: Apache-2.0

7

//

8

9

#include "QuantizeOperator.hpp"

10

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame]

11

#include "TosaRescaleOperatorUtils.hpp"

12

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

13

// This function is paraphrased from:

14

// tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc from function convertQuantizeOp

15

TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,

16

const std::vector<const TensorInfo*>& inputs,

17

const std::vector<const TensorInfo*>& outputs)

18

{

19

ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputs.size() == 1,

20

"ConvertQuantizeToTosaOperator: Quantize must have only one input" );

21

ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( outputs.size() == 1,

22

"ConvertQuantizeToTosaOperator: Quantize must have only one output" );

23

Teresa Charlin

8cfd059

2024-04-23 16:22:47 +0100

[diff] [blame^]

24

std::string inputName = std::string("input_");

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

25

std::string outputName = std::string("output0_");

26

std::string blockName = std::string("Op_QUANTIZE_block_") + GetUniqueTosaMappingID();

27

28

// If a layer is present then the block will be used for execution, so input and output names need to be determined

29

// using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.

30

if(layer != nullptr)

31

{

Teresa Charlin

8cfd059

2024-04-23 16:22:47 +0100

[diff] [blame^]

32

inputName = GenerateUniqueInputName(layer->GetInputSlot(0));

33

outputName = GenerateUniqueOutputName(*layer);

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

34

}

35

36

const TensorInfo inputInfo = *inputs[0];

37

const TensorInfo outputInfo = *outputs[0];

38

39

// Extract quantization detail from Tensor

40

float zeroPoint = static_cast<float>(outputInfo.GetQuantizationOffset());

41

// No per axis support in Tensorflow TOSA code

42

float scale = outputInfo.GetQuantizationScale();

43

44

// As per the Tensorflow quantization specification

45

// Tensorflow TOSA code calculates quantization using multiplication by scale

46

// Armnn code calculates quantization using division by scale

47

// Invert scale factor passed from Armnn for tf TOSA code

48

scale = (scale != 0) ? (1 / scale) : scale;

49

50

std::vector<TosaSerializationTensor*> tensors;

51

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame]

52

std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputInfo.GetShape());

53

DType inputDType0 = ArmNNToDType(inputInfo.GetDataType());

Teresa Charlin

ce48d1d

2024-04-24 13:30:58 +0100

[diff] [blame]

54

bool isFloatInput = inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32;

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame]

55

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

56

// Only add input tensors if connected layer is an input layer.

57

// As intermediate or constant tensors will be created separately.

58

// There also can't be duplicate tensor.

Teresa Charlin

8cfd059

2024-04-23 16:22:47 +0100

[diff] [blame^]

59

if(inputName.find("input_") != std::string::npos)

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

60

{

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

61

tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {}));

62

}

63

64

std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputInfo.GetShape());

65

DType outputDType0 = ArmNNToDType(outputInfo.GetDataType());

66

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame]

if (isFloatInput)

{

// quantize:

// const_zeroPoint = constant(zeroPoint)

71

// const_scale = constant(scale)

72

// out_mul = mul(input, const_scale)

73

// out_add = add(out_mul, const_zeroPoint)

74

// output = cast<output_type>(out_add)

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

75

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame]

76

std::string outputNameScale = std::string("input1_") + GetUniqueTosaMappingID();

77

std::string outputNameZeroPoint = std::string("input2_") + GetUniqueTosaMappingID();

78

std::string outputNameMul = std::string("intermediate0_") + GetUniqueTosaMappingID();

79

std::string outputNameAdd = std::string("intermediate1_") + GetUniqueTosaMappingID();

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

80

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame]

81

// const_zeroPoint

82

TosaSerializationOperator* zeroPointOp = nullptr;

83

TosaSerializationTensor* zeroPointTensor = nullptr;

84

CreateConstTosaOperator<float>(outputNameZeroPoint,

zeroPoint,

inputDType0,

inputShape0,

zeroPointOp,

zeroPointTensor);

tensors.push_back(zeroPointTensor);

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

91

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame]

92

// const_scale

93

TosaSerializationOperator *scaleOp = nullptr;

94

TosaSerializationTensor* scaleTensor = nullptr;

95

CreateConstTosaOperator<float>(outputNameScale,

scale,

inputDType0,

inputShape0,

scaleOp,

scaleTensor);

tensors.push_back(scaleTensor);

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

102

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame]

103

// mul

104

int32_t shift = 0;

105

TosaMulAttribute mulAttribute(shift);

106

TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,

107

Attribute_MulAttribute,

108

&mulAttribute,

109

{inputName, outputNameScale},

110

{outputNameMul});

111

tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

112

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame]

113

// add

114

TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,

115

Attribute_NONE,

116

nullptr,

117

{outputNameMul, outputNameZeroPoint},

118

{outputNameAdd});

119

tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

120

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame]

121

// cast

122

TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,

Attribute_NONE,

nullptr,

{outputNameAdd},

{outputName});

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame]

127

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame]

128

tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));

129

130

// operatorInputNames/operatorOutputNames ends up being the same as

131

// blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings

132

return new TosaSerializationBasicBlock(blockName, // name

133

mainName, // region name

134

{zeroPointOp, scaleOp, mulOp, addOp, castOp}, // operators

135

tensors, // tensors

136

{inputName}, // inputs

137

{outputName}); // outputs

}

else

{

double scale_alpha = inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale();

142

int32_t input_zp = inputs[0]->GetQuantizationOffset();

143

int32_t output_zp = outputs[0]->GetQuantizationOffset();

144

145

TosaSerializationOperator* rescaleOp = nullptr;

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame]

146

CreateRescaleTosaOperator(inputName,

147

outputName,

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame]

scale_alpha,

input_zp,

output_zp,

true,

true,

Teresa Charlin

ce48d1d

2024-04-24 13:30:58 +0100

[diff] [blame]

153

&rescaleOp);

154

tensors.push_back(new TosaSerializationTensor(outputName,

155

inputShape0,

156

outputDType0, {}));

John Mcloughlin

ceb4428

2024-04-23 16:47:04 +0100

[diff] [blame]

157

158

// operatorInputNames/operatorOutputNames ends up being the same as

159

// blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings

160

return new TosaSerializationBasicBlock(blockName, // name

161

mainName, // region name

162

{rescaleOp}, // operators

163

tensors, // tensors

164

{inputName}, // inputs

165

{outputName}); // outputs

166

}

167

}