Blame - src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp - ml/armnn

Teresa Charlin

ca5a23a

2023-12-15 14:20:47 +0000

[diff] [blame^]

1

//

2

3

// SPDX-License-Identifier: MIT

4

//

5

6

// SPDX-License-Identifier: Apache-2.0

7

//

8

9

#include "QuantizeOperator.hpp"

10

11

// This function is paraphrased from:

12

// tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc from function convertQuantizeOp

13

TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,

14

const std::vector<const TensorInfo*>& inputs,

15

const std::vector<const TensorInfo*>& outputs)

16

{

17

ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputs.size() == 1,

18

"ConvertQuantizeToTosaOperator: Quantize must have only one input" );

19

ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( outputs.size() == 1,

20

"ConvertQuantizeToTosaOperator: Quantize must have only one output" );

21

22

std::string inputName = std::string("input0_");

23

std::string outputNameZeroPoint = std::string("intermediate0_") + GetUniqueTosaMappingID();

24

std::string outputNameScale = std::string("intermediate1_") + GetUniqueTosaMappingID();

25

std::string outputNameMul = std::string("intermediate2_") + GetUniqueTosaMappingID();

26

std::string outputNameAdd = std::string("intermediate3_") + GetUniqueTosaMappingID();

27

std::string outputName = std::string("output0_");

28

std::string blockName = std::string("Op_QUANTIZE_block_") + GetUniqueTosaMappingID();

29

30

// If a layer is present then the block will be used for execution, so input and output names need to be determined

31

// using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.

32

if(layer != nullptr)

33

{

34

// Get the layers connected to the input slots and determine unique tensor names.

35

Layer& connectedLayer = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();

36

inputName = GenerateUniqueName(connectedLayer, 0);

37

38

// Determine unique output tensor name.

39

outputName = GenerateUniqueOutputName(*layer, 0);

40

}

41

42

const TensorInfo inputInfo = *inputs[0];

43

const TensorInfo outputInfo = *outputs[0];

44

45

// Extract quantization detail from Tensor

46

float zeroPoint = static_cast<float>(outputInfo.GetQuantizationOffset());

47

// No per axis support in Tensorflow TOSA code

48

float scale = outputInfo.GetQuantizationScale();

49

50

// As per the Tensorflow quantization specification

51

// Tensorflow TOSA code calculates quantization using multiplication by scale

52

// Armnn code calculates quantization using division by scale

53

// Invert scale factor passed from Armnn for tf TOSA code

54

scale = (scale != 0) ? (1 / scale) : scale;

55

56

std::vector<TosaSerializationTensor*> tensors;

57

58

// Only add input tensors if connected layer is an input layer.

59

// As intermediate or constant tensors will be created separately.

60

// There also can't be duplicate tensor.

61

std::vector<int32_t> inputShape0;

62

DType inputDType0 = DType::DType_UNKNOWN;

63

if(inputName.find("input0_") != std::string::npos)

64

{

65

inputShape0 = GetTosaTensorShape(inputInfo.GetShape());

66

inputDType0 = ArmNNToDType(inputInfo.GetDataType());

67

ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32,

68

"ConvertQuantizeToTosaOperator: Quantize input must be of type Float" );

69

tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {}));

70

}

71

72

std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputInfo.GetShape());

73

DType outputDType0 = ArmNNToDType(outputInfo.GetDataType());

74

75

// quantize:

76

// const_zeroPoint = constant(zeroPoint)

77

// const_scale = constant(scale)

78

// out_mul = mul(input, const_scale)

79

// out_add = add(out_mul, const_zeroPoint)

80

// output = cast<output_type>(out_add)

81

82

// const_zeroPoint

83

TosaSerializationOperator* zeroPointOp = nullptr;

84

TosaSerializationTensor* zeroPointTensor = nullptr;

85

CreateConstTosaOperator<float>(outputNameZeroPoint,

zeroPoint,

inputDType0,

inputShape0,

zeroPointOp,

zeroPointTensor);

tensors.push_back(zeroPointTensor);

92

93

// const_scale

94

TosaSerializationOperator *scaleOp = nullptr;

95

TosaSerializationTensor* scaleTensor = nullptr;

96

CreateConstTosaOperator<float>(outputNameScale,

scale,

inputDType0,

inputShape0,

scaleOp,

scaleTensor);

tensors.push_back(scaleTensor);

// mul

int32_t shift = 0;

TosaMulAttribute mulAttribute(shift);

107

TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,

108

Attribute_MulAttribute,

109

&mulAttribute,

110

{inputName, outputNameScale},

111

{outputNameMul});

112

tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));

113

114

// add

115

TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,

116

Attribute_NONE,

117

nullptr,

118

{outputNameMul, outputNameZeroPoint},

119

{outputNameAdd});

120

tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));

121

122

// cast

123

TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,

Attribute_NONE,

nullptr,

{outputNameAdd},

{outputName});

tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));

130

131

// operatorInputNames/operatorOutputNames ends up being the same as

132

// blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings

133

return new TosaSerializationBasicBlock(blockName, // name

134

mainName, // region name

135

{zeroPointOp, scaleOp, mulOp, addOp, castOp}, // operators

136

tensors, // tensors

137

{inputName}, // inputs

138

{outputName}); // outputs

139

}