blob: 1107add6e9ca9feb1350f5dca48726f3d5a3c249 [file] [log] [blame]
Teresa Charlinca5a23a2023-12-15 14:20:47 +00001//
2// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5// Copyright © 2020 The TensorFlow Authors. All Rights Reserved.
6// SPDX-License-Identifier: Apache-2.0
7//
8
9#include "QuantizeOperator.hpp"
10
11// This function is paraphrased from:
12// tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc from function convertQuantizeOp
13TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
14 const std::vector<const TensorInfo*>& inputs,
15 const std::vector<const TensorInfo*>& outputs)
16{
17 ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputs.size() == 1,
18 "ConvertQuantizeToTosaOperator: Quantize must have only one input" );
19 ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( outputs.size() == 1,
20 "ConvertQuantizeToTosaOperator: Quantize must have only one output" );
21
22 std::string inputName = std::string("input0_");
23 std::string outputNameZeroPoint = std::string("intermediate0_") + GetUniqueTosaMappingID();
24 std::string outputNameScale = std::string("intermediate1_") + GetUniqueTosaMappingID();
25 std::string outputNameMul = std::string("intermediate2_") + GetUniqueTosaMappingID();
26 std::string outputNameAdd = std::string("intermediate3_") + GetUniqueTosaMappingID();
27 std::string outputName = std::string("output0_");
28 std::string blockName = std::string("Op_QUANTIZE_block_") + GetUniqueTosaMappingID();
29
30 // If a layer is present then the block will be used for execution, so input and output names need to be determined
31 // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
32 if(layer != nullptr)
33 {
34 // Get the layers connected to the input slots and determine unique tensor names.
35 Layer& connectedLayer = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
36 inputName = GenerateUniqueName(connectedLayer, 0);
37
38 // Determine unique output tensor name.
39 outputName = GenerateUniqueOutputName(*layer, 0);
40 }
41
42 const TensorInfo inputInfo = *inputs[0];
43 const TensorInfo outputInfo = *outputs[0];
44
45 // Extract quantization detail from Tensor
46 float zeroPoint = static_cast<float>(outputInfo.GetQuantizationOffset());
47 // No per axis support in Tensorflow TOSA code
48 float scale = outputInfo.GetQuantizationScale();
49
50 // As per the Tensorflow quantization specification
51 // Tensorflow TOSA code calculates quantization using multiplication by scale
52 // Armnn code calculates quantization using division by scale
53 // Invert scale factor passed from Armnn for tf TOSA code
54 scale = (scale != 0) ? (1 / scale) : scale;
55
56 std::vector<TosaSerializationTensor*> tensors;
57
58 // Only add input tensors if connected layer is an input layer.
59 // As intermediate or constant tensors will be created separately.
60 // There also can't be duplicate tensor.
61 std::vector<int32_t> inputShape0;
62 DType inputDType0 = DType::DType_UNKNOWN;
63 if(inputName.find("input0_") != std::string::npos)
64 {
65 inputShape0 = GetTosaTensorShape(inputInfo.GetShape());
66 inputDType0 = ArmNNToDType(inputInfo.GetDataType());
67 ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32,
68 "ConvertQuantizeToTosaOperator: Quantize input must be of type Float" );
69 tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {}));
70 }
71
72 std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputInfo.GetShape());
73 DType outputDType0 = ArmNNToDType(outputInfo.GetDataType());
74
75 // quantize:
76 // const_zeroPoint = constant(zeroPoint)
77 // const_scale = constant(scale)
78 // out_mul = mul(input, const_scale)
79 // out_add = add(out_mul, const_zeroPoint)
80 // output = cast<output_type>(out_add)
81
82 // const_zeroPoint
83 TosaSerializationOperator* zeroPointOp = nullptr;
84 TosaSerializationTensor* zeroPointTensor = nullptr;
85 CreateConstTosaOperator<float>(outputNameZeroPoint,
86 zeroPoint,
87 inputDType0,
88 inputShape0,
89 zeroPointOp,
90 zeroPointTensor);
91 tensors.push_back(zeroPointTensor);
92
93 // const_scale
94 TosaSerializationOperator *scaleOp = nullptr;
95 TosaSerializationTensor* scaleTensor = nullptr;
96 CreateConstTosaOperator<float>(outputNameScale,
97 scale,
98 inputDType0,
99 inputShape0,
100 scaleOp,
101 scaleTensor);
102 tensors.push_back(scaleTensor);
103
104 // mul
105 int32_t shift = 0;
106 TosaMulAttribute mulAttribute(shift);
107 TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,
108 Attribute_MulAttribute,
109 &mulAttribute,
110 {inputName, outputNameScale},
111 {outputNameMul});
112 tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));
113
114 // add
115 TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,
116 Attribute_NONE,
117 nullptr,
118 {outputNameMul, outputNameZeroPoint},
119 {outputNameAdd});
120 tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));
121
122 // cast
123 TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,
124 Attribute_NONE,
125 nullptr,
126 {outputNameAdd},
127 {outputName});
128
129 tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
130
131 // operatorInputNames/operatorOutputNames ends up being the same as
132 // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
133 return new TosaSerializationBasicBlock(blockName, // name
134 mainName, // region name
135 {zeroPointOp, scaleOp, mulOp, addOp, castOp}, // operators
136 tensors, // tensors
137 {inputName}, // inputs
138 {outputName}); // outputs
139}