blob: a4d7d0ed281927ee2eeae80a8a2f0c3895749887 [file] [log] [blame]
Teresa Charlinca5a23a2023-12-15 14:20:47 +00001//
John Mcloughlinceb44282024-04-23 16:47:04 +01002// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
Teresa Charlinca5a23a2023-12-15 14:20:47 +00003// SPDX-License-Identifier: MIT
4//
5// Copyright © 2020 The TensorFlow Authors. All Rights Reserved.
6// SPDX-License-Identifier: Apache-2.0
7//
8
9#include "QuantizeOperator.hpp"
10
John Mcloughlinceb44282024-04-23 16:47:04 +010011#include "TosaRescaleOperatorUtils.hpp"
12
Teresa Charlinca5a23a2023-12-15 14:20:47 +000013// This function is paraphrased from:
14// tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc from function convertQuantizeOp
15TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
16 const std::vector<const TensorInfo*>& inputs,
17 const std::vector<const TensorInfo*>& outputs)
18{
19 ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputs.size() == 1,
20 "ConvertQuantizeToTosaOperator: Quantize must have only one input" );
21 ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( outputs.size() == 1,
22 "ConvertQuantizeToTosaOperator: Quantize must have only one output" );
23
Teresa Charlin8cfd0592024-04-23 16:22:47 +010024 std::string inputName = std::string("input_");
Teresa Charlinca5a23a2023-12-15 14:20:47 +000025 std::string outputName = std::string("output0_");
26 std::string blockName = std::string("Op_QUANTIZE_block_") + GetUniqueTosaMappingID();
27
28 // If a layer is present then the block will be used for execution, so input and output names need to be determined
29 // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
30 if(layer != nullptr)
31 {
Teresa Charlin8cfd0592024-04-23 16:22:47 +010032 inputName = GenerateUniqueInputName(layer->GetInputSlot(0));
33 outputName = GenerateUniqueOutputName(*layer);
Teresa Charlinca5a23a2023-12-15 14:20:47 +000034 }
35
36 const TensorInfo inputInfo = *inputs[0];
37 const TensorInfo outputInfo = *outputs[0];
38
39 // Extract quantization detail from Tensor
40 float zeroPoint = static_cast<float>(outputInfo.GetQuantizationOffset());
41 // No per axis support in Tensorflow TOSA code
42 float scale = outputInfo.GetQuantizationScale();
43
44 // As per the Tensorflow quantization specification
45 // Tensorflow TOSA code calculates quantization using multiplication by scale
46 // Armnn code calculates quantization using division by scale
47 // Invert scale factor passed from Armnn for tf TOSA code
48 scale = (scale != 0) ? (1 / scale) : scale;
49
50 std::vector<TosaSerializationTensor*> tensors;
51
John Mcloughlinceb44282024-04-23 16:47:04 +010052 std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputInfo.GetShape());
53 DType inputDType0 = ArmNNToDType(inputInfo.GetDataType());
Teresa Charlince48d1d2024-04-24 13:30:58 +010054 bool isFloatInput = inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32;
John Mcloughlinceb44282024-04-23 16:47:04 +010055
Teresa Charlinca5a23a2023-12-15 14:20:47 +000056 // Only add input tensors if connected layer is an input layer.
57 // As intermediate or constant tensors will be created separately.
58 // There also can't be duplicate tensor.
Teresa Charlin8cfd0592024-04-23 16:22:47 +010059 if(inputName.find("input_") != std::string::npos)
Teresa Charlinca5a23a2023-12-15 14:20:47 +000060 {
Teresa Charlinca5a23a2023-12-15 14:20:47 +000061 tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {}));
62 }
63
64 std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputInfo.GetShape());
65 DType outputDType0 = ArmNNToDType(outputInfo.GetDataType());
66
John Mcloughlinceb44282024-04-23 16:47:04 +010067 if (isFloatInput)
68 {
69 // quantize:
70 // const_zeroPoint = constant(zeroPoint)
71 // const_scale = constant(scale)
72 // out_mul = mul(input, const_scale)
73 // out_add = add(out_mul, const_zeroPoint)
74 // output = cast<output_type>(out_add)
Teresa Charlinca5a23a2023-12-15 14:20:47 +000075
John Mcloughlinceb44282024-04-23 16:47:04 +010076 std::string outputNameScale = std::string("input1_") + GetUniqueTosaMappingID();
77 std::string outputNameZeroPoint = std::string("input2_") + GetUniqueTosaMappingID();
78 std::string outputNameMul = std::string("intermediate0_") + GetUniqueTosaMappingID();
79 std::string outputNameAdd = std::string("intermediate1_") + GetUniqueTosaMappingID();
Teresa Charlinca5a23a2023-12-15 14:20:47 +000080
John Mcloughlinceb44282024-04-23 16:47:04 +010081 // const_zeroPoint
82 TosaSerializationOperator* zeroPointOp = nullptr;
83 TosaSerializationTensor* zeroPointTensor = nullptr;
84 CreateConstTosaOperator<float>(outputNameZeroPoint,
85 zeroPoint,
86 inputDType0,
87 inputShape0,
88 zeroPointOp,
89 zeroPointTensor);
90 tensors.push_back(zeroPointTensor);
Teresa Charlinca5a23a2023-12-15 14:20:47 +000091
John Mcloughlinceb44282024-04-23 16:47:04 +010092 // const_scale
93 TosaSerializationOperator *scaleOp = nullptr;
94 TosaSerializationTensor* scaleTensor = nullptr;
95 CreateConstTosaOperator<float>(outputNameScale,
96 scale,
97 inputDType0,
98 inputShape0,
99 scaleOp,
100 scaleTensor);
101 tensors.push_back(scaleTensor);
Teresa Charlinca5a23a2023-12-15 14:20:47 +0000102
John Mcloughlinceb44282024-04-23 16:47:04 +0100103 // mul
104 int32_t shift = 0;
105 TosaMulAttribute mulAttribute(shift);
106 TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,
107 Attribute_MulAttribute,
108 &mulAttribute,
109 {inputName, outputNameScale},
110 {outputNameMul});
111 tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));
Teresa Charlinca5a23a2023-12-15 14:20:47 +0000112
John Mcloughlinceb44282024-04-23 16:47:04 +0100113 // add
114 TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,
115 Attribute_NONE,
116 nullptr,
117 {outputNameMul, outputNameZeroPoint},
118 {outputNameAdd});
119 tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));
Teresa Charlinca5a23a2023-12-15 14:20:47 +0000120
John Mcloughlinceb44282024-04-23 16:47:04 +0100121 // cast
122 TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,
123 Attribute_NONE,
124 nullptr,
125 {outputNameAdd},
126 {outputName});
Teresa Charlinca5a23a2023-12-15 14:20:47 +0000127
John Mcloughlinceb44282024-04-23 16:47:04 +0100128 tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
129
130 // operatorInputNames/operatorOutputNames ends up being the same as
131 // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
132 return new TosaSerializationBasicBlock(blockName, // name
133 mainName, // region name
134 {zeroPointOp, scaleOp, mulOp, addOp, castOp}, // operators
135 tensors, // tensors
136 {inputName}, // inputs
137 {outputName}); // outputs
138 }
139 else
140 {
141 double scale_alpha = inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale();
142 int32_t input_zp = inputs[0]->GetQuantizationOffset();
143 int32_t output_zp = outputs[0]->GetQuantizationOffset();
144
145 TosaSerializationOperator* rescaleOp = nullptr;
John Mcloughlinceb44282024-04-23 16:47:04 +0100146 CreateRescaleTosaOperator(inputName,
147 outputName,
John Mcloughlinceb44282024-04-23 16:47:04 +0100148 scale_alpha,
149 input_zp,
150 output_zp,
151 true,
152 true,
Teresa Charlince48d1d2024-04-24 13:30:58 +0100153 &rescaleOp);
154 tensors.push_back(new TosaSerializationTensor(outputName,
155 inputShape0,
156 outputDType0, {}));
John Mcloughlinceb44282024-04-23 16:47:04 +0100157
158 // operatorInputNames/operatorOutputNames ends up being the same as
159 // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
160 return new TosaSerializationBasicBlock(blockName, // name
161 mainName, // region name
162 {rescaleOp}, // operators
163 tensors, // tensors
164 {inputName}, // inputs
165 {outputName}); // outputs
166 }
167}