blob: 1a104d84233bf293ef6a08f7f9218d4373439e36 [file] [log] [blame]
Teresa Charlinca5a23a2023-12-15 14:20:47 +00001//
John Mcloughlinceb44282024-04-23 16:47:04 +01002// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
Teresa Charlinca5a23a2023-12-15 14:20:47 +00003// SPDX-License-Identifier: MIT
4//
5// Copyright © 2020 The TensorFlow Authors. All Rights Reserved.
6// SPDX-License-Identifier: Apache-2.0
7//
8
9#include "QuantizeOperator.hpp"
10
John Mcloughlinceb44282024-04-23 16:47:04 +010011#include "TosaRescaleOperatorUtils.hpp"
12
Teresa Charlinca5a23a2023-12-15 14:20:47 +000013// This function is paraphrased from:
14// tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc from function convertQuantizeOp
15TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
16 const std::vector<const TensorInfo*>& inputs,
17 const std::vector<const TensorInfo*>& outputs)
18{
19 ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputs.size() == 1,
20 "ConvertQuantizeToTosaOperator: Quantize must have only one input" );
21 ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( outputs.size() == 1,
22 "ConvertQuantizeToTosaOperator: Quantize must have only one output" );
23
24 std::string inputName = std::string("input0_");
Teresa Charlinca5a23a2023-12-15 14:20:47 +000025 std::string outputName = std::string("output0_");
26 std::string blockName = std::string("Op_QUANTIZE_block_") + GetUniqueTosaMappingID();
27
28 // If a layer is present then the block will be used for execution, so input and output names need to be determined
29 // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
30 if(layer != nullptr)
31 {
32 // Get the layers connected to the input slots and determine unique tensor names.
33 Layer& connectedLayer = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
34 inputName = GenerateUniqueName(connectedLayer, 0);
35
36 // Determine unique output tensor name.
37 outputName = GenerateUniqueOutputName(*layer, 0);
38 }
39
40 const TensorInfo inputInfo = *inputs[0];
41 const TensorInfo outputInfo = *outputs[0];
42
43 // Extract quantization detail from Tensor
44 float zeroPoint = static_cast<float>(outputInfo.GetQuantizationOffset());
45 // No per axis support in Tensorflow TOSA code
46 float scale = outputInfo.GetQuantizationScale();
47
48 // As per the Tensorflow quantization specification
49 // Tensorflow TOSA code calculates quantization using multiplication by scale
50 // Armnn code calculates quantization using division by scale
51 // Invert scale factor passed from Armnn for tf TOSA code
52 scale = (scale != 0) ? (1 / scale) : scale;
53
54 std::vector<TosaSerializationTensor*> tensors;
55
John Mcloughlinceb44282024-04-23 16:47:04 +010056 std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputInfo.GetShape());
57 DType inputDType0 = ArmNNToDType(inputInfo.GetDataType());
58 float isFloatInput = inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32;
59
Teresa Charlinca5a23a2023-12-15 14:20:47 +000060 // Only add input tensors if connected layer is an input layer.
61 // As intermediate or constant tensors will be created separately.
62 // There also can't be duplicate tensor.
Teresa Charlinca5a23a2023-12-15 14:20:47 +000063 if(inputName.find("input0_") != std::string::npos)
64 {
Teresa Charlinca5a23a2023-12-15 14:20:47 +000065 tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {}));
66 }
67
68 std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputInfo.GetShape());
69 DType outputDType0 = ArmNNToDType(outputInfo.GetDataType());
70
John Mcloughlinceb44282024-04-23 16:47:04 +010071 if (isFloatInput)
72 {
73 // quantize:
74 // const_zeroPoint = constant(zeroPoint)
75 // const_scale = constant(scale)
76 // out_mul = mul(input, const_scale)
77 // out_add = add(out_mul, const_zeroPoint)
78 // output = cast<output_type>(out_add)
Teresa Charlinca5a23a2023-12-15 14:20:47 +000079
John Mcloughlinceb44282024-04-23 16:47:04 +010080 std::string outputNameScale = std::string("input1_") + GetUniqueTosaMappingID();
81 std::string outputNameZeroPoint = std::string("input2_") + GetUniqueTosaMappingID();
82 std::string outputNameMul = std::string("intermediate0_") + GetUniqueTosaMappingID();
83 std::string outputNameAdd = std::string("intermediate1_") + GetUniqueTosaMappingID();
Teresa Charlinca5a23a2023-12-15 14:20:47 +000084
John Mcloughlinceb44282024-04-23 16:47:04 +010085 // const_zeroPoint
86 TosaSerializationOperator* zeroPointOp = nullptr;
87 TosaSerializationTensor* zeroPointTensor = nullptr;
88 CreateConstTosaOperator<float>(outputNameZeroPoint,
89 zeroPoint,
90 inputDType0,
91 inputShape0,
92 zeroPointOp,
93 zeroPointTensor);
94 tensors.push_back(zeroPointTensor);
Teresa Charlinca5a23a2023-12-15 14:20:47 +000095
John Mcloughlinceb44282024-04-23 16:47:04 +010096 // const_scale
97 TosaSerializationOperator *scaleOp = nullptr;
98 TosaSerializationTensor* scaleTensor = nullptr;
99 CreateConstTosaOperator<float>(outputNameScale,
100 scale,
101 inputDType0,
102 inputShape0,
103 scaleOp,
104 scaleTensor);
105 tensors.push_back(scaleTensor);
Teresa Charlinca5a23a2023-12-15 14:20:47 +0000106
John Mcloughlinceb44282024-04-23 16:47:04 +0100107 // mul
108 int32_t shift = 0;
109 TosaMulAttribute mulAttribute(shift);
110 TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,
111 Attribute_MulAttribute,
112 &mulAttribute,
113 {inputName, outputNameScale},
114 {outputNameMul});
115 tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));
Teresa Charlinca5a23a2023-12-15 14:20:47 +0000116
John Mcloughlinceb44282024-04-23 16:47:04 +0100117 // add
118 TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,
119 Attribute_NONE,
120 nullptr,
121 {outputNameMul, outputNameZeroPoint},
122 {outputNameAdd});
123 tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));
Teresa Charlinca5a23a2023-12-15 14:20:47 +0000124
John Mcloughlinceb44282024-04-23 16:47:04 +0100125 // cast
126 TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,
127 Attribute_NONE,
128 nullptr,
129 {outputNameAdd},
130 {outputName});
Teresa Charlinca5a23a2023-12-15 14:20:47 +0000131
John Mcloughlinceb44282024-04-23 16:47:04 +0100132 tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
133
134 // operatorInputNames/operatorOutputNames ends up being the same as
135 // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
136 return new TosaSerializationBasicBlock(blockName, // name
137 mainName, // region name
138 {zeroPointOp, scaleOp, mulOp, addOp, castOp}, // operators
139 tensors, // tensors
140 {inputName}, // inputs
141 {outputName}); // outputs
142 }
143 else
144 {
145 double scale_alpha = inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale();
146 int32_t input_zp = inputs[0]->GetQuantizationOffset();
147 int32_t output_zp = outputs[0]->GetQuantizationOffset();
148
149 TosaSerializationOperator* rescaleOp = nullptr;
150 TosaSerializationTensor* rescaleTensor = nullptr;
151 CreateRescaleTosaOperator(inputName,
152 outputName,
153 outputDType0,
154 inputShape0,
155 scale_alpha,
156 input_zp,
157 output_zp,
158 true,
159 true,
160 &rescaleOp,
161 &rescaleTensor);
162 tensors.push_back(rescaleTensor);
163
164 // operatorInputNames/operatorOutputNames ends up being the same as
165 // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
166 return new TosaSerializationBasicBlock(blockName, // name
167 mainName, // region name
168 {rescaleOp}, // operators
169 tensors, // tensors
170 {inputName}, // inputs
171 {outputName}); // outputs
172 }
173}