Add Quantize Support to TOSA Ref Backend * Adding a one to many tosa mapping for Quantize * Added tests * Resolves IVGCVSW-7175 Signed-off-by: John Mcloughlin <john.mcloughlin@arm.com> Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com> Change-Id: Ia0852fefb618b4a29c2601b9de8b6b2731229801

commit: ca5a23a7cbe46b8da8de432d80889c47a745ca4c [log] [tgz]
author: Teresa Charlin <teresa.charlinreyes@arm.com> Fri Dec 15 14:20:47 2023 +0000
committer: john.mcloughlin <john.mcloughlin@arm.com> Thu Dec 21 11:14:54 2023 +0000
tree: c375efe1f4ecdad708d6e8e771a13c07ddbc0257
parent: 0587dd01272199a36210bd0ccc266185b113df75 [diff] [blame]
diff --git a/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp b/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp
new file mode 100644
index 0000000..1107add
--- /dev/null
+++ b/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp

@@ -0,0 +1,139 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+// Copyright © 2020 The TensorFlow Authors. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "QuantizeOperator.hpp"
+
+// This function is paraphrased from:
+// tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc from function convertQuantizeOp
+TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
+                                                           const std::vector<const TensorInfo*>& inputs,
+                                                           const std::vector<const TensorInfo*>& outputs)
+{
+    ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputs.size() == 1,
+                                         "ConvertQuantizeToTosaOperator: Quantize must have only one input" );
+    ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( outputs.size() == 1,
+                                         "ConvertQuantizeToTosaOperator: Quantize must have only one output" );
+
+    std::string inputName           = std::string("input0_");
+    std::string outputNameZeroPoint = std::string("intermediate0_") + GetUniqueTosaMappingID();
+    std::string outputNameScale     = std::string("intermediate1_") + GetUniqueTosaMappingID();
+    std::string outputNameMul       = std::string("intermediate2_") + GetUniqueTosaMappingID();
+    std::string outputNameAdd       = std::string("intermediate3_") + GetUniqueTosaMappingID();
+    std::string outputName          = std::string("output0_");
+    std::string blockName           = std::string("Op_QUANTIZE_block_") + GetUniqueTosaMappingID();
+
+    // If a layer is present then the block will be used for execution, so input and output names need to be determined
+    // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
+    if(layer != nullptr)
+    {
+        // Get the layers connected to the input slots and determine unique tensor names.
+        Layer& connectedLayer = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
+        inputName = GenerateUniqueName(connectedLayer, 0);
+
+        // Determine unique output tensor name.
+        outputName = GenerateUniqueOutputName(*layer, 0);
+    }
+
+    const TensorInfo inputInfo = *inputs[0];
+    const TensorInfo outputInfo = *outputs[0];
+
+    // Extract quantization detail from Tensor
+    float zeroPoint = static_cast<float>(outputInfo.GetQuantizationOffset());
+    // No per axis support in Tensorflow TOSA code
+    float scale = outputInfo.GetQuantizationScale();
+
+    // As per the Tensorflow quantization specification
+    // Tensorflow TOSA code calculates quantization using multiplication by scale
+    // Armnn code calculates quantization using division by scale
+    // Invert scale factor passed from Armnn for tf TOSA code
+    scale = (scale != 0) ?  (1 / scale) : scale;
+
+    std::vector<TosaSerializationTensor*> tensors;
+
+    // Only add input tensors if connected layer is an input layer.
+    // As intermediate or constant tensors will be created separately.
+    // There also can't be duplicate tensor.
+    std::vector<int32_t> inputShape0;
+    DType inputDType0 =  DType::DType_UNKNOWN;
+    if(inputName.find("input0_") != std::string::npos)
+    {
+        inputShape0 = GetTosaTensorShape(inputInfo.GetShape());
+        inputDType0 = ArmNNToDType(inputInfo.GetDataType());
+        ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32,
+                                             "ConvertQuantizeToTosaOperator: Quantize input must be of type Float" );
+        tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {}));
+    }
+
+    std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputInfo.GetShape());
+    DType outputDType0 = ArmNNToDType(outputInfo.GetDataType());
+
+    // quantize:
+    // const_zeroPoint = constant(zeroPoint)
+    // const_scale = constant(scale)
+    // out_mul = mul(input, const_scale)
+    // out_add = add(out_mul, const_zeroPoint)
+    // output = cast<output_type>(out_add)
+
+    // const_zeroPoint
+    TosaSerializationOperator* zeroPointOp = nullptr;
+    TosaSerializationTensor* zeroPointTensor = nullptr;
+    CreateConstTosaOperator<float>(outputNameZeroPoint,
+                                   zeroPoint,
+                                   inputDType0,
+                                   inputShape0,
+                                   zeroPointOp,
+                                   zeroPointTensor);
+    tensors.push_back(zeroPointTensor);
+
+    // const_scale
+    TosaSerializationOperator *scaleOp = nullptr;
+    TosaSerializationTensor* scaleTensor = nullptr;
+    CreateConstTosaOperator<float>(outputNameScale,
+                                   scale,
+                                   inputDType0,
+                                   inputShape0,
+                                   scaleOp,
+                                   scaleTensor);
+    tensors.push_back(scaleTensor);
+
+    // mul
+    int32_t shift = 0;
+    TosaMulAttribute mulAttribute(shift);
+    TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,
+                                                                     Attribute_MulAttribute,
+                                                                     &mulAttribute,
+                                                                     {inputName, outputNameScale},
+                                                                     {outputNameMul});
+    tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));
+
+    // add
+    TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,
+                                                                     Attribute_NONE,
+                                                                     nullptr,
+                                                                     {outputNameMul, outputNameZeroPoint},
+                                                                     {outputNameAdd});
+    tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));
+
+    // cast
+    TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,
+                                                                      Attribute_NONE,
+                                                                      nullptr,
+                                                                      {outputNameAdd},
+                                                                      {outputName});
+
+    tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
+
+    // operatorInputNames/operatorOutputNames ends up being the same as
+    // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
+    return new TosaSerializationBasicBlock(blockName,                                       // name
+                                           mainName,                                        // region name
+                                           {zeroPointOp, scaleOp, mulOp, addOp, castOp},    // operators
+                                           tensors,                                         // tensors
+                                           {inputName},                                     // inputs
+                                           {outputName});                                   // outputs
+}
commit	ca5a23a7cbe46b8da8de432d80889c47a745ca4c	[log] [tgz]
author	Teresa Charlin <teresa.charlinreyes@arm.com>	Fri Dec 15 14:20:47 2023 +0000
committer	john.mcloughlin <john.mcloughlin@arm.com>	Thu Dec 21 11:14:54 2023 +0000
tree	c375efe1f4ecdad708d6e8e771a13c07ddbc0257
parent	0587dd01272199a36210bd0ccc266185b113df75 [diff] [blame]