blob: 1a104d84233bf293ef6a08f7f9218d4373439e36 [file] [log] [blame]
//
// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
// Copyright © 2020 The TensorFlow Authors. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//
#include "QuantizeOperator.hpp"
#include "TosaRescaleOperatorUtils.hpp"
// This function is paraphrased from:
// tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc from function convertQuantizeOp
TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
const std::vector<const TensorInfo*>& inputs,
const std::vector<const TensorInfo*>& outputs)
{
ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputs.size() == 1,
"ConvertQuantizeToTosaOperator: Quantize must have only one input" );
ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( outputs.size() == 1,
"ConvertQuantizeToTosaOperator: Quantize must have only one output" );
std::string inputName = std::string("input0_");
std::string outputName = std::string("output0_");
std::string blockName = std::string("Op_QUANTIZE_block_") + GetUniqueTosaMappingID();
// If a layer is present then the block will be used for execution, so input and output names need to be determined
// using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
if(layer != nullptr)
{
// Get the layers connected to the input slots and determine unique tensor names.
Layer& connectedLayer = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
inputName = GenerateUniqueName(connectedLayer, 0);
// Determine unique output tensor name.
outputName = GenerateUniqueOutputName(*layer, 0);
}
const TensorInfo inputInfo = *inputs[0];
const TensorInfo outputInfo = *outputs[0];
// Extract quantization detail from Tensor
float zeroPoint = static_cast<float>(outputInfo.GetQuantizationOffset());
// No per axis support in Tensorflow TOSA code
float scale = outputInfo.GetQuantizationScale();
// As per the Tensorflow quantization specification
// Tensorflow TOSA code calculates quantization using multiplication by scale
// Armnn code calculates quantization using division by scale
// Invert scale factor passed from Armnn for tf TOSA code
scale = (scale != 0) ? (1 / scale) : scale;
std::vector<TosaSerializationTensor*> tensors;
std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputInfo.GetShape());
DType inputDType0 = ArmNNToDType(inputInfo.GetDataType());
float isFloatInput = inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32;
// Only add input tensors if connected layer is an input layer.
// As intermediate or constant tensors will be created separately.
// There also can't be duplicate tensor.
if(inputName.find("input0_") != std::string::npos)
{
tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {}));
}
std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputInfo.GetShape());
DType outputDType0 = ArmNNToDType(outputInfo.GetDataType());
if (isFloatInput)
{
// quantize:
// const_zeroPoint = constant(zeroPoint)
// const_scale = constant(scale)
// out_mul = mul(input, const_scale)
// out_add = add(out_mul, const_zeroPoint)
// output = cast<output_type>(out_add)
std::string outputNameScale = std::string("input1_") + GetUniqueTosaMappingID();
std::string outputNameZeroPoint = std::string("input2_") + GetUniqueTosaMappingID();
std::string outputNameMul = std::string("intermediate0_") + GetUniqueTosaMappingID();
std::string outputNameAdd = std::string("intermediate1_") + GetUniqueTosaMappingID();
// const_zeroPoint
TosaSerializationOperator* zeroPointOp = nullptr;
TosaSerializationTensor* zeroPointTensor = nullptr;
CreateConstTosaOperator<float>(outputNameZeroPoint,
zeroPoint,
inputDType0,
inputShape0,
zeroPointOp,
zeroPointTensor);
tensors.push_back(zeroPointTensor);
// const_scale
TosaSerializationOperator *scaleOp = nullptr;
TosaSerializationTensor* scaleTensor = nullptr;
CreateConstTosaOperator<float>(outputNameScale,
scale,
inputDType0,
inputShape0,
scaleOp,
scaleTensor);
tensors.push_back(scaleTensor);
// mul
int32_t shift = 0;
TosaMulAttribute mulAttribute(shift);
TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,
Attribute_MulAttribute,
&mulAttribute,
{inputName, outputNameScale},
{outputNameMul});
tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));
// add
TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,
Attribute_NONE,
nullptr,
{outputNameMul, outputNameZeroPoint},
{outputNameAdd});
tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));
// cast
TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,
Attribute_NONE,
nullptr,
{outputNameAdd},
{outputName});
tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
// operatorInputNames/operatorOutputNames ends up being the same as
// blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
return new TosaSerializationBasicBlock(blockName, // name
mainName, // region name
{zeroPointOp, scaleOp, mulOp, addOp, castOp}, // operators
tensors, // tensors
{inputName}, // inputs
{outputName}); // outputs
}
else
{
double scale_alpha = inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale();
int32_t input_zp = inputs[0]->GetQuantizationOffset();
int32_t output_zp = outputs[0]->GetQuantizationOffset();
TosaSerializationOperator* rescaleOp = nullptr;
TosaSerializationTensor* rescaleTensor = nullptr;
CreateRescaleTosaOperator(inputName,
outputName,
outputDType0,
inputShape0,
scale_alpha,
input_zp,
output_zp,
true,
true,
&rescaleOp,
&rescaleTensor);
tensors.push_back(rescaleTensor);
// operatorInputNames/operatorOutputNames ends up being the same as
// blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
return new TosaSerializationBasicBlock(blockName, // name
mainName, // region name
{rescaleOp}, // operators
tensors, // tensors
{inputName}, // inputs
{outputName}); // outputs
}
}